Use sync.Map instead of LRU for the per-job regexp cache

The preceding commit added lru.Cache for the compiled-regexp cache to
match the style in pkg/sqlite/regex.go. That file's use case is
different: a small bounded cache serving a read-dominated workload. The
auto-tag regexp cache is job-scoped (so eviction buys us nothing) and
hit by every worker on every candidate (so the LRU's per-Get mutex
becomes contention, measurable under the parallel worker pool).

sync.Map's read-optimised path avoids the contention without changing
any observable behavior. Kept as a separate commit so it can be
reverted independently if upstream prefers the LRU approach — the
first commit stands on its own either way.
This commit is contained in:
abdusalam.dihan 2026-04-19 21:49:40 +01:00
parent cd64433dc5
commit 93a05fce8f

View file

@ -6,17 +6,9 @@ import (
"strings"
"sync"
lru "github.com/hashicorp/golang-lru/v2"
"github.com/stashapp/stash/pkg/models"
)
// regexpCacheSize bounds the compiled-regexp LRU. Sized generously so that
// for realistic libraries (up to ~100 k performers/studios/tags combined,
// each optionally with a unicode and ASCII variant) the cache never evicts
// during one auto-tag job. LRU is used for consistency with
// pkg/sqlite/regex.go; eviction only kicks in for libraries far past that.
const regexpCacheSize = 200_000
const singleFirstCharacterRegex = `^[\p{L}][.\-_ ]`
var singleFirstCharacterRE = regexp.MustCompile(singleFirstCharacterRegex)
@ -145,8 +137,12 @@ type Cache struct {
tagByPrefix map[string][]cachedTag
tagAlwaysCheck []cachedTag
regexpCacheOnce sync.Once
regexpCache *lru.Cache[regexpCacheKey, *regexp.Regexp]
// regexpCache maps regexpCacheKey → *regexp.Regexp. sync.Map rather
// than the hashicorp LRU used in pkg/sqlite/regex.go: this cache is
// job-scoped (so LRU's eviction buys us nothing) and is hit by every
// worker on every candidate (so a single-mutex Get becomes the
// bottleneck). sync.Map's read-optimised path sidesteps that.
regexpCache sync.Map
}
// cachedStudio bundles a studio with its aliases so PathToStudio can match
@ -347,17 +343,13 @@ func (c *Cache) nameRegexp(name string, useUnicode bool) *regexp.Regexp {
return nameToRegexp(name, useUnicode)
}
c.regexpCacheOnce.Do(func() {
c.regexpCache, _ = lru.New[regexpCacheKey, *regexp.Regexp](regexpCacheSize)
})
key := regexpCacheKey{name: name, useUnicode: useUnicode}
if r, ok := c.regexpCache.Get(key); ok {
return r
if r, ok := c.regexpCache.Load(key); ok {
return r.(*regexp.Regexp)
}
r := nameToRegexp(name, useUnicode)
c.regexpCache.Add(key, r)
return r
actual, _ := c.regexpCache.LoadOrStore(key, r)
return actual.(*regexp.Regexp)
}
// getSingleLetterPerformers returns all performers with names that start with single character words.