From 93a05fce8fc71ee57e1a32ce9505e4a30218a822 Mon Sep 17 00:00:00 2001 From: "abdusalam.dihan" Date: Sun, 19 Apr 2026 21:49:40 +0100 Subject: [PATCH] Use sync.Map instead of LRU for the per-job regexp cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The preceding commit added lru.Cache for the compiled-regexp cache to match the style in pkg/sqlite/regex.go. That file's use case is different: a small bounded cache serving a read-dominated workload. The auto-tag regexp cache is job-scoped (so eviction buys us nothing) and hit by every worker on every candidate (so the LRU's per-Get mutex becomes contention, measurable under the parallel worker pool). sync.Map's read-optimised path avoids the contention without changing any observable behavior. Kept as a separate commit so it can be reverted independently if upstream prefers the LRU approach — the first commit stands on its own either way. --- pkg/match/cache.go | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/pkg/match/cache.go b/pkg/match/cache.go index 198fc09a5..2988520e0 100644 --- a/pkg/match/cache.go +++ b/pkg/match/cache.go @@ -6,17 +6,9 @@ import ( "strings" "sync" - lru "github.com/hashicorp/golang-lru/v2" "github.com/stashapp/stash/pkg/models" ) -// regexpCacheSize bounds the compiled-regexp LRU. Sized generously so that -// for realistic libraries (up to ~100 k performers/studios/tags combined, -// each optionally with a unicode and ASCII variant) the cache never evicts -// during one auto-tag job. LRU is used for consistency with -// pkg/sqlite/regex.go; eviction only kicks in for libraries far past that. -const regexpCacheSize = 200_000 - const singleFirstCharacterRegex = `^[\p{L}][.\-_ ]` var singleFirstCharacterRE = regexp.MustCompile(singleFirstCharacterRegex) @@ -145,8 +137,12 @@ type Cache struct { tagByPrefix map[string][]cachedTag tagAlwaysCheck []cachedTag - regexpCacheOnce sync.Once - regexpCache *lru.Cache[regexpCacheKey, *regexp.Regexp] + // regexpCache maps regexpCacheKey → *regexp.Regexp. sync.Map rather + // than the hashicorp LRU used in pkg/sqlite/regex.go: this cache is + // job-scoped (so LRU's eviction buys us nothing) and is hit by every + // worker on every candidate (so a single-mutex Get becomes the + // bottleneck). sync.Map's read-optimised path sidesteps that. + regexpCache sync.Map } // cachedStudio bundles a studio with its aliases so PathToStudio can match @@ -347,17 +343,13 @@ func (c *Cache) nameRegexp(name string, useUnicode bool) *regexp.Regexp { return nameToRegexp(name, useUnicode) } - c.regexpCacheOnce.Do(func() { - c.regexpCache, _ = lru.New[regexpCacheKey, *regexp.Regexp](regexpCacheSize) - }) - key := regexpCacheKey{name: name, useUnicode: useUnicode} - if r, ok := c.regexpCache.Get(key); ok { - return r + if r, ok := c.regexpCache.Load(key); ok { + return r.(*regexp.Regexp) } r := nameToRegexp(name, useUnicode) - c.regexpCache.Add(key, r) - return r + actual, _ := c.regexpCache.LoadOrStore(key, r) + return actual.(*regexp.Regexp) } // getSingleLetterPerformers returns all performers with names that start with single character words.