Use sync.Map instead of LRU for the per-job regexp cache

The preceding commit added lru.Cache for the compiled-regexp cache to match the style in pkg/sqlite/regex.go. That file's use case is different: a small bounded cache serving a read-dominated workload. The auto-tag regexp cache is job-scoped (so eviction buys us nothing) and hit by every worker on every candidate (so the LRU's per-Get mutex becomes contention, measurable under the parallel worker pool). sync.Map's read-optimised path avoids the contention without changing any observable behavior. Kept as a separate commit so it can be reverted independently if upstream prefers the LRU approach — the first commit stands on its own either way.
2026-05-09 05:05:29 +02:00 · 2026-04-19 21:49:40 +01:00 · 2026-04-19 21:49:40 +01:00 · 93a05fce8f
commit 93a05fce8f
parent cd64433dc5
1 changed files with 10 additions and 18 deletions
--- a/pkg/match/cache.go
+++ b/pkg/match/cache.go
@ -6,17 +6,9 @@ import (
 	"strings"
 	"sync"

-	lru "github.com/hashicorp/golang-lru/v2"
 	"github.com/stashapp/stash/pkg/models"
 )

-// regexpCacheSize bounds the compiled-regexp LRU. Sized generously so that
-// for realistic libraries (up to ~100 k performers/studios/tags combined,
-// each optionally with a unicode and ASCII variant) the cache never evicts
-// during one auto-tag job. LRU is used for consistency with
-// pkg/sqlite/regex.go; eviction only kicks in for libraries far past that.
-const regexpCacheSize = 200_000
-
 const singleFirstCharacterRegex = `^[\p{L}][.\-_ ]`

 var singleFirstCharacterRE = regexp.MustCompile(singleFirstCharacterRegex)
@ -145,8 +137,12 @@ type Cache struct {
 	tagByPrefix          map[string][]cachedTag
 	tagAlwaysCheck       []cachedTag

-	regexpCacheOnce sync.Once
-	regexpCache     *lru.Cache[regexpCacheKey, *regexp.Regexp]
+	// regexpCache maps regexpCacheKey → *regexp.Regexp. sync.Map rather
+	// than the hashicorp LRU used in pkg/sqlite/regex.go: this cache is
+	// job-scoped (so LRU's eviction buys us nothing) and is hit by every
+	// worker on every candidate (so a single-mutex Get becomes the
+	// bottleneck). sync.Map's read-optimised path sidesteps that.
+	regexpCache sync.Map
 }

 // cachedStudio bundles a studio with its aliases so PathToStudio can match
@ -347,17 +343,13 @@ func (c *Cache) nameRegexp(name string, useUnicode bool) *regexp.Regexp {
 		return nameToRegexp(name, useUnicode)
 	}

-	c.regexpCacheOnce.Do(func() {
-		c.regexpCache, _ = lru.New[regexpCacheKey, *regexp.Regexp](regexpCacheSize)
-	})
-
 	key := regexpCacheKey{name: name, useUnicode: useUnicode}
-	if r, ok := c.regexpCache.Get(key); ok {
-		return r
+	if r, ok := c.regexpCache.Load(key); ok {
+		return r.(*regexp.Regexp)
 	}
 	r := nameToRegexp(name, useUnicode)
-	c.regexpCache.Add(key, r)
-	return r
+	actual, _ := c.regexpCache.LoadOrStore(key, r)
+	return actual.(*regexp.Regexp)
 }

 // getSingleLetterPerformers returns all performers with names that start with single character words.