mirror of
https://github.com/stashapp/stash.git
synced 2026-05-09 05:05:29 +02:00
Use sync.Map instead of LRU for the per-job regexp cache
The preceding commit added lru.Cache for the compiled-regexp cache to match the style in pkg/sqlite/regex.go. That file's use case is different: a small bounded cache serving a read-dominated workload. The auto-tag regexp cache is job-scoped (so eviction buys us nothing) and hit by every worker on every candidate (so the LRU's per-Get mutex becomes contention, measurable under the parallel worker pool). sync.Map's read-optimised path avoids the contention without changing any observable behavior. Kept as a separate commit so it can be reverted independently if upstream prefers the LRU approach — the first commit stands on its own either way.
This commit is contained in:
parent
cd64433dc5
commit
93a05fce8f
1 changed files with 10 additions and 18 deletions
|
|
@ -6,17 +6,9 @@ import (
|
|||
"strings"
|
||||
"sync"
|
||||
|
||||
lru "github.com/hashicorp/golang-lru/v2"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
// regexpCacheSize bounds the compiled-regexp LRU. Sized generously so that
|
||||
// for realistic libraries (up to ~100 k performers/studios/tags combined,
|
||||
// each optionally with a unicode and ASCII variant) the cache never evicts
|
||||
// during one auto-tag job. LRU is used for consistency with
|
||||
// pkg/sqlite/regex.go; eviction only kicks in for libraries far past that.
|
||||
const regexpCacheSize = 200_000
|
||||
|
||||
const singleFirstCharacterRegex = `^[\p{L}][.\-_ ]`
|
||||
|
||||
var singleFirstCharacterRE = regexp.MustCompile(singleFirstCharacterRegex)
|
||||
|
|
@ -145,8 +137,12 @@ type Cache struct {
|
|||
tagByPrefix map[string][]cachedTag
|
||||
tagAlwaysCheck []cachedTag
|
||||
|
||||
regexpCacheOnce sync.Once
|
||||
regexpCache *lru.Cache[regexpCacheKey, *regexp.Regexp]
|
||||
// regexpCache maps regexpCacheKey → *regexp.Regexp. sync.Map rather
|
||||
// than the hashicorp LRU used in pkg/sqlite/regex.go: this cache is
|
||||
// job-scoped (so LRU's eviction buys us nothing) and is hit by every
|
||||
// worker on every candidate (so a single-mutex Get becomes the
|
||||
// bottleneck). sync.Map's read-optimised path sidesteps that.
|
||||
regexpCache sync.Map
|
||||
}
|
||||
|
||||
// cachedStudio bundles a studio with its aliases so PathToStudio can match
|
||||
|
|
@ -347,17 +343,13 @@ func (c *Cache) nameRegexp(name string, useUnicode bool) *regexp.Regexp {
|
|||
return nameToRegexp(name, useUnicode)
|
||||
}
|
||||
|
||||
c.regexpCacheOnce.Do(func() {
|
||||
c.regexpCache, _ = lru.New[regexpCacheKey, *regexp.Regexp](regexpCacheSize)
|
||||
})
|
||||
|
||||
key := regexpCacheKey{name: name, useUnicode: useUnicode}
|
||||
if r, ok := c.regexpCache.Get(key); ok {
|
||||
return r
|
||||
if r, ok := c.regexpCache.Load(key); ok {
|
||||
return r.(*regexp.Regexp)
|
||||
}
|
||||
r := nameToRegexp(name, useUnicode)
|
||||
c.regexpCache.Add(key, r)
|
||||
return r
|
||||
actual, _ := c.regexpCache.LoadOrStore(key, r)
|
||||
return actual.(*regexp.Regexp)
|
||||
}
|
||||
|
||||
// getSingleLetterPerformers returns all performers with names that start with single character words.
|
||||
|
|
|
|||
Loading…
Reference in a new issue