diff --git a/main.go b/main.go index 393a4f3e3..fa9e4d790 100644 --- a/main.go +++ b/main.go @@ -35,10 +35,11 @@ func main() { manager.Initialize() api.Start(uiBox, loginUIBox) - // stop any profiling at exit - defer pprof.StopCPUProfile() blockForever() + // stop any profiling at exit + pprof.StopCPUProfile() + manager.GetInstance().Shutdown(0) } diff --git a/pkg/match/path.go b/pkg/match/path.go index e80601fcd..5ae799de9 100644 --- a/pkg/match/path.go +++ b/pkg/match/path.go @@ -5,6 +5,7 @@ import ( "path/filepath" "regexp" "strings" + "unicode" "github.com/stashapp/stash/pkg/gallery" "github.com/stashapp/stash/pkg/image" @@ -12,7 +13,12 @@ import ( "github.com/stashapp/stash/pkg/scene" ) -const separatorChars = `.\-_ ` +const ( + separatorChars = `.\-_ ` + + reNotLetterWordUnicode = `[^\p{L}\w\d]` + reNotLetterWord = `[^\w\d]` +) func getPathQueryRegex(name string) string { // escape specific regex characters @@ -68,22 +74,22 @@ func getPathWords(path string) []string { return ret } +// https://stackoverflow.com/a/53069799 +func allASCII(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] > unicode.MaxASCII { + return false + } + } + return true +} + // nameMatchesPath returns the index in the path for the right-most match. // Returns -1 if not found. func nameMatchesPath(name, path string) int { - // escape specific regex characters - name = regexp.QuoteMeta(name) - - name = strings.ToLower(name) - path = strings.ToLower(path) - - // handle path separators - const separator = `[` + separatorChars + `]` - - reStr := strings.ReplaceAll(name, " ", separator+"*") - reStr = `(?:^|_|[^\p{L}\w\d])` + reStr + `(?:$|_|[^\p{L}\w\d])` - - re := regexp.MustCompile(reStr) + // #2363 - optimisation: only use unicode character regexp if path contains + // unicode characters + re := nameToRegexp(name, !allASCII(path)) found := re.FindAllStringIndex(path, -1) if found == nil { @@ -93,6 +99,39 @@ func nameMatchesPath(name, path string) int { return found[len(found)-1][0] } +// nameToRegexp compiles a regexp pattern to match paths from the given name. +// Set useUnicode to true if this regexp is to be used on any strings with unicode characters. +func nameToRegexp(name string, useUnicode bool) *regexp.Regexp { + // escape specific regex characters + name = regexp.QuoteMeta(name) + + name = strings.ToLower(name) + + // handle path separators + const separator = `[` + separatorChars + `]` + + // performance optimisation: only use \p{L} is useUnicode is true + notWord := reNotLetterWord + if useUnicode { + notWord = reNotLetterWordUnicode + } + + reStr := strings.ReplaceAll(name, " ", separator+"*") + reStr = `(?:^|_|` + notWord + `)` + reStr + `(?:$|_|` + notWord + `)` + + re := regexp.MustCompile(reStr) + return re +} + +func regexpMatchesPath(r *regexp.Regexp, path string) int { + path = strings.ToLower(path) + found := r.FindAllStringIndex(path, -1) + if found == nil { + return -1 + } + return found[len(found)-1][0] +} + func PathToPerformers(path string, performerReader models.PerformerReader) ([]*models.Performer, error) { words := getPathWords(path) performers, err := performerReader.QueryForAutoTag(words) @@ -208,8 +247,13 @@ func PathToScenes(name string, paths []string, sceneReader models.SceneReader) ( } var ret []*models.Scene + + // paths may have unicode characters + const useUnicode = true + + r := nameToRegexp(name, useUnicode) for _, p := range scenes { - if nameMatchesPath(name, p.Path) != -1 { + if regexpMatchesPath(r, p.Path) != -1 { ret = append(ret, p) } } @@ -240,8 +284,13 @@ func PathToImages(name string, paths []string, imageReader models.ImageReader) ( } var ret []*models.Image + + // paths may have unicode characters + const useUnicode = true + + r := nameToRegexp(name, useUnicode) for _, p := range images { - if nameMatchesPath(name, p.Path) != -1 { + if regexpMatchesPath(r, p.Path) != -1 { ret = append(ret, p) } } @@ -272,8 +321,13 @@ func PathToGalleries(name string, paths []string, galleryReader models.GalleryRe } var ret []*models.Gallery + + // paths may have unicode characters + const useUnicode = true + + r := nameToRegexp(name, useUnicode) for _, p := range gallerys { - if nameMatchesPath(name, p.Path.String) != -1 { + if regexpMatchesPath(r, p.Path.String) != -1 { ret = append(ret, p) } } diff --git a/pkg/sqlite/performer.go b/pkg/sqlite/performer.go index 8a67db052..b256d7d66 100644 --- a/pkg/sqlite/performer.go +++ b/pkg/sqlite/performer.go @@ -21,7 +21,11 @@ WHERE performers_tags.tag_id = ? GROUP BY performers_tags.performer_id ` -const singleFirstCharacterRegex = `^[\w\p{L}][.\-_ ]` +// KNOWN ISSUE: using \p{L} to find single unicode character names results in +// very slow queries. +// Suggested solution will be to cache single-character names and not include it +// in the autotag query. +const singleFirstCharacterRegex = `^[\w][.\-_ ]` type performerQueryBuilder struct { repository