mirror of
https://github.com/stashapp/stash.git
synced 2025-12-06 08:26:00 +01:00
Fix Unicode case-insensitive search for keyword queries
Use custom lower_unicode() function for proper Unicode case folding instead of SQLite's LIKE operator which only supports ASCII.
This commit is contained in:
parent
869cbd496b
commit
46ae6800fd
6 changed files with 127 additions and 9 deletions
|
|
@ -30,6 +30,7 @@ func (d *CustomSQLiteDriver) Open(dsn string) (driver.Conn, error) {
|
||||||
"durationToTinyInt": durationToTinyIntFn,
|
"durationToTinyInt": durationToTinyIntFn,
|
||||||
"basename": basenameFn,
|
"basename": basenameFn,
|
||||||
"phash_distance": phashDistanceFn,
|
"phash_distance": phashDistanceFn,
|
||||||
|
"lower_unicode": lowerUnicodeFn,
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, fn := range funcs {
|
for name, fn := range funcs {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package sqlite
|
package sqlite
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
@ -35,3 +36,27 @@ func durationToTinyIntFn(str string) (int64, error) {
|
||||||
func basenameFn(str string) (string, error) {
|
func basenameFn(str string) (string, error) {
|
||||||
return filepath.Base(str), nil
|
return filepath.Base(str), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// custom SQLite function to enable case-insensitive searches
|
||||||
|
// that properly handle unicode characters
|
||||||
|
func lowerUnicodeFn(str interface{}) (string, error) {
|
||||||
|
// handle NULL values
|
||||||
|
if str == nil {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle different types
|
||||||
|
switch v := str.(type) {
|
||||||
|
case string:
|
||||||
|
return strings.ToLower(v), nil
|
||||||
|
case int64:
|
||||||
|
// convert int64 to string (for phash fingerprints)
|
||||||
|
return strings.ToLower(strconv.FormatInt(v, 10)), nil
|
||||||
|
case []byte:
|
||||||
|
// handle BLOB type if needed
|
||||||
|
return strings.ToLower(string(v)), nil
|
||||||
|
default:
|
||||||
|
// for any other type, try converting to string
|
||||||
|
return strings.ToLower(fmt.Sprintf("%v", v)), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -2432,6 +2432,86 @@ func TestPerformerStore_FindByStashIDStatus(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPerformerQueryUnicodeSearchCaseInsensitive(t *testing.T) {
|
||||||
|
withTxn(func(ctx context.Context) error {
|
||||||
|
qb := db.Performer
|
||||||
|
|
||||||
|
// test cases with various Unicode characters
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
performerName string
|
||||||
|
searchTerm string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"Cyrillic lowercase search",
|
||||||
|
"Анна",
|
||||||
|
"анна",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Cyrillic uppercase search",
|
||||||
|
"мария",
|
||||||
|
"МАРИЯ",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Accented Latin lowercase",
|
||||||
|
"Zoë",
|
||||||
|
"zoë",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Accented Latin uppercase",
|
||||||
|
"chloé",
|
||||||
|
"CHLOÉ",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Greek lowercase search",
|
||||||
|
"Έλενα",
|
||||||
|
"έλενα",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
// create performer with unicode name
|
||||||
|
performer := models.Performer{
|
||||||
|
Name: tc.performerName,
|
||||||
|
}
|
||||||
|
err := qb.Create(ctx, &models.CreatePerformerInput{Performer: &performer})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Error creating performer: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// search using different case
|
||||||
|
findFilter := &models.FindFilterType{
|
||||||
|
Q: &tc.searchTerm,
|
||||||
|
}
|
||||||
|
|
||||||
|
performers, _, err := qb.Query(ctx, nil, findFilter)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Error querying performers: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// should find the performer regardless of case
|
||||||
|
found := false
|
||||||
|
for _, p := range performers {
|
||||||
|
if p.ID == performer.ID {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.True(t, found)
|
||||||
|
|
||||||
|
// clean up
|
||||||
|
if err := qb.Destroy(ctx, performer.ID); err != nil {
|
||||||
|
t.Fatalf("Error cleaning up performer: %s", err.Error())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// TODO Update
|
// TODO Update
|
||||||
// TODO Destroy
|
// TODO Destroy
|
||||||
// TODO Find
|
// TODO Find
|
||||||
|
|
|
||||||
|
|
@ -181,12 +181,22 @@ func (qb *queryBuilder) addFilter(f *filterBuilder) error {
|
||||||
func (qb *queryBuilder) parseQueryString(columns []string, q string) {
|
func (qb *queryBuilder) parseQueryString(columns []string, q string) {
|
||||||
specs := models.ParseSearchString(q)
|
specs := models.ParseSearchString(q)
|
||||||
|
|
||||||
|
// helper to wrap column with coalesce if it doesn't already have it
|
||||||
|
wrapColumn := func(column string) string {
|
||||||
|
// if column already has COALESCE or CAST, don't wrap again
|
||||||
|
if strings.HasPrefix(strings.ToUpper(strings.TrimSpace(column)), "COALESCE") ||
|
||||||
|
strings.HasPrefix(strings.ToUpper(strings.TrimSpace(column)), "CAST") {
|
||||||
|
return column
|
||||||
|
}
|
||||||
|
return coalesce(column)
|
||||||
|
}
|
||||||
|
|
||||||
for _, t := range specs.MustHave {
|
for _, t := range specs.MustHave {
|
||||||
var clauses []string
|
var clauses []string
|
||||||
|
|
||||||
for _, column := range columns {
|
for _, column := range columns {
|
||||||
clauses = append(clauses, column+" LIKE ?")
|
clauses = append(clauses, "lower_unicode("+wrapColumn(column)+") LIKE ?")
|
||||||
qb.addArg(like(t))
|
qb.addArg(likeLower(t))
|
||||||
}
|
}
|
||||||
|
|
||||||
qb.addWhere("(" + strings.Join(clauses, " OR ") + ")")
|
qb.addWhere("(" + strings.Join(clauses, " OR ") + ")")
|
||||||
|
|
@ -194,8 +204,8 @@ func (qb *queryBuilder) parseQueryString(columns []string, q string) {
|
||||||
|
|
||||||
for _, t := range specs.MustNot {
|
for _, t := range specs.MustNot {
|
||||||
for _, column := range columns {
|
for _, column := range columns {
|
||||||
qb.addWhere(coalesce(column) + " NOT LIKE ?")
|
qb.addWhere("lower_unicode(" + wrapColumn(column) + ") NOT LIKE ?")
|
||||||
qb.addArg(like(t))
|
qb.addArg(likeLower(t))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -204,8 +214,8 @@ func (qb *queryBuilder) parseQueryString(columns []string, q string) {
|
||||||
|
|
||||||
for _, column := range columns {
|
for _, column := range columns {
|
||||||
for _, v := range set {
|
for _, v := range set {
|
||||||
clauses = append(clauses, column+" LIKE ?")
|
clauses = append(clauses, "lower_unicode("+wrapColumn(column)+") LIKE ?")
|
||||||
qb.addArg(like(v))
|
qb.addArg(likeLower(v))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -958,7 +958,7 @@ func (qb *SceneStore) makeQuery(ctx context.Context, sceneFilter *models.SceneFi
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
filepathColumn := "folders.path || '" + string(filepath.Separator) + "' || files.basename"
|
filepathColumn := "COALESCE(folders.path, '') || '" + string(filepath.Separator) + "' || COALESCE(files.basename, '')"
|
||||||
searchColumns := []string{"scenes.title", "scenes.details", filepathColumn, "files_fingerprints.fingerprint", "scene_markers.title"}
|
searchColumns := []string{"scenes.title", "scenes.details", filepathColumn, "files_fingerprints.fingerprint", "scene_markers.title"}
|
||||||
query.parseQueryString(searchColumns, *q)
|
query.parseQueryString(searchColumns, *q)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -359,6 +359,8 @@ func coalesce(column string) string {
|
||||||
return fmt.Sprintf("COALESCE(%s, '')", column)
|
return fmt.Sprintf("COALESCE(%s, '')", column)
|
||||||
}
|
}
|
||||||
|
|
||||||
func like(v string) string {
|
// wraps a string with wildcard characters and converts it to lowercase
|
||||||
return "%" + v + "%"
|
// for use in case-insensitive LIKE queries with the lower_unicode() SQL function.
|
||||||
|
func likeLower(v string) string {
|
||||||
|
return "%" + strings.ToLower(v) + "%"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue