From ccd75731b76017cf3a0821b924340b4443600c14 Mon Sep 17 00:00:00 2001 From: bnkai <48220860+bnkai@users.noreply.github.com> Date: Sun, 24 May 2020 09:19:22 +0300 Subject: [PATCH] Change scrape matching (studio, movies, tag, performers) to case insensitive (#556) * Change scrape matching (studio, movies, tag, performers) to case insensitive * * fix collate order * * make filename parser findbyname calls case insensitive * * add unit testing for Tags GetFindbyName/s --- pkg/manager/filename_parser.go | 16 +++--- pkg/manager/task_import.go | 12 ++--- pkg/models/querybuilder_movies.go | 16 ++++-- pkg/models/querybuilder_performer.go | 9 +++- pkg/models/querybuilder_studio.go | 8 ++- pkg/models/querybuilder_tag.go | 16 ++++-- pkg/models/querybuilder_tag_test.go | 75 +++++++++++++++++++++++++++- pkg/models/setup_test.go | 30 +++++++++-- pkg/scraper/scrapers.go | 8 +-- 9 files changed, 153 insertions(+), 37 deletions(-) diff --git a/pkg/manager/filename_parser.go b/pkg/manager/filename_parser.go index 9ef98c2aa..eae6ba365 100644 --- a/pkg/manager/filename_parser.go +++ b/pkg/manager/filename_parser.go @@ -405,7 +405,7 @@ func (m parseMapper) parse(scene *models.Scene) *sceneHolder { } type performerQueryer interface { - FindByNames(names []string, tx *sqlx.Tx) ([]*models.Performer, error) + FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*models.Performer, error) } type sceneQueryer interface { @@ -413,15 +413,15 @@ type sceneQueryer interface { } type tagQueryer interface { - FindByName(name string, tx *sqlx.Tx) (*models.Tag, error) + FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Tag, error) } type studioQueryer interface { - FindByName(name string, tx *sqlx.Tx) (*models.Studio, error) + FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Studio, error) } type movieQueryer interface { - FindByName(name string, tx *sqlx.Tx) (*models.Movie, error) + FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Movie, error) } type SceneFilenameParser struct { @@ -546,7 +546,7 @@ func (p *SceneFilenameParser) queryPerformer(performerName string) *models.Perfo } // perform an exact match and grab the first - performers, _ := p.performerQuery.FindByNames([]string{performerName}, nil) + performers, _ := p.performerQuery.FindByNames([]string{performerName}, nil, true) var ret *models.Performer if len(performers) > 0 { @@ -568,7 +568,7 @@ func (p *SceneFilenameParser) queryStudio(studioName string) *models.Studio { return ret } - ret, _ := p.studioQuery.FindByName(studioName, nil) + ret, _ := p.studioQuery.FindByName(studioName, nil, true) // add result to cache p.studioCache[studioName] = ret @@ -585,7 +585,7 @@ func (p *SceneFilenameParser) queryMovie(movieName string) *models.Movie { return ret } - ret, _ := p.movieQuery.FindByName(movieName, nil) + ret, _ := p.movieQuery.FindByName(movieName, nil, true) // add result to cache p.movieCache[movieName] = ret @@ -603,7 +603,7 @@ func (p *SceneFilenameParser) queryTag(tagName string) *models.Tag { } // match tag name exactly - ret, _ := p.tagQuery.FindByName(tagName, nil) + ret, _ := p.tagQuery.FindByName(tagName, nil, true) // add result to cache p.tagCache[tagName] = ret diff --git a/pkg/manager/task_import.go b/pkg/manager/task_import.go index 89522f788..efc2d62c2 100644 --- a/pkg/manager/task_import.go +++ b/pkg/manager/task_import.go @@ -410,7 +410,7 @@ func (t *ImportTask) ImportScrapedItems(ctx context.Context) { UpdatedAt: models.SQLiteTimestamp{Timestamp: t.getTimeFromJSONTime(mappingJSON.UpdatedAt)}, } - studio, err := sqb.FindByName(mappingJSON.Studio, tx) + studio, err := sqb.FindByName(mappingJSON.Studio, tx, false) if err != nil { logger.Errorf("[scraped sites] failed to fetch studio: %s", err.Error()) } @@ -532,7 +532,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) { // Populate the studio ID if sceneJSON.Studio != "" { sqb := models.NewStudioQueryBuilder() - studio, err := sqb.FindByName(sceneJSON.Studio, tx) + studio, err := sqb.FindByName(sceneJSON.Studio, tx, false) if err != nil { logger.Warnf("[scenes] studio <%s> does not exist: %s", sceneJSON.Studio, err.Error()) } else { @@ -634,7 +634,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) { UpdatedAt: models.SQLiteTimestamp{Timestamp: t.getTimeFromJSONTime(marker.UpdatedAt)}, } - primaryTag, err := tqb.FindByName(marker.PrimaryTag, tx) + primaryTag, err := tqb.FindByName(marker.PrimaryTag, tx, false) if err != nil { logger.Errorf("[scenes] <%s> failed to find primary tag for marker: %s", scene.Checksum, err.Error()) } else { @@ -682,7 +682,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) { func (t *ImportTask) getPerformers(names []string, tx *sqlx.Tx) ([]*models.Performer, error) { pqb := models.NewPerformerQueryBuilder() - performers, err := pqb.FindByNames(names, tx) + performers, err := pqb.FindByNames(names, tx, false) if err != nil { return nil, err } @@ -711,7 +711,7 @@ func (t *ImportTask) getMoviesScenes(input []jsonschema.SceneMovie, sceneID int, var movies []models.MoviesScenes for _, inputMovie := range input { - movie, err := mqb.FindByName(inputMovie.MovieName, tx) + movie, err := mqb.FindByName(inputMovie.MovieName, tx, false) if err != nil { return nil, err } @@ -740,7 +740,7 @@ func (t *ImportTask) getMoviesScenes(input []jsonschema.SceneMovie, sceneID int, func (t *ImportTask) getTags(sceneChecksum string, names []string, tx *sqlx.Tx) ([]*models.Tag, error) { tqb := models.NewTagQueryBuilder() - tags, err := tqb.FindByNames(names, tx) + tags, err := tqb.FindByNames(names, tx, false) if err != nil { return nil, err } diff --git a/pkg/models/querybuilder_movies.go b/pkg/models/querybuilder_movies.go index 9c35017a1..cccd4d180 100644 --- a/pkg/models/querybuilder_movies.go +++ b/pkg/models/querybuilder_movies.go @@ -83,14 +83,22 @@ func (qb *MovieQueryBuilder) FindBySceneID(sceneID int, tx *sqlx.Tx) ([]*Movie, return qb.queryMovies(query, args, tx) } -func (qb *MovieQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Movie, error) { - query := "SELECT * FROM movies WHERE name = ? LIMIT 1" +func (qb *MovieQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Movie, error) { + query := "SELECT * FROM movies WHERE name = ?" + if nocase { + query += " COLLATE NOCASE" + } + query += " LIMIT 1" args := []interface{}{name} return qb.queryMovie(query, args, tx) } -func (qb *MovieQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Movie, error) { - query := "SELECT * FROM movies WHERE name IN " + getInBinding(len(names)) +func (qb *MovieQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Movie, error) { + query := "SELECT * FROM movies WHERE name" + if nocase { + query += " COLLATE NOCASE" + } + query += " IN " + getInBinding(len(names)) var args []interface{} for _, name := range names { args = append(args, name) diff --git a/pkg/models/querybuilder_performer.go b/pkg/models/querybuilder_performer.go index ffe3e5d3a..5b0dca723 100644 --- a/pkg/models/querybuilder_performer.go +++ b/pkg/models/querybuilder_performer.go @@ -95,8 +95,13 @@ func (qb *PerformerQueryBuilder) FindNameBySceneID(sceneID int, tx *sqlx.Tx) ([] return qb.queryPerformers(query, args, tx) } -func (qb *PerformerQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Performer, error) { - query := "SELECT * FROM performers WHERE name IN " + getInBinding(len(names)) +func (qb *PerformerQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Performer, error) { + query := "SELECT * FROM performers WHERE name" + if nocase { + query += " COLLATE NOCASE" + } + query += " IN " + getInBinding(len(names)) + var args []interface{} for _, name := range names { args = append(args, name) diff --git a/pkg/models/querybuilder_studio.go b/pkg/models/querybuilder_studio.go index 058ea2fad..2b65bba2a 100644 --- a/pkg/models/querybuilder_studio.go +++ b/pkg/models/querybuilder_studio.go @@ -79,8 +79,12 @@ func (qb *StudioQueryBuilder) FindBySceneID(sceneID int) (*Studio, error) { return qb.queryStudio(query, args, nil) } -func (qb *StudioQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Studio, error) { - query := "SELECT * FROM studios WHERE name = ? LIMIT 1" +func (qb *StudioQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Studio, error) { + query := "SELECT * FROM studios WHERE name = ?" + if nocase { + query += " COLLATE NOCASE" + } + query += " LIMIT 1" args := []interface{}{name} return qb.queryStudio(query, args, tx) } diff --git a/pkg/models/querybuilder_tag.go b/pkg/models/querybuilder_tag.go index 94cac05cd..35c64c323 100644 --- a/pkg/models/querybuilder_tag.go +++ b/pkg/models/querybuilder_tag.go @@ -111,14 +111,22 @@ func (qb *TagQueryBuilder) FindBySceneMarkerID(sceneMarkerID int, tx *sqlx.Tx) ( return qb.queryTags(query, args, tx) } -func (qb *TagQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Tag, error) { - query := "SELECT * FROM tags WHERE name = ? LIMIT 1" +func (qb *TagQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Tag, error) { + query := "SELECT * FROM tags WHERE name = ?" + if nocase { + query += " COLLATE NOCASE" + } + query += " LIMIT 1" args := []interface{}{name} return qb.queryTag(query, args, tx) } -func (qb *TagQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Tag, error) { - query := "SELECT * FROM tags WHERE name IN " + getInBinding(len(names)) +func (qb *TagQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Tag, error) { + query := "SELECT * FROM tags WHERE name" + if nocase { + query += " COLLATE NOCASE" + } + query += " IN " + getInBinding(len(names)) var args []interface{} for _, name := range names { args = append(args, name) diff --git a/pkg/models/querybuilder_tag_test.go b/pkg/models/querybuilder_tag_test.go index bfe87d976..faf736284 100644 --- a/pkg/models/querybuilder_tag_test.go +++ b/pkg/models/querybuilder_tag_test.go @@ -3,6 +3,7 @@ package models_test import ( + "strings" "testing" "github.com/stashapp/stash/pkg/models" @@ -32,14 +33,84 @@ func TestMarkerFindBySceneMarkerID(t *testing.T) { assert.Len(t, tags, 0) } +func TestTagFindByName(t *testing.T) { + + tqb := models.NewTagQueryBuilder() + + name := tagNames[tagIdxWithScene] // find a tag by name + + tag, err := tqb.FindByName(name, nil, false) + + if err != nil { + t.Fatalf("Error finding tags: %s", err.Error()) + } + + assert.Equal(t, tagNames[tagIdxWithScene], tag.Name) + + name = tagNames[tagIdxWithDupName] // find a tag by name nocase + + tag, err = tqb.FindByName(name, nil, true) + + if err != nil { + t.Fatalf("Error finding tags: %s", err.Error()) + } + // tagIdxWithDupName and tagIdxWithScene should have similar names ( only diff should be Name vs NaMe) + // match (tag.Name) should be the tagIdxWithScene since its ID is first + assert.Equal(t, tagNames[tagIdxWithScene], tag.Name) + assert.Equal(t, strings.ToLower(tagNames[tagIdxWithDupName]), strings.ToLower(tag.Name)) + +} + +func TestTagFindByNames(t *testing.T) { + var names []string + + tqb := models.NewTagQueryBuilder() + + names = append(names, tagNames[tagIdxWithScene]) // find tags by names + + tags, err := tqb.FindByNames(names, nil, false) + if err != nil { + t.Fatalf("Error finding tags: %s", err.Error()) + } + assert.Len(t, tags, 1) + assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name) + + tags, err = tqb.FindByNames(names, nil, true) // find tags by names nocase + if err != nil { + t.Fatalf("Error finding tags: %s", err.Error()) + } + assert.Len(t, tags, 2) // tagIdxWithScene and tagIdxWithDupName + assert.Equal(t, strings.ToLower(tagNames[tagIdxWithScene]), strings.ToLower(tags[0].Name)) + assert.Equal(t, strings.ToLower(tagNames[tagIdxWithScene]), strings.ToLower(tags[1].Name)) + + names = append(names, tagNames[tagIdx1WithScene]) // find tags by names ( 2 names ) + + tags, err = tqb.FindByNames(names, nil, false) + if err != nil { + t.Fatalf("Error finding tags: %s", err.Error()) + } + assert.Len(t, tags, 2) // tagIdxWithScene and tagIdx1WithScene + assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name) + assert.Equal(t, tagNames[tagIdx1WithScene], tags[1].Name) + + tags, err = tqb.FindByNames(names, nil, true) // find tags by names ( 2 names nocase) + if err != nil { + t.Fatalf("Error finding tags: %s", err.Error()) + } + assert.Len(t, tags, 4) // tagIdxWithScene and tagIdxWithDupName , tagIdx1WithScene and tagIdx1WithDupName + assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name) + assert.Equal(t, tagNames[tagIdx1WithScene], tags[1].Name) + assert.Equal(t, tagNames[tagIdx1WithDupName], tags[2].Name) + assert.Equal(t, tagNames[tagIdxWithDupName], tags[3].Name) + +} + // TODO Create // TODO Update // TODO Destroy // TODO Find // TODO FindBySceneID // TODO FindBySceneMarkerID -// TODO FindByName -// TODO FindByNames // TODO Count // TODO All // TODO AllSlim diff --git a/pkg/models/setup_test.go b/pkg/models/setup_test.go index 79f78412e..63412047b 100644 --- a/pkg/models/setup_test.go +++ b/pkg/models/setup_test.go @@ -22,7 +22,8 @@ const totalScenes = 12 const totalPerformers = 3 const totalMovies = 1 const totalGalleries = 1 -const totalTags = 5 +const tagsNameNoCase = 2 +const tagsNameCase = 5 const totalStudios = 1 var sceneIDs []int @@ -33,6 +34,8 @@ var tagIDs []int var studioIDs []int var markerIDs []int +var tagNames []string + const sceneIdxWithMovie = 0 const sceneIdxWithGallery = 1 const sceneIdxWithPerformer = 2 @@ -56,6 +59,8 @@ const tagIdx1WithScene = 1 const tagIdx2WithScene = 2 const tagIdxWithPrimaryMarker = 3 const tagIdxWithMarker = 4 +const tagIdx1WithDupName = 5 +const tagIdxWithDupName = 6 const studioIdxWithScene = 0 @@ -130,7 +135,7 @@ func populateDB() error { return err } - if err := createTags(tx, totalTags); err != nil { + if err := createTags(tx, tagsNameCase, tagsNameNoCase); err != nil { tx.Rollback() return err } @@ -341,12 +346,25 @@ func getTagStringValue(index int, field string) string { return "tag_" + strconv.FormatInt(int64(index), 10) + "_" + field } -func createTags(tx *sqlx.Tx, n int) error { +//createTags creates n tags with plain Name and o tags with camel cased NaMe included +func createTags(tx *sqlx.Tx, n int, o int) error { tqb := models.NewTagQueryBuilder() + const namePlain = "Name" + const nameNoCase = "NaMe" + + name := namePlain + + for i := 0; i < n+o; i++ { + index := i + + if i >= n { // i=n tags get dup names if case is not checked + index = n + o - (i + 1) // for the name to be the same the number (index) must be the same also + } // so count backwards to 0 as needed + // tags [ i ] and [ n + o - i - 1 ] should have similar names with only the Name!=NaMe part different - for i := 0; i < n; i++ { tag := models.Tag{ - Name: getTagStringValue(i, "Name"), + Name: getTagStringValue(index, name), } created, err := tqb.Create(tag, tx) @@ -356,6 +374,8 @@ func createTags(tx *sqlx.Tx, n int) error { } tagIDs = append(tagIDs, created.ID) + tagNames = append(tagNames, created.Name) + } return nil diff --git a/pkg/scraper/scrapers.go b/pkg/scraper/scrapers.go index da24a313e..c352dfd20 100644 --- a/pkg/scraper/scrapers.go +++ b/pkg/scraper/scrapers.go @@ -154,7 +154,7 @@ func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) { func matchPerformer(p *models.ScrapedScenePerformer) error { qb := models.NewPerformerQueryBuilder() - performers, err := qb.FindByNames([]string{p.Name}, nil) + performers, err := qb.FindByNames([]string{p.Name}, nil, true) if err != nil { return err @@ -173,7 +173,7 @@ func matchPerformer(p *models.ScrapedScenePerformer) error { func matchStudio(s *models.ScrapedSceneStudio) error { qb := models.NewStudioQueryBuilder() - studio, err := qb.FindByName(s.Name, nil) + studio, err := qb.FindByName(s.Name, nil, true) if err != nil { return err @@ -191,7 +191,7 @@ func matchStudio(s *models.ScrapedSceneStudio) error { func matchMovie(m *models.ScrapedSceneMovie) error { qb := models.NewMovieQueryBuilder() - movies, err := qb.FindByNames([]string{m.Name}, nil) + movies, err := qb.FindByNames([]string{m.Name}, nil, true) if err != nil { return err @@ -210,7 +210,7 @@ func matchMovie(m *models.ScrapedSceneMovie) error { func matchTag(s *models.ScrapedSceneTag) error { qb := models.NewTagQueryBuilder() - tag, err := qb.FindByName(s.Name, nil) + tag, err := qb.FindByName(s.Name, nil, true) if err != nil { return err