Change scrape matching (studio, movies, tag, performers) to case insensitive (#556)

* Change scrape matching (studio, movies, tag, performers) to case insensitive
* * fix collate order
* * make filename parser findbyname calls case insensitive
* * add unit testing for Tags GetFindbyName/s
This commit is contained in:
bnkai 2020-05-24 09:19:22 +03:00 committed by GitHub
parent 32fce9ac6f
commit ccd75731b7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 153 additions and 37 deletions

View file

@ -405,7 +405,7 @@ func (m parseMapper) parse(scene *models.Scene) *sceneHolder {
}
type performerQueryer interface {
FindByNames(names []string, tx *sqlx.Tx) ([]*models.Performer, error)
FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*models.Performer, error)
}
type sceneQueryer interface {
@ -413,15 +413,15 @@ type sceneQueryer interface {
}
type tagQueryer interface {
FindByName(name string, tx *sqlx.Tx) (*models.Tag, error)
FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Tag, error)
}
type studioQueryer interface {
FindByName(name string, tx *sqlx.Tx) (*models.Studio, error)
FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Studio, error)
}
type movieQueryer interface {
FindByName(name string, tx *sqlx.Tx) (*models.Movie, error)
FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Movie, error)
}
type SceneFilenameParser struct {
@ -546,7 +546,7 @@ func (p *SceneFilenameParser) queryPerformer(performerName string) *models.Perfo
}
// perform an exact match and grab the first
performers, _ := p.performerQuery.FindByNames([]string{performerName}, nil)
performers, _ := p.performerQuery.FindByNames([]string{performerName}, nil, true)
var ret *models.Performer
if len(performers) > 0 {
@ -568,7 +568,7 @@ func (p *SceneFilenameParser) queryStudio(studioName string) *models.Studio {
return ret
}
ret, _ := p.studioQuery.FindByName(studioName, nil)
ret, _ := p.studioQuery.FindByName(studioName, nil, true)
// add result to cache
p.studioCache[studioName] = ret
@ -585,7 +585,7 @@ func (p *SceneFilenameParser) queryMovie(movieName string) *models.Movie {
return ret
}
ret, _ := p.movieQuery.FindByName(movieName, nil)
ret, _ := p.movieQuery.FindByName(movieName, nil, true)
// add result to cache
p.movieCache[movieName] = ret
@ -603,7 +603,7 @@ func (p *SceneFilenameParser) queryTag(tagName string) *models.Tag {
}
// match tag name exactly
ret, _ := p.tagQuery.FindByName(tagName, nil)
ret, _ := p.tagQuery.FindByName(tagName, nil, true)
// add result to cache
p.tagCache[tagName] = ret

View file

@ -410,7 +410,7 @@ func (t *ImportTask) ImportScrapedItems(ctx context.Context) {
UpdatedAt: models.SQLiteTimestamp{Timestamp: t.getTimeFromJSONTime(mappingJSON.UpdatedAt)},
}
studio, err := sqb.FindByName(mappingJSON.Studio, tx)
studio, err := sqb.FindByName(mappingJSON.Studio, tx, false)
if err != nil {
logger.Errorf("[scraped sites] failed to fetch studio: %s", err.Error())
}
@ -532,7 +532,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) {
// Populate the studio ID
if sceneJSON.Studio != "" {
sqb := models.NewStudioQueryBuilder()
studio, err := sqb.FindByName(sceneJSON.Studio, tx)
studio, err := sqb.FindByName(sceneJSON.Studio, tx, false)
if err != nil {
logger.Warnf("[scenes] studio <%s> does not exist: %s", sceneJSON.Studio, err.Error())
} else {
@ -634,7 +634,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) {
UpdatedAt: models.SQLiteTimestamp{Timestamp: t.getTimeFromJSONTime(marker.UpdatedAt)},
}
primaryTag, err := tqb.FindByName(marker.PrimaryTag, tx)
primaryTag, err := tqb.FindByName(marker.PrimaryTag, tx, false)
if err != nil {
logger.Errorf("[scenes] <%s> failed to find primary tag for marker: %s", scene.Checksum, err.Error())
} else {
@ -682,7 +682,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) {
func (t *ImportTask) getPerformers(names []string, tx *sqlx.Tx) ([]*models.Performer, error) {
pqb := models.NewPerformerQueryBuilder()
performers, err := pqb.FindByNames(names, tx)
performers, err := pqb.FindByNames(names, tx, false)
if err != nil {
return nil, err
}
@ -711,7 +711,7 @@ func (t *ImportTask) getMoviesScenes(input []jsonschema.SceneMovie, sceneID int,
var movies []models.MoviesScenes
for _, inputMovie := range input {
movie, err := mqb.FindByName(inputMovie.MovieName, tx)
movie, err := mqb.FindByName(inputMovie.MovieName, tx, false)
if err != nil {
return nil, err
}
@ -740,7 +740,7 @@ func (t *ImportTask) getMoviesScenes(input []jsonschema.SceneMovie, sceneID int,
func (t *ImportTask) getTags(sceneChecksum string, names []string, tx *sqlx.Tx) ([]*models.Tag, error) {
tqb := models.NewTagQueryBuilder()
tags, err := tqb.FindByNames(names, tx)
tags, err := tqb.FindByNames(names, tx, false)
if err != nil {
return nil, err
}

View file

@ -83,14 +83,22 @@ func (qb *MovieQueryBuilder) FindBySceneID(sceneID int, tx *sqlx.Tx) ([]*Movie,
return qb.queryMovies(query, args, tx)
}
func (qb *MovieQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Movie, error) {
query := "SELECT * FROM movies WHERE name = ? LIMIT 1"
func (qb *MovieQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Movie, error) {
query := "SELECT * FROM movies WHERE name = ?"
if nocase {
query += " COLLATE NOCASE"
}
query += " LIMIT 1"
args := []interface{}{name}
return qb.queryMovie(query, args, tx)
}
func (qb *MovieQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Movie, error) {
query := "SELECT * FROM movies WHERE name IN " + getInBinding(len(names))
func (qb *MovieQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Movie, error) {
query := "SELECT * FROM movies WHERE name"
if nocase {
query += " COLLATE NOCASE"
}
query += " IN " + getInBinding(len(names))
var args []interface{}
for _, name := range names {
args = append(args, name)

View file

@ -95,8 +95,13 @@ func (qb *PerformerQueryBuilder) FindNameBySceneID(sceneID int, tx *sqlx.Tx) ([]
return qb.queryPerformers(query, args, tx)
}
func (qb *PerformerQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Performer, error) {
query := "SELECT * FROM performers WHERE name IN " + getInBinding(len(names))
func (qb *PerformerQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Performer, error) {
query := "SELECT * FROM performers WHERE name"
if nocase {
query += " COLLATE NOCASE"
}
query += " IN " + getInBinding(len(names))
var args []interface{}
for _, name := range names {
args = append(args, name)

View file

@ -79,8 +79,12 @@ func (qb *StudioQueryBuilder) FindBySceneID(sceneID int) (*Studio, error) {
return qb.queryStudio(query, args, nil)
}
func (qb *StudioQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Studio, error) {
query := "SELECT * FROM studios WHERE name = ? LIMIT 1"
func (qb *StudioQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Studio, error) {
query := "SELECT * FROM studios WHERE name = ?"
if nocase {
query += " COLLATE NOCASE"
}
query += " LIMIT 1"
args := []interface{}{name}
return qb.queryStudio(query, args, tx)
}

View file

@ -111,14 +111,22 @@ func (qb *TagQueryBuilder) FindBySceneMarkerID(sceneMarkerID int, tx *sqlx.Tx) (
return qb.queryTags(query, args, tx)
}
func (qb *TagQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Tag, error) {
query := "SELECT * FROM tags WHERE name = ? LIMIT 1"
func (qb *TagQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Tag, error) {
query := "SELECT * FROM tags WHERE name = ?"
if nocase {
query += " COLLATE NOCASE"
}
query += " LIMIT 1"
args := []interface{}{name}
return qb.queryTag(query, args, tx)
}
func (qb *TagQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Tag, error) {
query := "SELECT * FROM tags WHERE name IN " + getInBinding(len(names))
func (qb *TagQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Tag, error) {
query := "SELECT * FROM tags WHERE name"
if nocase {
query += " COLLATE NOCASE"
}
query += " IN " + getInBinding(len(names))
var args []interface{}
for _, name := range names {
args = append(args, name)

View file

@ -3,6 +3,7 @@
package models_test
import (
"strings"
"testing"
"github.com/stashapp/stash/pkg/models"
@ -32,14 +33,84 @@ func TestMarkerFindBySceneMarkerID(t *testing.T) {
assert.Len(t, tags, 0)
}
func TestTagFindByName(t *testing.T) {
tqb := models.NewTagQueryBuilder()
name := tagNames[tagIdxWithScene] // find a tag by name
tag, err := tqb.FindByName(name, nil, false)
if err != nil {
t.Fatalf("Error finding tags: %s", err.Error())
}
assert.Equal(t, tagNames[tagIdxWithScene], tag.Name)
name = tagNames[tagIdxWithDupName] // find a tag by name nocase
tag, err = tqb.FindByName(name, nil, true)
if err != nil {
t.Fatalf("Error finding tags: %s", err.Error())
}
// tagIdxWithDupName and tagIdxWithScene should have similar names ( only diff should be Name vs NaMe)
// match (tag.Name) should be the tagIdxWithScene since its ID is first
assert.Equal(t, tagNames[tagIdxWithScene], tag.Name)
assert.Equal(t, strings.ToLower(tagNames[tagIdxWithDupName]), strings.ToLower(tag.Name))
}
func TestTagFindByNames(t *testing.T) {
var names []string
tqb := models.NewTagQueryBuilder()
names = append(names, tagNames[tagIdxWithScene]) // find tags by names
tags, err := tqb.FindByNames(names, nil, false)
if err != nil {
t.Fatalf("Error finding tags: %s", err.Error())
}
assert.Len(t, tags, 1)
assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name)
tags, err = tqb.FindByNames(names, nil, true) // find tags by names nocase
if err != nil {
t.Fatalf("Error finding tags: %s", err.Error())
}
assert.Len(t, tags, 2) // tagIdxWithScene and tagIdxWithDupName
assert.Equal(t, strings.ToLower(tagNames[tagIdxWithScene]), strings.ToLower(tags[0].Name))
assert.Equal(t, strings.ToLower(tagNames[tagIdxWithScene]), strings.ToLower(tags[1].Name))
names = append(names, tagNames[tagIdx1WithScene]) // find tags by names ( 2 names )
tags, err = tqb.FindByNames(names, nil, false)
if err != nil {
t.Fatalf("Error finding tags: %s", err.Error())
}
assert.Len(t, tags, 2) // tagIdxWithScene and tagIdx1WithScene
assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name)
assert.Equal(t, tagNames[tagIdx1WithScene], tags[1].Name)
tags, err = tqb.FindByNames(names, nil, true) // find tags by names ( 2 names nocase)
if err != nil {
t.Fatalf("Error finding tags: %s", err.Error())
}
assert.Len(t, tags, 4) // tagIdxWithScene and tagIdxWithDupName , tagIdx1WithScene and tagIdx1WithDupName
assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name)
assert.Equal(t, tagNames[tagIdx1WithScene], tags[1].Name)
assert.Equal(t, tagNames[tagIdx1WithDupName], tags[2].Name)
assert.Equal(t, tagNames[tagIdxWithDupName], tags[3].Name)
}
// TODO Create
// TODO Update
// TODO Destroy
// TODO Find
// TODO FindBySceneID
// TODO FindBySceneMarkerID
// TODO FindByName
// TODO FindByNames
// TODO Count
// TODO All
// TODO AllSlim

View file

@ -22,7 +22,8 @@ const totalScenes = 12
const totalPerformers = 3
const totalMovies = 1
const totalGalleries = 1
const totalTags = 5
const tagsNameNoCase = 2
const tagsNameCase = 5
const totalStudios = 1
var sceneIDs []int
@ -33,6 +34,8 @@ var tagIDs []int
var studioIDs []int
var markerIDs []int
var tagNames []string
const sceneIdxWithMovie = 0
const sceneIdxWithGallery = 1
const sceneIdxWithPerformer = 2
@ -56,6 +59,8 @@ const tagIdx1WithScene = 1
const tagIdx2WithScene = 2
const tagIdxWithPrimaryMarker = 3
const tagIdxWithMarker = 4
const tagIdx1WithDupName = 5
const tagIdxWithDupName = 6
const studioIdxWithScene = 0
@ -130,7 +135,7 @@ func populateDB() error {
return err
}
if err := createTags(tx, totalTags); err != nil {
if err := createTags(tx, tagsNameCase, tagsNameNoCase); err != nil {
tx.Rollback()
return err
}
@ -341,12 +346,25 @@ func getTagStringValue(index int, field string) string {
return "tag_" + strconv.FormatInt(int64(index), 10) + "_" + field
}
func createTags(tx *sqlx.Tx, n int) error {
//createTags creates n tags with plain Name and o tags with camel cased NaMe included
func createTags(tx *sqlx.Tx, n int, o int) error {
tqb := models.NewTagQueryBuilder()
const namePlain = "Name"
const nameNoCase = "NaMe"
name := namePlain
for i := 0; i < n+o; i++ {
index := i
if i >= n { // i<n tags get normal names
name = nameNoCase // i>=n tags get dup names if case is not checked
index = n + o - (i + 1) // for the name to be the same the number (index) must be the same also
} // so count backwards to 0 as needed
// tags [ i ] and [ n + o - i - 1 ] should have similar names with only the Name!=NaMe part different
for i := 0; i < n; i++ {
tag := models.Tag{
Name: getTagStringValue(i, "Name"),
Name: getTagStringValue(index, name),
}
created, err := tqb.Create(tag, tx)
@ -356,6 +374,8 @@ func createTags(tx *sqlx.Tx, n int) error {
}
tagIDs = append(tagIDs, created.ID)
tagNames = append(tagNames, created.Name)
}
return nil

View file

@ -154,7 +154,7 @@ func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
func matchPerformer(p *models.ScrapedScenePerformer) error {
qb := models.NewPerformerQueryBuilder()
performers, err := qb.FindByNames([]string{p.Name}, nil)
performers, err := qb.FindByNames([]string{p.Name}, nil, true)
if err != nil {
return err
@ -173,7 +173,7 @@ func matchPerformer(p *models.ScrapedScenePerformer) error {
func matchStudio(s *models.ScrapedSceneStudio) error {
qb := models.NewStudioQueryBuilder()
studio, err := qb.FindByName(s.Name, nil)
studio, err := qb.FindByName(s.Name, nil, true)
if err != nil {
return err
@ -191,7 +191,7 @@ func matchStudio(s *models.ScrapedSceneStudio) error {
func matchMovie(m *models.ScrapedSceneMovie) error {
qb := models.NewMovieQueryBuilder()
movies, err := qb.FindByNames([]string{m.Name}, nil)
movies, err := qb.FindByNames([]string{m.Name}, nil, true)
if err != nil {
return err
@ -210,7 +210,7 @@ func matchMovie(m *models.ScrapedSceneMovie) error {
func matchTag(s *models.ScrapedSceneTag) error {
qb := models.NewTagQueryBuilder()
tag, err := qb.FindByName(s.Name, nil)
tag, err := qb.FindByName(s.Name, nil, true)
if err != nil {
return err