mirror of
https://github.com/stashapp/stash.git
synced 2026-01-05 23:25:04 +01:00
Change scrape matching (studio, movies, tag, performers) to case insensitive (#556)
* Change scrape matching (studio, movies, tag, performers) to case insensitive * * fix collate order * * make filename parser findbyname calls case insensitive * * add unit testing for Tags GetFindbyName/s
This commit is contained in:
parent
32fce9ac6f
commit
ccd75731b7
9 changed files with 153 additions and 37 deletions
|
|
@ -405,7 +405,7 @@ func (m parseMapper) parse(scene *models.Scene) *sceneHolder {
|
|||
}
|
||||
|
||||
type performerQueryer interface {
|
||||
FindByNames(names []string, tx *sqlx.Tx) ([]*models.Performer, error)
|
||||
FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*models.Performer, error)
|
||||
}
|
||||
|
||||
type sceneQueryer interface {
|
||||
|
|
@ -413,15 +413,15 @@ type sceneQueryer interface {
|
|||
}
|
||||
|
||||
type tagQueryer interface {
|
||||
FindByName(name string, tx *sqlx.Tx) (*models.Tag, error)
|
||||
FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Tag, error)
|
||||
}
|
||||
|
||||
type studioQueryer interface {
|
||||
FindByName(name string, tx *sqlx.Tx) (*models.Studio, error)
|
||||
FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Studio, error)
|
||||
}
|
||||
|
||||
type movieQueryer interface {
|
||||
FindByName(name string, tx *sqlx.Tx) (*models.Movie, error)
|
||||
FindByName(name string, tx *sqlx.Tx, nocase bool) (*models.Movie, error)
|
||||
}
|
||||
|
||||
type SceneFilenameParser struct {
|
||||
|
|
@ -546,7 +546,7 @@ func (p *SceneFilenameParser) queryPerformer(performerName string) *models.Perfo
|
|||
}
|
||||
|
||||
// perform an exact match and grab the first
|
||||
performers, _ := p.performerQuery.FindByNames([]string{performerName}, nil)
|
||||
performers, _ := p.performerQuery.FindByNames([]string{performerName}, nil, true)
|
||||
|
||||
var ret *models.Performer
|
||||
if len(performers) > 0 {
|
||||
|
|
@ -568,7 +568,7 @@ func (p *SceneFilenameParser) queryStudio(studioName string) *models.Studio {
|
|||
return ret
|
||||
}
|
||||
|
||||
ret, _ := p.studioQuery.FindByName(studioName, nil)
|
||||
ret, _ := p.studioQuery.FindByName(studioName, nil, true)
|
||||
|
||||
// add result to cache
|
||||
p.studioCache[studioName] = ret
|
||||
|
|
@ -585,7 +585,7 @@ func (p *SceneFilenameParser) queryMovie(movieName string) *models.Movie {
|
|||
return ret
|
||||
}
|
||||
|
||||
ret, _ := p.movieQuery.FindByName(movieName, nil)
|
||||
ret, _ := p.movieQuery.FindByName(movieName, nil, true)
|
||||
|
||||
// add result to cache
|
||||
p.movieCache[movieName] = ret
|
||||
|
|
@ -603,7 +603,7 @@ func (p *SceneFilenameParser) queryTag(tagName string) *models.Tag {
|
|||
}
|
||||
|
||||
// match tag name exactly
|
||||
ret, _ := p.tagQuery.FindByName(tagName, nil)
|
||||
ret, _ := p.tagQuery.FindByName(tagName, nil, true)
|
||||
|
||||
// add result to cache
|
||||
p.tagCache[tagName] = ret
|
||||
|
|
|
|||
|
|
@ -410,7 +410,7 @@ func (t *ImportTask) ImportScrapedItems(ctx context.Context) {
|
|||
UpdatedAt: models.SQLiteTimestamp{Timestamp: t.getTimeFromJSONTime(mappingJSON.UpdatedAt)},
|
||||
}
|
||||
|
||||
studio, err := sqb.FindByName(mappingJSON.Studio, tx)
|
||||
studio, err := sqb.FindByName(mappingJSON.Studio, tx, false)
|
||||
if err != nil {
|
||||
logger.Errorf("[scraped sites] failed to fetch studio: %s", err.Error())
|
||||
}
|
||||
|
|
@ -532,7 +532,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) {
|
|||
// Populate the studio ID
|
||||
if sceneJSON.Studio != "" {
|
||||
sqb := models.NewStudioQueryBuilder()
|
||||
studio, err := sqb.FindByName(sceneJSON.Studio, tx)
|
||||
studio, err := sqb.FindByName(sceneJSON.Studio, tx, false)
|
||||
if err != nil {
|
||||
logger.Warnf("[scenes] studio <%s> does not exist: %s", sceneJSON.Studio, err.Error())
|
||||
} else {
|
||||
|
|
@ -634,7 +634,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) {
|
|||
UpdatedAt: models.SQLiteTimestamp{Timestamp: t.getTimeFromJSONTime(marker.UpdatedAt)},
|
||||
}
|
||||
|
||||
primaryTag, err := tqb.FindByName(marker.PrimaryTag, tx)
|
||||
primaryTag, err := tqb.FindByName(marker.PrimaryTag, tx, false)
|
||||
if err != nil {
|
||||
logger.Errorf("[scenes] <%s> failed to find primary tag for marker: %s", scene.Checksum, err.Error())
|
||||
} else {
|
||||
|
|
@ -682,7 +682,7 @@ func (t *ImportTask) ImportScenes(ctx context.Context) {
|
|||
|
||||
func (t *ImportTask) getPerformers(names []string, tx *sqlx.Tx) ([]*models.Performer, error) {
|
||||
pqb := models.NewPerformerQueryBuilder()
|
||||
performers, err := pqb.FindByNames(names, tx)
|
||||
performers, err := pqb.FindByNames(names, tx, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -711,7 +711,7 @@ func (t *ImportTask) getMoviesScenes(input []jsonschema.SceneMovie, sceneID int,
|
|||
|
||||
var movies []models.MoviesScenes
|
||||
for _, inputMovie := range input {
|
||||
movie, err := mqb.FindByName(inputMovie.MovieName, tx)
|
||||
movie, err := mqb.FindByName(inputMovie.MovieName, tx, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -740,7 +740,7 @@ func (t *ImportTask) getMoviesScenes(input []jsonschema.SceneMovie, sceneID int,
|
|||
|
||||
func (t *ImportTask) getTags(sceneChecksum string, names []string, tx *sqlx.Tx) ([]*models.Tag, error) {
|
||||
tqb := models.NewTagQueryBuilder()
|
||||
tags, err := tqb.FindByNames(names, tx)
|
||||
tags, err := tqb.FindByNames(names, tx, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,14 +83,22 @@ func (qb *MovieQueryBuilder) FindBySceneID(sceneID int, tx *sqlx.Tx) ([]*Movie,
|
|||
return qb.queryMovies(query, args, tx)
|
||||
}
|
||||
|
||||
func (qb *MovieQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Movie, error) {
|
||||
query := "SELECT * FROM movies WHERE name = ? LIMIT 1"
|
||||
func (qb *MovieQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Movie, error) {
|
||||
query := "SELECT * FROM movies WHERE name = ?"
|
||||
if nocase {
|
||||
query += " COLLATE NOCASE"
|
||||
}
|
||||
query += " LIMIT 1"
|
||||
args := []interface{}{name}
|
||||
return qb.queryMovie(query, args, tx)
|
||||
}
|
||||
|
||||
func (qb *MovieQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Movie, error) {
|
||||
query := "SELECT * FROM movies WHERE name IN " + getInBinding(len(names))
|
||||
func (qb *MovieQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Movie, error) {
|
||||
query := "SELECT * FROM movies WHERE name"
|
||||
if nocase {
|
||||
query += " COLLATE NOCASE"
|
||||
}
|
||||
query += " IN " + getInBinding(len(names))
|
||||
var args []interface{}
|
||||
for _, name := range names {
|
||||
args = append(args, name)
|
||||
|
|
|
|||
|
|
@ -95,8 +95,13 @@ func (qb *PerformerQueryBuilder) FindNameBySceneID(sceneID int, tx *sqlx.Tx) ([]
|
|||
return qb.queryPerformers(query, args, tx)
|
||||
}
|
||||
|
||||
func (qb *PerformerQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Performer, error) {
|
||||
query := "SELECT * FROM performers WHERE name IN " + getInBinding(len(names))
|
||||
func (qb *PerformerQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Performer, error) {
|
||||
query := "SELECT * FROM performers WHERE name"
|
||||
if nocase {
|
||||
query += " COLLATE NOCASE"
|
||||
}
|
||||
query += " IN " + getInBinding(len(names))
|
||||
|
||||
var args []interface{}
|
||||
for _, name := range names {
|
||||
args = append(args, name)
|
||||
|
|
|
|||
|
|
@ -79,8 +79,12 @@ func (qb *StudioQueryBuilder) FindBySceneID(sceneID int) (*Studio, error) {
|
|||
return qb.queryStudio(query, args, nil)
|
||||
}
|
||||
|
||||
func (qb *StudioQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Studio, error) {
|
||||
query := "SELECT * FROM studios WHERE name = ? LIMIT 1"
|
||||
func (qb *StudioQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Studio, error) {
|
||||
query := "SELECT * FROM studios WHERE name = ?"
|
||||
if nocase {
|
||||
query += " COLLATE NOCASE"
|
||||
}
|
||||
query += " LIMIT 1"
|
||||
args := []interface{}{name}
|
||||
return qb.queryStudio(query, args, tx)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -111,14 +111,22 @@ func (qb *TagQueryBuilder) FindBySceneMarkerID(sceneMarkerID int, tx *sqlx.Tx) (
|
|||
return qb.queryTags(query, args, tx)
|
||||
}
|
||||
|
||||
func (qb *TagQueryBuilder) FindByName(name string, tx *sqlx.Tx) (*Tag, error) {
|
||||
query := "SELECT * FROM tags WHERE name = ? LIMIT 1"
|
||||
func (qb *TagQueryBuilder) FindByName(name string, tx *sqlx.Tx, nocase bool) (*Tag, error) {
|
||||
query := "SELECT * FROM tags WHERE name = ?"
|
||||
if nocase {
|
||||
query += " COLLATE NOCASE"
|
||||
}
|
||||
query += " LIMIT 1"
|
||||
args := []interface{}{name}
|
||||
return qb.queryTag(query, args, tx)
|
||||
}
|
||||
|
||||
func (qb *TagQueryBuilder) FindByNames(names []string, tx *sqlx.Tx) ([]*Tag, error) {
|
||||
query := "SELECT * FROM tags WHERE name IN " + getInBinding(len(names))
|
||||
func (qb *TagQueryBuilder) FindByNames(names []string, tx *sqlx.Tx, nocase bool) ([]*Tag, error) {
|
||||
query := "SELECT * FROM tags WHERE name"
|
||||
if nocase {
|
||||
query += " COLLATE NOCASE"
|
||||
}
|
||||
query += " IN " + getInBinding(len(names))
|
||||
var args []interface{}
|
||||
for _, name := range names {
|
||||
args = append(args, name)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
package models_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
|
|
@ -32,14 +33,84 @@ func TestMarkerFindBySceneMarkerID(t *testing.T) {
|
|||
assert.Len(t, tags, 0)
|
||||
}
|
||||
|
||||
func TestTagFindByName(t *testing.T) {
|
||||
|
||||
tqb := models.NewTagQueryBuilder()
|
||||
|
||||
name := tagNames[tagIdxWithScene] // find a tag by name
|
||||
|
||||
tag, err := tqb.FindByName(name, nil, false)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Error finding tags: %s", err.Error())
|
||||
}
|
||||
|
||||
assert.Equal(t, tagNames[tagIdxWithScene], tag.Name)
|
||||
|
||||
name = tagNames[tagIdxWithDupName] // find a tag by name nocase
|
||||
|
||||
tag, err = tqb.FindByName(name, nil, true)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Error finding tags: %s", err.Error())
|
||||
}
|
||||
// tagIdxWithDupName and tagIdxWithScene should have similar names ( only diff should be Name vs NaMe)
|
||||
// match (tag.Name) should be the tagIdxWithScene since its ID is first
|
||||
assert.Equal(t, tagNames[tagIdxWithScene], tag.Name)
|
||||
assert.Equal(t, strings.ToLower(tagNames[tagIdxWithDupName]), strings.ToLower(tag.Name))
|
||||
|
||||
}
|
||||
|
||||
func TestTagFindByNames(t *testing.T) {
|
||||
var names []string
|
||||
|
||||
tqb := models.NewTagQueryBuilder()
|
||||
|
||||
names = append(names, tagNames[tagIdxWithScene]) // find tags by names
|
||||
|
||||
tags, err := tqb.FindByNames(names, nil, false)
|
||||
if err != nil {
|
||||
t.Fatalf("Error finding tags: %s", err.Error())
|
||||
}
|
||||
assert.Len(t, tags, 1)
|
||||
assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name)
|
||||
|
||||
tags, err = tqb.FindByNames(names, nil, true) // find tags by names nocase
|
||||
if err != nil {
|
||||
t.Fatalf("Error finding tags: %s", err.Error())
|
||||
}
|
||||
assert.Len(t, tags, 2) // tagIdxWithScene and tagIdxWithDupName
|
||||
assert.Equal(t, strings.ToLower(tagNames[tagIdxWithScene]), strings.ToLower(tags[0].Name))
|
||||
assert.Equal(t, strings.ToLower(tagNames[tagIdxWithScene]), strings.ToLower(tags[1].Name))
|
||||
|
||||
names = append(names, tagNames[tagIdx1WithScene]) // find tags by names ( 2 names )
|
||||
|
||||
tags, err = tqb.FindByNames(names, nil, false)
|
||||
if err != nil {
|
||||
t.Fatalf("Error finding tags: %s", err.Error())
|
||||
}
|
||||
assert.Len(t, tags, 2) // tagIdxWithScene and tagIdx1WithScene
|
||||
assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name)
|
||||
assert.Equal(t, tagNames[tagIdx1WithScene], tags[1].Name)
|
||||
|
||||
tags, err = tqb.FindByNames(names, nil, true) // find tags by names ( 2 names nocase)
|
||||
if err != nil {
|
||||
t.Fatalf("Error finding tags: %s", err.Error())
|
||||
}
|
||||
assert.Len(t, tags, 4) // tagIdxWithScene and tagIdxWithDupName , tagIdx1WithScene and tagIdx1WithDupName
|
||||
assert.Equal(t, tagNames[tagIdxWithScene], tags[0].Name)
|
||||
assert.Equal(t, tagNames[tagIdx1WithScene], tags[1].Name)
|
||||
assert.Equal(t, tagNames[tagIdx1WithDupName], tags[2].Name)
|
||||
assert.Equal(t, tagNames[tagIdxWithDupName], tags[3].Name)
|
||||
|
||||
}
|
||||
|
||||
// TODO Create
|
||||
// TODO Update
|
||||
// TODO Destroy
|
||||
// TODO Find
|
||||
// TODO FindBySceneID
|
||||
// TODO FindBySceneMarkerID
|
||||
// TODO FindByName
|
||||
// TODO FindByNames
|
||||
// TODO Count
|
||||
// TODO All
|
||||
// TODO AllSlim
|
||||
|
|
|
|||
|
|
@ -22,7 +22,8 @@ const totalScenes = 12
|
|||
const totalPerformers = 3
|
||||
const totalMovies = 1
|
||||
const totalGalleries = 1
|
||||
const totalTags = 5
|
||||
const tagsNameNoCase = 2
|
||||
const tagsNameCase = 5
|
||||
const totalStudios = 1
|
||||
|
||||
var sceneIDs []int
|
||||
|
|
@ -33,6 +34,8 @@ var tagIDs []int
|
|||
var studioIDs []int
|
||||
var markerIDs []int
|
||||
|
||||
var tagNames []string
|
||||
|
||||
const sceneIdxWithMovie = 0
|
||||
const sceneIdxWithGallery = 1
|
||||
const sceneIdxWithPerformer = 2
|
||||
|
|
@ -56,6 +59,8 @@ const tagIdx1WithScene = 1
|
|||
const tagIdx2WithScene = 2
|
||||
const tagIdxWithPrimaryMarker = 3
|
||||
const tagIdxWithMarker = 4
|
||||
const tagIdx1WithDupName = 5
|
||||
const tagIdxWithDupName = 6
|
||||
|
||||
const studioIdxWithScene = 0
|
||||
|
||||
|
|
@ -130,7 +135,7 @@ func populateDB() error {
|
|||
return err
|
||||
}
|
||||
|
||||
if err := createTags(tx, totalTags); err != nil {
|
||||
if err := createTags(tx, tagsNameCase, tagsNameNoCase); err != nil {
|
||||
tx.Rollback()
|
||||
return err
|
||||
}
|
||||
|
|
@ -341,12 +346,25 @@ func getTagStringValue(index int, field string) string {
|
|||
return "tag_" + strconv.FormatInt(int64(index), 10) + "_" + field
|
||||
}
|
||||
|
||||
func createTags(tx *sqlx.Tx, n int) error {
|
||||
//createTags creates n tags with plain Name and o tags with camel cased NaMe included
|
||||
func createTags(tx *sqlx.Tx, n int, o int) error {
|
||||
tqb := models.NewTagQueryBuilder()
|
||||
const namePlain = "Name"
|
||||
const nameNoCase = "NaMe"
|
||||
|
||||
name := namePlain
|
||||
|
||||
for i := 0; i < n+o; i++ {
|
||||
index := i
|
||||
|
||||
if i >= n { // i<n tags get normal names
|
||||
name = nameNoCase // i>=n tags get dup names if case is not checked
|
||||
index = n + o - (i + 1) // for the name to be the same the number (index) must be the same also
|
||||
} // so count backwards to 0 as needed
|
||||
// tags [ i ] and [ n + o - i - 1 ] should have similar names with only the Name!=NaMe part different
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
tag := models.Tag{
|
||||
Name: getTagStringValue(i, "Name"),
|
||||
Name: getTagStringValue(index, name),
|
||||
}
|
||||
|
||||
created, err := tqb.Create(tag, tx)
|
||||
|
|
@ -356,6 +374,8 @@ func createTags(tx *sqlx.Tx, n int) error {
|
|||
}
|
||||
|
||||
tagIDs = append(tagIDs, created.ID)
|
||||
tagNames = append(tagNames, created.Name)
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
|||
|
|
@ -154,7 +154,7 @@ func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
|||
func matchPerformer(p *models.ScrapedScenePerformer) error {
|
||||
qb := models.NewPerformerQueryBuilder()
|
||||
|
||||
performers, err := qb.FindByNames([]string{p.Name}, nil)
|
||||
performers, err := qb.FindByNames([]string{p.Name}, nil, true)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -173,7 +173,7 @@ func matchPerformer(p *models.ScrapedScenePerformer) error {
|
|||
func matchStudio(s *models.ScrapedSceneStudio) error {
|
||||
qb := models.NewStudioQueryBuilder()
|
||||
|
||||
studio, err := qb.FindByName(s.Name, nil)
|
||||
studio, err := qb.FindByName(s.Name, nil, true)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -191,7 +191,7 @@ func matchStudio(s *models.ScrapedSceneStudio) error {
|
|||
func matchMovie(m *models.ScrapedSceneMovie) error {
|
||||
qb := models.NewMovieQueryBuilder()
|
||||
|
||||
movies, err := qb.FindByNames([]string{m.Name}, nil)
|
||||
movies, err := qb.FindByNames([]string{m.Name}, nil, true)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -210,7 +210,7 @@ func matchMovie(m *models.ScrapedSceneMovie) error {
|
|||
func matchTag(s *models.ScrapedSceneTag) error {
|
||||
qb := models.NewTagQueryBuilder()
|
||||
|
||||
tag, err := qb.FindByName(s.Name, nil)
|
||||
tag, err := qb.FindByName(s.Name, nil, true)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
|||
Loading…
Reference in a new issue