From e9d48683f8e4392d366ea0768692670b10ca7f11 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Mon, 11 Oct 2021 23:06:06 +1100 Subject: [PATCH] Autotag scraper (#1817) * Refactor scraper structures * Move matching code into new package * Add autotag scraper * Always check first letter of auto-tag names * Account for nulls Co-authored-by: Kermie --- pkg/autotag/gallery.go | 68 ---- pkg/autotag/image.go | 68 ---- pkg/autotag/performer.go | 19 - pkg/autotag/scene.go | 68 ---- pkg/autotag/studio.go | 38 +- pkg/autotag/tag.go | 36 -- pkg/autotag/tagger.go | 80 +--- pkg/match/path.go | 358 ++++++++++++++++++ pkg/{scraper/matchers.go => match/scraped.go} | 30 +- pkg/scraper/action.go | 12 +- pkg/scraper/autotag.go | 218 +++++++++++ pkg/scraper/config.go | 221 +---------- pkg/scraper/config_scraper.go | 283 ++++++++++++++ pkg/scraper/freeones.go | 7 +- pkg/scraper/scraper.go | 51 +++ pkg/scraper/scrapers.go | 95 ++--- pkg/scraper/stashbox/stash_box.go | 8 +- pkg/scraper/xpath_test.go | 3 +- pkg/sqlite/performer.go | 8 +- pkg/sqlite/studio.go | 5 + pkg/sqlite/tag.go | 5 + .../components/Changelog/versions/v0110.md | 2 + 22 files changed, 1023 insertions(+), 660 deletions(-) create mode 100644 pkg/match/path.go rename pkg/{scraper/matchers.go => match/scraped.go} (67%) create mode 100644 pkg/scraper/autotag.go create mode 100644 pkg/scraper/config_scraper.go create mode 100644 pkg/scraper/scraper.go diff --git a/pkg/autotag/gallery.go b/pkg/autotag/gallery.go index fa3ab3a84..d35b0b05f 100644 --- a/pkg/autotag/gallery.go +++ b/pkg/autotag/gallery.go @@ -1,78 +1,10 @@ package autotag import ( - "fmt" - "path/filepath" - "strings" - "github.com/stashapp/stash/pkg/gallery" "github.com/stashapp/stash/pkg/models" ) -func galleryPathsFilter(paths []string) *models.GalleryFilterType { - if paths == nil { - return nil - } - - sep := string(filepath.Separator) - - var ret *models.GalleryFilterType - var or *models.GalleryFilterType - for _, p := range paths { - newOr := &models.GalleryFilterType{} - if or != nil { - or.Or = newOr - } else { - ret = newOr - } - - or = newOr - - if !strings.HasSuffix(p, sep) { - p = p + sep - } - - or.Path = &models.StringCriterionInput{ - Modifier: models.CriterionModifierEquals, - Value: p + "%", - } - } - - return ret -} - -func getMatchingGalleries(name string, paths []string, galleryReader models.GalleryReader) ([]*models.Gallery, error) { - regex := getPathQueryRegex(name) - organized := false - filter := models.GalleryFilterType{ - Path: &models.StringCriterionInput{ - Value: "(?i)" + regex, - Modifier: models.CriterionModifierMatchesRegex, - }, - Organized: &organized, - } - - filter.And = galleryPathsFilter(paths) - - pp := models.PerPageAll - gallerys, _, err := galleryReader.Query(&filter, &models.FindFilterType{ - PerPage: &pp, - }) - - if err != nil { - return nil, fmt.Errorf("error querying gallerys with regex '%s': %s", regex, err.Error()) - } - - var ret []*models.Gallery - for _, p := range gallerys { - if nameMatchesPath(name, p.Path.String) { - ret = append(ret, p) - } - } - - return ret, nil -} - func getGalleryFileTagger(s *models.Gallery) tagger { return tagger{ ID: s.ID, diff --git a/pkg/autotag/image.go b/pkg/autotag/image.go index ff5816c6f..21745897c 100644 --- a/pkg/autotag/image.go +++ b/pkg/autotag/image.go @@ -1,78 +1,10 @@ package autotag import ( - "fmt" - "path/filepath" - "strings" - "github.com/stashapp/stash/pkg/image" "github.com/stashapp/stash/pkg/models" ) -func imagePathsFilter(paths []string) *models.ImageFilterType { - if paths == nil { - return nil - } - - sep := string(filepath.Separator) - - var ret *models.ImageFilterType - var or *models.ImageFilterType - for _, p := range paths { - newOr := &models.ImageFilterType{} - if or != nil { - or.Or = newOr - } else { - ret = newOr - } - - or = newOr - - if !strings.HasSuffix(p, sep) { - p = p + sep - } - - or.Path = &models.StringCriterionInput{ - Modifier: models.CriterionModifierEquals, - Value: p + "%", - } - } - - return ret -} - -func getMatchingImages(name string, paths []string, imageReader models.ImageReader) ([]*models.Image, error) { - regex := getPathQueryRegex(name) - organized := false - filter := models.ImageFilterType{ - Path: &models.StringCriterionInput{ - Value: "(?i)" + regex, - Modifier: models.CriterionModifierMatchesRegex, - }, - Organized: &organized, - } - - filter.And = imagePathsFilter(paths) - - pp := models.PerPageAll - images, _, err := imageReader.Query(&filter, &models.FindFilterType{ - PerPage: &pp, - }) - - if err != nil { - return nil, fmt.Errorf("error querying images with regex '%s': %s", regex, err.Error()) - } - - var ret []*models.Image - for _, p := range images { - if nameMatchesPath(name, p.Path) { - ret = append(ret, p) - } - } - - return ret, nil -} - func getImageFileTagger(s *models.Image) tagger { return tagger{ ID: s.ID, diff --git a/pkg/autotag/performer.go b/pkg/autotag/performer.go index bdbd497c3..77ec0f558 100644 --- a/pkg/autotag/performer.go +++ b/pkg/autotag/performer.go @@ -7,25 +7,6 @@ import ( "github.com/stashapp/stash/pkg/scene" ) -func getMatchingPerformers(path string, performerReader models.PerformerReader) ([]*models.Performer, error) { - words := getPathWords(path) - performers, err := performerReader.QueryForAutoTag(words) - - if err != nil { - return nil, err - } - - var ret []*models.Performer - for _, p := range performers { - // TODO - commenting out alias handling until both sides work correctly - if nameMatchesPath(p.Name.String, path) { // || nameMatchesPath(p.Aliases.String, path) { - ret = append(ret, p) - } - } - - return ret, nil -} - func getPerformerTagger(p *models.Performer) tagger { return tagger{ ID: p.ID, diff --git a/pkg/autotag/scene.go b/pkg/autotag/scene.go index 272f5a9fe..aca523cb9 100644 --- a/pkg/autotag/scene.go +++ b/pkg/autotag/scene.go @@ -1,78 +1,10 @@ package autotag import ( - "fmt" - "path/filepath" - "strings" - "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/scene" ) -func scenePathsFilter(paths []string) *models.SceneFilterType { - if paths == nil { - return nil - } - - sep := string(filepath.Separator) - - var ret *models.SceneFilterType - var or *models.SceneFilterType - for _, p := range paths { - newOr := &models.SceneFilterType{} - if or != nil { - or.Or = newOr - } else { - ret = newOr - } - - or = newOr - - if !strings.HasSuffix(p, sep) { - p = p + sep - } - - or.Path = &models.StringCriterionInput{ - Modifier: models.CriterionModifierEquals, - Value: p + "%", - } - } - - return ret -} - -func getMatchingScenes(name string, paths []string, sceneReader models.SceneReader) ([]*models.Scene, error) { - regex := getPathQueryRegex(name) - organized := false - filter := models.SceneFilterType{ - Path: &models.StringCriterionInput{ - Value: "(?i)" + regex, - Modifier: models.CriterionModifierMatchesRegex, - }, - Organized: &organized, - } - - filter.And = scenePathsFilter(paths) - - pp := models.PerPageAll - scenes, _, err := sceneReader.Query(&filter, &models.FindFilterType{ - PerPage: &pp, - }) - - if err != nil { - return nil, fmt.Errorf("error querying scenes with regex '%s': %s", regex, err.Error()) - } - - var ret []*models.Scene - for _, p := range scenes { - if nameMatchesPath(name, p.Path) { - ret = append(ret, p) - } - } - - return ret, nil -} - func getSceneFileTagger(s *models.Scene) tagger { return tagger{ ID: s.ID, diff --git a/pkg/autotag/studio.go b/pkg/autotag/studio.go index 1634a0fed..635050df7 100644 --- a/pkg/autotag/studio.go +++ b/pkg/autotag/studio.go @@ -2,46 +2,10 @@ package autotag import ( "database/sql" + "github.com/stashapp/stash/pkg/models" ) -func getMatchingStudios(path string, reader models.StudioReader) ([]*models.Studio, error) { - words := getPathWords(path) - candidates, err := reader.QueryForAutoTag(words) - - if err != nil { - return nil, err - } - - var ret []*models.Studio - for _, c := range candidates { - matches := false - if nameMatchesPath(c.Name.String, path) { - matches = true - } - - if !matches { - aliases, err := reader.GetAliases(c.ID) - if err != nil { - return nil, err - } - - for _, alias := range aliases { - if nameMatchesPath(alias, path) { - matches = true - break - } - } - } - - if matches { - ret = append(ret, c) - } - } - - return ret, nil -} - func addSceneStudio(sceneWriter models.SceneReaderWriter, sceneID, studioID int) (bool, error) { // don't set if already set scene, err := sceneWriter.Find(sceneID) diff --git a/pkg/autotag/tag.go b/pkg/autotag/tag.go index 48de81417..78e12b766 100644 --- a/pkg/autotag/tag.go +++ b/pkg/autotag/tag.go @@ -7,42 +7,6 @@ import ( "github.com/stashapp/stash/pkg/scene" ) -func getMatchingTags(path string, tagReader models.TagReader) ([]*models.Tag, error) { - words := getPathWords(path) - tags, err := tagReader.QueryForAutoTag(words) - - if err != nil { - return nil, err - } - - var ret []*models.Tag - for _, t := range tags { - matches := false - if nameMatchesPath(t.Name, path) { - matches = true - } - - if !matches { - aliases, err := tagReader.GetAliases(t.ID) - if err != nil { - return nil, err - } - for _, alias := range aliases { - if nameMatchesPath(alias, path) { - matches = true - break - } - } - } - - if matches { - ret = append(ret, t) - } - } - - return ret, nil -} - func getTagTaggers(p *models.Tag, aliases []string) []tagger { ret := []tagger{{ ID: p.ID, diff --git a/pkg/autotag/tagger.go b/pkg/autotag/tagger.go index c0555d401..b64e4e507 100644 --- a/pkg/autotag/tagger.go +++ b/pkg/autotag/tagger.go @@ -15,78 +15,12 @@ package autotag import ( "fmt" - "path/filepath" - "regexp" - "strings" "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" ) -const separatorChars = `.\-_ ` - -func getPathQueryRegex(name string) string { - // escape specific regex characters - name = regexp.QuoteMeta(name) - - // handle path separators - const separator = `[` + separatorChars + `]` - - ret := strings.Replace(name, " ", separator+"*", -1) - ret = `(?:^|_|[^\w\d])` + ret + `(?:$|_|[^\w\d])` - return ret -} - -func nameMatchesPath(name, path string) bool { - // escape specific regex characters - name = regexp.QuoteMeta(name) - - name = strings.ToLower(name) - path = strings.ToLower(path) - - // handle path separators - const separator = `[` + separatorChars + `]` - - reStr := strings.Replace(name, " ", separator+"*", -1) - reStr = `(?:^|_|[^\w\d])` + reStr + `(?:$|_|[^\w\d])` - - re := regexp.MustCompile(reStr) - return re.MatchString(path) -} - -func getPathWords(path string) []string { - retStr := path - - // remove the extension - ext := filepath.Ext(retStr) - if ext != "" { - retStr = strings.TrimSuffix(retStr, ext) - } - - // handle path separators - const separator = `(?:_|[^\w\d])+` - re := regexp.MustCompile(separator) - retStr = re.ReplaceAllString(retStr, " ") - - words := strings.Split(retStr, " ") - - // remove any single letter words - var ret []string - for _, w := range words { - if len(w) > 1 { - // #1450 - we need to open up the criteria for matching so that we - // can match where path has no space between subject names - - // ie name = "foo bar" - path = "foobar" - // we post-match afterwards, so we can afford to be a little loose - // with the query - // just use the first two characters - ret = append(ret, w[0:2]) - } - } - - return ret -} - type tagger struct { ID int Type string @@ -105,7 +39,7 @@ func (t *tagger) addLog(otherType, otherName string) { } func (t *tagger) tagPerformers(performerReader models.PerformerReader, addFunc addLinkFunc) error { - others, err := getMatchingPerformers(t.Path, performerReader) + others, err := match.PathToPerformers(t.Path, performerReader) if err != nil { return err } @@ -126,7 +60,7 @@ func (t *tagger) tagPerformers(performerReader models.PerformerReader, addFunc a } func (t *tagger) tagStudios(studioReader models.StudioReader, addFunc addLinkFunc) error { - others, err := getMatchingStudios(t.Path, studioReader) + others, err := match.PathToStudios(t.Path, studioReader) if err != nil { return err } @@ -149,7 +83,7 @@ func (t *tagger) tagStudios(studioReader models.StudioReader, addFunc addLinkFun } func (t *tagger) tagTags(tagReader models.TagReader, addFunc addLinkFunc) error { - others, err := getMatchingTags(t.Path, tagReader) + others, err := match.PathToTags(t.Path, tagReader) if err != nil { return err } @@ -170,7 +104,7 @@ func (t *tagger) tagTags(tagReader models.TagReader, addFunc addLinkFunc) error } func (t *tagger) tagScenes(paths []string, sceneReader models.SceneReader, addFunc addLinkFunc) error { - others, err := getMatchingScenes(t.Name, paths, sceneReader) + others, err := match.PathToScenes(t.Name, paths, sceneReader) if err != nil { return err } @@ -191,7 +125,7 @@ func (t *tagger) tagScenes(paths []string, sceneReader models.SceneReader, addFu } func (t *tagger) tagImages(paths []string, imageReader models.ImageReader, addFunc addLinkFunc) error { - others, err := getMatchingImages(t.Name, paths, imageReader) + others, err := match.PathToImages(t.Name, paths, imageReader) if err != nil { return err } @@ -212,7 +146,7 @@ func (t *tagger) tagImages(paths []string, imageReader models.ImageReader, addFu } func (t *tagger) tagGalleries(paths []string, galleryReader models.GalleryReader, addFunc addLinkFunc) error { - others, err := getMatchingGalleries(t.Name, paths, galleryReader) + others, err := match.PathToGalleries(t.Name, paths, galleryReader) if err != nil { return err } diff --git a/pkg/match/path.go b/pkg/match/path.go new file mode 100644 index 000000000..04f7fe58b --- /dev/null +++ b/pkg/match/path.go @@ -0,0 +1,358 @@ +package match + +import ( + "fmt" + "path/filepath" + "regexp" + "strings" + + "github.com/stashapp/stash/pkg/models" +) + +const separatorChars = `.\-_ ` + +func getPathQueryRegex(name string) string { + // escape specific regex characters + name = regexp.QuoteMeta(name) + + // handle path separators + const separator = `[` + separatorChars + `]` + + ret := strings.Replace(name, " ", separator+"*", -1) + ret = `(?:^|_|[^\w\d])` + ret + `(?:$|_|[^\w\d])` + return ret +} + +func getPathWords(path string) []string { + retStr := path + + // remove the extension + ext := filepath.Ext(retStr) + if ext != "" { + retStr = strings.TrimSuffix(retStr, ext) + } + + // handle path separators + const separator = `(?:_|[^\w\d])+` + re := regexp.MustCompile(separator) + retStr = re.ReplaceAllString(retStr, " ") + + words := strings.Split(retStr, " ") + + // remove any single letter words + var ret []string + for _, w := range words { + if len(w) > 1 { + // #1450 - we need to open up the criteria for matching so that we + // can match where path has no space between subject names - + // ie name = "foo bar" - path = "foobar" + // we post-match afterwards, so we can afford to be a little loose + // with the query + // just use the first two characters + ret = append(ret, w[0:2]) + } + } + + return ret +} + +func nameMatchesPath(name, path string) bool { + // escape specific regex characters + name = regexp.QuoteMeta(name) + + name = strings.ToLower(name) + path = strings.ToLower(path) + + // handle path separators + const separator = `[` + separatorChars + `]` + + reStr := strings.Replace(name, " ", separator+"*", -1) + reStr = `(?:^|_|[^\w\d])` + reStr + `(?:$|_|[^\w\d])` + + re := regexp.MustCompile(reStr) + return re.MatchString(path) +} + +func PathToPerformers(path string, performerReader models.PerformerReader) ([]*models.Performer, error) { + words := getPathWords(path) + performers, err := performerReader.QueryForAutoTag(words) + + if err != nil { + return nil, err + } + + var ret []*models.Performer + for _, p := range performers { + // TODO - commenting out alias handling until both sides work correctly + if nameMatchesPath(p.Name.String, path) { // || nameMatchesPath(p.Aliases.String, path) { + ret = append(ret, p) + } + } + + return ret, nil +} + +func PathToStudios(path string, reader models.StudioReader) ([]*models.Studio, error) { + words := getPathWords(path) + candidates, err := reader.QueryForAutoTag(words) + + if err != nil { + return nil, err + } + + var ret []*models.Studio + for _, c := range candidates { + matches := false + if nameMatchesPath(c.Name.String, path) { + matches = true + } + + if !matches { + aliases, err := reader.GetAliases(c.ID) + if err != nil { + return nil, err + } + + for _, alias := range aliases { + if nameMatchesPath(alias, path) { + matches = true + break + } + } + } + + if matches { + ret = append(ret, c) + } + } + + return ret, nil +} + +func PathToTags(path string, tagReader models.TagReader) ([]*models.Tag, error) { + words := getPathWords(path) + tags, err := tagReader.QueryForAutoTag(words) + + if err != nil { + return nil, err + } + + var ret []*models.Tag + for _, t := range tags { + matches := false + if nameMatchesPath(t.Name, path) { + matches = true + } + + if !matches { + aliases, err := tagReader.GetAliases(t.ID) + if err != nil { + return nil, err + } + for _, alias := range aliases { + if nameMatchesPath(alias, path) { + matches = true + break + } + } + } + + if matches { + ret = append(ret, t) + } + } + + return ret, nil +} + +func scenePathsFilter(paths []string) *models.SceneFilterType { + if paths == nil { + return nil + } + + sep := string(filepath.Separator) + + var ret *models.SceneFilterType + var or *models.SceneFilterType + for _, p := range paths { + newOr := &models.SceneFilterType{} + if or != nil { + or.Or = newOr + } else { + ret = newOr + } + + or = newOr + + if !strings.HasSuffix(p, sep) { + p = p + sep + } + + or.Path = &models.StringCriterionInput{ + Modifier: models.CriterionModifierEquals, + Value: p + "%", + } + } + + return ret +} + +func PathToScenes(name string, paths []string, sceneReader models.SceneReader) ([]*models.Scene, error) { + regex := getPathQueryRegex(name) + organized := false + filter := models.SceneFilterType{ + Path: &models.StringCriterionInput{ + Value: "(?i)" + regex, + Modifier: models.CriterionModifierMatchesRegex, + }, + Organized: &organized, + } + + filter.And = scenePathsFilter(paths) + + pp := models.PerPageAll + scenes, _, err := sceneReader.Query(&filter, &models.FindFilterType{ + PerPage: &pp, + }) + + if err != nil { + return nil, fmt.Errorf("error querying scenes with regex '%s': %s", regex, err.Error()) + } + + var ret []*models.Scene + for _, p := range scenes { + if nameMatchesPath(name, p.Path) { + ret = append(ret, p) + } + } + + return ret, nil +} + +func imagePathsFilter(paths []string) *models.ImageFilterType { + if paths == nil { + return nil + } + + sep := string(filepath.Separator) + + var ret *models.ImageFilterType + var or *models.ImageFilterType + for _, p := range paths { + newOr := &models.ImageFilterType{} + if or != nil { + or.Or = newOr + } else { + ret = newOr + } + + or = newOr + + if !strings.HasSuffix(p, sep) { + p = p + sep + } + + or.Path = &models.StringCriterionInput{ + Modifier: models.CriterionModifierEquals, + Value: p + "%", + } + } + + return ret +} + +func PathToImages(name string, paths []string, imageReader models.ImageReader) ([]*models.Image, error) { + regex := getPathQueryRegex(name) + organized := false + filter := models.ImageFilterType{ + Path: &models.StringCriterionInput{ + Value: "(?i)" + regex, + Modifier: models.CriterionModifierMatchesRegex, + }, + Organized: &organized, + } + + filter.And = imagePathsFilter(paths) + + pp := models.PerPageAll + images, _, err := imageReader.Query(&filter, &models.FindFilterType{ + PerPage: &pp, + }) + + if err != nil { + return nil, fmt.Errorf("error querying images with regex '%s': %s", regex, err.Error()) + } + + var ret []*models.Image + for _, p := range images { + if nameMatchesPath(name, p.Path) { + ret = append(ret, p) + } + } + + return ret, nil +} + +func galleryPathsFilter(paths []string) *models.GalleryFilterType { + if paths == nil { + return nil + } + + sep := string(filepath.Separator) + + var ret *models.GalleryFilterType + var or *models.GalleryFilterType + for _, p := range paths { + newOr := &models.GalleryFilterType{} + if or != nil { + or.Or = newOr + } else { + ret = newOr + } + + or = newOr + + if !strings.HasSuffix(p, sep) { + p = p + sep + } + + or.Path = &models.StringCriterionInput{ + Modifier: models.CriterionModifierEquals, + Value: p + "%", + } + } + + return ret +} + +func PathToGalleries(name string, paths []string, galleryReader models.GalleryReader) ([]*models.Gallery, error) { + regex := getPathQueryRegex(name) + organized := false + filter := models.GalleryFilterType{ + Path: &models.StringCriterionInput{ + Value: "(?i)" + regex, + Modifier: models.CriterionModifierMatchesRegex, + }, + Organized: &organized, + } + + filter.And = galleryPathsFilter(paths) + + pp := models.PerPageAll + gallerys, _, err := galleryReader.Query(&filter, &models.FindFilterType{ + PerPage: &pp, + }) + + if err != nil { + return nil, fmt.Errorf("error querying gallerys with regex '%s': %s", regex, err.Error()) + } + + var ret []*models.Gallery + for _, p := range gallerys { + if nameMatchesPath(name, p.Path.String) { + ret = append(ret, p) + } + } + + return ret, nil +} diff --git a/pkg/scraper/matchers.go b/pkg/match/scraped.go similarity index 67% rename from pkg/scraper/matchers.go rename to pkg/match/scraped.go index f129ec8b8..839fe3786 100644 --- a/pkg/scraper/matchers.go +++ b/pkg/match/scraped.go @@ -1,4 +1,4 @@ -package scraper +package match import ( "strconv" @@ -8,10 +8,10 @@ import ( "github.com/stashapp/stash/pkg/tag" ) -// MatchScrapedPerformer matches the provided performer with the +// ScrapedPerformer matches the provided performer with the // performers in the database and sets the ID field if one is found. -func MatchScrapedPerformer(qb models.PerformerReader, p *models.ScrapedPerformer) error { - if p.Name == nil { +func ScrapedPerformer(qb models.PerformerReader, p *models.ScrapedPerformer) error { + if p.StoredID != nil || p.Name == nil { return nil } @@ -31,9 +31,13 @@ func MatchScrapedPerformer(qb models.PerformerReader, p *models.ScrapedPerformer return nil } -// MatchScrapedStudio matches the provided studio with the studios +// ScrapedStudio matches the provided studio with the studios // in the database and sets the ID field if one is found. -func MatchScrapedStudio(qb models.StudioReader, s *models.ScrapedStudio) error { +func ScrapedStudio(qb models.StudioReader, s *models.ScrapedStudio) error { + if s.StoredID != nil { + return nil + } + st, err := studio.ByName(qb, s.Name) if err != nil { @@ -58,10 +62,10 @@ func MatchScrapedStudio(qb models.StudioReader, s *models.ScrapedStudio) error { return nil } -// MatchScrapedMovie matches the provided movie with the movies +// ScrapedMovie matches the provided movie with the movies // in the database and sets the ID field if one is found. -func MatchScrapedMovie(qb models.MovieReader, m *models.ScrapedMovie) error { - if m.Name == nil { +func ScrapedMovie(qb models.MovieReader, m *models.ScrapedMovie) error { + if m.StoredID != nil || m.Name == nil { return nil } @@ -81,9 +85,13 @@ func MatchScrapedMovie(qb models.MovieReader, m *models.ScrapedMovie) error { return nil } -// MatchScrapedTag matches the provided tag with the tags +// ScrapedTag matches the provided tag with the tags // in the database and sets the ID field if one is found. -func MatchScrapedTag(qb models.TagReader, s *models.ScrapedTag) error { +func ScrapedTag(qb models.TagReader, s *models.ScrapedTag) error { + if s.StoredID != nil { + return nil + } + t, err := tag.ByName(qb, s.Name) if err != nil { diff --git a/pkg/scraper/action.go b/pkg/scraper/action.go index 493163936..b5e67e712 100644 --- a/pkg/scraper/action.go +++ b/pkg/scraper/action.go @@ -19,7 +19,7 @@ func (e scraperAction) IsValid() bool { return false } -type scraper interface { +type scraperActionImpl interface { scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) @@ -36,16 +36,16 @@ type scraper interface { scrapeMovieByURL(url string) (*models.ScrapedMovie, error) } -func getScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) scraper { +func (c config) getScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl { switch scraper.Action { case scraperActionScript: - return newScriptScraper(scraper, config, globalConfig) + return newScriptScraper(scraper, c, globalConfig) case scraperActionStash: - return newStashScraper(scraper, txnManager, config, globalConfig) + return newStashScraper(scraper, txnManager, c, globalConfig) case scraperActionXPath: - return newXpathScraper(scraper, txnManager, config, globalConfig) + return newXpathScraper(scraper, txnManager, c, globalConfig) case scraperActionJson: - return newJsonScraper(scraper, txnManager, config, globalConfig) + return newJsonScraper(scraper, txnManager, c, globalConfig) } panic("unknown scraper action: " + scraper.Action) diff --git a/pkg/scraper/autotag.go b/pkg/scraper/autotag.go new file mode 100644 index 000000000..1f62a4701 --- /dev/null +++ b/pkg/scraper/autotag.go @@ -0,0 +1,218 @@ +package scraper + +import ( + "context" + "errors" + "fmt" + "strconv" + + "github.com/stashapp/stash/pkg/match" + "github.com/stashapp/stash/pkg/models" +) + +// autoTagScraperID is the scraper ID for the built-in AutoTag scraper +const ( + autoTagScraperID = "builtin_autotag" + autoTagScraperName = "Auto Tag" +) + +var errNotSupported = errors.New("not supported") + +type autotagScraper struct { + txnManager models.TransactionManager + globalConfig GlobalConfig +} + +func (s *autotagScraper) matchPerformers(path string, performerReader models.PerformerReader) ([]*models.ScrapedPerformer, error) { + p, err := match.PathToPerformers(path, performerReader) + if err != nil { + return nil, fmt.Errorf("error matching performers: %w", err) + } + + var ret []*models.ScrapedPerformer + for _, pp := range p { + id := strconv.Itoa(pp.ID) + + sp := &models.ScrapedPerformer{ + Name: &pp.Name.String, + StoredID: &id, + } + if pp.Gender.Valid { + sp.Gender = &pp.Gender.String + } + + ret = append(ret, sp) + } + + return ret, nil +} + +func (s *autotagScraper) matchStudio(path string, studioReader models.StudioReader) (*models.ScrapedStudio, error) { + st, err := match.PathToStudios(path, studioReader) + if err != nil { + return nil, fmt.Errorf("error matching studios: %w", err) + } + + if len(st) > 0 { + id := strconv.Itoa(st[0].ID) + return &models.ScrapedStudio{ + Name: st[0].Name.String, + StoredID: &id, + }, nil + } + + return nil, nil +} + +func (s *autotagScraper) matchTags(path string, tagReader models.TagReader) ([]*models.ScrapedTag, error) { + t, err := match.PathToTags(path, tagReader) + if err != nil { + return nil, fmt.Errorf("error matching tags: %w", err) + } + + var ret []*models.ScrapedTag + for _, tt := range t { + id := strconv.Itoa(tt.ID) + + st := &models.ScrapedTag{ + Name: tt.Name, + StoredID: &id, + } + + ret = append(ret, st) + } + + return ret, nil +} + +type autotagSceneScraper struct { + *autotagScraper +} + +func (c *autotagSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) { + return nil, errNotSupported +} + +func (c *autotagSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) { + var ret *models.ScrapedScene + + // populate performers, studio and tags based on scene path + if err := c.txnManager.WithReadTxn(context.Background(), func(r models.ReaderRepository) error { + path := scene.Path + performers, err := c.matchPerformers(path, r.Performer()) + if err != nil { + return err + } + studio, err := c.matchStudio(path, r.Studio()) + if err != nil { + return err + } + + tags, err := c.matchTags(path, r.Tag()) + if err != nil { + return err + } + + if len(performers) > 0 || studio != nil || len(tags) > 0 { + ret = &models.ScrapedScene{ + Performers: performers, + Studio: studio, + Tags: tags, + } + } + + return nil + }); err != nil { + return nil, err + } + + return ret, nil +} + +func (c *autotagSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { + return nil, errNotSupported +} + +func (c *autotagSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) { + return nil, errNotSupported +} + +type autotagGalleryScraper struct { + *autotagScraper +} + +func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) { + if !gallery.Path.Valid { + // not valid for non-path-based galleries + return nil, nil + } + + var ret *models.ScrapedGallery + + // populate performers, studio and tags based on scene path + if err := c.txnManager.WithReadTxn(context.Background(), func(r models.ReaderRepository) error { + path := gallery.Path.String + performers, err := c.matchPerformers(path, r.Performer()) + if err != nil { + return err + } + studio, err := c.matchStudio(path, r.Studio()) + if err != nil { + return err + } + + tags, err := c.matchTags(path, r.Tag()) + if err != nil { + return err + } + + if len(performers) > 0 || studio != nil || len(tags) > 0 { + ret = &models.ScrapedGallery{ + Performers: performers, + Studio: studio, + Tags: tags, + } + } + + return nil + }); err != nil { + return nil, err + } + + return ret, nil +} + +func (c *autotagGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { + return nil, errNotSupported +} + +func (c *autotagGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) { + return nil, errNotSupported +} + +func getAutoTagScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper { + base := autotagScraper{ + txnManager: txnManager, + globalConfig: globalConfig, + } + + supportedScrapes := []models.ScrapeType{ + models.ScrapeTypeFragment, + } + + return scraper{ + ID: autoTagScraperID, + Spec: &models.Scraper{ + ID: autoTagScraperID, + Name: autoTagScraperName, + Scene: &models.ScraperSpec{ + SupportedScrapes: supportedScrapes, + }, + Gallery: &models.ScraperSpec{ + SupportedScrapes: supportedScrapes, + }, + }, + Scene: &autotagSceneScraper{&base}, + Gallery: &autotagGalleryScraper{&base}, + } +} diff --git a/pkg/scraper/config.go b/pkg/scraper/config.go index 78d3fe4fe..ee12c7e90 100644 --- a/pkg/scraper/config.go +++ b/pkg/scraper/config.go @@ -9,8 +9,6 @@ import ( "strings" "gopkg.in/yaml.v2" - - "github.com/stashapp/stash/pkg/models" ) type config struct { @@ -194,7 +192,7 @@ type scraperDriverOptions struct { Headers []*header `yaml:"headers"` } -func loadScraperFromYAML(id string, reader io.Reader) (*config, error) { +func loadConfigFromYAML(id string, reader io.Reader) (*config, error) { ret := &config{} parser := yaml.NewDecoder(reader) @@ -213,7 +211,7 @@ func loadScraperFromYAML(id string, reader io.Reader) (*config, error) { return ret, nil } -func loadScraperFromYAMLFile(path string) (*config, error) { +func loadConfigFromYAMLFile(path string) (*config, error) { file, err := os.Open(path) if err != nil { return nil, err @@ -224,7 +222,7 @@ func loadScraperFromYAMLFile(path string) (*config, error) { id := filepath.Base(path) id = id[:strings.LastIndex(id, ".")] - ret, err := loadScraperFromYAML(id, file) + ret, err := loadConfigFromYAML(id, file) if err != nil { return nil, err } @@ -234,78 +232,6 @@ func loadScraperFromYAMLFile(path string) (*config, error) { return ret, nil } -func (c config) toScraper() *models.Scraper { - ret := models.Scraper{ - ID: c.ID, - Name: c.Name, - } - - performer := models.ScraperSpec{} - if c.PerformerByName != nil { - performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName) - } - if c.PerformerByFragment != nil { - performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment) - } - if len(c.PerformerByURL) > 0 { - performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL) - for _, v := range c.PerformerByURL { - performer.Urls = append(performer.Urls, v.URL...) - } - } - - if len(performer.SupportedScrapes) > 0 { - ret.Performer = &performer - } - - scene := models.ScraperSpec{} - if c.SceneByFragment != nil { - scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment) - } - if c.SceneByName != nil && c.SceneByQueryFragment != nil { - scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName) - } - if len(c.SceneByURL) > 0 { - scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL) - for _, v := range c.SceneByURL { - scene.Urls = append(scene.Urls, v.URL...) - } - } - - if len(scene.SupportedScrapes) > 0 { - ret.Scene = &scene - } - - gallery := models.ScraperSpec{} - if c.GalleryByFragment != nil { - gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment) - } - if len(c.GalleryByURL) > 0 { - gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL) - for _, v := range c.GalleryByURL { - gallery.Urls = append(gallery.Urls, v.URL...) - } - } - - if len(gallery.SupportedScrapes) > 0 { - ret.Gallery = &gallery - } - - movie := models.ScraperSpec{} - if len(c.MovieByURL) > 0 { - movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL) - for _, v := range c.MovieByURL { - movie.Urls = append(movie.Urls, v.URL...) - } - } - - if len(movie.SupportedScrapes) > 0 { - ret.Movie = &movie - } - - return &ret -} - func (c config) supportsPerformers() bool { return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0 } @@ -320,47 +246,6 @@ func (c config) matchesPerformerURL(url string) bool { return false } -func (c config) ScrapePerformerNames(name string, txnManager models.TransactionManager, globalConfig GlobalConfig) ([]*models.ScrapedPerformer, error) { - if c.PerformerByName != nil { - s := getScraper(*c.PerformerByName, txnManager, c, globalConfig) - return s.scrapePerformersByName(name) - } - - return nil, nil -} - -func (c config) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedPerformer, error) { - if c.PerformerByFragment != nil { - s := getScraper(*c.PerformerByFragment, txnManager, c, globalConfig) - return s.scrapePerformerByFragment(scrapedPerformer) - } - - // try to match against URL if present - if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" { - return c.ScrapePerformerURL(*scrapedPerformer.URL, txnManager, globalConfig) - } - - return nil, nil -} - -func (c config) ScrapePerformerURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedPerformer, error) { - for _, scraper := range c.PerformerByURL { - if scraper.matchesURL(url) { - s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig) - ret, err := s.scrapePerformerByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - return ret, nil - } - } - } - - return nil, nil -} - func (c config) supportsScenes() bool { return (c.SceneByName != nil && c.SceneByQueryFragment != nil) || c.SceneByFragment != nil || len(c.SceneByURL) > 0 } @@ -401,103 +286,3 @@ func (c config) matchesMovieURL(url string) bool { return false } - -func (c config) ScrapeSceneQuery(name string, txnManager models.TransactionManager, globalConfig GlobalConfig) ([]*models.ScrapedScene, error) { - if c.SceneByName != nil { - s := getScraper(*c.SceneByName, txnManager, c, globalConfig) - return s.scrapeScenesByName(name) - } - - return nil, nil -} - -func (c config) ScrapeSceneByScene(scene *models.Scene, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) { - if c.SceneByFragment != nil { - s := getScraper(*c.SceneByFragment, txnManager, c, globalConfig) - return s.scrapeSceneByScene(scene) - } - - return nil, nil -} - -func (c config) ScrapeSceneByFragment(scene models.ScrapedSceneInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) { - if c.SceneByQueryFragment != nil { - s := getScraper(*c.SceneByQueryFragment, txnManager, c, globalConfig) - return s.scrapeSceneByFragment(scene) - } - - return nil, nil -} - -func (c config) ScrapeSceneURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) { - for _, scraper := range c.SceneByURL { - if scraper.matchesURL(url) { - s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig) - ret, err := s.scrapeSceneByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - return ret, nil - } - } - } - - return nil, nil -} - -func (c config) ScrapeGalleryByGallery(gallery *models.Gallery, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) { - if c.GalleryByFragment != nil { - s := getScraper(*c.GalleryByFragment, txnManager, c, globalConfig) - return s.scrapeGalleryByGallery(gallery) - } - - return nil, nil -} - -func (c config) ScrapeGalleryByFragment(gallery models.ScrapedGalleryInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) { - if c.GalleryByFragment != nil { - // TODO - this should be galleryByQueryFragment - s := getScraper(*c.GalleryByFragment, txnManager, c, globalConfig) - return s.scrapeGalleryByFragment(gallery) - } - - return nil, nil -} - -func (c config) ScrapeGalleryURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) { - for _, scraper := range c.GalleryByURL { - if scraper.matchesURL(url) { - s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig) - ret, err := s.scrapeGalleryByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - return ret, nil - } - } - } - - return nil, nil -} - -func (c config) ScrapeMovieURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedMovie, error) { - for _, scraper := range c.MovieByURL { - if scraper.matchesURL(url) { - s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig) - ret, err := s.scrapeMovieByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - return ret, nil - } - } - } - - return nil, nil -} diff --git a/pkg/scraper/config_scraper.go b/pkg/scraper/config_scraper.go new file mode 100644 index 000000000..b14d819e6 --- /dev/null +++ b/pkg/scraper/config_scraper.go @@ -0,0 +1,283 @@ +package scraper + +import "github.com/stashapp/stash/pkg/models" + +type configSceneScraper struct { + *configScraper +} + +func (c *configSceneScraper) matchesURL(url string) bool { + return c.config.matchesSceneURL(url) +} + +func (c *configSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) { + if c.config.SceneByName != nil { + s := c.config.getScraper(*c.config.SceneByName, c.txnManager, c.globalConfig) + return s.scrapeScenesByName(name) + } + + return nil, nil +} + +func (c *configSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) { + if c.config.SceneByFragment != nil { + s := c.config.getScraper(*c.config.SceneByFragment, c.txnManager, c.globalConfig) + return s.scrapeSceneByScene(scene) + } + + return nil, nil +} + +func (c *configSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { + if c.config.SceneByQueryFragment != nil { + s := c.config.getScraper(*c.config.SceneByQueryFragment, c.txnManager, c.globalConfig) + return s.scrapeSceneByFragment(scene) + } + + return nil, nil +} + +func (c *configSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) { + for _, scraper := range c.config.SceneByURL { + if scraper.matchesURL(url) { + s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig) + ret, err := s.scrapeSceneByURL(url) + if err != nil { + return nil, err + } + + if ret != nil { + return ret, nil + } + } + } + + return nil, nil +} + +type configPerformerScraper struct { + *configScraper +} + +func (c *configPerformerScraper) matchesURL(url string) bool { + return c.config.matchesPerformerURL(url) +} + +func (c *configPerformerScraper) scrapeByName(name string) ([]*models.ScrapedPerformer, error) { + if c.config.PerformerByName != nil { + s := c.config.getScraper(*c.config.PerformerByName, c.txnManager, c.globalConfig) + return s.scrapePerformersByName(name) + } + + return nil, nil +} + +func (c *configPerformerScraper) scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { + if c.config.PerformerByFragment != nil { + s := c.config.getScraper(*c.config.PerformerByFragment, c.txnManager, c.globalConfig) + return s.scrapePerformerByFragment(scrapedPerformer) + } + + // try to match against URL if present + if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" { + return c.scrapeByURL(*scrapedPerformer.URL) + } + + return nil, nil +} + +func (c *configPerformerScraper) scrapeByURL(url string) (*models.ScrapedPerformer, error) { + for _, scraper := range c.config.PerformerByURL { + if scraper.matchesURL(url) { + s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig) + ret, err := s.scrapePerformerByURL(url) + if err != nil { + return nil, err + } + + if ret != nil { + return ret, nil + } + } + } + + return nil, nil +} + +type configGalleryScraper struct { + *configScraper +} + +func (c *configGalleryScraper) matchesURL(url string) bool { + return c.config.matchesGalleryURL(url) +} + +func (c *configGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) { + if c.config.GalleryByFragment != nil { + s := c.config.getScraper(*c.config.GalleryByFragment, c.txnManager, c.globalConfig) + return s.scrapeGalleryByGallery(gallery) + } + + return nil, nil +} + +func (c *configGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { + if c.config.GalleryByFragment != nil { + // TODO - this should be galleryByQueryFragment + s := c.config.getScraper(*c.config.GalleryByFragment, c.txnManager, c.globalConfig) + return s.scrapeGalleryByFragment(gallery) + } + + return nil, nil +} + +func (c *configGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) { + for _, scraper := range c.config.GalleryByURL { + if scraper.matchesURL(url) { + s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig) + ret, err := s.scrapeGalleryByURL(url) + if err != nil { + return nil, err + } + + if ret != nil { + return ret, nil + } + } + } + + return nil, nil +} + +type configMovieScraper struct { + *configScraper +} + +func (c *configMovieScraper) matchesURL(url string) bool { + return c.config.matchesMovieURL(url) +} + +func (c *configMovieScraper) scrapeByURL(url string) (*models.ScrapedMovie, error) { + for _, scraper := range c.config.MovieByURL { + if scraper.matchesURL(url) { + s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig) + ret, err := s.scrapeMovieByURL(url) + if err != nil { + return nil, err + } + + if ret != nil { + return ret, nil + } + } + } + + return nil, nil +} + +type configScraper struct { + config config + txnManager models.TransactionManager + globalConfig GlobalConfig +} + +func createScraperFromConfig(c config, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper { + base := configScraper{ + config: c, + txnManager: txnManager, + globalConfig: globalConfig, + } + + ret := scraper{ + ID: c.ID, + Spec: configScraperSpec(c), + } + + // only set fields if supported + if c.supportsPerformers() { + ret.Performer = &configPerformerScraper{&base} + } + if c.supportsGalleries() { + ret.Gallery = &configGalleryScraper{&base} + } + if c.supportsMovies() { + ret.Movie = &configMovieScraper{&base} + } + if c.supportsScenes() { + ret.Scene = &configSceneScraper{&base} + } + + return ret +} + +func configScraperSpec(c config) *models.Scraper { + ret := models.Scraper{ + ID: c.ID, + Name: c.Name, + } + + performer := models.ScraperSpec{} + if c.PerformerByName != nil { + performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName) + } + if c.PerformerByFragment != nil { + performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment) + } + if len(c.PerformerByURL) > 0 { + performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.PerformerByURL { + performer.Urls = append(performer.Urls, v.URL...) + } + } + + if len(performer.SupportedScrapes) > 0 { + ret.Performer = &performer + } + + scene := models.ScraperSpec{} + if c.SceneByFragment != nil { + scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment) + } + if c.SceneByName != nil && c.SceneByQueryFragment != nil { + scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName) + } + if len(c.SceneByURL) > 0 { + scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.SceneByURL { + scene.Urls = append(scene.Urls, v.URL...) + } + } + + if len(scene.SupportedScrapes) > 0 { + ret.Scene = &scene + } + + gallery := models.ScraperSpec{} + if c.GalleryByFragment != nil { + gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment) + } + if len(c.GalleryByURL) > 0 { + gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.GalleryByURL { + gallery.Urls = append(gallery.Urls, v.URL...) + } + } + + if len(gallery.SupportedScrapes) > 0 { + ret.Gallery = &gallery + } + + movie := models.ScraperSpec{} + if len(c.MovieByURL) > 0 { + movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.MovieByURL { + movie.Urls = append(movie.Urls, v.URL...) + } + } + + if len(movie.SupportedScrapes) > 0 { + ret.Movie = &movie + } + + return &ret +} diff --git a/pkg/scraper/freeones.go b/pkg/scraper/freeones.go index c229e874a..66db306b1 100644 --- a/pkg/scraper/freeones.go +++ b/pkg/scraper/freeones.go @@ -4,6 +4,7 @@ import ( "strings" "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/models" ) // FreeonesScraperID is the scraper ID for the built-in Freeones scraper @@ -122,13 +123,13 @@ xPathScrapers: # Last updated April 13, 2021 ` -func getFreeonesScraper() config { +func getFreeonesScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper { yml := freeonesScraperConfig - scraper, err := loadScraperFromYAML(FreeonesScraperID, strings.NewReader(yml)) + c, err := loadConfigFromYAML(FreeonesScraperID, strings.NewReader(yml)) if err != nil { logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error()) } - return *scraper + return createScraperFromConfig(*c, txnManager, globalConfig) } diff --git a/pkg/scraper/scraper.go b/pkg/scraper/scraper.go new file mode 100644 index 000000000..b842f3df4 --- /dev/null +++ b/pkg/scraper/scraper.go @@ -0,0 +1,51 @@ +package scraper + +import "github.com/stashapp/stash/pkg/models" + +type urlMatcher interface { + matchesURL(url string) bool +} + +type performerScraper interface { + scrapeByName(name string) ([]*models.ScrapedPerformer, error) + scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) + scrapeByURL(url string) (*models.ScrapedPerformer, error) +} + +type sceneScraper interface { + scrapeByName(name string) ([]*models.ScrapedScene, error) + scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) + scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) + scrapeByURL(url string) (*models.ScrapedScene, error) +} + +type galleryScraper interface { + scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) + scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) + scrapeByURL(url string) (*models.ScrapedGallery, error) +} + +type movieScraper interface { + scrapeByURL(url string) (*models.ScrapedMovie, error) +} + +type scraper struct { + ID string + Spec *models.Scraper + + Performer performerScraper + Scene sceneScraper + Gallery galleryScraper + Movie movieScraper +} + +func matchesURL(maybeURLMatcher interface{}, url string) bool { + if maybeURLMatcher != nil { + matcher, ok := maybeURLMatcher.(urlMatcher) + if ok { + return matcher.matchesURL(url) + } + } + + return false +} diff --git a/pkg/scraper/scrapers.go b/pkg/scraper/scrapers.go index d039a59a7..697155c77 100644 --- a/pkg/scraper/scrapers.go +++ b/pkg/scraper/scrapers.go @@ -10,6 +10,7 @@ import ( "github.com/stashapp/stash/pkg/logger" stash_config "github.com/stashapp/stash/pkg/manager/config" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/utils" ) @@ -32,7 +33,7 @@ func isCDPPathWS(c GlobalConfig) bool { // Cache stores scraper details. type Cache struct { - scrapers []config + scrapers []scraper globalConfig GlobalConfig txnManager models.TransactionManager } @@ -44,7 +45,7 @@ type Cache struct { // Scraper configurations are loaded from yml files in the provided scrapers // directory and any subdirectories. func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) { - scrapers, err := loadScrapers(globalConfig.GetScrapersPath()) + scrapers, err := loadScrapers(globalConfig, txnManager) if err != nil { return nil, err } @@ -56,8 +57,9 @@ func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) ( }, nil } -func loadScrapers(path string) ([]config, error) { - scrapers := make([]config, 0) +func loadScrapers(globalConfig GlobalConfig, txnManager models.TransactionManager) ([]scraper, error) { + path := globalConfig.GetScrapersPath() + scrapers := make([]scraper, 0) logger.Debugf("Reading scraper configs from %s", path) scraperFiles := []string{} @@ -74,14 +76,15 @@ func loadScrapers(path string) ([]config, error) { } // add built-in freeones scraper - scrapers = append(scrapers, getFreeonesScraper()) + scrapers = append(scrapers, getFreeonesScraper(txnManager, globalConfig), getAutoTagScraper(txnManager, globalConfig)) for _, file := range scraperFiles { - scraper, err := loadScraperFromYAMLFile(file) + c, err := loadConfigFromYAMLFile(file) if err != nil { logger.Errorf("Error loading scraper %s: %s", file, err.Error()) } else { - scrapers = append(scrapers, *scraper) + scraper := createScraperFromConfig(*c, txnManager, globalConfig) + scrapers = append(scrapers, scraper) } } @@ -92,7 +95,7 @@ func loadScrapers(path string) ([]config, error) { // In the event of an error during loading, the cache will be left empty. func (c *Cache) ReloadScrapers() error { c.scrapers = nil - scrapers, err := loadScrapers(c.globalConfig.GetScrapersPath()) + scrapers, err := loadScrapers(c.globalConfig, c.txnManager) if err != nil { return err } @@ -114,8 +117,8 @@ func (c Cache) ListPerformerScrapers() []*models.Scraper { var ret []*models.Scraper for _, s := range c.scrapers { // filter on type - if s.supportsPerformers() { - ret = append(ret, s.toScraper()) + if s.Performer != nil { + ret = append(ret, s.Spec) } } @@ -128,8 +131,8 @@ func (c Cache) ListSceneScrapers() []*models.Scraper { var ret []*models.Scraper for _, s := range c.scrapers { // filter on type - if s.supportsScenes() { - ret = append(ret, s.toScraper()) + if s.Scene != nil { + ret = append(ret, s.Spec) } } @@ -142,8 +145,8 @@ func (c Cache) ListGalleryScrapers() []*models.Scraper { var ret []*models.Scraper for _, s := range c.scrapers { // filter on type - if s.supportsGalleries() { - ret = append(ret, s.toScraper()) + if s.Gallery != nil { + ret = append(ret, s.Spec) } } @@ -156,15 +159,15 @@ func (c Cache) ListMovieScrapers() []*models.Scraper { var ret []*models.Scraper for _, s := range c.scrapers { // filter on type - if s.supportsMovies() { - ret = append(ret, s.toScraper()) + if s.Movie != nil { + ret = append(ret, s.Spec) } } return ret } -func (c Cache) findScraper(scraperID string) *config { +func (c Cache) findScraper(scraperID string) *scraper { for _, s := range c.scrapers { if s.ID == scraperID { return &s @@ -180,8 +183,8 @@ func (c Cache) findScraper(scraperID string) *config { func (c Cache) ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) { // find scraper with the provided id s := c.findScraper(scraperID) - if s != nil { - return s.ScrapePerformerNames(query, c.txnManager, c.globalConfig) + if s != nil && s.Performer != nil { + return s.Performer.scrapeByName(query) } return nil, errors.New("Scraper with ID " + scraperID + " not found") @@ -192,8 +195,8 @@ func (c Cache) ScrapePerformerList(scraperID string, query string) ([]*models.Sc func (c Cache) ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { // find scraper with the provided id s := c.findScraper(scraperID) - if s != nil { - ret, err := s.ScrapePerformer(scrapedPerformer, c.txnManager, c.globalConfig) + if s != nil && s.Performer != nil { + ret, err := s.Performer.scrapeByFragment(scrapedPerformer) if err != nil { return nil, err } @@ -216,8 +219,8 @@ func (c Cache) ScrapePerformer(scraperID string, scrapedPerformer models.Scraped // the URL, then nil is returned. func (c Cache) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) { for _, s := range c.scrapers { - if s.matchesPerformerURL(url) { - ret, err := s.ScrapePerformerURL(url, c.txnManager, c.globalConfig) + if matchesURL(s.Performer, url) { + ret, err := s.Performer.scrapeByURL(url) if err != nil { return nil, err } @@ -289,13 +292,13 @@ func (c Cache) postScrapeScene(ret *models.ScrapedScene) error { return err } - if err := MatchScrapedPerformer(pqb, p); err != nil { + if err := match.ScrapedPerformer(pqb, p); err != nil { return err } } for _, p := range ret.Movies { - err := MatchScrapedMovie(mqb, p) + err := match.ScrapedMovie(mqb, p) if err != nil { return err } @@ -308,7 +311,7 @@ func (c Cache) postScrapeScene(ret *models.ScrapedScene) error { ret.Tags = tags if ret.Studio != nil { - err := MatchScrapedStudio(sqb, ret.Studio) + err := match.ScrapedStudio(sqb, ret.Studio) if err != nil { return err } @@ -334,7 +337,7 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error { sqb := r.Studio() for _, p := range ret.Performers { - err := MatchScrapedPerformer(pqb, p) + err := match.ScrapedPerformer(pqb, p) if err != nil { return err } @@ -347,7 +350,7 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error { ret.Tags = tags if ret.Studio != nil { - err := MatchScrapedStudio(sqb, ret.Studio) + err := match.ScrapedStudio(sqb, ret.Studio) if err != nil { return err } @@ -365,14 +368,14 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error { func (c Cache) ScrapeScene(scraperID string, sceneID int) (*models.ScrapedScene, error) { // find scraper with the provided id s := c.findScraper(scraperID) - if s != nil { + if s != nil && s.Scene != nil { // get scene from id scene, err := getScene(sceneID, c.txnManager) if err != nil { return nil, err } - ret, err := s.ScrapeSceneByScene(scene, c.txnManager, c.globalConfig) + ret, err := s.Scene.scrapeByScene(scene) if err != nil { return nil, err @@ -397,8 +400,8 @@ func (c Cache) ScrapeScene(scraperID string, sceneID int) (*models.ScrapedScene, func (c Cache) ScrapeSceneQuery(scraperID string, query string) ([]*models.ScrapedScene, error) { // find scraper with the provided id s := c.findScraper(scraperID) - if s != nil { - return s.ScrapeSceneQuery(query, c.txnManager, c.globalConfig) + if s != nil && s.Scene != nil { + return s.Scene.scrapeByName(query) } return nil, errors.New("Scraper with ID " + scraperID + " not found") @@ -408,8 +411,8 @@ func (c Cache) ScrapeSceneQuery(scraperID string, query string) ([]*models.Scrap func (c Cache) ScrapeSceneFragment(scraperID string, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { // find scraper with the provided id s := c.findScraper(scraperID) - if s != nil { - ret, err := s.ScrapeSceneByFragment(scene, c.txnManager, c.globalConfig) + if s != nil && s.Scene != nil { + ret, err := s.Scene.scrapeByFragment(scene) if err != nil { return nil, err @@ -433,8 +436,8 @@ func (c Cache) ScrapeSceneFragment(scraperID string, scene models.ScrapedSceneIn // the URL, then nil is returned. func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) { for _, s := range c.scrapers { - if s.matchesSceneURL(url) { - ret, err := s.ScrapeSceneURL(url, c.txnManager, c.globalConfig) + if matchesURL(s.Scene, url) { + ret, err := s.Scene.scrapeByURL(url) if err != nil { return nil, err @@ -455,14 +458,14 @@ func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) { // ScrapeGallery uses the scraper with the provided ID to scrape a gallery using existing data. func (c Cache) ScrapeGallery(scraperID string, galleryID int) (*models.ScrapedGallery, error) { s := c.findScraper(scraperID) - if s != nil { + if s != nil && s.Gallery != nil { // get gallery from id gallery, err := getGallery(galleryID, c.txnManager) if err != nil { return nil, err } - ret, err := s.ScrapeGalleryByGallery(gallery, c.txnManager, c.globalConfig) + ret, err := s.Gallery.scrapeByGallery(gallery) if err != nil { return nil, err @@ -484,8 +487,8 @@ func (c Cache) ScrapeGallery(scraperID string, galleryID int) (*models.ScrapedGa // ScrapeGalleryFragment uses the scraper with the provided ID to scrape a gallery. func (c Cache) ScrapeGalleryFragment(scraperID string, gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { s := c.findScraper(scraperID) - if s != nil { - ret, err := s.ScrapeGalleryByFragment(gallery, c.txnManager, c.globalConfig) + if s != nil && s.Gallery != nil { + ret, err := s.Gallery.scrapeByFragment(gallery) if err != nil { return nil, err @@ -509,8 +512,8 @@ func (c Cache) ScrapeGalleryFragment(scraperID string, gallery models.ScrapedGal // the URL, then nil is returned. func (c Cache) ScrapeGalleryURL(url string) (*models.ScrapedGallery, error) { for _, s := range c.scrapers { - if s.matchesGalleryURL(url) { - ret, err := s.ScrapeGalleryURL(url, c.txnManager, c.globalConfig) + if matchesURL(s.Gallery, url) { + ret, err := s.Gallery.scrapeByURL(url) if err != nil { return nil, err @@ -533,15 +536,15 @@ func (c Cache) ScrapeGalleryURL(url string) (*models.ScrapedGallery, error) { // the URL, then nil is returned. func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) { for _, s := range c.scrapers { - if s.matchesMovieURL(url) { - ret, err := s.ScrapeMovieURL(url, c.txnManager, c.globalConfig) + if s.Movie != nil && matchesURL(s.Movie, url) { + ret, err := s.Movie.scrapeByURL(url) if err != nil { return nil, err } if ret.Studio != nil { if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - return MatchScrapedStudio(r.Studio(), ret.Studio) + return match.ScrapedStudio(r.Studio(), ret.Studio) }); err != nil { return nil, err } @@ -587,7 +590,7 @@ ScrapeTag: } } - err := MatchScrapedTag(tqb, t) + err := match.ScrapedTag(tqb, t) if err != nil { return nil, err } diff --git a/pkg/scraper/stashbox/stash_box.go b/pkg/scraper/stashbox/stash_box.go index b6174d7ee..21d2b7ec8 100644 --- a/pkg/scraper/stashbox/stash_box.go +++ b/pkg/scraper/stashbox/stash_box.go @@ -12,8 +12,8 @@ import ( "github.com/Yamashou/gqlgenc/client" "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" - "github.com/stashapp/stash/pkg/scraper" "github.com/stashapp/stash/pkg/scraper/stashbox/graphql" "github.com/stashapp/stash/pkg/utils" ) @@ -644,7 +644,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq RemoteSiteID: &studioID, } - err := scraper.MatchScrapedStudio(r.Studio(), ss.Studio) + err := match.ScrapedStudio(r.Studio(), ss.Studio) if err != nil { return err } @@ -653,7 +653,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq for _, p := range s.Performers { sp := performerFragmentToScrapedScenePerformer(p.Performer) - err := scraper.MatchScrapedPerformer(pqb, sp) + err := match.ScrapedPerformer(pqb, sp) if err != nil { return err } @@ -666,7 +666,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq Name: t.Name, } - err := scraper.MatchScrapedTag(tqb, st) + err := match.ScrapedTag(tqb, st) if err != nil { return err } diff --git a/pkg/scraper/xpath_test.go b/pkg/scraper/xpath_test.go index 4ba98d40d..fbffcc390 100644 --- a/pkg/scraper/xpath_test.go +++ b/pkg/scraper/xpath_test.go @@ -874,7 +874,8 @@ xPathScrapers: globalConfig := mockGlobalConfig{} - performer, err := c.ScrapePerformerURL(ts.URL, nil, globalConfig) + s := createScraperFromConfig(*c, nil, globalConfig) + performer, err := s.Performer.scrapeByURL(ts.URL) if err != nil { t.Errorf("Error scraping performer: %s", err.Error()) diff --git a/pkg/sqlite/performer.go b/pkg/sqlite/performer.go index c8b3f86de..212a706bb 100644 --- a/pkg/sqlite/performer.go +++ b/pkg/sqlite/performer.go @@ -182,11 +182,15 @@ func (qb *performerQueryBuilder) QueryForAutoTag(words []string) ([]*models.Perf var whereClauses []string var args []interface{} + whereClauses = append(whereClauses, "name regexp ?") + args = append(args, "^[\\w][.\\-_ ]") + for _, w := range words { whereClauses = append(whereClauses, "name like ?") args = append(args, w+"%") - whereClauses = append(whereClauses, "aliases like ?") - args = append(args, w+"%") + // TODO - commented out until alias matching works both ways + // whereClauses = append(whereClauses, "aliases like ?") + // args = append(args, w+"%") } where := strings.Join(whereClauses, " OR ") diff --git a/pkg/sqlite/studio.go b/pkg/sqlite/studio.go index 746a89b12..b46737385 100644 --- a/pkg/sqlite/studio.go +++ b/pkg/sqlite/studio.go @@ -133,6 +133,11 @@ func (qb *studioQueryBuilder) QueryForAutoTag(words []string) ([]*models.Studio, var whereClauses []string var args []interface{} + // always include names that begin with a single character + singleFirstCharacterRegex := "^[\\w][.\\-_ ]" + whereClauses = append(whereClauses, "studios.name regexp ? OR COALESCE(studio_aliases.alias, '') regexp ?") + args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex) + for _, w := range words { ww := w + "%" whereClauses = append(whereClauses, "studios.name like ?") diff --git a/pkg/sqlite/tag.go b/pkg/sqlite/tag.go index 0acf748c0..59caa023c 100644 --- a/pkg/sqlite/tag.go +++ b/pkg/sqlite/tag.go @@ -235,6 +235,11 @@ func (qb *tagQueryBuilder) QueryForAutoTag(words []string) ([]*models.Tag, error var whereClauses []string var args []interface{} + // always include names that begin with a single character + singleFirstCharacterRegex := "^[\\w][.\\-_ ]" + whereClauses = append(whereClauses, "tags.name regexp ? OR COALESCE(tag_aliases.alias, '') regexp ?") + args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex) + for _, w := range words { ww := w + "%" whereClauses = append(whereClauses, "tags.name like ?") diff --git a/ui/v2.5/src/components/Changelog/versions/v0110.md b/ui/v2.5/src/components/Changelog/versions/v0110.md index 1fc0897d3..d6b8f5c67 100644 --- a/ui/v2.5/src/components/Changelog/versions/v0110.md +++ b/ui/v2.5/src/components/Changelog/versions/v0110.md @@ -1,5 +1,7 @@ ### ✨ New Features +* Added built-in `Auto Tag` scene scraper to match performers, studio and tags from filename - using AutoTag logic. ([#1817](https://github.com/stashapp/stash/pull/1817)) * Added interface options to disable creating performers/studios/tags from dropdown selectors. ([#1814](https://github.com/stashapp/stash/pull/1814)) ### 🐛 Bug fixes +* Fix auto-tag logic for names which have single-letter words. ([#1817](https://github.com/stashapp/stash/pull/1817)) * Fix huge memory usage spike during clean task. ([#1805](https://github.com/stashapp/stash/pull/1805))