Autotag scraper (#1817)

* Refactor scraper structures
* Move matching code into new package
* Add autotag scraper
* Always check first letter of auto-tag names
* Account for nulls

Co-authored-by: Kermie <kermie@isinthe.house>
This commit is contained in:
WithoutPants 2021-10-11 23:06:06 +11:00 committed by GitHub
parent b5381ff071
commit e9d48683f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 1023 additions and 660 deletions

View file

@ -1,78 +1,10 @@
package autotag
import (
"fmt"
"path/filepath"
"strings"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/models"
)
func galleryPathsFilter(paths []string) *models.GalleryFilterType {
if paths == nil {
return nil
}
sep := string(filepath.Separator)
var ret *models.GalleryFilterType
var or *models.GalleryFilterType
for _, p := range paths {
newOr := &models.GalleryFilterType{}
if or != nil {
or.Or = newOr
} else {
ret = newOr
}
or = newOr
if !strings.HasSuffix(p, sep) {
p = p + sep
}
or.Path = &models.StringCriterionInput{
Modifier: models.CriterionModifierEquals,
Value: p + "%",
}
}
return ret
}
func getMatchingGalleries(name string, paths []string, galleryReader models.GalleryReader) ([]*models.Gallery, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.GalleryFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = galleryPathsFilter(paths)
pp := models.PerPageAll
gallerys, _, err := galleryReader.Query(&filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying gallerys with regex '%s': %s", regex, err.Error())
}
var ret []*models.Gallery
for _, p := range gallerys {
if nameMatchesPath(name, p.Path.String) {
ret = append(ret, p)
}
}
return ret, nil
}
func getGalleryFileTagger(s *models.Gallery) tagger {
return tagger{
ID: s.ID,

View file

@ -1,78 +1,10 @@
package autotag
import (
"fmt"
"path/filepath"
"strings"
"github.com/stashapp/stash/pkg/image"
"github.com/stashapp/stash/pkg/models"
)
func imagePathsFilter(paths []string) *models.ImageFilterType {
if paths == nil {
return nil
}
sep := string(filepath.Separator)
var ret *models.ImageFilterType
var or *models.ImageFilterType
for _, p := range paths {
newOr := &models.ImageFilterType{}
if or != nil {
or.Or = newOr
} else {
ret = newOr
}
or = newOr
if !strings.HasSuffix(p, sep) {
p = p + sep
}
or.Path = &models.StringCriterionInput{
Modifier: models.CriterionModifierEquals,
Value: p + "%",
}
}
return ret
}
func getMatchingImages(name string, paths []string, imageReader models.ImageReader) ([]*models.Image, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.ImageFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = imagePathsFilter(paths)
pp := models.PerPageAll
images, _, err := imageReader.Query(&filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying images with regex '%s': %s", regex, err.Error())
}
var ret []*models.Image
for _, p := range images {
if nameMatchesPath(name, p.Path) {
ret = append(ret, p)
}
}
return ret, nil
}
func getImageFileTagger(s *models.Image) tagger {
return tagger{
ID: s.ID,

View file

@ -7,25 +7,6 @@ import (
"github.com/stashapp/stash/pkg/scene"
)
func getMatchingPerformers(path string, performerReader models.PerformerReader) ([]*models.Performer, error) {
words := getPathWords(path)
performers, err := performerReader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
var ret []*models.Performer
for _, p := range performers {
// TODO - commenting out alias handling until both sides work correctly
if nameMatchesPath(p.Name.String, path) { // || nameMatchesPath(p.Aliases.String, path) {
ret = append(ret, p)
}
}
return ret, nil
}
func getPerformerTagger(p *models.Performer) tagger {
return tagger{
ID: p.ID,

View file

@ -1,78 +1,10 @@
package autotag
import (
"fmt"
"path/filepath"
"strings"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/scene"
)
func scenePathsFilter(paths []string) *models.SceneFilterType {
if paths == nil {
return nil
}
sep := string(filepath.Separator)
var ret *models.SceneFilterType
var or *models.SceneFilterType
for _, p := range paths {
newOr := &models.SceneFilterType{}
if or != nil {
or.Or = newOr
} else {
ret = newOr
}
or = newOr
if !strings.HasSuffix(p, sep) {
p = p + sep
}
or.Path = &models.StringCriterionInput{
Modifier: models.CriterionModifierEquals,
Value: p + "%",
}
}
return ret
}
func getMatchingScenes(name string, paths []string, sceneReader models.SceneReader) ([]*models.Scene, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.SceneFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = scenePathsFilter(paths)
pp := models.PerPageAll
scenes, _, err := sceneReader.Query(&filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying scenes with regex '%s': %s", regex, err.Error())
}
var ret []*models.Scene
for _, p := range scenes {
if nameMatchesPath(name, p.Path) {
ret = append(ret, p)
}
}
return ret, nil
}
func getSceneFileTagger(s *models.Scene) tagger {
return tagger{
ID: s.ID,

View file

@ -2,46 +2,10 @@ package autotag
import (
"database/sql"
"github.com/stashapp/stash/pkg/models"
)
func getMatchingStudios(path string, reader models.StudioReader) ([]*models.Studio, error) {
words := getPathWords(path)
candidates, err := reader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
var ret []*models.Studio
for _, c := range candidates {
matches := false
if nameMatchesPath(c.Name.String, path) {
matches = true
}
if !matches {
aliases, err := reader.GetAliases(c.ID)
if err != nil {
return nil, err
}
for _, alias := range aliases {
if nameMatchesPath(alias, path) {
matches = true
break
}
}
}
if matches {
ret = append(ret, c)
}
}
return ret, nil
}
func addSceneStudio(sceneWriter models.SceneReaderWriter, sceneID, studioID int) (bool, error) {
// don't set if already set
scene, err := sceneWriter.Find(sceneID)

View file

@ -7,42 +7,6 @@ import (
"github.com/stashapp/stash/pkg/scene"
)
func getMatchingTags(path string, tagReader models.TagReader) ([]*models.Tag, error) {
words := getPathWords(path)
tags, err := tagReader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
var ret []*models.Tag
for _, t := range tags {
matches := false
if nameMatchesPath(t.Name, path) {
matches = true
}
if !matches {
aliases, err := tagReader.GetAliases(t.ID)
if err != nil {
return nil, err
}
for _, alias := range aliases {
if nameMatchesPath(alias, path) {
matches = true
break
}
}
}
if matches {
ret = append(ret, t)
}
}
return ret, nil
}
func getTagTaggers(p *models.Tag, aliases []string) []tagger {
ret := []tagger{{
ID: p.ID,

View file

@ -15,78 +15,12 @@ package autotag
import (
"fmt"
"path/filepath"
"regexp"
"strings"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/match"
"github.com/stashapp/stash/pkg/models"
)
const separatorChars = `.\-_ `
func getPathQueryRegex(name string) string {
// escape specific regex characters
name = regexp.QuoteMeta(name)
// handle path separators
const separator = `[` + separatorChars + `]`
ret := strings.Replace(name, " ", separator+"*", -1)
ret = `(?:^|_|[^\w\d])` + ret + `(?:$|_|[^\w\d])`
return ret
}
func nameMatchesPath(name, path string) bool {
// escape specific regex characters
name = regexp.QuoteMeta(name)
name = strings.ToLower(name)
path = strings.ToLower(path)
// handle path separators
const separator = `[` + separatorChars + `]`
reStr := strings.Replace(name, " ", separator+"*", -1)
reStr = `(?:^|_|[^\w\d])` + reStr + `(?:$|_|[^\w\d])`
re := regexp.MustCompile(reStr)
return re.MatchString(path)
}
func getPathWords(path string) []string {
retStr := path
// remove the extension
ext := filepath.Ext(retStr)
if ext != "" {
retStr = strings.TrimSuffix(retStr, ext)
}
// handle path separators
const separator = `(?:_|[^\w\d])+`
re := regexp.MustCompile(separator)
retStr = re.ReplaceAllString(retStr, " ")
words := strings.Split(retStr, " ")
// remove any single letter words
var ret []string
for _, w := range words {
if len(w) > 1 {
// #1450 - we need to open up the criteria for matching so that we
// can match where path has no space between subject names -
// ie name = "foo bar" - path = "foobar"
// we post-match afterwards, so we can afford to be a little loose
// with the query
// just use the first two characters
ret = append(ret, w[0:2])
}
}
return ret
}
type tagger struct {
ID int
Type string
@ -105,7 +39,7 @@ func (t *tagger) addLog(otherType, otherName string) {
}
func (t *tagger) tagPerformers(performerReader models.PerformerReader, addFunc addLinkFunc) error {
others, err := getMatchingPerformers(t.Path, performerReader)
others, err := match.PathToPerformers(t.Path, performerReader)
if err != nil {
return err
}
@ -126,7 +60,7 @@ func (t *tagger) tagPerformers(performerReader models.PerformerReader, addFunc a
}
func (t *tagger) tagStudios(studioReader models.StudioReader, addFunc addLinkFunc) error {
others, err := getMatchingStudios(t.Path, studioReader)
others, err := match.PathToStudios(t.Path, studioReader)
if err != nil {
return err
}
@ -149,7 +83,7 @@ func (t *tagger) tagStudios(studioReader models.StudioReader, addFunc addLinkFun
}
func (t *tagger) tagTags(tagReader models.TagReader, addFunc addLinkFunc) error {
others, err := getMatchingTags(t.Path, tagReader)
others, err := match.PathToTags(t.Path, tagReader)
if err != nil {
return err
}
@ -170,7 +104,7 @@ func (t *tagger) tagTags(tagReader models.TagReader, addFunc addLinkFunc) error
}
func (t *tagger) tagScenes(paths []string, sceneReader models.SceneReader, addFunc addLinkFunc) error {
others, err := getMatchingScenes(t.Name, paths, sceneReader)
others, err := match.PathToScenes(t.Name, paths, sceneReader)
if err != nil {
return err
}
@ -191,7 +125,7 @@ func (t *tagger) tagScenes(paths []string, sceneReader models.SceneReader, addFu
}
func (t *tagger) tagImages(paths []string, imageReader models.ImageReader, addFunc addLinkFunc) error {
others, err := getMatchingImages(t.Name, paths, imageReader)
others, err := match.PathToImages(t.Name, paths, imageReader)
if err != nil {
return err
}
@ -212,7 +146,7 @@ func (t *tagger) tagImages(paths []string, imageReader models.ImageReader, addFu
}
func (t *tagger) tagGalleries(paths []string, galleryReader models.GalleryReader, addFunc addLinkFunc) error {
others, err := getMatchingGalleries(t.Name, paths, galleryReader)
others, err := match.PathToGalleries(t.Name, paths, galleryReader)
if err != nil {
return err
}

358
pkg/match/path.go Normal file
View file

@ -0,0 +1,358 @@
package match
import (
"fmt"
"path/filepath"
"regexp"
"strings"
"github.com/stashapp/stash/pkg/models"
)
const separatorChars = `.\-_ `
func getPathQueryRegex(name string) string {
// escape specific regex characters
name = regexp.QuoteMeta(name)
// handle path separators
const separator = `[` + separatorChars + `]`
ret := strings.Replace(name, " ", separator+"*", -1)
ret = `(?:^|_|[^\w\d])` + ret + `(?:$|_|[^\w\d])`
return ret
}
func getPathWords(path string) []string {
retStr := path
// remove the extension
ext := filepath.Ext(retStr)
if ext != "" {
retStr = strings.TrimSuffix(retStr, ext)
}
// handle path separators
const separator = `(?:_|[^\w\d])+`
re := regexp.MustCompile(separator)
retStr = re.ReplaceAllString(retStr, " ")
words := strings.Split(retStr, " ")
// remove any single letter words
var ret []string
for _, w := range words {
if len(w) > 1 {
// #1450 - we need to open up the criteria for matching so that we
// can match where path has no space between subject names -
// ie name = "foo bar" - path = "foobar"
// we post-match afterwards, so we can afford to be a little loose
// with the query
// just use the first two characters
ret = append(ret, w[0:2])
}
}
return ret
}
func nameMatchesPath(name, path string) bool {
// escape specific regex characters
name = regexp.QuoteMeta(name)
name = strings.ToLower(name)
path = strings.ToLower(path)
// handle path separators
const separator = `[` + separatorChars + `]`
reStr := strings.Replace(name, " ", separator+"*", -1)
reStr = `(?:^|_|[^\w\d])` + reStr + `(?:$|_|[^\w\d])`
re := regexp.MustCompile(reStr)
return re.MatchString(path)
}
func PathToPerformers(path string, performerReader models.PerformerReader) ([]*models.Performer, error) {
words := getPathWords(path)
performers, err := performerReader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
var ret []*models.Performer
for _, p := range performers {
// TODO - commenting out alias handling until both sides work correctly
if nameMatchesPath(p.Name.String, path) { // || nameMatchesPath(p.Aliases.String, path) {
ret = append(ret, p)
}
}
return ret, nil
}
func PathToStudios(path string, reader models.StudioReader) ([]*models.Studio, error) {
words := getPathWords(path)
candidates, err := reader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
var ret []*models.Studio
for _, c := range candidates {
matches := false
if nameMatchesPath(c.Name.String, path) {
matches = true
}
if !matches {
aliases, err := reader.GetAliases(c.ID)
if err != nil {
return nil, err
}
for _, alias := range aliases {
if nameMatchesPath(alias, path) {
matches = true
break
}
}
}
if matches {
ret = append(ret, c)
}
}
return ret, nil
}
func PathToTags(path string, tagReader models.TagReader) ([]*models.Tag, error) {
words := getPathWords(path)
tags, err := tagReader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
var ret []*models.Tag
for _, t := range tags {
matches := false
if nameMatchesPath(t.Name, path) {
matches = true
}
if !matches {
aliases, err := tagReader.GetAliases(t.ID)
if err != nil {
return nil, err
}
for _, alias := range aliases {
if nameMatchesPath(alias, path) {
matches = true
break
}
}
}
if matches {
ret = append(ret, t)
}
}
return ret, nil
}
func scenePathsFilter(paths []string) *models.SceneFilterType {
if paths == nil {
return nil
}
sep := string(filepath.Separator)
var ret *models.SceneFilterType
var or *models.SceneFilterType
for _, p := range paths {
newOr := &models.SceneFilterType{}
if or != nil {
or.Or = newOr
} else {
ret = newOr
}
or = newOr
if !strings.HasSuffix(p, sep) {
p = p + sep
}
or.Path = &models.StringCriterionInput{
Modifier: models.CriterionModifierEquals,
Value: p + "%",
}
}
return ret
}
func PathToScenes(name string, paths []string, sceneReader models.SceneReader) ([]*models.Scene, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.SceneFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = scenePathsFilter(paths)
pp := models.PerPageAll
scenes, _, err := sceneReader.Query(&filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying scenes with regex '%s': %s", regex, err.Error())
}
var ret []*models.Scene
for _, p := range scenes {
if nameMatchesPath(name, p.Path) {
ret = append(ret, p)
}
}
return ret, nil
}
func imagePathsFilter(paths []string) *models.ImageFilterType {
if paths == nil {
return nil
}
sep := string(filepath.Separator)
var ret *models.ImageFilterType
var or *models.ImageFilterType
for _, p := range paths {
newOr := &models.ImageFilterType{}
if or != nil {
or.Or = newOr
} else {
ret = newOr
}
or = newOr
if !strings.HasSuffix(p, sep) {
p = p + sep
}
or.Path = &models.StringCriterionInput{
Modifier: models.CriterionModifierEquals,
Value: p + "%",
}
}
return ret
}
func PathToImages(name string, paths []string, imageReader models.ImageReader) ([]*models.Image, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.ImageFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = imagePathsFilter(paths)
pp := models.PerPageAll
images, _, err := imageReader.Query(&filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying images with regex '%s': %s", regex, err.Error())
}
var ret []*models.Image
for _, p := range images {
if nameMatchesPath(name, p.Path) {
ret = append(ret, p)
}
}
return ret, nil
}
func galleryPathsFilter(paths []string) *models.GalleryFilterType {
if paths == nil {
return nil
}
sep := string(filepath.Separator)
var ret *models.GalleryFilterType
var or *models.GalleryFilterType
for _, p := range paths {
newOr := &models.GalleryFilterType{}
if or != nil {
or.Or = newOr
} else {
ret = newOr
}
or = newOr
if !strings.HasSuffix(p, sep) {
p = p + sep
}
or.Path = &models.StringCriterionInput{
Modifier: models.CriterionModifierEquals,
Value: p + "%",
}
}
return ret
}
func PathToGalleries(name string, paths []string, galleryReader models.GalleryReader) ([]*models.Gallery, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.GalleryFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = galleryPathsFilter(paths)
pp := models.PerPageAll
gallerys, _, err := galleryReader.Query(&filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying gallerys with regex '%s': %s", regex, err.Error())
}
var ret []*models.Gallery
for _, p := range gallerys {
if nameMatchesPath(name, p.Path.String) {
ret = append(ret, p)
}
}
return ret, nil
}

View file

@ -1,4 +1,4 @@
package scraper
package match
import (
"strconv"
@ -8,10 +8,10 @@ import (
"github.com/stashapp/stash/pkg/tag"
)
// MatchScrapedPerformer matches the provided performer with the
// ScrapedPerformer matches the provided performer with the
// performers in the database and sets the ID field if one is found.
func MatchScrapedPerformer(qb models.PerformerReader, p *models.ScrapedPerformer) error {
if p.Name == nil {
func ScrapedPerformer(qb models.PerformerReader, p *models.ScrapedPerformer) error {
if p.StoredID != nil || p.Name == nil {
return nil
}
@ -31,9 +31,13 @@ func MatchScrapedPerformer(qb models.PerformerReader, p *models.ScrapedPerformer
return nil
}
// MatchScrapedStudio matches the provided studio with the studios
// ScrapedStudio matches the provided studio with the studios
// in the database and sets the ID field if one is found.
func MatchScrapedStudio(qb models.StudioReader, s *models.ScrapedStudio) error {
func ScrapedStudio(qb models.StudioReader, s *models.ScrapedStudio) error {
if s.StoredID != nil {
return nil
}
st, err := studio.ByName(qb, s.Name)
if err != nil {
@ -58,10 +62,10 @@ func MatchScrapedStudio(qb models.StudioReader, s *models.ScrapedStudio) error {
return nil
}
// MatchScrapedMovie matches the provided movie with the movies
// ScrapedMovie matches the provided movie with the movies
// in the database and sets the ID field if one is found.
func MatchScrapedMovie(qb models.MovieReader, m *models.ScrapedMovie) error {
if m.Name == nil {
func ScrapedMovie(qb models.MovieReader, m *models.ScrapedMovie) error {
if m.StoredID != nil || m.Name == nil {
return nil
}
@ -81,9 +85,13 @@ func MatchScrapedMovie(qb models.MovieReader, m *models.ScrapedMovie) error {
return nil
}
// MatchScrapedTag matches the provided tag with the tags
// ScrapedTag matches the provided tag with the tags
// in the database and sets the ID field if one is found.
func MatchScrapedTag(qb models.TagReader, s *models.ScrapedTag) error {
func ScrapedTag(qb models.TagReader, s *models.ScrapedTag) error {
if s.StoredID != nil {
return nil
}
t, err := tag.ByName(qb, s.Name)
if err != nil {

View file

@ -19,7 +19,7 @@ func (e scraperAction) IsValid() bool {
return false
}
type scraper interface {
type scraperActionImpl interface {
scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error)
scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
scrapePerformerByURL(url string) (*models.ScrapedPerformer, error)
@ -36,16 +36,16 @@ type scraper interface {
scrapeMovieByURL(url string) (*models.ScrapedMovie, error)
}
func getScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) scraper {
func (c config) getScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl {
switch scraper.Action {
case scraperActionScript:
return newScriptScraper(scraper, config, globalConfig)
return newScriptScraper(scraper, c, globalConfig)
case scraperActionStash:
return newStashScraper(scraper, txnManager, config, globalConfig)
return newStashScraper(scraper, txnManager, c, globalConfig)
case scraperActionXPath:
return newXpathScraper(scraper, txnManager, config, globalConfig)
return newXpathScraper(scraper, txnManager, c, globalConfig)
case scraperActionJson:
return newJsonScraper(scraper, txnManager, config, globalConfig)
return newJsonScraper(scraper, txnManager, c, globalConfig)
}
panic("unknown scraper action: " + scraper.Action)

218
pkg/scraper/autotag.go Normal file
View file

@ -0,0 +1,218 @@
package scraper
import (
"context"
"errors"
"fmt"
"strconv"
"github.com/stashapp/stash/pkg/match"
"github.com/stashapp/stash/pkg/models"
)
// autoTagScraperID is the scraper ID for the built-in AutoTag scraper
const (
autoTagScraperID = "builtin_autotag"
autoTagScraperName = "Auto Tag"
)
var errNotSupported = errors.New("not supported")
type autotagScraper struct {
txnManager models.TransactionManager
globalConfig GlobalConfig
}
func (s *autotagScraper) matchPerformers(path string, performerReader models.PerformerReader) ([]*models.ScrapedPerformer, error) {
p, err := match.PathToPerformers(path, performerReader)
if err != nil {
return nil, fmt.Errorf("error matching performers: %w", err)
}
var ret []*models.ScrapedPerformer
for _, pp := range p {
id := strconv.Itoa(pp.ID)
sp := &models.ScrapedPerformer{
Name: &pp.Name.String,
StoredID: &id,
}
if pp.Gender.Valid {
sp.Gender = &pp.Gender.String
}
ret = append(ret, sp)
}
return ret, nil
}
func (s *autotagScraper) matchStudio(path string, studioReader models.StudioReader) (*models.ScrapedStudio, error) {
st, err := match.PathToStudios(path, studioReader)
if err != nil {
return nil, fmt.Errorf("error matching studios: %w", err)
}
if len(st) > 0 {
id := strconv.Itoa(st[0].ID)
return &models.ScrapedStudio{
Name: st[0].Name.String,
StoredID: &id,
}, nil
}
return nil, nil
}
func (s *autotagScraper) matchTags(path string, tagReader models.TagReader) ([]*models.ScrapedTag, error) {
t, err := match.PathToTags(path, tagReader)
if err != nil {
return nil, fmt.Errorf("error matching tags: %w", err)
}
var ret []*models.ScrapedTag
for _, tt := range t {
id := strconv.Itoa(tt.ID)
st := &models.ScrapedTag{
Name: tt.Name,
StoredID: &id,
}
ret = append(ret, st)
}
return ret, nil
}
type autotagSceneScraper struct {
*autotagScraper
}
func (c *autotagSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) {
return nil, errNotSupported
}
func (c *autotagSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) {
var ret *models.ScrapedScene
// populate performers, studio and tags based on scene path
if err := c.txnManager.WithReadTxn(context.Background(), func(r models.ReaderRepository) error {
path := scene.Path
performers, err := c.matchPerformers(path, r.Performer())
if err != nil {
return err
}
studio, err := c.matchStudio(path, r.Studio())
if err != nil {
return err
}
tags, err := c.matchTags(path, r.Tag())
if err != nil {
return err
}
if len(performers) > 0 || studio != nil || len(tags) > 0 {
ret = &models.ScrapedScene{
Performers: performers,
Studio: studio,
Tags: tags,
}
}
return nil
}); err != nil {
return nil, err
}
return ret, nil
}
func (c *autotagSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
return nil, errNotSupported
}
func (c *autotagSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) {
return nil, errNotSupported
}
type autotagGalleryScraper struct {
*autotagScraper
}
func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
if !gallery.Path.Valid {
// not valid for non-path-based galleries
return nil, nil
}
var ret *models.ScrapedGallery
// populate performers, studio and tags based on scene path
if err := c.txnManager.WithReadTxn(context.Background(), func(r models.ReaderRepository) error {
path := gallery.Path.String
performers, err := c.matchPerformers(path, r.Performer())
if err != nil {
return err
}
studio, err := c.matchStudio(path, r.Studio())
if err != nil {
return err
}
tags, err := c.matchTags(path, r.Tag())
if err != nil {
return err
}
if len(performers) > 0 || studio != nil || len(tags) > 0 {
ret = &models.ScrapedGallery{
Performers: performers,
Studio: studio,
Tags: tags,
}
}
return nil
}); err != nil {
return nil, err
}
return ret, nil
}
func (c *autotagGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
return nil, errNotSupported
}
func (c *autotagGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) {
return nil, errNotSupported
}
func getAutoTagScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
base := autotagScraper{
txnManager: txnManager,
globalConfig: globalConfig,
}
supportedScrapes := []models.ScrapeType{
models.ScrapeTypeFragment,
}
return scraper{
ID: autoTagScraperID,
Spec: &models.Scraper{
ID: autoTagScraperID,
Name: autoTagScraperName,
Scene: &models.ScraperSpec{
SupportedScrapes: supportedScrapes,
},
Gallery: &models.ScraperSpec{
SupportedScrapes: supportedScrapes,
},
},
Scene: &autotagSceneScraper{&base},
Gallery: &autotagGalleryScraper{&base},
}
}

View file

@ -9,8 +9,6 @@ import (
"strings"
"gopkg.in/yaml.v2"
"github.com/stashapp/stash/pkg/models"
)
type config struct {
@ -194,7 +192,7 @@ type scraperDriverOptions struct {
Headers []*header `yaml:"headers"`
}
func loadScraperFromYAML(id string, reader io.Reader) (*config, error) {
func loadConfigFromYAML(id string, reader io.Reader) (*config, error) {
ret := &config{}
parser := yaml.NewDecoder(reader)
@ -213,7 +211,7 @@ func loadScraperFromYAML(id string, reader io.Reader) (*config, error) {
return ret, nil
}
func loadScraperFromYAMLFile(path string) (*config, error) {
func loadConfigFromYAMLFile(path string) (*config, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
@ -224,7 +222,7 @@ func loadScraperFromYAMLFile(path string) (*config, error) {
id := filepath.Base(path)
id = id[:strings.LastIndex(id, ".")]
ret, err := loadScraperFromYAML(id, file)
ret, err := loadConfigFromYAML(id, file)
if err != nil {
return nil, err
}
@ -234,78 +232,6 @@ func loadScraperFromYAMLFile(path string) (*config, error) {
return ret, nil
}
func (c config) toScraper() *models.Scraper {
ret := models.Scraper{
ID: c.ID,
Name: c.Name,
}
performer := models.ScraperSpec{}
if c.PerformerByName != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
}
if c.PerformerByFragment != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.PerformerByURL) > 0 {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.PerformerByURL {
performer.Urls = append(performer.Urls, v.URL...)
}
}
if len(performer.SupportedScrapes) > 0 {
ret.Performer = &performer
}
scene := models.ScraperSpec{}
if c.SceneByFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
}
if c.SceneByName != nil && c.SceneByQueryFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName)
}
if len(c.SceneByURL) > 0 {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.SceneByURL {
scene.Urls = append(scene.Urls, v.URL...)
}
}
if len(scene.SupportedScrapes) > 0 {
ret.Scene = &scene
}
gallery := models.ScraperSpec{}
if c.GalleryByFragment != nil {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.GalleryByURL) > 0 {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.GalleryByURL {
gallery.Urls = append(gallery.Urls, v.URL...)
}
}
if len(gallery.SupportedScrapes) > 0 {
ret.Gallery = &gallery
}
movie := models.ScraperSpec{}
if len(c.MovieByURL) > 0 {
movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.MovieByURL {
movie.Urls = append(movie.Urls, v.URL...)
}
}
if len(movie.SupportedScrapes) > 0 {
ret.Movie = &movie
}
return &ret
}
func (c config) supportsPerformers() bool {
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
}
@ -320,47 +246,6 @@ func (c config) matchesPerformerURL(url string) bool {
return false
}
func (c config) ScrapePerformerNames(name string, txnManager models.TransactionManager, globalConfig GlobalConfig) ([]*models.ScrapedPerformer, error) {
if c.PerformerByName != nil {
s := getScraper(*c.PerformerByName, txnManager, c, globalConfig)
return s.scrapePerformersByName(name)
}
return nil, nil
}
func (c config) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedPerformer, error) {
if c.PerformerByFragment != nil {
s := getScraper(*c.PerformerByFragment, txnManager, c, globalConfig)
return s.scrapePerformerByFragment(scrapedPerformer)
}
// try to match against URL if present
if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" {
return c.ScrapePerformerURL(*scrapedPerformer.URL, txnManager, globalConfig)
}
return nil, nil
}
func (c config) ScrapePerformerURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedPerformer, error) {
for _, scraper := range c.PerformerByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig)
ret, err := s.scrapePerformerByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c config) supportsScenes() bool {
return (c.SceneByName != nil && c.SceneByQueryFragment != nil) || c.SceneByFragment != nil || len(c.SceneByURL) > 0
}
@ -401,103 +286,3 @@ func (c config) matchesMovieURL(url string) bool {
return false
}
func (c config) ScrapeSceneQuery(name string, txnManager models.TransactionManager, globalConfig GlobalConfig) ([]*models.ScrapedScene, error) {
if c.SceneByName != nil {
s := getScraper(*c.SceneByName, txnManager, c, globalConfig)
return s.scrapeScenesByName(name)
}
return nil, nil
}
func (c config) ScrapeSceneByScene(scene *models.Scene, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
if c.SceneByFragment != nil {
s := getScraper(*c.SceneByFragment, txnManager, c, globalConfig)
return s.scrapeSceneByScene(scene)
}
return nil, nil
}
func (c config) ScrapeSceneByFragment(scene models.ScrapedSceneInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
if c.SceneByQueryFragment != nil {
s := getScraper(*c.SceneByQueryFragment, txnManager, c, globalConfig)
return s.scrapeSceneByFragment(scene)
}
return nil, nil
}
func (c config) ScrapeSceneURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig)
ret, err := s.scrapeSceneByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c config) ScrapeGalleryByGallery(gallery *models.Gallery, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) {
if c.GalleryByFragment != nil {
s := getScraper(*c.GalleryByFragment, txnManager, c, globalConfig)
return s.scrapeGalleryByGallery(gallery)
}
return nil, nil
}
func (c config) ScrapeGalleryByFragment(gallery models.ScrapedGalleryInput, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) {
if c.GalleryByFragment != nil {
// TODO - this should be galleryByQueryFragment
s := getScraper(*c.GalleryByFragment, txnManager, c, globalConfig)
return s.scrapeGalleryByFragment(gallery)
}
return nil, nil
}
func (c config) ScrapeGalleryURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedGallery, error) {
for _, scraper := range c.GalleryByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig)
ret, err := s.scrapeGalleryByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c config) ScrapeMovieURL(url string, txnManager models.TransactionManager, globalConfig GlobalConfig) (*models.ScrapedMovie, error) {
for _, scraper := range c.MovieByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, txnManager, c, globalConfig)
ret, err := s.scrapeMovieByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}

View file

@ -0,0 +1,283 @@
package scraper
import "github.com/stashapp/stash/pkg/models"
type configSceneScraper struct {
*configScraper
}
func (c *configSceneScraper) matchesURL(url string) bool {
return c.config.matchesSceneURL(url)
}
func (c *configSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) {
if c.config.SceneByName != nil {
s := c.config.getScraper(*c.config.SceneByName, c.txnManager, c.globalConfig)
return s.scrapeScenesByName(name)
}
return nil, nil
}
func (c *configSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) {
if c.config.SceneByFragment != nil {
s := c.config.getScraper(*c.config.SceneByFragment, c.txnManager, c.globalConfig)
return s.scrapeSceneByScene(scene)
}
return nil, nil
}
func (c *configSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
if c.config.SceneByQueryFragment != nil {
s := c.config.getScraper(*c.config.SceneByQueryFragment, c.txnManager, c.globalConfig)
return s.scrapeSceneByFragment(scene)
}
return nil, nil
}
func (c *configSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) {
for _, scraper := range c.config.SceneByURL {
if scraper.matchesURL(url) {
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
ret, err := s.scrapeSceneByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
type configPerformerScraper struct {
*configScraper
}
func (c *configPerformerScraper) matchesURL(url string) bool {
return c.config.matchesPerformerURL(url)
}
func (c *configPerformerScraper) scrapeByName(name string) ([]*models.ScrapedPerformer, error) {
if c.config.PerformerByName != nil {
s := c.config.getScraper(*c.config.PerformerByName, c.txnManager, c.globalConfig)
return s.scrapePerformersByName(name)
}
return nil, nil
}
func (c *configPerformerScraper) scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
if c.config.PerformerByFragment != nil {
s := c.config.getScraper(*c.config.PerformerByFragment, c.txnManager, c.globalConfig)
return s.scrapePerformerByFragment(scrapedPerformer)
}
// try to match against URL if present
if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" {
return c.scrapeByURL(*scrapedPerformer.URL)
}
return nil, nil
}
func (c *configPerformerScraper) scrapeByURL(url string) (*models.ScrapedPerformer, error) {
for _, scraper := range c.config.PerformerByURL {
if scraper.matchesURL(url) {
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
ret, err := s.scrapePerformerByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
type configGalleryScraper struct {
*configScraper
}
func (c *configGalleryScraper) matchesURL(url string) bool {
return c.config.matchesGalleryURL(url)
}
func (c *configGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
if c.config.GalleryByFragment != nil {
s := c.config.getScraper(*c.config.GalleryByFragment, c.txnManager, c.globalConfig)
return s.scrapeGalleryByGallery(gallery)
}
return nil, nil
}
func (c *configGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
if c.config.GalleryByFragment != nil {
// TODO - this should be galleryByQueryFragment
s := c.config.getScraper(*c.config.GalleryByFragment, c.txnManager, c.globalConfig)
return s.scrapeGalleryByFragment(gallery)
}
return nil, nil
}
func (c *configGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) {
for _, scraper := range c.config.GalleryByURL {
if scraper.matchesURL(url) {
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
ret, err := s.scrapeGalleryByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
type configMovieScraper struct {
*configScraper
}
func (c *configMovieScraper) matchesURL(url string) bool {
return c.config.matchesMovieURL(url)
}
func (c *configMovieScraper) scrapeByURL(url string) (*models.ScrapedMovie, error) {
for _, scraper := range c.config.MovieByURL {
if scraper.matchesURL(url) {
s := c.config.getScraper(scraper.scraperTypeConfig, c.txnManager, c.globalConfig)
ret, err := s.scrapeMovieByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
type configScraper struct {
config config
txnManager models.TransactionManager
globalConfig GlobalConfig
}
func createScraperFromConfig(c config, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
base := configScraper{
config: c,
txnManager: txnManager,
globalConfig: globalConfig,
}
ret := scraper{
ID: c.ID,
Spec: configScraperSpec(c),
}
// only set fields if supported
if c.supportsPerformers() {
ret.Performer = &configPerformerScraper{&base}
}
if c.supportsGalleries() {
ret.Gallery = &configGalleryScraper{&base}
}
if c.supportsMovies() {
ret.Movie = &configMovieScraper{&base}
}
if c.supportsScenes() {
ret.Scene = &configSceneScraper{&base}
}
return ret
}
func configScraperSpec(c config) *models.Scraper {
ret := models.Scraper{
ID: c.ID,
Name: c.Name,
}
performer := models.ScraperSpec{}
if c.PerformerByName != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
}
if c.PerformerByFragment != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.PerformerByURL) > 0 {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.PerformerByURL {
performer.Urls = append(performer.Urls, v.URL...)
}
}
if len(performer.SupportedScrapes) > 0 {
ret.Performer = &performer
}
scene := models.ScraperSpec{}
if c.SceneByFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
}
if c.SceneByName != nil && c.SceneByQueryFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName)
}
if len(c.SceneByURL) > 0 {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.SceneByURL {
scene.Urls = append(scene.Urls, v.URL...)
}
}
if len(scene.SupportedScrapes) > 0 {
ret.Scene = &scene
}
gallery := models.ScraperSpec{}
if c.GalleryByFragment != nil {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.GalleryByURL) > 0 {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.GalleryByURL {
gallery.Urls = append(gallery.Urls, v.URL...)
}
}
if len(gallery.SupportedScrapes) > 0 {
ret.Gallery = &gallery
}
movie := models.ScraperSpec{}
if len(c.MovieByURL) > 0 {
movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.MovieByURL {
movie.Urls = append(movie.Urls, v.URL...)
}
}
if len(movie.SupportedScrapes) > 0 {
ret.Movie = &movie
}
return &ret
}

View file

@ -4,6 +4,7 @@ import (
"strings"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
)
// FreeonesScraperID is the scraper ID for the built-in Freeones scraper
@ -122,13 +123,13 @@ xPathScrapers:
# Last updated April 13, 2021
`
func getFreeonesScraper() config {
func getFreeonesScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
yml := freeonesScraperConfig
scraper, err := loadScraperFromYAML(FreeonesScraperID, strings.NewReader(yml))
c, err := loadConfigFromYAML(FreeonesScraperID, strings.NewReader(yml))
if err != nil {
logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error())
}
return *scraper
return createScraperFromConfig(*c, txnManager, globalConfig)
}

51
pkg/scraper/scraper.go Normal file
View file

@ -0,0 +1,51 @@
package scraper
import "github.com/stashapp/stash/pkg/models"
type urlMatcher interface {
matchesURL(url string) bool
}
type performerScraper interface {
scrapeByName(name string) ([]*models.ScrapedPerformer, error)
scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
scrapeByURL(url string) (*models.ScrapedPerformer, error)
}
type sceneScraper interface {
scrapeByName(name string) ([]*models.ScrapedScene, error)
scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error)
scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error)
scrapeByURL(url string) (*models.ScrapedScene, error)
}
type galleryScraper interface {
scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error)
scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error)
scrapeByURL(url string) (*models.ScrapedGallery, error)
}
type movieScraper interface {
scrapeByURL(url string) (*models.ScrapedMovie, error)
}
type scraper struct {
ID string
Spec *models.Scraper
Performer performerScraper
Scene sceneScraper
Gallery galleryScraper
Movie movieScraper
}
func matchesURL(maybeURLMatcher interface{}, url string) bool {
if maybeURLMatcher != nil {
matcher, ok := maybeURLMatcher.(urlMatcher)
if ok {
return matcher.matchesURL(url)
}
}
return false
}

View file

@ -10,6 +10,7 @@ import (
"github.com/stashapp/stash/pkg/logger"
stash_config "github.com/stashapp/stash/pkg/manager/config"
"github.com/stashapp/stash/pkg/match"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/utils"
)
@ -32,7 +33,7 @@ func isCDPPathWS(c GlobalConfig) bool {
// Cache stores scraper details.
type Cache struct {
scrapers []config
scrapers []scraper
globalConfig GlobalConfig
txnManager models.TransactionManager
}
@ -44,7 +45,7 @@ type Cache struct {
// Scraper configurations are loaded from yml files in the provided scrapers
// directory and any subdirectories.
func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) {
scrapers, err := loadScrapers(globalConfig.GetScrapersPath())
scrapers, err := loadScrapers(globalConfig, txnManager)
if err != nil {
return nil, err
}
@ -56,8 +57,9 @@ func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (
}, nil
}
func loadScrapers(path string) ([]config, error) {
scrapers := make([]config, 0)
func loadScrapers(globalConfig GlobalConfig, txnManager models.TransactionManager) ([]scraper, error) {
path := globalConfig.GetScrapersPath()
scrapers := make([]scraper, 0)
logger.Debugf("Reading scraper configs from %s", path)
scraperFiles := []string{}
@ -74,14 +76,15 @@ func loadScrapers(path string) ([]config, error) {
}
// add built-in freeones scraper
scrapers = append(scrapers, getFreeonesScraper())
scrapers = append(scrapers, getFreeonesScraper(txnManager, globalConfig), getAutoTagScraper(txnManager, globalConfig))
for _, file := range scraperFiles {
scraper, err := loadScraperFromYAMLFile(file)
c, err := loadConfigFromYAMLFile(file)
if err != nil {
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
} else {
scrapers = append(scrapers, *scraper)
scraper := createScraperFromConfig(*c, txnManager, globalConfig)
scrapers = append(scrapers, scraper)
}
}
@ -92,7 +95,7 @@ func loadScrapers(path string) ([]config, error) {
// In the event of an error during loading, the cache will be left empty.
func (c *Cache) ReloadScrapers() error {
c.scrapers = nil
scrapers, err := loadScrapers(c.globalConfig.GetScrapersPath())
scrapers, err := loadScrapers(c.globalConfig, c.txnManager)
if err != nil {
return err
}
@ -114,8 +117,8 @@ func (c Cache) ListPerformerScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsPerformers() {
ret = append(ret, s.toScraper())
if s.Performer != nil {
ret = append(ret, s.Spec)
}
}
@ -128,8 +131,8 @@ func (c Cache) ListSceneScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsScenes() {
ret = append(ret, s.toScraper())
if s.Scene != nil {
ret = append(ret, s.Spec)
}
}
@ -142,8 +145,8 @@ func (c Cache) ListGalleryScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsGalleries() {
ret = append(ret, s.toScraper())
if s.Gallery != nil {
ret = append(ret, s.Spec)
}
}
@ -156,15 +159,15 @@ func (c Cache) ListMovieScrapers() []*models.Scraper {
var ret []*models.Scraper
for _, s := range c.scrapers {
// filter on type
if s.supportsMovies() {
ret = append(ret, s.toScraper())
if s.Movie != nil {
ret = append(ret, s.Spec)
}
}
return ret
}
func (c Cache) findScraper(scraperID string) *config {
func (c Cache) findScraper(scraperID string) *scraper {
for _, s := range c.scrapers {
if s.ID == scraperID {
return &s
@ -180,8 +183,8 @@ func (c Cache) findScraper(scraperID string) *config {
func (c Cache) ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
return s.ScrapePerformerNames(query, c.txnManager, c.globalConfig)
if s != nil && s.Performer != nil {
return s.Performer.scrapeByName(query)
}
return nil, errors.New("Scraper with ID " + scraperID + " not found")
@ -192,8 +195,8 @@ func (c Cache) ScrapePerformerList(scraperID string, query string) ([]*models.Sc
func (c Cache) ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
ret, err := s.ScrapePerformer(scrapedPerformer, c.txnManager, c.globalConfig)
if s != nil && s.Performer != nil {
ret, err := s.Performer.scrapeByFragment(scrapedPerformer)
if err != nil {
return nil, err
}
@ -216,8 +219,8 @@ func (c Cache) ScrapePerformer(scraperID string, scrapedPerformer models.Scraped
// the URL, then nil is returned.
func (c Cache) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
for _, s := range c.scrapers {
if s.matchesPerformerURL(url) {
ret, err := s.ScrapePerformerURL(url, c.txnManager, c.globalConfig)
if matchesURL(s.Performer, url) {
ret, err := s.Performer.scrapeByURL(url)
if err != nil {
return nil, err
}
@ -289,13 +292,13 @@ func (c Cache) postScrapeScene(ret *models.ScrapedScene) error {
return err
}
if err := MatchScrapedPerformer(pqb, p); err != nil {
if err := match.ScrapedPerformer(pqb, p); err != nil {
return err
}
}
for _, p := range ret.Movies {
err := MatchScrapedMovie(mqb, p)
err := match.ScrapedMovie(mqb, p)
if err != nil {
return err
}
@ -308,7 +311,7 @@ func (c Cache) postScrapeScene(ret *models.ScrapedScene) error {
ret.Tags = tags
if ret.Studio != nil {
err := MatchScrapedStudio(sqb, ret.Studio)
err := match.ScrapedStudio(sqb, ret.Studio)
if err != nil {
return err
}
@ -334,7 +337,7 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
sqb := r.Studio()
for _, p := range ret.Performers {
err := MatchScrapedPerformer(pqb, p)
err := match.ScrapedPerformer(pqb, p)
if err != nil {
return err
}
@ -347,7 +350,7 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
ret.Tags = tags
if ret.Studio != nil {
err := MatchScrapedStudio(sqb, ret.Studio)
err := match.ScrapedStudio(sqb, ret.Studio)
if err != nil {
return err
}
@ -365,14 +368,14 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
func (c Cache) ScrapeScene(scraperID string, sceneID int) (*models.ScrapedScene, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
if s != nil && s.Scene != nil {
// get scene from id
scene, err := getScene(sceneID, c.txnManager)
if err != nil {
return nil, err
}
ret, err := s.ScrapeSceneByScene(scene, c.txnManager, c.globalConfig)
ret, err := s.Scene.scrapeByScene(scene)
if err != nil {
return nil, err
@ -397,8 +400,8 @@ func (c Cache) ScrapeScene(scraperID string, sceneID int) (*models.ScrapedScene,
func (c Cache) ScrapeSceneQuery(scraperID string, query string) ([]*models.ScrapedScene, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
return s.ScrapeSceneQuery(query, c.txnManager, c.globalConfig)
if s != nil && s.Scene != nil {
return s.Scene.scrapeByName(query)
}
return nil, errors.New("Scraper with ID " + scraperID + " not found")
@ -408,8 +411,8 @@ func (c Cache) ScrapeSceneQuery(scraperID string, query string) ([]*models.Scrap
func (c Cache) ScrapeSceneFragment(scraperID string, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
// find scraper with the provided id
s := c.findScraper(scraperID)
if s != nil {
ret, err := s.ScrapeSceneByFragment(scene, c.txnManager, c.globalConfig)
if s != nil && s.Scene != nil {
ret, err := s.Scene.scrapeByFragment(scene)
if err != nil {
return nil, err
@ -433,8 +436,8 @@ func (c Cache) ScrapeSceneFragment(scraperID string, scene models.ScrapedSceneIn
// the URL, then nil is returned.
func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
for _, s := range c.scrapers {
if s.matchesSceneURL(url) {
ret, err := s.ScrapeSceneURL(url, c.txnManager, c.globalConfig)
if matchesURL(s.Scene, url) {
ret, err := s.Scene.scrapeByURL(url)
if err != nil {
return nil, err
@ -455,14 +458,14 @@ func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
// ScrapeGallery uses the scraper with the provided ID to scrape a gallery using existing data.
func (c Cache) ScrapeGallery(scraperID string, galleryID int) (*models.ScrapedGallery, error) {
s := c.findScraper(scraperID)
if s != nil {
if s != nil && s.Gallery != nil {
// get gallery from id
gallery, err := getGallery(galleryID, c.txnManager)
if err != nil {
return nil, err
}
ret, err := s.ScrapeGalleryByGallery(gallery, c.txnManager, c.globalConfig)
ret, err := s.Gallery.scrapeByGallery(gallery)
if err != nil {
return nil, err
@ -484,8 +487,8 @@ func (c Cache) ScrapeGallery(scraperID string, galleryID int) (*models.ScrapedGa
// ScrapeGalleryFragment uses the scraper with the provided ID to scrape a gallery.
func (c Cache) ScrapeGalleryFragment(scraperID string, gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
s := c.findScraper(scraperID)
if s != nil {
ret, err := s.ScrapeGalleryByFragment(gallery, c.txnManager, c.globalConfig)
if s != nil && s.Gallery != nil {
ret, err := s.Gallery.scrapeByFragment(gallery)
if err != nil {
return nil, err
@ -509,8 +512,8 @@ func (c Cache) ScrapeGalleryFragment(scraperID string, gallery models.ScrapedGal
// the URL, then nil is returned.
func (c Cache) ScrapeGalleryURL(url string) (*models.ScrapedGallery, error) {
for _, s := range c.scrapers {
if s.matchesGalleryURL(url) {
ret, err := s.ScrapeGalleryURL(url, c.txnManager, c.globalConfig)
if matchesURL(s.Gallery, url) {
ret, err := s.Gallery.scrapeByURL(url)
if err != nil {
return nil, err
@ -533,15 +536,15 @@ func (c Cache) ScrapeGalleryURL(url string) (*models.ScrapedGallery, error) {
// the URL, then nil is returned.
func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) {
for _, s := range c.scrapers {
if s.matchesMovieURL(url) {
ret, err := s.ScrapeMovieURL(url, c.txnManager, c.globalConfig)
if s.Movie != nil && matchesURL(s.Movie, url) {
ret, err := s.Movie.scrapeByURL(url)
if err != nil {
return nil, err
}
if ret.Studio != nil {
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
return MatchScrapedStudio(r.Studio(), ret.Studio)
return match.ScrapedStudio(r.Studio(), ret.Studio)
}); err != nil {
return nil, err
}
@ -587,7 +590,7 @@ ScrapeTag:
}
}
err := MatchScrapedTag(tqb, t)
err := match.ScrapedTag(tqb, t)
if err != nil {
return nil, err
}

View file

@ -12,8 +12,8 @@ import (
"github.com/Yamashou/gqlgenc/client"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/match"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/scraper"
"github.com/stashapp/stash/pkg/scraper/stashbox/graphql"
"github.com/stashapp/stash/pkg/utils"
)
@ -644,7 +644,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq
RemoteSiteID: &studioID,
}
err := scraper.MatchScrapedStudio(r.Studio(), ss.Studio)
err := match.ScrapedStudio(r.Studio(), ss.Studio)
if err != nil {
return err
}
@ -653,7 +653,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq
for _, p := range s.Performers {
sp := performerFragmentToScrapedScenePerformer(p.Performer)
err := scraper.MatchScrapedPerformer(pqb, sp)
err := match.ScrapedPerformer(pqb, sp)
if err != nil {
return err
}
@ -666,7 +666,7 @@ func sceneFragmentToScrapedScene(txnManager models.TransactionManager, s *graphq
Name: t.Name,
}
err := scraper.MatchScrapedTag(tqb, st)
err := match.ScrapedTag(tqb, st)
if err != nil {
return err
}

View file

@ -874,7 +874,8 @@ xPathScrapers:
globalConfig := mockGlobalConfig{}
performer, err := c.ScrapePerformerURL(ts.URL, nil, globalConfig)
s := createScraperFromConfig(*c, nil, globalConfig)
performer, err := s.Performer.scrapeByURL(ts.URL)
if err != nil {
t.Errorf("Error scraping performer: %s", err.Error())

View file

@ -182,11 +182,15 @@ func (qb *performerQueryBuilder) QueryForAutoTag(words []string) ([]*models.Perf
var whereClauses []string
var args []interface{}
whereClauses = append(whereClauses, "name regexp ?")
args = append(args, "^[\\w][.\\-_ ]")
for _, w := range words {
whereClauses = append(whereClauses, "name like ?")
args = append(args, w+"%")
whereClauses = append(whereClauses, "aliases like ?")
args = append(args, w+"%")
// TODO - commented out until alias matching works both ways
// whereClauses = append(whereClauses, "aliases like ?")
// args = append(args, w+"%")
}
where := strings.Join(whereClauses, " OR ")

View file

@ -133,6 +133,11 @@ func (qb *studioQueryBuilder) QueryForAutoTag(words []string) ([]*models.Studio,
var whereClauses []string
var args []interface{}
// always include names that begin with a single character
singleFirstCharacterRegex := "^[\\w][.\\-_ ]"
whereClauses = append(whereClauses, "studios.name regexp ? OR COALESCE(studio_aliases.alias, '') regexp ?")
args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex)
for _, w := range words {
ww := w + "%"
whereClauses = append(whereClauses, "studios.name like ?")

View file

@ -235,6 +235,11 @@ func (qb *tagQueryBuilder) QueryForAutoTag(words []string) ([]*models.Tag, error
var whereClauses []string
var args []interface{}
// always include names that begin with a single character
singleFirstCharacterRegex := "^[\\w][.\\-_ ]"
whereClauses = append(whereClauses, "tags.name regexp ? OR COALESCE(tag_aliases.alias, '') regexp ?")
args = append(args, singleFirstCharacterRegex, singleFirstCharacterRegex)
for _, w := range words {
ww := w + "%"
whereClauses = append(whereClauses, "tags.name like ?")

View file

@ -1,5 +1,7 @@
### ✨ New Features
* Added built-in `Auto Tag` scene scraper to match performers, studio and tags from filename - using AutoTag logic. ([#1817](https://github.com/stashapp/stash/pull/1817))
* Added interface options to disable creating performers/studios/tags from dropdown selectors. ([#1814](https://github.com/stashapp/stash/pull/1814))
### 🐛 Bug fixes
* Fix auto-tag logic for names which have single-letter words. ([#1817](https://github.com/stashapp/stash/pull/1817))
* Fix huge memory usage spike during clean task. ([#1805](https://github.com/stashapp/stash/pull/1805))