Refactor scraping to include related object fields (#6266)

* Refactor scraper post-processing and process related objects consistently
* Refactor image processing
* Scrape related studio fields consistently
* Don't set image on related objects
This commit is contained in:
WithoutPants 2025-12-02 12:49:44 +11:00 committed by GitHub
parent c6ae43c1d6
commit 84e24eb612
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 362 additions and 337 deletions

View file

@ -16,7 +16,6 @@ import (
"github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/match"
"github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/sliceutil"
"github.com/stashapp/stash/pkg/txn" "github.com/stashapp/stash/pkg/txn"
) )
@ -262,19 +261,23 @@ func (c Cache) ScrapeName(ctx context.Context, id, query string, ty ScrapeConten
return nil, fmt.Errorf("error while name scraping with scraper %s: %w", id, err) return nil, fmt.Errorf("error while name scraping with scraper %s: %w", id, err)
} }
ignoredRegex := c.compileExcludeTagPatterns() pp := postScraper{
Cache: c,
var ignoredTags []string excludeTagRE: c.compileExcludeTagPatterns(),
for i, cc := range content { }
var thisIgnoredTags []string if err := c.repository.WithReadTxn(ctx, func(ctx context.Context) error {
content[i], thisIgnoredTags, err = c.postScrape(ctx, cc, ignoredRegex) for i, cc := range content {
if err != nil { content[i], err = pp.postScrape(ctx, cc)
return nil, fmt.Errorf("error while post-scraping with scraper %s: %w", id, err) if err != nil {
return fmt.Errorf("error while post-scraping with scraper %s: %w", id, err)
}
} }
ignoredTags = sliceutil.AppendUniques(ignoredTags, thisIgnoredTags) return nil
}); err != nil {
return nil, err
} }
LogIgnoredTags(ignoredTags) LogIgnoredTags(pp.ignoredTags)
return content, nil return content, nil
} }

View file

@ -37,88 +37,43 @@ func setPerformerImage(ctx context.Context, client *http.Client, p *models.Scrap
return nil return nil
} }
func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedScene, globalConfig GlobalConfig) error { func setStudioImage(ctx context.Context, client *http.Client, p *models.ScrapedStudio, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL // backwards compatibility: we fetch the image if it's a URL and set it to the first image
if s.Image == nil || !strings.HasPrefix(*s.Image, "http") { // Image is deprecated, so only do this if Images is unset
if p.Image == nil || len(p.Images) > 0 {
// nothing to do // nothing to do
return nil return nil
} }
img, err := getImage(ctx, *s.Image, client, globalConfig) // don't try to get the image if it doesn't appear to be a URL
if !strings.HasPrefix(*p.Image, "http") {
p.Images = []string{*p.Image}
return nil
}
img, err := getImage(ctx, *p.Image, client, globalConfig)
if err != nil { if err != nil {
return err return err
} }
s.Image = img p.Image = img
// Image is deprecated. Use images instead
p.Images = []string{*img}
return nil return nil
} }
func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error { func processImageField(ctx context.Context, imageField *string, client *http.Client, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL if imageField == nil {
if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
// nothing to do
return nil return nil
} }
img, err := getImage(ctx, *m.FrontImage, client, globalConfig) img, err := getImage(ctx, *imageField, client, globalConfig)
if err != nil { if err != nil {
return err return err
} }
m.FrontImage = img *imageField = *img
return nil
}
func setMovieBackImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL
if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
// nothing to do
return nil
}
img, err := getImage(ctx, *m.BackImage, client, globalConfig)
if err != nil {
return err
}
m.BackImage = img
return nil
}
func setGroupFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedGroup, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL
if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
// nothing to do
return nil
}
img, err := getImage(ctx, *m.FrontImage, client, globalConfig)
if err != nil {
return err
}
m.FrontImage = img
return nil
}
func setGroupBackImage(ctx context.Context, client *http.Client, m *models.ScrapedGroup, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL
if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
// nothing to do
return nil
}
img, err := getImage(ctx, *m.BackImage, client, globalConfig)
if err != nil {
return err
}
m.BackImage = img
return nil return nil
} }

View file

@ -11,85 +11,91 @@ import (
"github.com/stashapp/stash/pkg/utils" "github.com/stashapp/stash/pkg/utils"
) )
type postScraper struct {
Cache
excludeTagRE []*regexp.Regexp
// ignoredTags is a list of tags that were ignored during post-processing
ignoredTags []string
}
// postScrape handles post-processing of scraped content. If the content // postScrape handles post-processing of scraped content. If the content
// requires post-processing, this function fans out to the given content // requires post-processing, this function fans out to the given content
// type and post-processes it. // type and post-processes it.
func (c Cache) postScrape(ctx context.Context, content ScrapedContent, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { // Assumes called within a read transaction.
func (c *postScraper) postScrape(ctx context.Context, content ScrapedContent) (_ ScrapedContent, err error) {
const related = false
// Analyze the concrete type, call the right post-processing function // Analyze the concrete type, call the right post-processing function
switch v := content.(type) { switch v := content.(type) {
case *models.ScrapedPerformer: case *models.ScrapedPerformer:
if v != nil { if v != nil {
return c.postScrapePerformer(ctx, *v, excludeTagRE) return c.postScrapePerformer(ctx, *v, related)
} }
case models.ScrapedPerformer: case models.ScrapedPerformer:
return c.postScrapePerformer(ctx, v, excludeTagRE) return c.postScrapePerformer(ctx, v, related)
case *models.ScrapedScene: case *models.ScrapedScene:
if v != nil { if v != nil {
return c.postScrapeScene(ctx, *v, excludeTagRE) return c.postScrapeScene(ctx, *v)
} }
case models.ScrapedScene: case models.ScrapedScene:
return c.postScrapeScene(ctx, v, excludeTagRE) return c.postScrapeScene(ctx, v)
case *models.ScrapedGallery: case *models.ScrapedGallery:
if v != nil { if v != nil {
return c.postScrapeGallery(ctx, *v, excludeTagRE) return c.postScrapeGallery(ctx, *v)
} }
case models.ScrapedGallery: case models.ScrapedGallery:
return c.postScrapeGallery(ctx, v, excludeTagRE) return c.postScrapeGallery(ctx, v)
case *models.ScrapedImage: case *models.ScrapedImage:
if v != nil { if v != nil {
return c.postScrapeImage(ctx, *v, excludeTagRE) return c.postScrapeImage(ctx, *v)
} }
case models.ScrapedImage: case models.ScrapedImage:
return c.postScrapeImage(ctx, v, excludeTagRE) return c.postScrapeImage(ctx, v)
case *models.ScrapedMovie: case *models.ScrapedMovie:
if v != nil { if v != nil {
return c.postScrapeMovie(ctx, *v, excludeTagRE) return c.postScrapeMovie(ctx, *v, related)
} }
case models.ScrapedMovie: case models.ScrapedMovie:
return c.postScrapeMovie(ctx, v, excludeTagRE) return c.postScrapeMovie(ctx, v, related)
case *models.ScrapedGroup: case *models.ScrapedGroup:
if v != nil { if v != nil {
return c.postScrapeGroup(ctx, *v, excludeTagRE) return c.postScrapeGroup(ctx, *v, related)
} }
case models.ScrapedGroup: case models.ScrapedGroup:
return c.postScrapeGroup(ctx, v, excludeTagRE) return c.postScrapeGroup(ctx, v, related)
} }
// If nothing matches, pass the content through // If nothing matches, pass the content through
return content, nil, nil return content, nil
} }
// postScrapeSingle handles post-processing of a single scraped content item. func (c *postScraper) filterTags(tags []*models.ScrapedTag) []*models.ScrapedTag {
// This is a convenience function that includes logging the ignored tags, as opposed to logging them in the caller. var ret []*models.ScrapedTag
func (c Cache) postScrapeSingle(ctx context.Context, content ScrapedContent) (ScrapedContent, error) { var thisIgnoredTags []string
ret, ignoredTags, err := c.postScrape(ctx, content, c.compileExcludeTagPatterns()) ret, thisIgnoredTags = FilterTags(c.excludeTagRE, tags)
c.ignoredTags = sliceutil.AppendUniques(c.ignoredTags, thisIgnoredTags)
return ret
}
func (c *postScraper) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer, related bool) (_ ScrapedContent, err error) {
r := c.repository
tqb := r.TagFinder
tags, err := postProcessTags(ctx, tqb, p.Tags)
if err != nil { if err != nil {
return nil, err return nil, err
} }
LogIgnoredTags(ignoredTags) p.Tags = c.filterTags(tags)
return ret, nil
}
func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
r := c.repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
tqb := r.TagFinder
tags, err := postProcessTags(ctx, tqb, p.Tags)
if err != nil {
return err
}
p.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
return nil
}); err != nil {
return nil, nil, err
}
// post-process - set the image if applicable // post-process - set the image if applicable
if err := setPerformerImage(ctx, c.client, &p, c.globalConfig); err != nil { // don't set image for related performers to avoid excessive network calls
logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error()) if !related {
if err := setPerformerImage(ctx, c.client, &p, c.globalConfig); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error())
}
} }
p.Country = resolveCountryName(p.Country) p.Country = resolveCountryName(p.Country)
@ -119,119 +125,224 @@ func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerforme
} }
} }
return p, ignoredTags, nil return p, nil
} }
func (c Cache) postScrapeMovie(ctx context.Context, m models.ScrapedMovie, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { func (c *postScraper) postScrapeMovie(ctx context.Context, m models.ScrapedMovie, related bool) (_ ScrapedContent, err error) {
r := c.repository r := c.repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error { tqb := r.TagFinder
tqb := r.TagFinder tags, err := postProcessTags(ctx, tqb, m.Tags)
tags, err := postProcessTags(ctx, tqb, m.Tags)
if err != nil {
return err
}
m.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
if m.Studio != nil {
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
return err
}
}
return nil
}); err != nil {
return nil, nil, err
}
// populate URL/URLs
// if URLs are provided, only use those
if len(m.URLs) > 0 {
m.URL = &m.URLs[0]
} else {
urls := []string{}
if m.URL != nil {
urls = append(urls, *m.URL)
}
if len(urls) > 0 {
m.URLs = urls
}
}
// post-process - set the image if applicable
if err := setMovieFrontImage(ctx, c.client, &m, c.globalConfig); err != nil {
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
}
if err := setMovieBackImage(ctx, c.client, &m, c.globalConfig); err != nil {
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
}
return m, ignoredTags, nil
}
func (c Cache) postScrapeGroup(ctx context.Context, m models.ScrapedGroup, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
r := c.repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
tqb := r.TagFinder
tags, err := postProcessTags(ctx, tqb, m.Tags)
if err != nil {
return err
}
m.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
if m.Studio != nil {
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
return err
}
}
return nil
}); err != nil {
return nil, nil, err
}
// populate URL/URLs
// if URLs are provided, only use those
if len(m.URLs) > 0 {
m.URL = &m.URLs[0]
} else {
urls := []string{}
if m.URL != nil {
urls = append(urls, *m.URL)
}
if len(urls) > 0 {
m.URLs = urls
}
}
// post-process - set the image if applicable
if err := setGroupFrontImage(ctx, c.client, &m, c.globalConfig); err != nil {
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
}
if err := setGroupBackImage(ctx, c.client, &m, c.globalConfig); err != nil {
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
}
return m, ignoredTags, nil
}
func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPerformer, excludeTagRE []*regexp.Regexp) (ignoredTags []string, err error) {
tqb := c.repository.TagFinder
tags, err := postProcessTags(ctx, tqb, p.Tags)
if err != nil { if err != nil {
return nil, err return nil, err
} }
p.Tags = tags m.Tags = c.filterTags(tags)
p.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
p.Country = resolveCountryName(p.Country) if m.Studio != nil {
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
return nil, err
}
}
return ignoredTags, nil // populate URL/URLs
// if URLs are provided, only use those
if len(m.URLs) > 0 {
m.URL = &m.URLs[0]
} else {
urls := []string{}
if m.URL != nil {
urls = append(urls, *m.URL)
}
if len(urls) > 0 {
m.URLs = urls
}
}
// post-process - set the image if applicable
// don't set images for related movies to avoid excessive network calls
if !related {
if err := processImageField(ctx, m.FrontImage, c.client, c.globalConfig); err != nil {
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
}
if err := processImageField(ctx, m.BackImage, c.client, c.globalConfig); err != nil {
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
}
}
return m, nil
} }
func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { func (c *postScraper) postScrapeGroup(ctx context.Context, m models.ScrapedGroup, related bool) (_ ScrapedContent, err error) {
r := c.repository
tqb := r.TagFinder
tags, err := postProcessTags(ctx, tqb, m.Tags)
if err != nil {
return nil, err
}
m.Tags = c.filterTags(tags)
if m.Studio != nil {
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
return nil, err
}
}
// populate URL/URLs
// if URLs are provided, only use those
if len(m.URLs) > 0 {
m.URL = &m.URLs[0]
} else {
urls := []string{}
if m.URL != nil {
urls = append(urls, *m.URL)
}
if len(urls) > 0 {
m.URLs = urls
}
}
// post-process - set the image if applicable
// don't set images for related groups to avoid excessive network calls
if !related {
if err := processImageField(ctx, m.FrontImage, c.client, c.globalConfig); err != nil {
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
}
if err := processImageField(ctx, m.BackImage, c.client, c.globalConfig); err != nil {
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
}
}
return m, nil
}
// postScrapeRelatedPerformers post-processes a list of performers.
// It modifies the performers in place.
func (c *postScraper) postScrapeRelatedPerformers(ctx context.Context, items []*models.ScrapedPerformer) error {
for _, p := range items {
if p == nil {
continue
}
const related = true
sc, err := c.postScrapePerformer(ctx, *p, related)
if err != nil {
return err
}
newP := sc.(models.ScrapedPerformer)
*p = newP
if err := match.ScrapedPerformer(ctx, c.repository.PerformerFinder, p, ""); err != nil {
return err
}
}
return nil
}
func (c *postScraper) postScrapeRelatedMovies(ctx context.Context, items []*models.ScrapedMovie) error {
for _, p := range items {
const related = true
sc, err := c.postScrapeMovie(ctx, *p, related)
if err != nil {
return err
}
newP := sc.(models.ScrapedMovie)
*p = newP
matchedID, err := match.ScrapedGroup(ctx, c.repository.GroupFinder, p.StoredID, p.Name)
if err != nil {
return err
}
if matchedID != nil {
p.StoredID = matchedID
}
}
return nil
}
func (c *postScraper) postScrapeRelatedGroups(ctx context.Context, items []*models.ScrapedGroup) error {
for _, p := range items {
const related = true
sc, err := c.postScrapeGroup(ctx, *p, related)
if err != nil {
return err
}
newP := sc.(models.ScrapedGroup)
*p = newP
matchedID, err := match.ScrapedGroup(ctx, c.repository.GroupFinder, p.StoredID, p.Name)
if err != nil {
return err
}
if matchedID != nil {
p.StoredID = matchedID
}
}
return nil
}
func (c *postScraper) postScrapeStudio(ctx context.Context, s models.ScrapedStudio, related bool) (_ ScrapedContent, err error) {
r := c.repository
tqb := r.TagFinder
tags, err := postProcessTags(ctx, tqb, s.Tags)
if err != nil {
return nil, err
}
s.Tags = c.filterTags(tags)
// post-process - set the image if applicable
// don't set image for related studios to avoid excessive network calls
if !related {
if err := setStudioImage(ctx, c.client, &s, c.globalConfig); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *s.Image, err.Error())
}
}
// populate URL/URLs
// if URLs are provided, only use those
if len(s.URLs) > 0 {
s.URL = &s.URLs[0]
} else {
urls := []string{}
if s.URL != nil {
urls = append(urls, *s.URL)
}
if len(urls) > 0 {
s.URLs = urls
}
}
return s, nil
}
func (c *postScraper) postScrapeRelatedStudio(ctx context.Context, s *models.ScrapedStudio) error {
if s == nil {
return nil
}
const related = true
sc, err := c.postScrapeStudio(ctx, *s, related)
if err != nil {
return err
}
newS := sc.(models.ScrapedStudio)
*s = newS
if err = match.ScrapedStudio(ctx, c.repository.StudioFinder, s, ""); err != nil {
return err
}
return nil
}
func (c *postScraper) postScrapeScene(ctx context.Context, scene models.ScrapedScene) (_ ScrapedContent, err error) {
// set the URL/URLs field // set the URL/URLs field
if scene.URL == nil && len(scene.URLs) > 0 { if scene.URL == nil && len(scene.URLs) > 0 {
scene.URL = &scene.URLs[0] scene.URL = &scene.URLs[0]
@ -241,92 +352,53 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, e
} }
r := c.repository r := c.repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error { tqb := r.TagFinder
pqb := r.PerformerFinder
gqb := r.GroupFinder
tqb := r.TagFinder
sqb := r.StudioFinder
for _, p := range scene.Performers { if err = c.postScrapeRelatedPerformers(ctx, scene.Performers); err != nil {
if p == nil { return nil, err
continue }
}
thisIgnoredTags, err := c.postScrapeScenePerformer(ctx, *p, excludeTagRE) if err = c.postScrapeRelatedMovies(ctx, scene.Movies); err != nil {
if err != nil { return nil, err
return err }
}
if err := match.ScrapedPerformer(ctx, pqb, p, ""); err != nil { if err = c.postScrapeRelatedGroups(ctx, scene.Groups); err != nil {
return err return nil, err
} }
ignoredTags = sliceutil.AppendUniques(ignoredTags, thisIgnoredTags) // HACK - if movies was returned but not groups, add the groups from the movies
// if groups was returned but not movies, add the movies from the groups for backward compatibility
if len(scene.Movies) > 0 && len(scene.Groups) == 0 {
for _, m := range scene.Movies {
g := m.ScrapedGroup()
scene.Groups = append(scene.Groups, &g)
} }
} else if len(scene.Groups) > 0 && len(scene.Movies) == 0 {
for _, p := range scene.Movies { for _, g := range scene.Groups {
matchedID, err := match.ScrapedGroup(ctx, gqb, p.StoredID, p.Name) m := g.ScrapedMovie()
if err != nil { scene.Movies = append(scene.Movies, &m)
return err
}
if matchedID != nil {
p.StoredID = matchedID
}
} }
}
for _, p := range scene.Groups { tags, err := postProcessTags(ctx, tqb, scene.Tags)
matchedID, err := match.ScrapedGroup(ctx, gqb, p.StoredID, p.Name) if err != nil {
if err != nil { return nil, err
return err }
} scene.Tags = c.filterTags(tags)
if matchedID != nil { if err := c.postScrapeRelatedStudio(ctx, scene.Studio); err != nil {
p.StoredID = matchedID return nil, err
}
}
// HACK - if movies was returned but not groups, add the groups from the movies
// if groups was returned but not movies, add the movies from the groups for backward compatibility
if len(scene.Movies) > 0 && len(scene.Groups) == 0 {
for _, m := range scene.Movies {
g := m.ScrapedGroup()
scene.Groups = append(scene.Groups, &g)
}
} else if len(scene.Groups) > 0 && len(scene.Movies) == 0 {
for _, g := range scene.Groups {
m := g.ScrapedMovie()
scene.Movies = append(scene.Movies, &m)
}
}
tags, err := postProcessTags(ctx, tqb, scene.Tags)
if err != nil {
return err
}
scene.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
if scene.Studio != nil {
err := match.ScrapedStudio(ctx, sqb, scene.Studio, "")
if err != nil {
return err
}
}
return nil
}); err != nil {
return nil, nil, err
} }
// post-process - set the image if applicable // post-process - set the image if applicable
if err := setSceneImage(ctx, c.client, &scene, c.globalConfig); err != nil { if err := processImageField(ctx, scene.Image, c.client, c.globalConfig); err != nil {
logger.Warnf("Could not set image using URL %s: %v", *scene.Image, err) logger.Warnf("Could not set image using URL %s: %v", *scene.Image, err)
} }
return scene, ignoredTags, nil return scene, nil
} }
func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { func (c *postScraper) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (_ ScrapedContent, err error) {
// set the URL/URLs field // set the URL/URLs field
if g.URL == nil && len(g.URLs) > 0 { if g.URL == nil && len(g.URLs) > 0 {
g.URL = &g.URLs[0] g.URL = &g.URLs[0]
@ -336,70 +408,65 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery, e
} }
r := c.repository r := c.repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error { tqb := r.TagFinder
pqb := r.PerformerFinder
tqb := r.TagFinder
sqb := r.StudioFinder
for _, p := range g.Performers { if err = c.postScrapeRelatedPerformers(ctx, g.Performers); err != nil {
err := match.ScrapedPerformer(ctx, pqb, p, "") return nil, err
if err != nil {
return err
}
}
tags, err := postProcessTags(ctx, tqb, g.Tags)
if err != nil {
return err
}
g.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
if g.Studio != nil {
err := match.ScrapedStudio(ctx, sqb, g.Studio, "")
if err != nil {
return err
}
}
return nil
}); err != nil {
return nil, nil, err
} }
return g, ignoredTags, nil tags, err := postProcessTags(ctx, tqb, g.Tags)
if err != nil {
return nil, err
}
g.Tags = c.filterTags(tags)
if err := c.postScrapeRelatedStudio(ctx, g.Studio); err != nil {
return nil, err
}
return g, nil
} }
func (c Cache) postScrapeImage(ctx context.Context, image models.ScrapedImage, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { func (c *postScraper) postScrapeImage(ctx context.Context, image models.ScrapedImage) (_ ScrapedContent, err error) {
r := c.repository r := c.repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error { tqb := r.TagFinder
pqb := r.PerformerFinder
tqb := r.TagFinder
sqb := r.StudioFinder
for _, p := range image.Performers { if err = c.postScrapeRelatedPerformers(ctx, image.Performers); err != nil {
if err := match.ScrapedPerformer(ctx, pqb, p, ""); err != nil { return nil, err
return err }
}
}
tags, err := postProcessTags(ctx, tqb, image.Tags) tags, err := postProcessTags(ctx, tqb, image.Tags)
if err != nil {
return nil, err
}
image.Tags = c.filterTags(tags)
if err := c.postScrapeRelatedStudio(ctx, image.Studio); err != nil {
return nil, err
}
return image, nil
}
// postScrapeSingle handles post-processing of a single scraped content item.
// This is a convenience function that includes logging the ignored tags, as opposed to logging them in the caller.
func (c Cache) postScrapeSingle(ctx context.Context, content ScrapedContent) (ret ScrapedContent, err error) {
pp := postScraper{
Cache: c,
excludeTagRE: c.compileExcludeTagPatterns(),
}
if err := c.repository.WithReadTxn(ctx, func(ctx context.Context) error {
ret, err = pp.postScrape(ctx, content)
if err != nil { if err != nil {
return err return err
} }
image.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
if image.Studio != nil {
err := match.ScrapedStudio(ctx, sqb, image.Studio, "")
if err != nil {
return err
}
}
return nil return nil
}); err != nil { }); err != nil {
return nil, nil, err return nil, err
} }
return image, ignoredTags, nil LogIgnoredTags(pp.ignoredTags)
return ret, nil
} }