From 84e24eb6124623620749feb5f309095c815864a7 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Tue, 2 Dec 2025 12:49:44 +1100 Subject: [PATCH] Refactor scraping to include related object fields (#6266) * Refactor scraper post-processing and process related objects consistently * Refactor image processing * Scrape related studio fields consistently * Don't set image on related objects --- pkg/scraper/cache.go | 25 +- pkg/scraper/image.go | 81 ++--- pkg/scraper/postprocessing.go | 593 +++++++++++++++++++--------------- 3 files changed, 362 insertions(+), 337 deletions(-) diff --git a/pkg/scraper/cache.go b/pkg/scraper/cache.go index d2c3bd5d9..5cc51ac54 100644 --- a/pkg/scraper/cache.go +++ b/pkg/scraper/cache.go @@ -16,7 +16,6 @@ import ( "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/match" "github.com/stashapp/stash/pkg/models" - "github.com/stashapp/stash/pkg/sliceutil" "github.com/stashapp/stash/pkg/txn" ) @@ -262,19 +261,23 @@ func (c Cache) ScrapeName(ctx context.Context, id, query string, ty ScrapeConten return nil, fmt.Errorf("error while name scraping with scraper %s: %w", id, err) } - ignoredRegex := c.compileExcludeTagPatterns() - - var ignoredTags []string - for i, cc := range content { - var thisIgnoredTags []string - content[i], thisIgnoredTags, err = c.postScrape(ctx, cc, ignoredRegex) - if err != nil { - return nil, fmt.Errorf("error while post-scraping with scraper %s: %w", id, err) + pp := postScraper{ + Cache: c, + excludeTagRE: c.compileExcludeTagPatterns(), + } + if err := c.repository.WithReadTxn(ctx, func(ctx context.Context) error { + for i, cc := range content { + content[i], err = pp.postScrape(ctx, cc) + if err != nil { + return fmt.Errorf("error while post-scraping with scraper %s: %w", id, err) + } } - ignoredTags = sliceutil.AppendUniques(ignoredTags, thisIgnoredTags) + return nil + }); err != nil { + return nil, err } - LogIgnoredTags(ignoredTags) + LogIgnoredTags(pp.ignoredTags) return content, nil } diff --git a/pkg/scraper/image.go b/pkg/scraper/image.go index 93ed7a037..87f114668 100644 --- a/pkg/scraper/image.go +++ b/pkg/scraper/image.go @@ -37,88 +37,43 @@ func setPerformerImage(ctx context.Context, client *http.Client, p *models.Scrap return nil } -func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedScene, globalConfig GlobalConfig) error { - // don't try to get the image if it doesn't appear to be a URL - if s.Image == nil || !strings.HasPrefix(*s.Image, "http") { +func setStudioImage(ctx context.Context, client *http.Client, p *models.ScrapedStudio, globalConfig GlobalConfig) error { + // backwards compatibility: we fetch the image if it's a URL and set it to the first image + // Image is deprecated, so only do this if Images is unset + if p.Image == nil || len(p.Images) > 0 { // nothing to do return nil } - img, err := getImage(ctx, *s.Image, client, globalConfig) + // don't try to get the image if it doesn't appear to be a URL + if !strings.HasPrefix(*p.Image, "http") { + p.Images = []string{*p.Image} + return nil + } + + img, err := getImage(ctx, *p.Image, client, globalConfig) if err != nil { return err } - s.Image = img + p.Image = img + // Image is deprecated. Use images instead + p.Images = []string{*img} return nil } -func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error { - // don't try to get the image if it doesn't appear to be a URL - if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") { - // nothing to do +func processImageField(ctx context.Context, imageField *string, client *http.Client, globalConfig GlobalConfig) error { + if imageField == nil { return nil } - img, err := getImage(ctx, *m.FrontImage, client, globalConfig) + img, err := getImage(ctx, *imageField, client, globalConfig) if err != nil { return err } - m.FrontImage = img - - return nil -} - -func setMovieBackImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error { - // don't try to get the image if it doesn't appear to be a URL - if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") { - // nothing to do - return nil - } - - img, err := getImage(ctx, *m.BackImage, client, globalConfig) - if err != nil { - return err - } - - m.BackImage = img - - return nil -} - -func setGroupFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedGroup, globalConfig GlobalConfig) error { - // don't try to get the image if it doesn't appear to be a URL - if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") { - // nothing to do - return nil - } - - img, err := getImage(ctx, *m.FrontImage, client, globalConfig) - if err != nil { - return err - } - - m.FrontImage = img - - return nil -} - -func setGroupBackImage(ctx context.Context, client *http.Client, m *models.ScrapedGroup, globalConfig GlobalConfig) error { - // don't try to get the image if it doesn't appear to be a URL - if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") { - // nothing to do - return nil - } - - img, err := getImage(ctx, *m.BackImage, client, globalConfig) - if err != nil { - return err - } - - m.BackImage = img - + *imageField = *img return nil } diff --git a/pkg/scraper/postprocessing.go b/pkg/scraper/postprocessing.go index 62aa53c72..c2653743a 100644 --- a/pkg/scraper/postprocessing.go +++ b/pkg/scraper/postprocessing.go @@ -11,85 +11,91 @@ import ( "github.com/stashapp/stash/pkg/utils" ) +type postScraper struct { + Cache + excludeTagRE []*regexp.Regexp + + // ignoredTags is a list of tags that were ignored during post-processing + ignoredTags []string +} + // postScrape handles post-processing of scraped content. If the content // requires post-processing, this function fans out to the given content // type and post-processes it. -func (c Cache) postScrape(ctx context.Context, content ScrapedContent, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { +// Assumes called within a read transaction. +func (c *postScraper) postScrape(ctx context.Context, content ScrapedContent) (_ ScrapedContent, err error) { + const related = false + // Analyze the concrete type, call the right post-processing function switch v := content.(type) { case *models.ScrapedPerformer: if v != nil { - return c.postScrapePerformer(ctx, *v, excludeTagRE) + return c.postScrapePerformer(ctx, *v, related) } case models.ScrapedPerformer: - return c.postScrapePerformer(ctx, v, excludeTagRE) + return c.postScrapePerformer(ctx, v, related) case *models.ScrapedScene: if v != nil { - return c.postScrapeScene(ctx, *v, excludeTagRE) + return c.postScrapeScene(ctx, *v) } case models.ScrapedScene: - return c.postScrapeScene(ctx, v, excludeTagRE) + return c.postScrapeScene(ctx, v) case *models.ScrapedGallery: if v != nil { - return c.postScrapeGallery(ctx, *v, excludeTagRE) + return c.postScrapeGallery(ctx, *v) } case models.ScrapedGallery: - return c.postScrapeGallery(ctx, v, excludeTagRE) + return c.postScrapeGallery(ctx, v) case *models.ScrapedImage: if v != nil { - return c.postScrapeImage(ctx, *v, excludeTagRE) + return c.postScrapeImage(ctx, *v) } case models.ScrapedImage: - return c.postScrapeImage(ctx, v, excludeTagRE) + return c.postScrapeImage(ctx, v) case *models.ScrapedMovie: if v != nil { - return c.postScrapeMovie(ctx, *v, excludeTagRE) + return c.postScrapeMovie(ctx, *v, related) } case models.ScrapedMovie: - return c.postScrapeMovie(ctx, v, excludeTagRE) + return c.postScrapeMovie(ctx, v, related) case *models.ScrapedGroup: if v != nil { - return c.postScrapeGroup(ctx, *v, excludeTagRE) + return c.postScrapeGroup(ctx, *v, related) } case models.ScrapedGroup: - return c.postScrapeGroup(ctx, v, excludeTagRE) + return c.postScrapeGroup(ctx, v, related) } // If nothing matches, pass the content through - return content, nil, nil + return content, nil } -// postScrapeSingle handles post-processing of a single scraped content item. -// This is a convenience function that includes logging the ignored tags, as opposed to logging them in the caller. -func (c Cache) postScrapeSingle(ctx context.Context, content ScrapedContent) (ScrapedContent, error) { - ret, ignoredTags, err := c.postScrape(ctx, content, c.compileExcludeTagPatterns()) +func (c *postScraper) filterTags(tags []*models.ScrapedTag) []*models.ScrapedTag { + var ret []*models.ScrapedTag + var thisIgnoredTags []string + ret, thisIgnoredTags = FilterTags(c.excludeTagRE, tags) + c.ignoredTags = sliceutil.AppendUniques(c.ignoredTags, thisIgnoredTags) + + return ret +} + +func (c *postScraper) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer, related bool) (_ ScrapedContent, err error) { + r := c.repository + tqb := r.TagFinder + + tags, err := postProcessTags(ctx, tqb, p.Tags) if err != nil { return nil, err } - LogIgnoredTags(ignoredTags) - return ret, nil -} - -func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { - r := c.repository - if err := r.WithReadTxn(ctx, func(ctx context.Context) error { - tqb := r.TagFinder - - tags, err := postProcessTags(ctx, tqb, p.Tags) - if err != nil { - return err - } - p.Tags, ignoredTags = FilterTags(excludeTagRE, tags) - - return nil - }); err != nil { - return nil, nil, err - } + p.Tags = c.filterTags(tags) // post-process - set the image if applicable - if err := setPerformerImage(ctx, c.client, &p, c.globalConfig); err != nil { - logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error()) + // don't set image for related performers to avoid excessive network calls + if !related { + if err := setPerformerImage(ctx, c.client, &p, c.globalConfig); err != nil { + logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error()) + } } p.Country = resolveCountryName(p.Country) @@ -119,119 +125,224 @@ func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerforme } } - return p, ignoredTags, nil + return p, nil } -func (c Cache) postScrapeMovie(ctx context.Context, m models.ScrapedMovie, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { +func (c *postScraper) postScrapeMovie(ctx context.Context, m models.ScrapedMovie, related bool) (_ ScrapedContent, err error) { r := c.repository - if err := r.WithReadTxn(ctx, func(ctx context.Context) error { - tqb := r.TagFinder - tags, err := postProcessTags(ctx, tqb, m.Tags) - if err != nil { - return err - } - m.Tags, ignoredTags = FilterTags(excludeTagRE, tags) - - if m.Studio != nil { - if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil { - return err - } - } - - return nil - }); err != nil { - return nil, nil, err - } - - // populate URL/URLs - // if URLs are provided, only use those - if len(m.URLs) > 0 { - m.URL = &m.URLs[0] - } else { - urls := []string{} - if m.URL != nil { - urls = append(urls, *m.URL) - } - - if len(urls) > 0 { - m.URLs = urls - } - } - - // post-process - set the image if applicable - if err := setMovieFrontImage(ctx, c.client, &m, c.globalConfig); err != nil { - logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err) - } - if err := setMovieBackImage(ctx, c.client, &m, c.globalConfig); err != nil { - logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err) - } - - return m, ignoredTags, nil -} - -func (c Cache) postScrapeGroup(ctx context.Context, m models.ScrapedGroup, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { - r := c.repository - if err := r.WithReadTxn(ctx, func(ctx context.Context) error { - tqb := r.TagFinder - tags, err := postProcessTags(ctx, tqb, m.Tags) - if err != nil { - return err - } - m.Tags, ignoredTags = FilterTags(excludeTagRE, tags) - - if m.Studio != nil { - if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil { - return err - } - } - - return nil - }); err != nil { - return nil, nil, err - } - - // populate URL/URLs - // if URLs are provided, only use those - if len(m.URLs) > 0 { - m.URL = &m.URLs[0] - } else { - urls := []string{} - if m.URL != nil { - urls = append(urls, *m.URL) - } - - if len(urls) > 0 { - m.URLs = urls - } - } - - // post-process - set the image if applicable - if err := setGroupFrontImage(ctx, c.client, &m, c.globalConfig); err != nil { - logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err) - } - if err := setGroupBackImage(ctx, c.client, &m, c.globalConfig); err != nil { - logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err) - } - - return m, ignoredTags, nil -} - -func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPerformer, excludeTagRE []*regexp.Regexp) (ignoredTags []string, err error) { - tqb := c.repository.TagFinder - - tags, err := postProcessTags(ctx, tqb, p.Tags) + tqb := r.TagFinder + tags, err := postProcessTags(ctx, tqb, m.Tags) if err != nil { return nil, err } - p.Tags = tags - p.Tags, ignoredTags = FilterTags(excludeTagRE, tags) + m.Tags = c.filterTags(tags) - p.Country = resolveCountryName(p.Country) + if m.Studio != nil { + if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil { + return nil, err + } + } - return ignoredTags, nil + // populate URL/URLs + // if URLs are provided, only use those + if len(m.URLs) > 0 { + m.URL = &m.URLs[0] + } else { + urls := []string{} + if m.URL != nil { + urls = append(urls, *m.URL) + } + + if len(urls) > 0 { + m.URLs = urls + } + } + + // post-process - set the image if applicable + // don't set images for related movies to avoid excessive network calls + if !related { + if err := processImageField(ctx, m.FrontImage, c.client, c.globalConfig); err != nil { + logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err) + } + if err := processImageField(ctx, m.BackImage, c.client, c.globalConfig); err != nil { + logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err) + } + } + + return m, nil } -func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { +func (c *postScraper) postScrapeGroup(ctx context.Context, m models.ScrapedGroup, related bool) (_ ScrapedContent, err error) { + r := c.repository + tqb := r.TagFinder + tags, err := postProcessTags(ctx, tqb, m.Tags) + if err != nil { + return nil, err + } + m.Tags = c.filterTags(tags) + + if m.Studio != nil { + if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil { + return nil, err + } + } + + // populate URL/URLs + // if URLs are provided, only use those + if len(m.URLs) > 0 { + m.URL = &m.URLs[0] + } else { + urls := []string{} + if m.URL != nil { + urls = append(urls, *m.URL) + } + + if len(urls) > 0 { + m.URLs = urls + } + } + + // post-process - set the image if applicable + // don't set images for related groups to avoid excessive network calls + if !related { + if err := processImageField(ctx, m.FrontImage, c.client, c.globalConfig); err != nil { + logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err) + } + if err := processImageField(ctx, m.BackImage, c.client, c.globalConfig); err != nil { + logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err) + } + } + + return m, nil +} + +// postScrapeRelatedPerformers post-processes a list of performers. +// It modifies the performers in place. +func (c *postScraper) postScrapeRelatedPerformers(ctx context.Context, items []*models.ScrapedPerformer) error { + for _, p := range items { + if p == nil { + continue + } + + const related = true + sc, err := c.postScrapePerformer(ctx, *p, related) + if err != nil { + return err + } + newP := sc.(models.ScrapedPerformer) + *p = newP + + if err := match.ScrapedPerformer(ctx, c.repository.PerformerFinder, p, ""); err != nil { + return err + } + } + return nil +} + +func (c *postScraper) postScrapeRelatedMovies(ctx context.Context, items []*models.ScrapedMovie) error { + for _, p := range items { + const related = true + sc, err := c.postScrapeMovie(ctx, *p, related) + if err != nil { + return err + } + newP := sc.(models.ScrapedMovie) + *p = newP + + matchedID, err := match.ScrapedGroup(ctx, c.repository.GroupFinder, p.StoredID, p.Name) + if err != nil { + return err + } + + if matchedID != nil { + p.StoredID = matchedID + } + } + + return nil +} + +func (c *postScraper) postScrapeRelatedGroups(ctx context.Context, items []*models.ScrapedGroup) error { + for _, p := range items { + const related = true + sc, err := c.postScrapeGroup(ctx, *p, related) + if err != nil { + return err + } + newP := sc.(models.ScrapedGroup) + *p = newP + + matchedID, err := match.ScrapedGroup(ctx, c.repository.GroupFinder, p.StoredID, p.Name) + if err != nil { + return err + } + + if matchedID != nil { + p.StoredID = matchedID + } + } + + return nil +} + +func (c *postScraper) postScrapeStudio(ctx context.Context, s models.ScrapedStudio, related bool) (_ ScrapedContent, err error) { + r := c.repository + tqb := r.TagFinder + + tags, err := postProcessTags(ctx, tqb, s.Tags) + if err != nil { + return nil, err + } + + s.Tags = c.filterTags(tags) + + // post-process - set the image if applicable + // don't set image for related studios to avoid excessive network calls + if !related { + if err := setStudioImage(ctx, c.client, &s, c.globalConfig); err != nil { + logger.Warnf("Could not set image using URL %s: %s", *s.Image, err.Error()) + } + } + + // populate URL/URLs + // if URLs are provided, only use those + if len(s.URLs) > 0 { + s.URL = &s.URLs[0] + } else { + urls := []string{} + if s.URL != nil { + urls = append(urls, *s.URL) + } + + if len(urls) > 0 { + s.URLs = urls + } + } + + return s, nil +} + +func (c *postScraper) postScrapeRelatedStudio(ctx context.Context, s *models.ScrapedStudio) error { + if s == nil { + return nil + } + + const related = true + sc, err := c.postScrapeStudio(ctx, *s, related) + if err != nil { + return err + } + newS := sc.(models.ScrapedStudio) + *s = newS + + if err = match.ScrapedStudio(ctx, c.repository.StudioFinder, s, ""); err != nil { + return err + } + + return nil +} + +func (c *postScraper) postScrapeScene(ctx context.Context, scene models.ScrapedScene) (_ ScrapedContent, err error) { // set the URL/URLs field if scene.URL == nil && len(scene.URLs) > 0 { scene.URL = &scene.URLs[0] @@ -241,92 +352,53 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, e } r := c.repository - if err := r.WithReadTxn(ctx, func(ctx context.Context) error { - pqb := r.PerformerFinder - gqb := r.GroupFinder - tqb := r.TagFinder - sqb := r.StudioFinder + tqb := r.TagFinder - for _, p := range scene.Performers { - if p == nil { - continue - } + if err = c.postScrapeRelatedPerformers(ctx, scene.Performers); err != nil { + return nil, err + } - thisIgnoredTags, err := c.postScrapeScenePerformer(ctx, *p, excludeTagRE) - if err != nil { - return err - } + if err = c.postScrapeRelatedMovies(ctx, scene.Movies); err != nil { + return nil, err + } - if err := match.ScrapedPerformer(ctx, pqb, p, ""); err != nil { - return err - } + if err = c.postScrapeRelatedGroups(ctx, scene.Groups); err != nil { + return nil, err + } - ignoredTags = sliceutil.AppendUniques(ignoredTags, thisIgnoredTags) + // HACK - if movies was returned but not groups, add the groups from the movies + // if groups was returned but not movies, add the movies from the groups for backward compatibility + if len(scene.Movies) > 0 && len(scene.Groups) == 0 { + for _, m := range scene.Movies { + g := m.ScrapedGroup() + scene.Groups = append(scene.Groups, &g) } - - for _, p := range scene.Movies { - matchedID, err := match.ScrapedGroup(ctx, gqb, p.StoredID, p.Name) - if err != nil { - return err - } - - if matchedID != nil { - p.StoredID = matchedID - } + } else if len(scene.Groups) > 0 && len(scene.Movies) == 0 { + for _, g := range scene.Groups { + m := g.ScrapedMovie() + scene.Movies = append(scene.Movies, &m) } + } - for _, p := range scene.Groups { - matchedID, err := match.ScrapedGroup(ctx, gqb, p.StoredID, p.Name) - if err != nil { - return err - } + tags, err := postProcessTags(ctx, tqb, scene.Tags) + if err != nil { + return nil, err + } + scene.Tags = c.filterTags(tags) - if matchedID != nil { - p.StoredID = matchedID - } - } - - // HACK - if movies was returned but not groups, add the groups from the movies - // if groups was returned but not movies, add the movies from the groups for backward compatibility - if len(scene.Movies) > 0 && len(scene.Groups) == 0 { - for _, m := range scene.Movies { - g := m.ScrapedGroup() - scene.Groups = append(scene.Groups, &g) - } - } else if len(scene.Groups) > 0 && len(scene.Movies) == 0 { - for _, g := range scene.Groups { - m := g.ScrapedMovie() - scene.Movies = append(scene.Movies, &m) - } - } - - tags, err := postProcessTags(ctx, tqb, scene.Tags) - if err != nil { - return err - } - scene.Tags, ignoredTags = FilterTags(excludeTagRE, tags) - - if scene.Studio != nil { - err := match.ScrapedStudio(ctx, sqb, scene.Studio, "") - if err != nil { - return err - } - } - - return nil - }); err != nil { - return nil, nil, err + if err := c.postScrapeRelatedStudio(ctx, scene.Studio); err != nil { + return nil, err } // post-process - set the image if applicable - if err := setSceneImage(ctx, c.client, &scene, c.globalConfig); err != nil { + if err := processImageField(ctx, scene.Image, c.client, c.globalConfig); err != nil { logger.Warnf("Could not set image using URL %s: %v", *scene.Image, err) } - return scene, ignoredTags, nil + return scene, nil } -func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { +func (c *postScraper) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (_ ScrapedContent, err error) { // set the URL/URLs field if g.URL == nil && len(g.URLs) > 0 { g.URL = &g.URLs[0] @@ -336,70 +408,65 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery, e } r := c.repository - if err := r.WithReadTxn(ctx, func(ctx context.Context) error { - pqb := r.PerformerFinder - tqb := r.TagFinder - sqb := r.StudioFinder + tqb := r.TagFinder - for _, p := range g.Performers { - err := match.ScrapedPerformer(ctx, pqb, p, "") - if err != nil { - return err - } - } - - tags, err := postProcessTags(ctx, tqb, g.Tags) - if err != nil { - return err - } - g.Tags, ignoredTags = FilterTags(excludeTagRE, tags) - - if g.Studio != nil { - err := match.ScrapedStudio(ctx, sqb, g.Studio, "") - if err != nil { - return err - } - } - - return nil - }); err != nil { - return nil, nil, err + if err = c.postScrapeRelatedPerformers(ctx, g.Performers); err != nil { + return nil, err } - return g, ignoredTags, nil + tags, err := postProcessTags(ctx, tqb, g.Tags) + if err != nil { + return nil, err + } + g.Tags = c.filterTags(tags) + + if err := c.postScrapeRelatedStudio(ctx, g.Studio); err != nil { + return nil, err + } + + return g, nil } -func (c Cache) postScrapeImage(ctx context.Context, image models.ScrapedImage, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) { +func (c *postScraper) postScrapeImage(ctx context.Context, image models.ScrapedImage) (_ ScrapedContent, err error) { r := c.repository - if err := r.WithReadTxn(ctx, func(ctx context.Context) error { - pqb := r.PerformerFinder - tqb := r.TagFinder - sqb := r.StudioFinder + tqb := r.TagFinder - for _, p := range image.Performers { - if err := match.ScrapedPerformer(ctx, pqb, p, ""); err != nil { - return err - } - } + if err = c.postScrapeRelatedPerformers(ctx, image.Performers); err != nil { + return nil, err + } - tags, err := postProcessTags(ctx, tqb, image.Tags) + tags, err := postProcessTags(ctx, tqb, image.Tags) + if err != nil { + return nil, err + } + + image.Tags = c.filterTags(tags) + + if err := c.postScrapeRelatedStudio(ctx, image.Studio); err != nil { + return nil, err + } + + return image, nil +} + +// postScrapeSingle handles post-processing of a single scraped content item. +// This is a convenience function that includes logging the ignored tags, as opposed to logging them in the caller. +func (c Cache) postScrapeSingle(ctx context.Context, content ScrapedContent) (ret ScrapedContent, err error) { + pp := postScraper{ + Cache: c, + excludeTagRE: c.compileExcludeTagPatterns(), + } + + if err := c.repository.WithReadTxn(ctx, func(ctx context.Context) error { + ret, err = pp.postScrape(ctx, content) if err != nil { return err } - - image.Tags, ignoredTags = FilterTags(excludeTagRE, tags) - - if image.Studio != nil { - err := match.ScrapedStudio(ctx, sqb, image.Studio, "") - if err != nil { - return err - } - } - return nil }); err != nil { - return nil, nil, err + return nil, err } - return image, ignoredTags, nil + LogIgnoredTags(pp.ignoredTags) + return ret, nil }