mirror of
https://github.com/stashapp/stash.git
synced 2025-12-06 08:26:00 +01:00
Refactor scraping to include related object fields (#6266)
* Refactor scraper post-processing and process related objects consistently * Refactor image processing * Scrape related studio fields consistently * Don't set image on related objects
This commit is contained in:
parent
c6ae43c1d6
commit
84e24eb612
3 changed files with 362 additions and 337 deletions
|
|
@ -16,7 +16,6 @@ import (
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
"github.com/stashapp/stash/pkg/match"
|
"github.com/stashapp/stash/pkg/match"
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
"github.com/stashapp/stash/pkg/sliceutil"
|
|
||||||
"github.com/stashapp/stash/pkg/txn"
|
"github.com/stashapp/stash/pkg/txn"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -262,19 +261,23 @@ func (c Cache) ScrapeName(ctx context.Context, id, query string, ty ScrapeConten
|
||||||
return nil, fmt.Errorf("error while name scraping with scraper %s: %w", id, err)
|
return nil, fmt.Errorf("error while name scraping with scraper %s: %w", id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ignoredRegex := c.compileExcludeTagPatterns()
|
pp := postScraper{
|
||||||
|
Cache: c,
|
||||||
var ignoredTags []string
|
excludeTagRE: c.compileExcludeTagPatterns(),
|
||||||
|
}
|
||||||
|
if err := c.repository.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||||
for i, cc := range content {
|
for i, cc := range content {
|
||||||
var thisIgnoredTags []string
|
content[i], err = pp.postScrape(ctx, cc)
|
||||||
content[i], thisIgnoredTags, err = c.postScrape(ctx, cc, ignoredRegex)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error while post-scraping with scraper %s: %w", id, err)
|
return fmt.Errorf("error while post-scraping with scraper %s: %w", id, err)
|
||||||
}
|
}
|
||||||
ignoredTags = sliceutil.AppendUniques(ignoredTags, thisIgnoredTags)
|
}
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
LogIgnoredTags(ignoredTags)
|
LogIgnoredTags(pp.ignoredTags)
|
||||||
|
|
||||||
return content, nil
|
return content, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,88 +37,43 @@ func setPerformerImage(ctx context.Context, client *http.Client, p *models.Scrap
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedScene, globalConfig GlobalConfig) error {
|
func setStudioImage(ctx context.Context, client *http.Client, p *models.ScrapedStudio, globalConfig GlobalConfig) error {
|
||||||
// don't try to get the image if it doesn't appear to be a URL
|
// backwards compatibility: we fetch the image if it's a URL and set it to the first image
|
||||||
if s.Image == nil || !strings.HasPrefix(*s.Image, "http") {
|
// Image is deprecated, so only do this if Images is unset
|
||||||
|
if p.Image == nil || len(p.Images) > 0 {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
img, err := getImage(ctx, *s.Image, client, globalConfig)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
s.Image = img
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
|
|
||||||
// don't try to get the image if it doesn't appear to be a URL
|
// don't try to get the image if it doesn't appear to be a URL
|
||||||
if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
|
if !strings.HasPrefix(*p.Image, "http") {
|
||||||
// nothing to do
|
p.Images = []string{*p.Image}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
img, err := getImage(ctx, *m.FrontImage, client, globalConfig)
|
img, err := getImage(ctx, *p.Image, client, globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
m.FrontImage = img
|
p.Image = img
|
||||||
|
// Image is deprecated. Use images instead
|
||||||
|
p.Images = []string{*img}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func setMovieBackImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
|
func processImageField(ctx context.Context, imageField *string, client *http.Client, globalConfig GlobalConfig) error {
|
||||||
// don't try to get the image if it doesn't appear to be a URL
|
if imageField == nil {
|
||||||
if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
|
|
||||||
// nothing to do
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
img, err := getImage(ctx, *m.BackImage, client, globalConfig)
|
img, err := getImage(ctx, *imageField, client, globalConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
m.BackImage = img
|
*imageField = *img
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func setGroupFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedGroup, globalConfig GlobalConfig) error {
|
|
||||||
// don't try to get the image if it doesn't appear to be a URL
|
|
||||||
if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
|
|
||||||
// nothing to do
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
img, err := getImage(ctx, *m.FrontImage, client, globalConfig)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
m.FrontImage = img
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func setGroupBackImage(ctx context.Context, client *http.Client, m *models.ScrapedGroup, globalConfig GlobalConfig) error {
|
|
||||||
// don't try to get the image if it doesn't appear to be a URL
|
|
||||||
if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
|
|
||||||
// nothing to do
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
img, err := getImage(ctx, *m.BackImage, client, globalConfig)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
m.BackImage = img
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,86 +11,92 @@ import (
|
||||||
"github.com/stashapp/stash/pkg/utils"
|
"github.com/stashapp/stash/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type postScraper struct {
|
||||||
|
Cache
|
||||||
|
excludeTagRE []*regexp.Regexp
|
||||||
|
|
||||||
|
// ignoredTags is a list of tags that were ignored during post-processing
|
||||||
|
ignoredTags []string
|
||||||
|
}
|
||||||
|
|
||||||
// postScrape handles post-processing of scraped content. If the content
|
// postScrape handles post-processing of scraped content. If the content
|
||||||
// requires post-processing, this function fans out to the given content
|
// requires post-processing, this function fans out to the given content
|
||||||
// type and post-processes it.
|
// type and post-processes it.
|
||||||
func (c Cache) postScrape(ctx context.Context, content ScrapedContent, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
// Assumes called within a read transaction.
|
||||||
|
func (c *postScraper) postScrape(ctx context.Context, content ScrapedContent) (_ ScrapedContent, err error) {
|
||||||
|
const related = false
|
||||||
|
|
||||||
// Analyze the concrete type, call the right post-processing function
|
// Analyze the concrete type, call the right post-processing function
|
||||||
switch v := content.(type) {
|
switch v := content.(type) {
|
||||||
case *models.ScrapedPerformer:
|
case *models.ScrapedPerformer:
|
||||||
if v != nil {
|
if v != nil {
|
||||||
return c.postScrapePerformer(ctx, *v, excludeTagRE)
|
return c.postScrapePerformer(ctx, *v, related)
|
||||||
}
|
}
|
||||||
case models.ScrapedPerformer:
|
case models.ScrapedPerformer:
|
||||||
return c.postScrapePerformer(ctx, v, excludeTagRE)
|
return c.postScrapePerformer(ctx, v, related)
|
||||||
case *models.ScrapedScene:
|
case *models.ScrapedScene:
|
||||||
if v != nil {
|
if v != nil {
|
||||||
return c.postScrapeScene(ctx, *v, excludeTagRE)
|
return c.postScrapeScene(ctx, *v)
|
||||||
}
|
}
|
||||||
case models.ScrapedScene:
|
case models.ScrapedScene:
|
||||||
return c.postScrapeScene(ctx, v, excludeTagRE)
|
return c.postScrapeScene(ctx, v)
|
||||||
case *models.ScrapedGallery:
|
case *models.ScrapedGallery:
|
||||||
if v != nil {
|
if v != nil {
|
||||||
return c.postScrapeGallery(ctx, *v, excludeTagRE)
|
return c.postScrapeGallery(ctx, *v)
|
||||||
}
|
}
|
||||||
case models.ScrapedGallery:
|
case models.ScrapedGallery:
|
||||||
return c.postScrapeGallery(ctx, v, excludeTagRE)
|
return c.postScrapeGallery(ctx, v)
|
||||||
case *models.ScrapedImage:
|
case *models.ScrapedImage:
|
||||||
if v != nil {
|
if v != nil {
|
||||||
return c.postScrapeImage(ctx, *v, excludeTagRE)
|
return c.postScrapeImage(ctx, *v)
|
||||||
}
|
}
|
||||||
case models.ScrapedImage:
|
case models.ScrapedImage:
|
||||||
return c.postScrapeImage(ctx, v, excludeTagRE)
|
return c.postScrapeImage(ctx, v)
|
||||||
case *models.ScrapedMovie:
|
case *models.ScrapedMovie:
|
||||||
if v != nil {
|
if v != nil {
|
||||||
return c.postScrapeMovie(ctx, *v, excludeTagRE)
|
return c.postScrapeMovie(ctx, *v, related)
|
||||||
}
|
}
|
||||||
case models.ScrapedMovie:
|
case models.ScrapedMovie:
|
||||||
return c.postScrapeMovie(ctx, v, excludeTagRE)
|
return c.postScrapeMovie(ctx, v, related)
|
||||||
case *models.ScrapedGroup:
|
case *models.ScrapedGroup:
|
||||||
if v != nil {
|
if v != nil {
|
||||||
return c.postScrapeGroup(ctx, *v, excludeTagRE)
|
return c.postScrapeGroup(ctx, *v, related)
|
||||||
}
|
}
|
||||||
case models.ScrapedGroup:
|
case models.ScrapedGroup:
|
||||||
return c.postScrapeGroup(ctx, v, excludeTagRE)
|
return c.postScrapeGroup(ctx, v, related)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If nothing matches, pass the content through
|
// If nothing matches, pass the content through
|
||||||
return content, nil, nil
|
return content, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// postScrapeSingle handles post-processing of a single scraped content item.
|
func (c *postScraper) filterTags(tags []*models.ScrapedTag) []*models.ScrapedTag {
|
||||||
// This is a convenience function that includes logging the ignored tags, as opposed to logging them in the caller.
|
var ret []*models.ScrapedTag
|
||||||
func (c Cache) postScrapeSingle(ctx context.Context, content ScrapedContent) (ScrapedContent, error) {
|
var thisIgnoredTags []string
|
||||||
ret, ignoredTags, err := c.postScrape(ctx, content, c.compileExcludeTagPatterns())
|
ret, thisIgnoredTags = FilterTags(c.excludeTagRE, tags)
|
||||||
if err != nil {
|
c.ignoredTags = sliceutil.AppendUniques(c.ignoredTags, thisIgnoredTags)
|
||||||
return nil, err
|
|
||||||
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
LogIgnoredTags(ignoredTags)
|
func (c *postScraper) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer, related bool) (_ ScrapedContent, err error) {
|
||||||
return ret, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
|
||||||
r := c.repository
|
r := c.repository
|
||||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
|
||||||
tqb := r.TagFinder
|
tqb := r.TagFinder
|
||||||
|
|
||||||
tags, err := postProcessTags(ctx, tqb, p.Tags)
|
tags, err := postProcessTags(ctx, tqb, p.Tags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
p.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
|
||||||
|
|
||||||
return nil
|
p.Tags = c.filterTags(tags)
|
||||||
}); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// post-process - set the image if applicable
|
// post-process - set the image if applicable
|
||||||
|
// don't set image for related performers to avoid excessive network calls
|
||||||
|
if !related {
|
||||||
if err := setPerformerImage(ctx, c.client, &p, c.globalConfig); err != nil {
|
if err := setPerformerImage(ctx, c.client, &p, c.globalConfig); err != nil {
|
||||||
logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error())
|
logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
p.Country = resolveCountryName(p.Country)
|
p.Country = resolveCountryName(p.Country)
|
||||||
|
|
||||||
|
|
@ -119,119 +125,224 @@ func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerforme
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return p, ignoredTags, nil
|
return p, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Cache) postScrapeMovie(ctx context.Context, m models.ScrapedMovie, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
func (c *postScraper) postScrapeMovie(ctx context.Context, m models.ScrapedMovie, related bool) (_ ScrapedContent, err error) {
|
||||||
r := c.repository
|
r := c.repository
|
||||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
|
||||||
tqb := r.TagFinder
|
tqb := r.TagFinder
|
||||||
tags, err := postProcessTags(ctx, tqb, m.Tags)
|
tags, err := postProcessTags(ctx, tqb, m.Tags)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
|
||||||
|
|
||||||
if m.Studio != nil {
|
|
||||||
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// populate URL/URLs
|
|
||||||
// if URLs are provided, only use those
|
|
||||||
if len(m.URLs) > 0 {
|
|
||||||
m.URL = &m.URLs[0]
|
|
||||||
} else {
|
|
||||||
urls := []string{}
|
|
||||||
if m.URL != nil {
|
|
||||||
urls = append(urls, *m.URL)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(urls) > 0 {
|
|
||||||
m.URLs = urls
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// post-process - set the image if applicable
|
|
||||||
if err := setMovieFrontImage(ctx, c.client, &m, c.globalConfig); err != nil {
|
|
||||||
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
|
|
||||||
}
|
|
||||||
if err := setMovieBackImage(ctx, c.client, &m, c.globalConfig); err != nil {
|
|
||||||
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return m, ignoredTags, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Cache) postScrapeGroup(ctx context.Context, m models.ScrapedGroup, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
|
||||||
r := c.repository
|
|
||||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
|
||||||
tqb := r.TagFinder
|
|
||||||
tags, err := postProcessTags(ctx, tqb, m.Tags)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
|
||||||
|
|
||||||
if m.Studio != nil {
|
|
||||||
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// populate URL/URLs
|
|
||||||
// if URLs are provided, only use those
|
|
||||||
if len(m.URLs) > 0 {
|
|
||||||
m.URL = &m.URLs[0]
|
|
||||||
} else {
|
|
||||||
urls := []string{}
|
|
||||||
if m.URL != nil {
|
|
||||||
urls = append(urls, *m.URL)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(urls) > 0 {
|
|
||||||
m.URLs = urls
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// post-process - set the image if applicable
|
|
||||||
if err := setGroupFrontImage(ctx, c.client, &m, c.globalConfig); err != nil {
|
|
||||||
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
|
|
||||||
}
|
|
||||||
if err := setGroupBackImage(ctx, c.client, &m, c.globalConfig); err != nil {
|
|
||||||
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return m, ignoredTags, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPerformer, excludeTagRE []*regexp.Regexp) (ignoredTags []string, err error) {
|
|
||||||
tqb := c.repository.TagFinder
|
|
||||||
|
|
||||||
tags, err := postProcessTags(ctx, tqb, p.Tags)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
p.Tags = tags
|
m.Tags = c.filterTags(tags)
|
||||||
p.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
|
||||||
|
|
||||||
p.Country = resolveCountryName(p.Country)
|
if m.Studio != nil {
|
||||||
|
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
|
||||||
return ignoredTags, nil
|
return nil, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
// populate URL/URLs
|
||||||
|
// if URLs are provided, only use those
|
||||||
|
if len(m.URLs) > 0 {
|
||||||
|
m.URL = &m.URLs[0]
|
||||||
|
} else {
|
||||||
|
urls := []string{}
|
||||||
|
if m.URL != nil {
|
||||||
|
urls = append(urls, *m.URL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(urls) > 0 {
|
||||||
|
m.URLs = urls
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// post-process - set the image if applicable
|
||||||
|
// don't set images for related movies to avoid excessive network calls
|
||||||
|
if !related {
|
||||||
|
if err := processImageField(ctx, m.FrontImage, c.client, c.globalConfig); err != nil {
|
||||||
|
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
|
||||||
|
}
|
||||||
|
if err := processImageField(ctx, m.BackImage, c.client, c.globalConfig); err != nil {
|
||||||
|
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *postScraper) postScrapeGroup(ctx context.Context, m models.ScrapedGroup, related bool) (_ ScrapedContent, err error) {
|
||||||
|
r := c.repository
|
||||||
|
tqb := r.TagFinder
|
||||||
|
tags, err := postProcessTags(ctx, tqb, m.Tags)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
m.Tags = c.filterTags(tags)
|
||||||
|
|
||||||
|
if m.Studio != nil {
|
||||||
|
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// populate URL/URLs
|
||||||
|
// if URLs are provided, only use those
|
||||||
|
if len(m.URLs) > 0 {
|
||||||
|
m.URL = &m.URLs[0]
|
||||||
|
} else {
|
||||||
|
urls := []string{}
|
||||||
|
if m.URL != nil {
|
||||||
|
urls = append(urls, *m.URL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(urls) > 0 {
|
||||||
|
m.URLs = urls
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// post-process - set the image if applicable
|
||||||
|
// don't set images for related groups to avoid excessive network calls
|
||||||
|
if !related {
|
||||||
|
if err := processImageField(ctx, m.FrontImage, c.client, c.globalConfig); err != nil {
|
||||||
|
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
|
||||||
|
}
|
||||||
|
if err := processImageField(ctx, m.BackImage, c.client, c.globalConfig); err != nil {
|
||||||
|
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// postScrapeRelatedPerformers post-processes a list of performers.
|
||||||
|
// It modifies the performers in place.
|
||||||
|
func (c *postScraper) postScrapeRelatedPerformers(ctx context.Context, items []*models.ScrapedPerformer) error {
|
||||||
|
for _, p := range items {
|
||||||
|
if p == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const related = true
|
||||||
|
sc, err := c.postScrapePerformer(ctx, *p, related)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newP := sc.(models.ScrapedPerformer)
|
||||||
|
*p = newP
|
||||||
|
|
||||||
|
if err := match.ScrapedPerformer(ctx, c.repository.PerformerFinder, p, ""); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *postScraper) postScrapeRelatedMovies(ctx context.Context, items []*models.ScrapedMovie) error {
|
||||||
|
for _, p := range items {
|
||||||
|
const related = true
|
||||||
|
sc, err := c.postScrapeMovie(ctx, *p, related)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newP := sc.(models.ScrapedMovie)
|
||||||
|
*p = newP
|
||||||
|
|
||||||
|
matchedID, err := match.ScrapedGroup(ctx, c.repository.GroupFinder, p.StoredID, p.Name)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if matchedID != nil {
|
||||||
|
p.StoredID = matchedID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *postScraper) postScrapeRelatedGroups(ctx context.Context, items []*models.ScrapedGroup) error {
|
||||||
|
for _, p := range items {
|
||||||
|
const related = true
|
||||||
|
sc, err := c.postScrapeGroup(ctx, *p, related)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newP := sc.(models.ScrapedGroup)
|
||||||
|
*p = newP
|
||||||
|
|
||||||
|
matchedID, err := match.ScrapedGroup(ctx, c.repository.GroupFinder, p.StoredID, p.Name)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if matchedID != nil {
|
||||||
|
p.StoredID = matchedID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *postScraper) postScrapeStudio(ctx context.Context, s models.ScrapedStudio, related bool) (_ ScrapedContent, err error) {
|
||||||
|
r := c.repository
|
||||||
|
tqb := r.TagFinder
|
||||||
|
|
||||||
|
tags, err := postProcessTags(ctx, tqb, s.Tags)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.Tags = c.filterTags(tags)
|
||||||
|
|
||||||
|
// post-process - set the image if applicable
|
||||||
|
// don't set image for related studios to avoid excessive network calls
|
||||||
|
if !related {
|
||||||
|
if err := setStudioImage(ctx, c.client, &s, c.globalConfig); err != nil {
|
||||||
|
logger.Warnf("Could not set image using URL %s: %s", *s.Image, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// populate URL/URLs
|
||||||
|
// if URLs are provided, only use those
|
||||||
|
if len(s.URLs) > 0 {
|
||||||
|
s.URL = &s.URLs[0]
|
||||||
|
} else {
|
||||||
|
urls := []string{}
|
||||||
|
if s.URL != nil {
|
||||||
|
urls = append(urls, *s.URL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(urls) > 0 {
|
||||||
|
s.URLs = urls
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *postScraper) postScrapeRelatedStudio(ctx context.Context, s *models.ScrapedStudio) error {
|
||||||
|
if s == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
const related = true
|
||||||
|
sc, err := c.postScrapeStudio(ctx, *s, related)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newS := sc.(models.ScrapedStudio)
|
||||||
|
*s = newS
|
||||||
|
|
||||||
|
if err = match.ScrapedStudio(ctx, c.repository.StudioFinder, s, ""); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *postScraper) postScrapeScene(ctx context.Context, scene models.ScrapedScene) (_ ScrapedContent, err error) {
|
||||||
// set the URL/URLs field
|
// set the URL/URLs field
|
||||||
if scene.URL == nil && len(scene.URLs) > 0 {
|
if scene.URL == nil && len(scene.URLs) > 0 {
|
||||||
scene.URL = &scene.URLs[0]
|
scene.URL = &scene.URLs[0]
|
||||||
|
|
@ -241,49 +352,18 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, e
|
||||||
}
|
}
|
||||||
|
|
||||||
r := c.repository
|
r := c.repository
|
||||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
|
||||||
pqb := r.PerformerFinder
|
|
||||||
gqb := r.GroupFinder
|
|
||||||
tqb := r.TagFinder
|
tqb := r.TagFinder
|
||||||
sqb := r.StudioFinder
|
|
||||||
|
|
||||||
for _, p := range scene.Performers {
|
if err = c.postScrapeRelatedPerformers(ctx, scene.Performers); err != nil {
|
||||||
if p == nil {
|
return nil, err
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
thisIgnoredTags, err := c.postScrapeScenePerformer(ctx, *p, excludeTagRE)
|
if err = c.postScrapeRelatedMovies(ctx, scene.Movies); err != nil {
|
||||||
if err != nil {
|
return nil, err
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := match.ScrapedPerformer(ctx, pqb, p, ""); err != nil {
|
if err = c.postScrapeRelatedGroups(ctx, scene.Groups); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
|
||||||
|
|
||||||
ignoredTags = sliceutil.AppendUniques(ignoredTags, thisIgnoredTags)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, p := range scene.Movies {
|
|
||||||
matchedID, err := match.ScrapedGroup(ctx, gqb, p.StoredID, p.Name)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if matchedID != nil {
|
|
||||||
p.StoredID = matchedID
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, p := range scene.Groups {
|
|
||||||
matchedID, err := match.ScrapedGroup(ctx, gqb, p.StoredID, p.Name)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if matchedID != nil {
|
|
||||||
p.StoredID = matchedID
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// HACK - if movies was returned but not groups, add the groups from the movies
|
// HACK - if movies was returned but not groups, add the groups from the movies
|
||||||
|
|
@ -302,31 +382,23 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, e
|
||||||
|
|
||||||
tags, err := postProcessTags(ctx, tqb, scene.Tags)
|
tags, err := postProcessTags(ctx, tqb, scene.Tags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
scene.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
scene.Tags = c.filterTags(tags)
|
||||||
|
|
||||||
if scene.Studio != nil {
|
if err := c.postScrapeRelatedStudio(ctx, scene.Studio); err != nil {
|
||||||
err := match.ScrapedStudio(ctx, sqb, scene.Studio, "")
|
return nil, err
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// post-process - set the image if applicable
|
// post-process - set the image if applicable
|
||||||
if err := setSceneImage(ctx, c.client, &scene, c.globalConfig); err != nil {
|
if err := processImageField(ctx, scene.Image, c.client, c.globalConfig); err != nil {
|
||||||
logger.Warnf("Could not set image using URL %s: %v", *scene.Image, err)
|
logger.Warnf("Could not set image using URL %s: %v", *scene.Image, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return scene, ignoredTags, nil
|
return scene, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
func (c *postScraper) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (_ ScrapedContent, err error) {
|
||||||
// set the URL/URLs field
|
// set the URL/URLs field
|
||||||
if g.URL == nil && len(g.URLs) > 0 {
|
if g.URL == nil && len(g.URLs) > 0 {
|
||||||
g.URL = &g.URLs[0]
|
g.URL = &g.URLs[0]
|
||||||
|
|
@ -336,70 +408,65 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery, e
|
||||||
}
|
}
|
||||||
|
|
||||||
r := c.repository
|
r := c.repository
|
||||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
|
||||||
pqb := r.PerformerFinder
|
|
||||||
tqb := r.TagFinder
|
tqb := r.TagFinder
|
||||||
sqb := r.StudioFinder
|
|
||||||
|
|
||||||
for _, p := range g.Performers {
|
if err = c.postScrapeRelatedPerformers(ctx, g.Performers); err != nil {
|
||||||
err := match.ScrapedPerformer(ctx, pqb, p, "")
|
return nil, err
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tags, err := postProcessTags(ctx, tqb, g.Tags)
|
tags, err := postProcessTags(ctx, tqb, g.Tags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
g.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
g.Tags = c.filterTags(tags)
|
||||||
|
|
||||||
if g.Studio != nil {
|
if err := c.postScrapeRelatedStudio(ctx, g.Studio); err != nil {
|
||||||
err := match.ScrapedStudio(ctx, sqb, g.Studio, "")
|
return nil, err
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return g, nil
|
||||||
}); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return g, ignoredTags, nil
|
func (c *postScraper) postScrapeImage(ctx context.Context, image models.ScrapedImage) (_ ScrapedContent, err error) {
|
||||||
}
|
|
||||||
|
|
||||||
func (c Cache) postScrapeImage(ctx context.Context, image models.ScrapedImage, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
|
||||||
r := c.repository
|
r := c.repository
|
||||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
|
||||||
pqb := r.PerformerFinder
|
|
||||||
tqb := r.TagFinder
|
tqb := r.TagFinder
|
||||||
sqb := r.StudioFinder
|
|
||||||
|
|
||||||
for _, p := range image.Performers {
|
if err = c.postScrapeRelatedPerformers(ctx, image.Performers); err != nil {
|
||||||
if err := match.ScrapedPerformer(ctx, pqb, p, ""); err != nil {
|
return nil, err
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tags, err := postProcessTags(ctx, tqb, image.Tags)
|
tags, err := postProcessTags(ctx, tqb, image.Tags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
image.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
image.Tags = c.filterTags(tags)
|
||||||
|
|
||||||
if image.Studio != nil {
|
if err := c.postScrapeRelatedStudio(ctx, image.Studio); err != nil {
|
||||||
err := match.ScrapedStudio(ctx, sqb, image.Studio, "")
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return image, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// postScrapeSingle handles post-processing of a single scraped content item.
|
||||||
|
// This is a convenience function that includes logging the ignored tags, as opposed to logging them in the caller.
|
||||||
|
func (c Cache) postScrapeSingle(ctx context.Context, content ScrapedContent) (ret ScrapedContent, err error) {
|
||||||
|
pp := postScraper{
|
||||||
|
Cache: c,
|
||||||
|
excludeTagRE: c.compileExcludeTagPatterns(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := c.repository.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||||
|
ret, err = pp.postScrape(ctx, content)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
return nil, nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return image, ignoredTags, nil
|
LogIgnoredTags(pp.ignoredTags)
|
||||||
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue