mirror of
https://github.com/stashapp/stash.git
synced 2025-12-06 08:26:00 +01:00
Refactor scraping to include related object fields (#6266)
* Refactor scraper post-processing and process related objects consistently * Refactor image processing * Scrape related studio fields consistently * Don't set image on related objects
This commit is contained in:
parent
c6ae43c1d6
commit
84e24eb612
3 changed files with 362 additions and 337 deletions
|
|
@ -16,7 +16,6 @@ import (
|
|||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/match"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/sliceutil"
|
||||
"github.com/stashapp/stash/pkg/txn"
|
||||
)
|
||||
|
||||
|
|
@ -262,19 +261,23 @@ func (c Cache) ScrapeName(ctx context.Context, id, query string, ty ScrapeConten
|
|||
return nil, fmt.Errorf("error while name scraping with scraper %s: %w", id, err)
|
||||
}
|
||||
|
||||
ignoredRegex := c.compileExcludeTagPatterns()
|
||||
|
||||
var ignoredTags []string
|
||||
pp := postScraper{
|
||||
Cache: c,
|
||||
excludeTagRE: c.compileExcludeTagPatterns(),
|
||||
}
|
||||
if err := c.repository.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
for i, cc := range content {
|
||||
var thisIgnoredTags []string
|
||||
content[i], thisIgnoredTags, err = c.postScrape(ctx, cc, ignoredRegex)
|
||||
content[i], err = pp.postScrape(ctx, cc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while post-scraping with scraper %s: %w", id, err)
|
||||
return fmt.Errorf("error while post-scraping with scraper %s: %w", id, err)
|
||||
}
|
||||
ignoredTags = sliceutil.AppendUniques(ignoredTags, thisIgnoredTags)
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
LogIgnoredTags(ignoredTags)
|
||||
LogIgnoredTags(pp.ignoredTags)
|
||||
|
||||
return content, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,88 +37,43 @@ func setPerformerImage(ctx context.Context, client *http.Client, p *models.Scrap
|
|||
return nil
|
||||
}
|
||||
|
||||
func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedScene, globalConfig GlobalConfig) error {
|
||||
// don't try to get the image if it doesn't appear to be a URL
|
||||
if s.Image == nil || !strings.HasPrefix(*s.Image, "http") {
|
||||
func setStudioImage(ctx context.Context, client *http.Client, p *models.ScrapedStudio, globalConfig GlobalConfig) error {
|
||||
// backwards compatibility: we fetch the image if it's a URL and set it to the first image
|
||||
// Image is deprecated, so only do this if Images is unset
|
||||
if p.Image == nil || len(p.Images) > 0 {
|
||||
// nothing to do
|
||||
return nil
|
||||
}
|
||||
|
||||
img, err := getImage(ctx, *s.Image, client, globalConfig)
|
||||
// don't try to get the image if it doesn't appear to be a URL
|
||||
if !strings.HasPrefix(*p.Image, "http") {
|
||||
p.Images = []string{*p.Image}
|
||||
return nil
|
||||
}
|
||||
|
||||
img, err := getImage(ctx, *p.Image, client, globalConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.Image = img
|
||||
p.Image = img
|
||||
// Image is deprecated. Use images instead
|
||||
p.Images = []string{*img}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
|
||||
// don't try to get the image if it doesn't appear to be a URL
|
||||
if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
|
||||
// nothing to do
|
||||
func processImageField(ctx context.Context, imageField *string, client *http.Client, globalConfig GlobalConfig) error {
|
||||
if imageField == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
img, err := getImage(ctx, *m.FrontImage, client, globalConfig)
|
||||
img, err := getImage(ctx, *imageField, client, globalConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.FrontImage = img
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setMovieBackImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
|
||||
// don't try to get the image if it doesn't appear to be a URL
|
||||
if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
|
||||
// nothing to do
|
||||
return nil
|
||||
}
|
||||
|
||||
img, err := getImage(ctx, *m.BackImage, client, globalConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.BackImage = img
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setGroupFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedGroup, globalConfig GlobalConfig) error {
|
||||
// don't try to get the image if it doesn't appear to be a URL
|
||||
if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
|
||||
// nothing to do
|
||||
return nil
|
||||
}
|
||||
|
||||
img, err := getImage(ctx, *m.FrontImage, client, globalConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.FrontImage = img
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setGroupBackImage(ctx context.Context, client *http.Client, m *models.ScrapedGroup, globalConfig GlobalConfig) error {
|
||||
// don't try to get the image if it doesn't appear to be a URL
|
||||
if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
|
||||
// nothing to do
|
||||
return nil
|
||||
}
|
||||
|
||||
img, err := getImage(ctx, *m.BackImage, client, globalConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.BackImage = img
|
||||
|
||||
*imageField = *img
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,86 +11,92 @@ import (
|
|||
"github.com/stashapp/stash/pkg/utils"
|
||||
)
|
||||
|
||||
type postScraper struct {
|
||||
Cache
|
||||
excludeTagRE []*regexp.Regexp
|
||||
|
||||
// ignoredTags is a list of tags that were ignored during post-processing
|
||||
ignoredTags []string
|
||||
}
|
||||
|
||||
// postScrape handles post-processing of scraped content. If the content
|
||||
// requires post-processing, this function fans out to the given content
|
||||
// type and post-processes it.
|
||||
func (c Cache) postScrape(ctx context.Context, content ScrapedContent, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
||||
// Assumes called within a read transaction.
|
||||
func (c *postScraper) postScrape(ctx context.Context, content ScrapedContent) (_ ScrapedContent, err error) {
|
||||
const related = false
|
||||
|
||||
// Analyze the concrete type, call the right post-processing function
|
||||
switch v := content.(type) {
|
||||
case *models.ScrapedPerformer:
|
||||
if v != nil {
|
||||
return c.postScrapePerformer(ctx, *v, excludeTagRE)
|
||||
return c.postScrapePerformer(ctx, *v, related)
|
||||
}
|
||||
case models.ScrapedPerformer:
|
||||
return c.postScrapePerformer(ctx, v, excludeTagRE)
|
||||
return c.postScrapePerformer(ctx, v, related)
|
||||
case *models.ScrapedScene:
|
||||
if v != nil {
|
||||
return c.postScrapeScene(ctx, *v, excludeTagRE)
|
||||
return c.postScrapeScene(ctx, *v)
|
||||
}
|
||||
case models.ScrapedScene:
|
||||
return c.postScrapeScene(ctx, v, excludeTagRE)
|
||||
return c.postScrapeScene(ctx, v)
|
||||
case *models.ScrapedGallery:
|
||||
if v != nil {
|
||||
return c.postScrapeGallery(ctx, *v, excludeTagRE)
|
||||
return c.postScrapeGallery(ctx, *v)
|
||||
}
|
||||
case models.ScrapedGallery:
|
||||
return c.postScrapeGallery(ctx, v, excludeTagRE)
|
||||
return c.postScrapeGallery(ctx, v)
|
||||
case *models.ScrapedImage:
|
||||
if v != nil {
|
||||
return c.postScrapeImage(ctx, *v, excludeTagRE)
|
||||
return c.postScrapeImage(ctx, *v)
|
||||
}
|
||||
case models.ScrapedImage:
|
||||
return c.postScrapeImage(ctx, v, excludeTagRE)
|
||||
return c.postScrapeImage(ctx, v)
|
||||
case *models.ScrapedMovie:
|
||||
if v != nil {
|
||||
return c.postScrapeMovie(ctx, *v, excludeTagRE)
|
||||
return c.postScrapeMovie(ctx, *v, related)
|
||||
}
|
||||
case models.ScrapedMovie:
|
||||
return c.postScrapeMovie(ctx, v, excludeTagRE)
|
||||
return c.postScrapeMovie(ctx, v, related)
|
||||
case *models.ScrapedGroup:
|
||||
if v != nil {
|
||||
return c.postScrapeGroup(ctx, *v, excludeTagRE)
|
||||
return c.postScrapeGroup(ctx, *v, related)
|
||||
}
|
||||
case models.ScrapedGroup:
|
||||
return c.postScrapeGroup(ctx, v, excludeTagRE)
|
||||
return c.postScrapeGroup(ctx, v, related)
|
||||
}
|
||||
|
||||
// If nothing matches, pass the content through
|
||||
return content, nil, nil
|
||||
return content, nil
|
||||
}
|
||||
|
||||
// postScrapeSingle handles post-processing of a single scraped content item.
|
||||
// This is a convenience function that includes logging the ignored tags, as opposed to logging them in the caller.
|
||||
func (c Cache) postScrapeSingle(ctx context.Context, content ScrapedContent) (ScrapedContent, error) {
|
||||
ret, ignoredTags, err := c.postScrape(ctx, content, c.compileExcludeTagPatterns())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
func (c *postScraper) filterTags(tags []*models.ScrapedTag) []*models.ScrapedTag {
|
||||
var ret []*models.ScrapedTag
|
||||
var thisIgnoredTags []string
|
||||
ret, thisIgnoredTags = FilterTags(c.excludeTagRE, tags)
|
||||
c.ignoredTags = sliceutil.AppendUniques(c.ignoredTags, thisIgnoredTags)
|
||||
|
||||
LogIgnoredTags(ignoredTags)
|
||||
return ret, nil
|
||||
return ret
|
||||
}
|
||||
|
||||
func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
||||
func (c *postScraper) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer, related bool) (_ ScrapedContent, err error) {
|
||||
r := c.repository
|
||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
tqb := r.TagFinder
|
||||
|
||||
tags, err := postProcessTags(ctx, tqb, p.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
p.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
p.Tags = c.filterTags(tags)
|
||||
|
||||
// post-process - set the image if applicable
|
||||
// don't set image for related performers to avoid excessive network calls
|
||||
if !related {
|
||||
if err := setPerformerImage(ctx, c.client, &p, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
p.Country = resolveCountryName(p.Country)
|
||||
|
||||
|
|
@ -119,119 +125,224 @@ func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerforme
|
|||
}
|
||||
}
|
||||
|
||||
return p, ignoredTags, nil
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeMovie(ctx context.Context, m models.ScrapedMovie, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
||||
func (c *postScraper) postScrapeMovie(ctx context.Context, m models.ScrapedMovie, related bool) (_ ScrapedContent, err error) {
|
||||
r := c.repository
|
||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
tqb := r.TagFinder
|
||||
tags, err := postProcessTags(ctx, tqb, m.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
||||
|
||||
if m.Studio != nil {
|
||||
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// populate URL/URLs
|
||||
// if URLs are provided, only use those
|
||||
if len(m.URLs) > 0 {
|
||||
m.URL = &m.URLs[0]
|
||||
} else {
|
||||
urls := []string{}
|
||||
if m.URL != nil {
|
||||
urls = append(urls, *m.URL)
|
||||
}
|
||||
|
||||
if len(urls) > 0 {
|
||||
m.URLs = urls
|
||||
}
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setMovieFrontImage(ctx, c.client, &m, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
|
||||
}
|
||||
if err := setMovieBackImage(ctx, c.client, &m, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
|
||||
}
|
||||
|
||||
return m, ignoredTags, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeGroup(ctx context.Context, m models.ScrapedGroup, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
||||
r := c.repository
|
||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
tqb := r.TagFinder
|
||||
tags, err := postProcessTags(ctx, tqb, m.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
||||
|
||||
if m.Studio != nil {
|
||||
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// populate URL/URLs
|
||||
// if URLs are provided, only use those
|
||||
if len(m.URLs) > 0 {
|
||||
m.URL = &m.URLs[0]
|
||||
} else {
|
||||
urls := []string{}
|
||||
if m.URL != nil {
|
||||
urls = append(urls, *m.URL)
|
||||
}
|
||||
|
||||
if len(urls) > 0 {
|
||||
m.URLs = urls
|
||||
}
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setGroupFrontImage(ctx, c.client, &m, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
|
||||
}
|
||||
if err := setGroupBackImage(ctx, c.client, &m, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
|
||||
}
|
||||
|
||||
return m, ignoredTags, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPerformer, excludeTagRE []*regexp.Regexp) (ignoredTags []string, err error) {
|
||||
tqb := c.repository.TagFinder
|
||||
|
||||
tags, err := postProcessTags(ctx, tqb, p.Tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.Tags = tags
|
||||
p.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
||||
m.Tags = c.filterTags(tags)
|
||||
|
||||
p.Country = resolveCountryName(p.Country)
|
||||
if m.Studio != nil {
|
||||
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return ignoredTags, nil
|
||||
// populate URL/URLs
|
||||
// if URLs are provided, only use those
|
||||
if len(m.URLs) > 0 {
|
||||
m.URL = &m.URLs[0]
|
||||
} else {
|
||||
urls := []string{}
|
||||
if m.URL != nil {
|
||||
urls = append(urls, *m.URL)
|
||||
}
|
||||
|
||||
if len(urls) > 0 {
|
||||
m.URLs = urls
|
||||
}
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
// don't set images for related movies to avoid excessive network calls
|
||||
if !related {
|
||||
if err := processImageField(ctx, m.FrontImage, c.client, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
|
||||
}
|
||||
if err := processImageField(ctx, m.BackImage, c.client, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
|
||||
}
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
||||
func (c *postScraper) postScrapeGroup(ctx context.Context, m models.ScrapedGroup, related bool) (_ ScrapedContent, err error) {
|
||||
r := c.repository
|
||||
tqb := r.TagFinder
|
||||
tags, err := postProcessTags(ctx, tqb, m.Tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m.Tags = c.filterTags(tags)
|
||||
|
||||
if m.Studio != nil {
|
||||
if err := match.ScrapedStudio(ctx, r.StudioFinder, m.Studio, ""); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// populate URL/URLs
|
||||
// if URLs are provided, only use those
|
||||
if len(m.URLs) > 0 {
|
||||
m.URL = &m.URLs[0]
|
||||
} else {
|
||||
urls := []string{}
|
||||
if m.URL != nil {
|
||||
urls = append(urls, *m.URL)
|
||||
}
|
||||
|
||||
if len(urls) > 0 {
|
||||
m.URLs = urls
|
||||
}
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
// don't set images for related groups to avoid excessive network calls
|
||||
if !related {
|
||||
if err := processImageField(ctx, m.FrontImage, c.client, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
|
||||
}
|
||||
if err := processImageField(ctx, m.BackImage, c.client, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
|
||||
}
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// postScrapeRelatedPerformers post-processes a list of performers.
|
||||
// It modifies the performers in place.
|
||||
func (c *postScraper) postScrapeRelatedPerformers(ctx context.Context, items []*models.ScrapedPerformer) error {
|
||||
for _, p := range items {
|
||||
if p == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
const related = true
|
||||
sc, err := c.postScrapePerformer(ctx, *p, related)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newP := sc.(models.ScrapedPerformer)
|
||||
*p = newP
|
||||
|
||||
if err := match.ScrapedPerformer(ctx, c.repository.PerformerFinder, p, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *postScraper) postScrapeRelatedMovies(ctx context.Context, items []*models.ScrapedMovie) error {
|
||||
for _, p := range items {
|
||||
const related = true
|
||||
sc, err := c.postScrapeMovie(ctx, *p, related)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newP := sc.(models.ScrapedMovie)
|
||||
*p = newP
|
||||
|
||||
matchedID, err := match.ScrapedGroup(ctx, c.repository.GroupFinder, p.StoredID, p.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if matchedID != nil {
|
||||
p.StoredID = matchedID
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *postScraper) postScrapeRelatedGroups(ctx context.Context, items []*models.ScrapedGroup) error {
|
||||
for _, p := range items {
|
||||
const related = true
|
||||
sc, err := c.postScrapeGroup(ctx, *p, related)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newP := sc.(models.ScrapedGroup)
|
||||
*p = newP
|
||||
|
||||
matchedID, err := match.ScrapedGroup(ctx, c.repository.GroupFinder, p.StoredID, p.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if matchedID != nil {
|
||||
p.StoredID = matchedID
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *postScraper) postScrapeStudio(ctx context.Context, s models.ScrapedStudio, related bool) (_ ScrapedContent, err error) {
|
||||
r := c.repository
|
||||
tqb := r.TagFinder
|
||||
|
||||
tags, err := postProcessTags(ctx, tqb, s.Tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.Tags = c.filterTags(tags)
|
||||
|
||||
// post-process - set the image if applicable
|
||||
// don't set image for related studios to avoid excessive network calls
|
||||
if !related {
|
||||
if err := setStudioImage(ctx, c.client, &s, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set image using URL %s: %s", *s.Image, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// populate URL/URLs
|
||||
// if URLs are provided, only use those
|
||||
if len(s.URLs) > 0 {
|
||||
s.URL = &s.URLs[0]
|
||||
} else {
|
||||
urls := []string{}
|
||||
if s.URL != nil {
|
||||
urls = append(urls, *s.URL)
|
||||
}
|
||||
|
||||
if len(urls) > 0 {
|
||||
s.URLs = urls
|
||||
}
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (c *postScraper) postScrapeRelatedStudio(ctx context.Context, s *models.ScrapedStudio) error {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
const related = true
|
||||
sc, err := c.postScrapeStudio(ctx, *s, related)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newS := sc.(models.ScrapedStudio)
|
||||
*s = newS
|
||||
|
||||
if err = match.ScrapedStudio(ctx, c.repository.StudioFinder, s, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *postScraper) postScrapeScene(ctx context.Context, scene models.ScrapedScene) (_ ScrapedContent, err error) {
|
||||
// set the URL/URLs field
|
||||
if scene.URL == nil && len(scene.URLs) > 0 {
|
||||
scene.URL = &scene.URLs[0]
|
||||
|
|
@ -241,49 +352,18 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, e
|
|||
}
|
||||
|
||||
r := c.repository
|
||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
pqb := r.PerformerFinder
|
||||
gqb := r.GroupFinder
|
||||
tqb := r.TagFinder
|
||||
sqb := r.StudioFinder
|
||||
|
||||
for _, p := range scene.Performers {
|
||||
if p == nil {
|
||||
continue
|
||||
if err = c.postScrapeRelatedPerformers(ctx, scene.Performers); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
thisIgnoredTags, err := c.postScrapeScenePerformer(ctx, *p, excludeTagRE)
|
||||
if err != nil {
|
||||
return err
|
||||
if err = c.postScrapeRelatedMovies(ctx, scene.Movies); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := match.ScrapedPerformer(ctx, pqb, p, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ignoredTags = sliceutil.AppendUniques(ignoredTags, thisIgnoredTags)
|
||||
}
|
||||
|
||||
for _, p := range scene.Movies {
|
||||
matchedID, err := match.ScrapedGroup(ctx, gqb, p.StoredID, p.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if matchedID != nil {
|
||||
p.StoredID = matchedID
|
||||
}
|
||||
}
|
||||
|
||||
for _, p := range scene.Groups {
|
||||
matchedID, err := match.ScrapedGroup(ctx, gqb, p.StoredID, p.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if matchedID != nil {
|
||||
p.StoredID = matchedID
|
||||
}
|
||||
if err = c.postScrapeRelatedGroups(ctx, scene.Groups); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// HACK - if movies was returned but not groups, add the groups from the movies
|
||||
|
|
@ -302,31 +382,23 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene, e
|
|||
|
||||
tags, err := postProcessTags(ctx, tqb, scene.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
scene.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
||||
scene.Tags = c.filterTags(tags)
|
||||
|
||||
if scene.Studio != nil {
|
||||
err := match.ScrapedStudio(ctx, sqb, scene.Studio, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
if err := c.postScrapeRelatedStudio(ctx, scene.Studio); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setSceneImage(ctx, c.client, &scene, c.globalConfig); err != nil {
|
||||
if err := processImageField(ctx, scene.Image, c.client, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set image using URL %s: %v", *scene.Image, err)
|
||||
}
|
||||
|
||||
return scene, ignoredTags, nil
|
||||
return scene, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
||||
func (c *postScraper) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (_ ScrapedContent, err error) {
|
||||
// set the URL/URLs field
|
||||
if g.URL == nil && len(g.URLs) > 0 {
|
||||
g.URL = &g.URLs[0]
|
||||
|
|
@ -336,70 +408,65 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery, e
|
|||
}
|
||||
|
||||
r := c.repository
|
||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
pqb := r.PerformerFinder
|
||||
tqb := r.TagFinder
|
||||
sqb := r.StudioFinder
|
||||
|
||||
for _, p := range g.Performers {
|
||||
err := match.ScrapedPerformer(ctx, pqb, p, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = c.postScrapeRelatedPerformers(ctx, g.Performers); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := postProcessTags(ctx, tqb, g.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
g.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
||||
g.Tags = c.filterTags(tags)
|
||||
|
||||
if g.Studio != nil {
|
||||
err := match.ScrapedStudio(ctx, sqb, g.Studio, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.postScrapeRelatedStudio(ctx, g.Studio); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return g, ignoredTags, nil
|
||||
return g, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeImage(ctx context.Context, image models.ScrapedImage, excludeTagRE []*regexp.Regexp) (_ ScrapedContent, ignoredTags []string, err error) {
|
||||
func (c *postScraper) postScrapeImage(ctx context.Context, image models.ScrapedImage) (_ ScrapedContent, err error) {
|
||||
r := c.repository
|
||||
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
pqb := r.PerformerFinder
|
||||
tqb := r.TagFinder
|
||||
sqb := r.StudioFinder
|
||||
|
||||
for _, p := range image.Performers {
|
||||
if err := match.ScrapedPerformer(ctx, pqb, p, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = c.postScrapeRelatedPerformers(ctx, image.Performers); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := postProcessTags(ctx, tqb, image.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
image.Tags, ignoredTags = FilterTags(excludeTagRE, tags)
|
||||
image.Tags = c.filterTags(tags)
|
||||
|
||||
if image.Studio != nil {
|
||||
err := match.ScrapedStudio(ctx, sqb, image.Studio, "")
|
||||
if err := c.postScrapeRelatedStudio(ctx, image.Studio); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return image, nil
|
||||
}
|
||||
|
||||
// postScrapeSingle handles post-processing of a single scraped content item.
|
||||
// This is a convenience function that includes logging the ignored tags, as opposed to logging them in the caller.
|
||||
func (c Cache) postScrapeSingle(ctx context.Context, content ScrapedContent) (ret ScrapedContent, err error) {
|
||||
pp := postScraper{
|
||||
Cache: c,
|
||||
excludeTagRE: c.compileExcludeTagPatterns(),
|
||||
}
|
||||
|
||||
if err := c.repository.WithReadTxn(ctx, func(ctx context.Context) error {
|
||||
ret, err = pp.postScrape(ctx, content)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return image, ignoredTags, nil
|
||||
LogIgnoredTags(pp.ignoredTags)
|
||||
return ret, nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue