Apply scraped tag exclusions galleries and performers (#4872)

This commit is contained in:
WithoutPants 2024-05-24 08:06:23 +10:00 committed by GitHub
parent 3dd218e1ba
commit 28b5fbfd4d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -54,9 +54,8 @@ func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string,
return ret, nil return ret, nil
} }
// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes func compileRegexps(patterns []string) []*regexp.Regexp {
func filterSceneTags(scenes []*scraper.ScrapedScene) { excludePatterns := patterns
excludePatterns := manager.GetInstance().Config.GetScraperExcludeTagPatterns()
var excludeRegexps []*regexp.Regexp var excludeRegexps []*regexp.Regexp
for _, excludePattern := range excludePatterns { for _, excludePattern := range excludePatterns {
@ -68,15 +67,16 @@ func filterSceneTags(scenes []*scraper.ScrapedScene) {
} }
} }
if len(excludeRegexps) == 0 { return excludeRegexps
return
} }
var ignoredTags []string // filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes
func filterTags(excludeRegexps []*regexp.Regexp, tags []*models.ScrapedTag) (newTags []*models.ScrapedTag, ignoredTags []string) {
if len(excludeRegexps) == 0 {
return tags, nil
}
for _, s := range scenes { for _, t := range tags {
var newTags []*models.ScrapedTag
for _, t := range s.Tags {
ignore := false ignore := false
for _, reg := range excludeRegexps { for _, reg := range excludeRegexps {
if reg.MatchString(strings.ToLower(t.Name)) { if reg.MatchString(strings.ToLower(t.Name)) {
@ -91,7 +91,53 @@ func filterSceneTags(scenes []*scraper.ScrapedScene) {
} }
} }
s.Tags = newTags return
}
// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes
func filterSceneTags(scenes []*scraper.ScrapedScene) {
excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns())
var ignoredTags []string
for _, s := range scenes {
var ignored []string
s.Tags, ignored = filterTags(excludeRegexps, s.Tags)
ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored)
}
if len(ignoredTags) > 0 {
logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
}
}
// filterGalleryTags removes tags matching excluded tag patterns from the provided scraped galleries
func filterGalleryTags(g []*scraper.ScrapedGallery) {
excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns())
var ignoredTags []string
for _, s := range g {
var ignored []string
s.Tags, ignored = filterTags(excludeRegexps, s.Tags)
ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored)
}
if len(ignoredTags) > 0 {
logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
}
}
// filterGalleryTags removes tags matching excluded tag patterns from the provided scraped galleries
func filterPerformerTags(p []*models.ScrapedPerformer) {
excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns())
var ignoredTags []string
for _, s := range p {
var ignored []string
s.Tags, ignored = filterTags(excludeRegexps, s.Tags)
ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored)
} }
if len(ignoredTags) > 0 { if len(ignoredTags) > 0 {
@ -123,7 +169,16 @@ func (r *queryResolver) ScrapeGalleryURL(ctx context.Context, url string) (*scra
return nil, err return nil, err
} }
return marshalScrapedGallery(content) ret, err := marshalScrapedGallery(content)
if err != nil {
return nil, err
}
if ret != nil {
filterGalleryTags([]*scraper.ScrapedGallery{ret})
}
return ret, nil
} }
func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
@ -264,39 +319,46 @@ func (r *queryResolver) ScrapeSingleStudio(ctx context.Context, source scraper.S
} }
func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source scraper.Source, input ScrapeSinglePerformerInput) ([]*models.ScrapedPerformer, error) { func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source scraper.Source, input ScrapeSinglePerformerInput) ([]*models.ScrapedPerformer, error) {
if source.ScraperID != nil { var ret []*models.ScrapedPerformer
if input.PerformerInput != nil { switch {
case source.ScraperID != nil:
switch {
case input.PerformerInput != nil:
performer, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Performer: input.PerformerInput}) performer, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Performer: input.PerformerInput})
if err != nil { if err != nil {
return nil, err return nil, err
} }
return marshalScrapedPerformers([]scraper.ScrapedContent{performer}) ret, err = marshalScrapedPerformers([]scraper.ScrapedContent{performer})
if err != nil {
return nil, err
} }
case input.Query != nil:
if input.Query != nil {
content, err := r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, scraper.ScrapeContentTypePerformer) content, err := r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, scraper.ScrapeContentTypePerformer)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return marshalScrapedPerformers(content) ret, err = marshalScrapedPerformers(content)
if err != nil {
return nil, err
} }
default:
return nil, ErrNotImplemented return nil, ErrNotImplemented
}
// FIXME - we're relying on a deprecated field and not processing the endpoint input // FIXME - we're relying on a deprecated field and not processing the endpoint input
} else if source.StashBoxIndex != nil { case source.StashBoxIndex != nil:
client, err := r.getStashBoxClient(*source.StashBoxIndex) client, err := r.getStashBoxClient(*source.StashBoxIndex)
if err != nil { if err != nil {
return nil, err return nil, err
} }
var ret []*stashbox.StashBoxPerformerQueryResult var res []*stashbox.StashBoxPerformerQueryResult
switch { switch {
case input.PerformerID != nil: case input.PerformerID != nil:
ret, err = client.FindStashBoxPerformersByNames(ctx, []string{*input.PerformerID}) res, err = client.FindStashBoxPerformersByNames(ctx, []string{*input.PerformerID})
case input.Query != nil: case input.Query != nil:
ret, err = client.QueryStashBoxPerformer(ctx, *input.Query) res, err = client.QueryStashBoxPerformer(ctx, *input.Query)
default: default:
return nil, ErrNotImplemented return nil, ErrNotImplemented
} }
@ -305,16 +367,18 @@ func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source scrape
return nil, err return nil, err
} }
if len(ret) > 0 { if len(res) > 0 {
return ret[0].Results, nil ret = res[0].Results
} }
default:
return nil, nil
}
return nil, errors.New("scraper_id or stash_box_index must be set") return nil, errors.New("scraper_id or stash_box_index must be set")
} }
filterPerformerTags(ret)
return ret, nil
}
func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source scraper.Source, input ScrapeMultiPerformersInput) ([][]*models.ScrapedPerformer, error) { func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source scraper.Source, input ScrapeMultiPerformersInput) ([][]*models.ScrapedPerformer, error) {
if source.ScraperID != nil { if source.ScraperID != nil {
return nil, ErrNotImplemented return nil, ErrNotImplemented
@ -331,6 +395,8 @@ func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source scrape
} }
func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper.Source, input ScrapeSingleGalleryInput) ([]*scraper.ScrapedGallery, error) { func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper.Source, input ScrapeSingleGalleryInput) ([]*scraper.ScrapedGallery, error) {
var ret []*scraper.ScrapedGallery
if source.StashBoxIndex != nil { if source.StashBoxIndex != nil {
return nil, ErrNotSupported return nil, ErrNotSupported
} }
@ -351,16 +417,25 @@ func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper.
if err != nil { if err != nil {
return nil, err return nil, err
} }
return marshalScrapedGalleries([]scraper.ScrapedContent{c}) ret, err = marshalScrapedGalleries([]scraper.ScrapedContent{c})
if err != nil {
return nil, err
}
case input.GalleryInput != nil: case input.GalleryInput != nil:
c, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Gallery: input.GalleryInput}) c, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Gallery: input.GalleryInput})
if err != nil { if err != nil {
return nil, err return nil, err
} }
return marshalScrapedGalleries([]scraper.ScrapedContent{c}) ret, err = marshalScrapedGalleries([]scraper.ScrapedContent{c})
if err != nil {
return nil, err
}
default: default:
return nil, ErrNotImplemented return nil, ErrNotImplemented
} }
filterGalleryTags(ret)
return ret, nil
} }
func (r *queryResolver) ScrapeSingleMovie(ctx context.Context, source scraper.Source, input ScrapeSingleMovieInput) ([]*models.ScrapedMovie, error) { func (r *queryResolver) ScrapeSingleMovie(ctx context.Context, source scraper.Source, input ScrapeSingleMovieInput) ([]*models.ScrapedMovie, error) {