From 28b5fbfd4d2a5899dc362b0965d43e139e7a8eed Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Fri, 24 May 2024 08:06:23 +1000 Subject: [PATCH] Apply scraped tag exclusions galleries and performers (#4872) --- internal/api/resolver_query_scraper.go | 155 ++++++++++++++++++------- 1 file changed, 115 insertions(+), 40 deletions(-) diff --git a/internal/api/resolver_query_scraper.go b/internal/api/resolver_query_scraper.go index a2974d0d8..5f27db3de 100644 --- a/internal/api/resolver_query_scraper.go +++ b/internal/api/resolver_query_scraper.go @@ -54,9 +54,8 @@ func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string, return ret, nil } -// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes -func filterSceneTags(scenes []*scraper.ScrapedScene) { - excludePatterns := manager.GetInstance().Config.GetScraperExcludeTagPatterns() +func compileRegexps(patterns []string) []*regexp.Regexp { + excludePatterns := patterns var excludeRegexps []*regexp.Regexp for _, excludePattern := range excludePatterns { @@ -68,30 +67,77 @@ func filterSceneTags(scenes []*scraper.ScrapedScene) { } } + return excludeRegexps +} + +// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes +func filterTags(excludeRegexps []*regexp.Regexp, tags []*models.ScrapedTag) (newTags []*models.ScrapedTag, ignoredTags []string) { if len(excludeRegexps) == 0 { - return + return tags, nil } + for _, t := range tags { + ignore := false + for _, reg := range excludeRegexps { + if reg.MatchString(strings.ToLower(t.Name)) { + ignore = true + ignoredTags = sliceutil.AppendUnique(ignoredTags, t.Name) + break + } + } + + if !ignore { + newTags = append(newTags, t) + } + } + + return +} + +// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes +func filterSceneTags(scenes []*scraper.ScrapedScene) { + excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns()) + var ignoredTags []string for _, s := range scenes { - var newTags []*models.ScrapedTag - for _, t := range s.Tags { - ignore := false - for _, reg := range excludeRegexps { - if reg.MatchString(strings.ToLower(t.Name)) { - ignore = true - ignoredTags = sliceutil.AppendUnique(ignoredTags, t.Name) - break - } - } + var ignored []string + s.Tags, ignored = filterTags(excludeRegexps, s.Tags) + ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored) + } - if !ignore { - newTags = append(newTags, t) - } - } + if len(ignoredTags) > 0 { + logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", ")) + } +} - s.Tags = newTags +// filterGalleryTags removes tags matching excluded tag patterns from the provided scraped galleries +func filterGalleryTags(g []*scraper.ScrapedGallery) { + excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns()) + + var ignoredTags []string + + for _, s := range g { + var ignored []string + s.Tags, ignored = filterTags(excludeRegexps, s.Tags) + ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored) + } + + if len(ignoredTags) > 0 { + logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", ")) + } +} + +// filterGalleryTags removes tags matching excluded tag patterns from the provided scraped galleries +func filterPerformerTags(p []*models.ScrapedPerformer) { + excludeRegexps := compileRegexps(manager.GetInstance().Config.GetScraperExcludeTagPatterns()) + + var ignoredTags []string + + for _, s := range p { + var ignored []string + s.Tags, ignored = filterTags(excludeRegexps, s.Tags) + ignoredTags = sliceutil.AppendUniques(ignoredTags, ignored) } if len(ignoredTags) > 0 { @@ -123,7 +169,16 @@ func (r *queryResolver) ScrapeGalleryURL(ctx context.Context, url string) (*scra return nil, err } - return marshalScrapedGallery(content) + ret, err := marshalScrapedGallery(content) + if err != nil { + return nil, err + } + + if ret != nil { + filterGalleryTags([]*scraper.ScrapedGallery{ret}) + } + + return ret, nil } func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { @@ -264,39 +319,46 @@ func (r *queryResolver) ScrapeSingleStudio(ctx context.Context, source scraper.S } func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source scraper.Source, input ScrapeSinglePerformerInput) ([]*models.ScrapedPerformer, error) { - if source.ScraperID != nil { - if input.PerformerInput != nil { + var ret []*models.ScrapedPerformer + switch { + case source.ScraperID != nil: + switch { + case input.PerformerInput != nil: performer, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Performer: input.PerformerInput}) if err != nil { return nil, err } - return marshalScrapedPerformers([]scraper.ScrapedContent{performer}) - } - - if input.Query != nil { + ret, err = marshalScrapedPerformers([]scraper.ScrapedContent{performer}) + if err != nil { + return nil, err + } + case input.Query != nil: content, err := r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, scraper.ScrapeContentTypePerformer) if err != nil { return nil, err } - return marshalScrapedPerformers(content) + ret, err = marshalScrapedPerformers(content) + if err != nil { + return nil, err + } + default: + return nil, ErrNotImplemented } - - return nil, ErrNotImplemented // FIXME - we're relying on a deprecated field and not processing the endpoint input - } else if source.StashBoxIndex != nil { + case source.StashBoxIndex != nil: client, err := r.getStashBoxClient(*source.StashBoxIndex) if err != nil { return nil, err } - var ret []*stashbox.StashBoxPerformerQueryResult + var res []*stashbox.StashBoxPerformerQueryResult switch { case input.PerformerID != nil: - ret, err = client.FindStashBoxPerformersByNames(ctx, []string{*input.PerformerID}) + res, err = client.FindStashBoxPerformersByNames(ctx, []string{*input.PerformerID}) case input.Query != nil: - ret, err = client.QueryStashBoxPerformer(ctx, *input.Query) + res, err = client.QueryStashBoxPerformer(ctx, *input.Query) default: return nil, ErrNotImplemented } @@ -305,14 +367,16 @@ func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source scrape return nil, err } - if len(ret) > 0 { - return ret[0].Results, nil + if len(res) > 0 { + ret = res[0].Results } - - return nil, nil + default: + return nil, errors.New("scraper_id or stash_box_index must be set") } - return nil, errors.New("scraper_id or stash_box_index must be set") + filterPerformerTags(ret) + + return ret, nil } func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source scraper.Source, input ScrapeMultiPerformersInput) ([][]*models.ScrapedPerformer, error) { @@ -331,6 +395,8 @@ func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source scrape } func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper.Source, input ScrapeSingleGalleryInput) ([]*scraper.ScrapedGallery, error) { + var ret []*scraper.ScrapedGallery + if source.StashBoxIndex != nil { return nil, ErrNotSupported } @@ -351,16 +417,25 @@ func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper. if err != nil { return nil, err } - return marshalScrapedGalleries([]scraper.ScrapedContent{c}) + ret, err = marshalScrapedGalleries([]scraper.ScrapedContent{c}) + if err != nil { + return nil, err + } case input.GalleryInput != nil: c, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Gallery: input.GalleryInput}) if err != nil { return nil, err } - return marshalScrapedGalleries([]scraper.ScrapedContent{c}) + ret, err = marshalScrapedGalleries([]scraper.ScrapedContent{c}) + if err != nil { + return nil, err + } default: return nil, ErrNotImplemented } + + filterGalleryTags(ret) + return ret, nil } func (r *queryResolver) ScrapeSingleMovie(ctx context.Context, source scraper.Source, input ScrapeSingleMovieInput) ([]*models.ScrapedMovie, error) {