mirror of
https://github.com/stashapp/stash.git
synced 2025-12-08 01:13:09 +01:00
Move tag exclusion to scrape query resolver (#2391)
This commit is contained in:
parent
e4ad42caf0
commit
dd0fa48345
4 changed files with 104 additions and 46 deletions
|
|
@ -4,12 +4,17 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/stashapp/stash/internal/manager"
|
||||||
"github.com/stashapp/stash/internal/manager/config"
|
"github.com/stashapp/stash/internal/manager/config"
|
||||||
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
"github.com/stashapp/stash/pkg/models"
|
"github.com/stashapp/stash/pkg/models"
|
||||||
"github.com/stashapp/stash/pkg/scraper"
|
"github.com/stashapp/stash/pkg/scraper"
|
||||||
"github.com/stashapp/stash/pkg/scraper/stashbox"
|
"github.com/stashapp/stash/pkg/scraper/stashbox"
|
||||||
|
"github.com/stashapp/stash/pkg/sliceutil/stringslice"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (r *queryResolver) ScrapeURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
|
func (r *queryResolver) ScrapeURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
|
||||||
|
|
@ -99,7 +104,13 @@ func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string,
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return marshalScrapedScenes(content)
|
ret, err := marshalScrapedScenes(content)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
filterSceneTags(ret)
|
||||||
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
|
func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
|
||||||
|
|
@ -113,7 +124,59 @@ func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return marshalScrapedScene(content)
|
ret, err := marshalScrapedScene(content)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
filterSceneTags([]*models.ScrapedScene{ret})
|
||||||
|
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// filterSceneTags removes tags matching excluded tag patterns from the provided scraped scenes
|
||||||
|
func filterSceneTags(scenes []*models.ScrapedScene) {
|
||||||
|
excludePatterns := manager.GetInstance().Config.GetScraperExcludeTagPatterns()
|
||||||
|
var excludeRegexps []*regexp.Regexp
|
||||||
|
|
||||||
|
for _, excludePattern := range excludePatterns {
|
||||||
|
reg, err := regexp.Compile(strings.ToLower(excludePattern))
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("Invalid tag exclusion pattern: %v", err)
|
||||||
|
} else {
|
||||||
|
excludeRegexps = append(excludeRegexps, reg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(excludeRegexps) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var ignoredTags []string
|
||||||
|
|
||||||
|
for _, s := range scenes {
|
||||||
|
var newTags []*models.ScrapedTag
|
||||||
|
for _, t := range s.Tags {
|
||||||
|
ignore := false
|
||||||
|
for _, reg := range excludeRegexps {
|
||||||
|
if reg.MatchString(strings.ToLower(t.Name)) {
|
||||||
|
ignore = true
|
||||||
|
ignoredTags = stringslice.StrAppendUnique(ignoredTags, t.Name)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ignore {
|
||||||
|
newTags = append(newTags, t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.Tags = newTags
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(ignoredTags) > 0 {
|
||||||
|
logger.Debugf("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
|
func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
|
||||||
|
|
@ -122,7 +185,14 @@ func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return marshalScrapedScene(content)
|
ret, err := marshalScrapedScene(content)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
filterSceneTags([]*models.ScrapedScene{ret})
|
||||||
|
|
||||||
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) ScrapeGallery(ctx context.Context, scraperID string, gallery models.GalleryUpdateInput) (*models.ScrapedGallery, error) {
|
func (r *queryResolver) ScrapeGallery(ctx context.Context, scraperID string, gallery models.GalleryUpdateInput) (*models.ScrapedGallery, error) {
|
||||||
|
|
@ -208,10 +278,13 @@ func (r *queryResolver) getStashBoxClient(index int) (*stashbox.Client, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleSceneInput) ([]*models.ScrapedScene, error) {
|
func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleSceneInput) ([]*models.ScrapedScene, error) {
|
||||||
if source.ScraperID != nil {
|
var ret []*models.ScrapedScene
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case source.ScraperID != nil:
|
||||||
|
var err error
|
||||||
var c models.ScrapedContent
|
var c models.ScrapedContent
|
||||||
var content []models.ScrapedContent
|
var content []models.ScrapedContent
|
||||||
var err error
|
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case input.SceneID != nil:
|
case input.SceneID != nil:
|
||||||
|
|
@ -239,23 +312,35 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return marshalScrapedScenes(content)
|
ret, err = marshalScrapedScenes(content)
|
||||||
} else if source.StashBoxIndex != nil {
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
case source.StashBoxIndex != nil:
|
||||||
client, err := r.getStashBoxClient(*source.StashBoxIndex)
|
client, err := r.getStashBoxClient(*source.StashBoxIndex)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.SceneID != nil {
|
switch {
|
||||||
return client.FindStashBoxScenesByFingerprintsFlat(ctx, []string{*input.SceneID})
|
case input.SceneID != nil:
|
||||||
} else if input.Query != nil {
|
ret, err = client.FindStashBoxScenesByFingerprintsFlat(ctx, []string{*input.SceneID})
|
||||||
return client.QueryStashBoxScene(ctx, *input.Query)
|
case input.Query != nil:
|
||||||
|
ret, err = client.QueryStashBoxScene(ctx, *input.Query)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("%w: scene_id or query must be set", ErrInput)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("%w: scene_id or query must be set", ErrInput)
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput)
|
filterSceneTags(ret)
|
||||||
|
|
||||||
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeMultiScenesInput) ([][]*models.ScrapedScene, error) {
|
func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeMultiScenesInput) ([][]*models.ScrapedScene, error) {
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,6 @@ type GlobalConfig interface {
|
||||||
GetScrapersPath() string
|
GetScrapersPath() string
|
||||||
GetScraperCDPPath() string
|
GetScraperCDPPath() string
|
||||||
GetScraperCertCheck() bool
|
GetScraperCertCheck() bool
|
||||||
GetScraperExcludeTagPatterns() []string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func isCDPPathHTTP(c GlobalConfig) bool {
|
func isCDPPathHTTP(c GlobalConfig) bool {
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,6 @@ package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"regexp"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/stashapp/stash/pkg/logger"
|
"github.com/stashapp/stash/pkg/logger"
|
||||||
"github.com/stashapp/stash/pkg/match"
|
"github.com/stashapp/stash/pkg/match"
|
||||||
|
|
@ -50,7 +48,7 @@ func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerforme
|
||||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||||
tqb := r.Tag()
|
tqb := r.Tag()
|
||||||
|
|
||||||
tags, err := postProcessTags(c.globalConfig, tqb, p.Tags)
|
tags, err := postProcessTags(tqb, p.Tags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
@ -93,7 +91,7 @@ func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPer
|
||||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||||
tqb := r.Tag()
|
tqb := r.Tag()
|
||||||
|
|
||||||
tags, err := postProcessTags(c.globalConfig, tqb, p.Tags)
|
tags, err := postProcessTags(tqb, p.Tags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
@ -135,7 +133,7 @@ func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene) (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tags, err := postProcessTags(c.globalConfig, tqb, scene.Tags)
|
tags, err := postProcessTags(tqb, scene.Tags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
@ -174,7 +172,7 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tags, err := postProcessTags(c.globalConfig, tqb, g.Tags)
|
tags, err := postProcessTags(tqb, g.Tags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
@ -195,31 +193,10 @@ func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (
|
||||||
return g, nil
|
return g, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func postProcessTags(globalConfig GlobalConfig, tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) {
|
func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) {
|
||||||
var ret []*models.ScrapedTag
|
var ret []*models.ScrapedTag
|
||||||
|
|
||||||
excludePatterns := globalConfig.GetScraperExcludeTagPatterns()
|
|
||||||
var excludeRegexps []*regexp.Regexp
|
|
||||||
|
|
||||||
for _, excludePattern := range excludePatterns {
|
|
||||||
reg, err := regexp.Compile(strings.ToLower(excludePattern))
|
|
||||||
if err != nil {
|
|
||||||
logger.Errorf("Invalid tag exclusion pattern :%v", err)
|
|
||||||
} else {
|
|
||||||
excludeRegexps = append(excludeRegexps, reg)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var ignoredTags []string
|
|
||||||
ScrapeTag:
|
|
||||||
for _, t := range scrapedTags {
|
for _, t := range scrapedTags {
|
||||||
for _, reg := range excludeRegexps {
|
|
||||||
if reg.MatchString(strings.ToLower(t.Name)) {
|
|
||||||
ignoredTags = append(ignoredTags, t.Name)
|
|
||||||
continue ScrapeTag
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
err := match.ScrapedTag(tqb, t)
|
err := match.ScrapedTag(tqb, t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
@ -227,9 +204,5 @@ ScrapeTag:
|
||||||
ret = append(ret, t)
|
ret = append(ret, t)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(ignoredTags) > 0 {
|
|
||||||
logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret, nil
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
* Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368))
|
* Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368))
|
||||||
|
|
||||||
### 🐛 Bug fixes
|
### 🐛 Bug fixes
|
||||||
|
* Perform tag pattern exclusion on stash-box sources. ([#2391](https://github.com/stashapp/stash/pull/2391))
|
||||||
* Don't generate jpg thumbnails for animated webp files. ([#2388](https://github.com/stashapp/stash/pull/2388))
|
* Don't generate jpg thumbnails for animated webp files. ([#2388](https://github.com/stashapp/stash/pull/2388))
|
||||||
* Removed warnings and incorrect error message in json scrapers. ([#2375](https://github.com/stashapp/stash/pull/2375))
|
* Removed warnings and incorrect error message in json scrapers. ([#2375](https://github.com/stashapp/stash/pull/2375))
|
||||||
* Ensure identify continues using other scrapers if a scrape returns no results. ([#2375](https://github.com/stashapp/stash/pull/2375))
|
* Ensure identify continues using other scrapers if a scrape returns no results. ([#2375](https://github.com/stashapp/stash/pull/2375))
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue