diff --git a/pkg/api/resolver_mutation_stash_box.go b/pkg/api/resolver_mutation_stash_box.go index d05212deb..9c489e8de 100644 --- a/pkg/api/resolver_mutation_stash_box.go +++ b/pkg/api/resolver_mutation_stash_box.go @@ -20,7 +20,7 @@ func (r *mutationResolver) SubmitStashBoxFingerprints(ctx context.Context, input client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager) - return client.SubmitStashBoxFingerprints(input.SceneIds, boxes[input.StashBoxIndex].Endpoint) + return client.SubmitStashBoxFingerprints(ctx, input.SceneIds, boxes[input.StashBoxIndex].Endpoint) } func (r *mutationResolver) StashBoxBatchPerformerTag(ctx context.Context, input models.StashBoxBatchPerformerTagInput) (string, error) { diff --git a/pkg/api/resolver_query_scraper.go b/pkg/api/resolver_query_scraper.go index ed65ac44d..85d051d72 100644 --- a/pkg/api/resolver_query_scraper.go +++ b/pkg/api/resolver_query_scraper.go @@ -167,7 +167,7 @@ func (r *queryResolver) QueryStashBoxScene(ctx context.Context, input models.Sta client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager) if len(input.SceneIds) > 0 { - return client.FindStashBoxScenesByFingerprintsFlat(input.SceneIds) + return client.FindStashBoxScenesByFingerprintsFlat(ctx, input.SceneIds) } if input.Q != nil { @@ -187,11 +187,11 @@ func (r *queryResolver) QueryStashBoxPerformer(ctx context.Context, input models client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager) if len(input.PerformerIds) > 0 { - return client.FindStashBoxPerformersByNames(input.PerformerIds) + return client.FindStashBoxPerformersByNames(ctx, input.PerformerIds) } if input.Q != nil { - return client.QueryStashBoxPerformer(*input.Q) + return client.QueryStashBoxPerformer(ctx, *input.Q) } return nil, nil @@ -243,7 +243,7 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr } if input.SceneID != nil { - return client.FindStashBoxScenesByFingerprintsFlat([]string{*input.SceneID}) + return client.FindStashBoxScenesByFingerprintsFlat(ctx, []string{*input.SceneID}) } else if input.Query != nil { return client.QueryStashBoxScene(ctx, *input.Query) } @@ -263,7 +263,7 @@ func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.Scr return nil, err } - return client.FindStashBoxScenesByFingerprints(input.SceneIds) + return client.FindStashBoxScenesByFingerprints(ctx, input.SceneIds) } return nil, errors.New("scraper_id or stash_box_index must be set") @@ -299,9 +299,9 @@ func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source models var ret []*models.StashBoxPerformerQueryResult switch { case input.PerformerID != nil: - ret, err = client.FindStashBoxPerformersByNames([]string{*input.PerformerID}) + ret, err = client.FindStashBoxPerformersByNames(ctx, []string{*input.PerformerID}) case input.Query != nil: - ret, err = client.QueryStashBoxPerformer(*input.Query) + ret, err = client.QueryStashBoxPerformer(ctx, *input.Query) default: return nil, ErrNotImplemented } @@ -329,7 +329,7 @@ func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source models return nil, err } - return client.FindStashBoxPerformersByPerformerNames(input.PerformerIds) + return client.FindStashBoxPerformersByPerformerNames(ctx, input.PerformerIds) } return nil, errors.New("scraper_id or stash_box_index must be set") diff --git a/pkg/api/scraped_content.go b/pkg/api/scraped_content.go index 4a63cfb60..5374efd75 100644 --- a/pkg/api/scraped_content.go +++ b/pkg/api/scraped_content.go @@ -16,9 +16,12 @@ func marshalScrapedScenes(content []models.ScrapedContent) ([]*models.ScrapedSce continue } - if s, ok := c.(*models.ScrapedScene); ok { + switch s := c.(type) { + case *models.ScrapedScene: ret = append(ret, s) - } else { + case models.ScrapedScene: + ret = append(ret, &s) + default: return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedScene", models.ErrConversion) } } @@ -36,9 +39,12 @@ func marshalScrapedPerformers(content []models.ScrapedContent) ([]*models.Scrape continue } - if p, ok := c.(*models.ScrapedPerformer); ok { + switch p := c.(type) { + case *models.ScrapedPerformer: ret = append(ret, p) - } else { + case models.ScrapedPerformer: + ret = append(ret, &p) + default: return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedPerformer", models.ErrConversion) } } @@ -56,9 +62,12 @@ func marshalScrapedGalleries(content []models.ScrapedContent) ([]*models.Scraped continue } - if g, ok := c.(*models.ScrapedGallery); ok { + switch g := c.(type) { + case *models.ScrapedGallery: ret = append(ret, g) - } else { + case models.ScrapedGallery: + ret = append(ret, &g) + default: return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedGallery", models.ErrConversion) } } @@ -76,9 +85,12 @@ func marshalScrapedMovies(content []models.ScrapedContent) ([]*models.ScrapedMov continue } - if m, ok := c.(*models.ScrapedMovie); ok { + switch m := c.(type) { + case *models.ScrapedMovie: ret = append(ret, m) - } else { + case models.ScrapedMovie: + ret = append(ret, &m) + default: return nil, fmt.Errorf("%w: cannot turn ScrapedConetnt into ScrapedMovie", models.ErrConversion) } } diff --git a/pkg/manager/task_identify.go b/pkg/manager/task_identify.go index e7e16df76..6aaea48ef 100644 --- a/pkg/manager/task_identify.go +++ b/pkg/manager/task_identify.go @@ -211,8 +211,8 @@ type stashboxSource struct { endpoint string } -func (s stashboxSource) ScrapeScene(_ context.Context, sceneID int) (*models.ScrapedScene, error) { - results, err := s.FindStashBoxScenesByFingerprintsFlat([]string{strconv.Itoa(sceneID)}) +func (s stashboxSource) ScrapeScene(ctx context.Context, sceneID int) (*models.ScrapedScene, error) { + results, err := s.FindStashBoxScenesByFingerprintsFlat(ctx, []string{strconv.Itoa(sceneID)}) if err != nil { return nil, fmt.Errorf("error querying stash-box using scene ID %d: %w", sceneID, err) } diff --git a/pkg/manager/task_stash_box_tag.go b/pkg/manager/task_stash_box_tag.go index 6da960381..dbfa28073 100644 --- a/pkg/manager/task_stash_box_tag.go +++ b/pkg/manager/task_stash_box_tag.go @@ -44,7 +44,7 @@ func (t *StashBoxPerformerTagTask) stashBoxPerformerTag(ctx context.Context) { if t.refresh { var performerID string - txnErr := t.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + txnErr := t.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { stashids, _ := r.Performer().GetStashIDs(t.performer.ID) for _, id := range stashids { if id.Endpoint == t.box.Endpoint { @@ -57,7 +57,7 @@ func (t *StashBoxPerformerTagTask) stashBoxPerformerTag(ctx context.Context) { logger.Warnf("error while executing read transaction: %v", err) } if performerID != "" { - performer, err = client.FindStashBoxPerformerByID(performerID) + performer, err = client.FindStashBoxPerformerByID(ctx, performerID) } } else { var name string @@ -66,7 +66,7 @@ func (t *StashBoxPerformerTagTask) stashBoxPerformerTag(ctx context.Context) { } else { name = t.performer.Name.String } - performer, err = client.FindStashBoxPerformerByName(name) + performer, err = client.FindStashBoxPerformerByName(ctx, name) } if err != nil { diff --git a/pkg/scraper/action.go b/pkg/scraper/action.go index c49be9e2c..3f80cee29 100644 --- a/pkg/scraper/action.go +++ b/pkg/scraper/action.go @@ -25,20 +25,12 @@ func (e scraperAction) IsValid() bool { } type scraperActionImpl interface { - scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) - scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) - scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) + scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) + scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) + scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) - scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) - scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) - scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) - scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) - scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) - scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) - - scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) } func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl { diff --git a/pkg/scraper/cache.go b/pkg/scraper/cache.go index 7578317b9..4af806819 100644 --- a/pkg/scraper/cache.go +++ b/pkg/scraper/cache.go @@ -268,7 +268,7 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models return nil, fmt.Errorf("%w: cannot use scraper %s as a scene scraper", ErrNotSupported, scraperID) } - scene, err := getScene(id, c.txnManager) + scene, err := getScene(ctx, id, c.txnManager) if err != nil { return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err) } @@ -283,7 +283,7 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models return nil, fmt.Errorf("%w: cannot use scraper %s as a gallery scraper", ErrNotSupported, scraperID) } - gallery, err := getGallery(id, c.txnManager) + gallery, err := getGallery(ctx, id, c.txnManager) if err != nil { return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err) } diff --git a/pkg/scraper/group.go b/pkg/scraper/group.go index 583f4dd87..7a3620118 100644 --- a/pkg/scraper/group.go +++ b/pkg/scraper/group.go @@ -42,20 +42,6 @@ func (g group) fragmentScraper(input Input) *scraperTypeConfig { return nil } -// scrapeFragmentInput analyzes the input and calls an appropriate scraperActionImpl -func scrapeFragmentInput(ctx context.Context, input Input, s scraperActionImpl) (models.ScrapedContent, error) { - switch { - case input.Performer != nil: - return s.scrapePerformerByFragment(*input.Performer) - case input.Gallery != nil: - return s.scrapeGalleryByFragment(*input.Gallery) - case input.Scene != nil: - return s.scrapeSceneByFragment(ctx, *input.Scene) - } - - return nil, ErrNotSupported -} - func (g group) viaFragment(ctx context.Context, client *http.Client, input Input) (models.ScrapedContent, error) { stc := g.fragmentScraper(input) if stc == nil { @@ -70,7 +56,7 @@ func (g group) viaFragment(ctx context.Context, client *http.Client, input Input } s := g.config.getScraper(*stc, client, g.txnManager, g.globalConf) - return scrapeFragmentInput(ctx, input, s) + return s.scrapeByFragment(ctx, input) } func (g group) viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) { @@ -106,27 +92,12 @@ func loadUrlCandidates(c config, ty models.ScrapeContentType) []*scrapeByURLConf panic("loadUrlCandidates: unreachable") } -func scrapeByUrl(ctx context.Context, url string, s scraperActionImpl, ty models.ScrapeContentType) (models.ScrapedContent, error) { - switch ty { - case models.ScrapeContentTypePerformer: - return s.scrapePerformerByURL(ctx, url) - case models.ScrapeContentTypeScene: - return s.scrapeSceneByURL(ctx, url) - case models.ScrapeContentTypeMovie: - return s.scrapeMovieByURL(ctx, url) - case models.ScrapeContentTypeGallery: - return s.scrapeGalleryByURL(ctx, url) - } - - panic("scrapeByUrl: unreachable") -} - func (g group) viaURL(ctx context.Context, client *http.Client, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) { candidates := loadUrlCandidates(g.config, ty) for _, scraper := range candidates { if scraper.matchesURL(url) { s := g.config.getScraper(scraper.scraperTypeConfig, client, g.txnManager, g.globalConf) - ret, err := scrapeByUrl(ctx, url, s, ty) + ret, err := s.scrapeByURL(ctx, url, ty) if err != nil { return nil, err } @@ -148,30 +119,14 @@ func (g group) viaName(ctx context.Context, client *http.Client, name string, ty } s := g.config.getScraper(*g.config.PerformerByName, client, g.txnManager, g.globalConf) - performers, err := s.scrapePerformersByName(ctx, name) - if err != nil { - return nil, err - } - content := make([]models.ScrapedContent, len(performers)) - for i := range performers { - content[i] = performers[i] - } - return content, nil + return s.scrapeByName(ctx, name, ty) case models.ScrapeContentTypeScene: if g.config.SceneByName == nil { break } s := g.config.getScraper(*g.config.SceneByName, client, g.txnManager, g.globalConf) - scenes, err := s.scrapeScenesByName(ctx, name) - if err != nil { - return nil, err - } - content := make([]models.ScrapedContent, len(scenes)) - for i := range scenes { - content[i] = scenes[i] - } - return content, nil + return s.scrapeByName(ctx, name, ty) } return nil, fmt.Errorf("%w: cannot load %v by name", ErrNotSupported, ty) diff --git a/pkg/scraper/image.go b/pkg/scraper/image.go index 3954cdbaf..51e22d7f8 100644 --- a/pkg/scraper/image.go +++ b/pkg/scraper/image.go @@ -11,8 +11,8 @@ import ( "github.com/stashapp/stash/pkg/utils" ) -func setPerformerImage(ctx context.Context, client *http.Client, p *models.ScrapedPerformer, globalConfig GlobalConfig) error { - if p == nil || p.Image == nil || !strings.HasPrefix(*p.Image, "http") { +func setPerformerImage(ctx context.Context, client *http.Client, p models.ScrapedPerformer, globalConfig GlobalConfig) error { + if p.Image == nil || !strings.HasPrefix(*p.Image, "http") { // nothing to do return nil } @@ -29,9 +29,9 @@ func setPerformerImage(ctx context.Context, client *http.Client, p *models.Scrap return nil } -func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedScene, globalConfig GlobalConfig) error { +func setSceneImage(ctx context.Context, client *http.Client, s models.ScrapedScene, globalConfig GlobalConfig) error { // don't try to get the image if it doesn't appear to be a URL - if s == nil || s.Image == nil || !strings.HasPrefix(*s.Image, "http") { + if s.Image == nil || !strings.HasPrefix(*s.Image, "http") { // nothing to do return nil } @@ -46,9 +46,9 @@ func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedSc return nil } -func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error { +func setMovieFrontImage(ctx context.Context, client *http.Client, m models.ScrapedMovie, globalConfig GlobalConfig) error { // don't try to get the image if it doesn't appear to be a URL - if m == nil || m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") { + if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") { // nothing to do return nil } @@ -63,9 +63,9 @@ func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.Scra return nil } -func setMovieBackImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error { +func setMovieBackImage(ctx context.Context, client *http.Client, m models.ScrapedMovie, globalConfig GlobalConfig) error { // don't try to get the image if it doesn't appear to be a URL - if m == nil || m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") { + if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") { // nothing to do return nil } diff --git a/pkg/scraper/json.go b/pkg/scraper/json.go index d64227d84..62b165f1b 100644 --- a/pkg/scraper/json.go +++ b/pkg/scraper/json.go @@ -75,84 +75,33 @@ func (s *jsonScraper) loadURL(ctx context.Context, url string) (string, error) { return docStr, err } -func (s *jsonScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { - u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries +func (s *jsonScraper) scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) { + u := replaceURL(url, s.scraper) // allow a URL Replace for url-queries doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } q := s.getJsonQuery(doc) - return scraper.scrapePerformer(q) -} - -func (s *jsonScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) { - u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries - doc, scraper, err := s.scrapeURL(ctx, u) - if err != nil { - return nil, err + switch ty { + case models.ScrapeContentTypePerformer: + return scraper.scrapePerformer(ctx, q) + case models.ScrapeContentTypeScene: + return scraper.scrapeScene(ctx, q) + case models.ScrapeContentTypeGallery: + return scraper.scrapeGallery(ctx, q) + case models.ScrapeContentTypeMovie: + return scraper.scrapeMovie(ctx, q) } - q := s.getJsonQuery(doc) - return scraper.scrapeScene(q) + return nil, ErrNotSupported } -func (s *jsonScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { - u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries - doc, scraper, err := s.scrapeURL(ctx, u) - if err != nil { - return nil, err - } - - q := s.getJsonQuery(doc) - return scraper.scrapeGallery(q) -} - -func (s *jsonScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { - u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries - doc, scraper, err := s.scrapeURL(ctx, u) - if err != nil { - return nil, err - } - - q := s.getJsonQuery(doc) - return scraper.scrapeMovie(q) -} - -func (s *jsonScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) { +func (s *jsonScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) { scraper := s.getJsonScraper() if scraper == nil { - return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config") - } - - const placeholder = "{}" - - // replace the placeholder string with the URL-escaped name - escapedName := url.QueryEscape(name) - - url := s.scraper.QueryURL - url = strings.ReplaceAll(url, placeholder, escapedName) - - doc, err := s.loadURL(context.TODO(), url) - - if err != nil { - return nil, err - } - - q := s.getJsonQuery(doc) - return scraper.scrapePerformers(q) -} - -func (s *jsonScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { - return nil, errors.New("scrapePerformerByFragment not supported for json scraper") -} - -func (s *jsonScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) { - scraper := s.getJsonScraper() - - if scraper == nil { - return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config") + return nil, fmt.Errorf("%w: name %v", ErrNotFound, s.scraper.Scraper) } const placeholder = "{}" @@ -170,7 +119,34 @@ func (s *jsonScraper) scrapeScenesByName(ctx context.Context, name string) ([]*m } q := s.getJsonQuery(doc) - return scraper.scrapeScenes(q) + + var content []models.ScrapedContent + switch ty { + case models.ScrapeContentTypePerformer: + performers, err := scraper.scrapePerformers(ctx, q) + if err != nil { + return nil, err + } + + for _, p := range performers { + content = append(content, p) + } + + return content, nil + case models.ScrapeContentTypeScene: + scenes, err := scraper.scrapeScenes(ctx, q) + if err != nil { + return nil, err + } + + for _, s := range scenes { + content = append(content, s) + } + + return content, nil + } + + return nil, ErrNotSupported } func (s *jsonScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) { @@ -194,10 +170,21 @@ func (s *jsonScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scen } q := s.getJsonQuery(doc) - return scraper.scrapeScene(q) + return scraper.scrapeScene(ctx, q) } -func (s *jsonScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { +func (s *jsonScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) { + switch { + case input.Gallery != nil: + return nil, fmt.Errorf("%w: cannot use a json scraper as a gallery fragment scraper", ErrNotSupported) + case input.Performer != nil: + return nil, fmt.Errorf("%w: cannot use a json scraper as a performer fragment scraper", ErrNotSupported) + case input.Scene == nil: + return nil, fmt.Errorf("%w: scene input is nil", ErrNotSupported) + } + + scene := *input.Scene + // construct the URL queryURL := queryURLParametersFromScrapedScene(scene) if s.scraper.QueryURLReplacements != nil { @@ -218,7 +205,7 @@ func (s *jsonScraper) scrapeSceneByFragment(ctx context.Context, scene models.Sc } q := s.getJsonQuery(doc) - return scraper.scrapeScene(q) + return scraper.scrapeScene(ctx, q) } func (s *jsonScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) { @@ -242,11 +229,7 @@ func (s *jsonScraper) scrapeGalleryByGallery(ctx context.Context, gallery *model } q := s.getJsonQuery(doc) - return scraper.scrapeGallery(q) -} - -func (s *jsonScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { - return nil, errors.New("scrapeGalleryByFragment not supported for json scraper") + return scraper.scrapeGallery(ctx, q) } func (s *jsonScraper) getJsonQuery(doc string) *jsonQuery { @@ -281,8 +264,8 @@ func (q *jsonQuery) runQuery(selector string) ([]string, error) { return ret, nil } -func (q *jsonQuery) subScrape(value string) mappedQuery { - doc, err := q.scraper.loadURL(context.TODO(), value) +func (q *jsonQuery) subScrape(ctx context.Context, value string) mappedQuery { + doc, err := q.scraper.loadURL(ctx, value) if err != nil { logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error()) diff --git a/pkg/scraper/json_test.go b/pkg/scraper/json_test.go index 271d83235..d5430dccf 100644 --- a/pkg/scraper/json_test.go +++ b/pkg/scraper/json_test.go @@ -1,6 +1,7 @@ package scraper import ( + "context" "testing" "gopkg.in/yaml.v2" @@ -81,7 +82,7 @@ jsonScrapers: doc: json, } - scrapedPerformer, err := performerScraper.scrapePerformer(q) + scrapedPerformer, err := performerScraper.scrapePerformer(context.Background(), q) if err != nil { t.Fatalf("Error scraping performer: %s", err.Error()) } diff --git a/pkg/scraper/mapped.go b/pkg/scraper/mapped.go index 6a366ccdb..bee951c9f 100644 --- a/pkg/scraper/mapped.go +++ b/pkg/scraper/mapped.go @@ -1,6 +1,7 @@ package scraper import ( + "context" "errors" "fmt" "math" @@ -18,7 +19,7 @@ import ( type mappedQuery interface { runQuery(selector string) ([]string, error) - subScrape(value string) mappedQuery + subScrape(ctx context.Context, value string) mappedQuery } type commonMappedConfig map[string]string @@ -38,7 +39,7 @@ func (s mappedConfig) applyCommon(c commonMappedConfig, src string) string { return ret } -func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedResults { +func (s mappedConfig) process(ctx context.Context, q mappedQuery, common commonMappedConfig) mappedResults { var ret mappedResults for k, attrConfig := range s { @@ -57,7 +58,7 @@ func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedRe } if len(found) > 0 { - result := s.postProcess(q, attrConfig, found) + result := s.postProcess(ctx, q, attrConfig, found) for i, text := range result { ret = ret.setKey(i, k, text) } @@ -68,12 +69,12 @@ func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedRe return ret } -func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrConfig, found []string) []string { +func (s mappedConfig) postProcess(ctx context.Context, q mappedQuery, attrConfig mappedScraperAttrConfig, found []string) []string { // check if we're concatenating the results into a single result var ret []string if attrConfig.hasConcat() { result := attrConfig.concatenateResults(found) - result = attrConfig.postProcess(result, q) + result = attrConfig.postProcess(ctx, result, q) if attrConfig.hasSplit() { results := attrConfig.splitString(result) results = attrConfig.cleanResults(results) @@ -83,7 +84,7 @@ func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrCon ret = []string{result} } else { for _, text := range found { - text = attrConfig.postProcess(text, q) + text = attrConfig.postProcess(ctx, text, q) if attrConfig.hasSplit() { return attrConfig.splitString(text) } @@ -359,12 +360,12 @@ func (c mappedRegexConfigs) apply(value string) string { } type postProcessAction interface { - Apply(value string, q mappedQuery) string + Apply(ctx context.Context, value string, q mappedQuery) string } type postProcessParseDate string -func (p *postProcessParseDate) Apply(value string, q mappedQuery) string { +func (p *postProcessParseDate) Apply(ctx context.Context, value string, q mappedQuery) string { parseDate := string(*p) const internalDateFormat = "2006-01-02" @@ -396,7 +397,7 @@ func (p *postProcessParseDate) Apply(value string, q mappedQuery) string { type postProcessSubtractDays bool -func (p *postProcessSubtractDays) Apply(value string, q mappedQuery) string { +func (p *postProcessSubtractDays) Apply(ctx context.Context, value string, q mappedQuery) string { const internalDateFormat = "2006-01-02" i, err := strconv.Atoi(value) @@ -412,18 +413,18 @@ func (p *postProcessSubtractDays) Apply(value string, q mappedQuery) string { type postProcessReplace mappedRegexConfigs -func (c *postProcessReplace) Apply(value string, q mappedQuery) string { +func (c *postProcessReplace) Apply(ctx context.Context, value string, q mappedQuery) string { replace := mappedRegexConfigs(*c) return replace.apply(value) } type postProcessSubScraper mappedScraperAttrConfig -func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string { +func (p *postProcessSubScraper) Apply(ctx context.Context, value string, q mappedQuery) string { subScrapeConfig := mappedScraperAttrConfig(*p) logger.Debugf("Sub-scraping for: %s", value) - ss := q.subScrape(value) + ss := q.subScrape(ctx, value) if ss != nil { found, err := ss.runQuery(subScrapeConfig.Selector) @@ -440,7 +441,7 @@ func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string { result = found[0] } - result = subScrapeConfig.postProcess(result, ss) + result = subScrapeConfig.postProcess(ctx, result, ss) return result } } @@ -450,7 +451,7 @@ func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string { type postProcessMap map[string]string -func (p *postProcessMap) Apply(value string, q mappedQuery) string { +func (p *postProcessMap) Apply(ctx context.Context, value string, q mappedQuery) string { // return the mapped value if present m := *p mapped, ok := m[value] @@ -464,7 +465,7 @@ func (p *postProcessMap) Apply(value string, q mappedQuery) string { type postProcessFeetToCm bool -func (p *postProcessFeetToCm) Apply(value string, q mappedQuery) string { +func (p *postProcessFeetToCm) Apply(ctx context.Context, value string, q mappedQuery) string { const foot_in_cm = 30.48 const inch_in_cm = 2.54 @@ -488,7 +489,7 @@ func (p *postProcessFeetToCm) Apply(value string, q mappedQuery) string { type postProcessLbToKg bool -func (p *postProcessLbToKg) Apply(value string, q mappedQuery) string { +func (p *postProcessLbToKg) Apply(ctx context.Context, value string, q mappedQuery) string { const lb_in_kg = 0.45359237 w, err := strconv.ParseFloat(value, 64) if err == nil { @@ -690,9 +691,9 @@ func (c mappedScraperAttrConfig) splitString(value string) []string { return res } -func (c mappedScraperAttrConfig) postProcess(value string, q mappedQuery) string { +func (c mappedScraperAttrConfig) postProcess(ctx context.Context, value string, q mappedQuery) string { for _, action := range c.postProcessActions { - value = action.Apply(value, q) + value = action.Apply(ctx, value, q) } return value @@ -748,7 +749,7 @@ func (r mappedResults) setKey(index int, key string, value string) mappedResults return r } -func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer, error) { +func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*models.ScrapedPerformer, error) { var ret models.ScrapedPerformer performerMap := s.Performer @@ -758,14 +759,14 @@ func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer, performerTagsMap := performerMap.Tags - results := performerMap.process(q, s.Common) + results := performerMap.process(ctx, q, s.Common) if len(results) > 0 { results[0].apply(&ret) // now apply the tags if performerTagsMap != nil { logger.Debug(`Processing performer tags:`) - tagResults := performerTagsMap.process(q, s.Common) + tagResults := performerTagsMap.process(ctx, q, s.Common) for _, p := range tagResults { tag := &models.ScrapedTag{} @@ -778,7 +779,7 @@ func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer, return &ret, nil } -func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerformer, error) { +func (s mappedScraper) scrapePerformers(ctx context.Context, q mappedQuery) ([]*models.ScrapedPerformer, error) { var ret []*models.ScrapedPerformer performerMap := s.Performer @@ -786,7 +787,7 @@ func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerform return nil, nil } - results := performerMap.process(q, s.Common) + results := performerMap.process(ctx, q, s.Common) for _, r := range results { var p models.ScrapedPerformer r.apply(&p) @@ -796,7 +797,7 @@ func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerform return ret, nil } -func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.ScrapedScene { +func (s mappedScraper) processScene(ctx context.Context, q mappedQuery, r mappedResult) *models.ScrapedScene { var ret models.ScrapedScene sceneScraperConfig := s.Scene @@ -813,13 +814,13 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap // process performer tags once var performerTagResults mappedResults if scenePerformerTagsMap != nil { - performerTagResults = scenePerformerTagsMap.process(q, s.Common) + performerTagResults = scenePerformerTagsMap.process(ctx, q, s.Common) } // now apply the performers and tags if scenePerformersMap.mappedConfig != nil { logger.Debug(`Processing scene performers:`) - performerResults := scenePerformersMap.process(q, s.Common) + performerResults := scenePerformersMap.process(ctx, q, s.Common) for _, p := range performerResults { performer := &models.ScrapedPerformer{} @@ -837,7 +838,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap if sceneTagsMap != nil { logger.Debug(`Processing scene tags:`) - tagResults := sceneTagsMap.process(q, s.Common) + tagResults := sceneTagsMap.process(ctx, q, s.Common) for _, p := range tagResults { tag := &models.ScrapedTag{} @@ -848,7 +849,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap if sceneStudioMap != nil { logger.Debug(`Processing scene studio:`) - studioResults := sceneStudioMap.process(q, s.Common) + studioResults := sceneStudioMap.process(ctx, q, s.Common) if len(studioResults) > 0 { studio := &models.ScrapedStudio{} @@ -859,7 +860,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap if sceneMoviesMap != nil { logger.Debug(`Processing scene movies:`) - movieResults := sceneMoviesMap.process(q, s.Common) + movieResults := sceneMoviesMap.process(ctx, q, s.Common) for _, p := range movieResults { movie := &models.ScrapedMovie{} @@ -871,7 +872,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap return &ret } -func (s mappedScraper) scrapeScenes(q mappedQuery) ([]*models.ScrapedScene, error) { +func (s mappedScraper) scrapeScenes(ctx context.Context, q mappedQuery) ([]*models.ScrapedScene, error) { var ret []*models.ScrapedScene sceneScraperConfig := s.Scene @@ -881,16 +882,16 @@ func (s mappedScraper) scrapeScenes(q mappedQuery) ([]*models.ScrapedScene, erro } logger.Debug(`Processing scenes:`) - results := sceneMap.process(q, s.Common) + results := sceneMap.process(ctx, q, s.Common) for _, r := range results { logger.Debug(`Processing scene:`) - ret = append(ret, s.processScene(q, r)) + ret = append(ret, s.processScene(ctx, q, r)) } return ret, nil } -func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error) { +func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.ScrapedScene, error) { var ret models.ScrapedScene sceneScraperConfig := s.Scene @@ -900,16 +901,16 @@ func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error) } logger.Debug(`Processing scene:`) - results := sceneMap.process(q, s.Common) + results := sceneMap.process(ctx, q, s.Common) if len(results) > 0 { - ss := s.processScene(q, results[0]) + ss := s.processScene(ctx, q, results[0]) ret = *ss } return &ret, nil } -func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, error) { +func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*models.ScrapedGallery, error) { var ret models.ScrapedGallery galleryScraperConfig := s.Gallery @@ -923,14 +924,14 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err galleryStudioMap := galleryScraperConfig.Studio logger.Debug(`Processing gallery:`) - results := galleryMap.process(q, s.Common) + results := galleryMap.process(ctx, q, s.Common) if len(results) > 0 { results[0].apply(&ret) // now apply the performers and tags if galleryPerformersMap != nil { logger.Debug(`Processing gallery performers:`) - performerResults := galleryPerformersMap.process(q, s.Common) + performerResults := galleryPerformersMap.process(ctx, q, s.Common) for _, p := range performerResults { performer := &models.ScrapedPerformer{} @@ -941,7 +942,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err if galleryTagsMap != nil { logger.Debug(`Processing gallery tags:`) - tagResults := galleryTagsMap.process(q, s.Common) + tagResults := galleryTagsMap.process(ctx, q, s.Common) for _, p := range tagResults { tag := &models.ScrapedTag{} @@ -952,7 +953,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err if galleryStudioMap != nil { logger.Debug(`Processing gallery studio:`) - studioResults := galleryStudioMap.process(q, s.Common) + studioResults := galleryStudioMap.process(ctx, q, s.Common) if len(studioResults) > 0 { studio := &models.ScrapedStudio{} @@ -965,7 +966,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err return &ret, nil } -func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error) { +func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.ScrapedMovie, error) { var ret models.ScrapedMovie movieScraperConfig := s.Movie @@ -976,13 +977,13 @@ func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error) movieStudioMap := movieScraperConfig.Studio - results := movieMap.process(q, s.Common) + results := movieMap.process(ctx, q, s.Common) if len(results) > 0 { results[0].apply(&ret) if movieStudioMap != nil { logger.Debug(`Processing movie studio:`) - studioResults := movieStudioMap.process(q, s.Common) + studioResults := movieStudioMap.process(ctx, q, s.Common) if len(studioResults) > 0 { studio := &models.ScrapedStudio{} diff --git a/pkg/scraper/mapped_test.go b/pkg/scraper/mapped_test.go index 193847ca4..3213a824e 100644 --- a/pkg/scraper/mapped_test.go +++ b/pkg/scraper/mapped_test.go @@ -1,6 +1,7 @@ package scraper import ( + "context" "testing" "github.com/stretchr/testify/assert" @@ -15,7 +16,7 @@ performerByURL: xPathScrapers: performerScraper: performer: - Name: + Name: selector: //div/a/@href postProcess: - parseDate: Jan 2, 2006 @@ -55,6 +56,6 @@ func TestFeetToCM(t *testing.T) { q := &xpathQuery{} for _, test := range feetToCMTests { - assert.Equal(t, test.out, pp.Apply(test.in, q)) + assert.Equal(t, test.out, pp.Apply(context.Background(), test.in, q)) } } diff --git a/pkg/scraper/postprocessing.go b/pkg/scraper/postprocessing.go index d1542e5e8..323ada752 100644 --- a/pkg/scraper/postprocessing.go +++ b/pkg/scraper/postprocessing.go @@ -18,36 +18,44 @@ func (c Cache) postScrape(ctx context.Context, content models.ScrapedContent) (m // Analyze the concrete type, call the right post-processing function switch v := content.(type) { case *models.ScrapedPerformer: - return c.postScrapePerformer(ctx, v) + if v != nil { + return c.postScrapePerformer(ctx, *v) + } case models.ScrapedPerformer: - return c.postScrapePerformer(ctx, &v) + return c.postScrapePerformer(ctx, v) case *models.ScrapedScene: - return c.postScrapeScene(ctx, v) + if v != nil { + return c.postScrapeScene(ctx, *v) + } case models.ScrapedScene: - return c.postScrapeScene(ctx, &v) + return c.postScrapeScene(ctx, v) case *models.ScrapedGallery: - return c.postScrapeGallery(ctx, v) + if v != nil { + return c.postScrapeGallery(ctx, *v) + } case models.ScrapedGallery: - return c.postScrapeGallery(ctx, &v) + return c.postScrapeGallery(ctx, v) case *models.ScrapedMovie: - return c.postScrapeMovie(ctx, v) + if v != nil { + return c.postScrapeMovie(ctx, *v) + } case models.ScrapedMovie: - return c.postScrapeMovie(ctx, &v) + return c.postScrapeMovie(ctx, v) } // If nothing matches, pass the content through return content, nil } -func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerformer) (models.ScrapedContent, error) { +func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer) (models.ScrapedContent, error) { if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { tqb := r.Tag() - tags, err := postProcessTags(tqb, ret.Tags) + tags, err := postProcessTags(tqb, p.Tags) if err != nil { return err } - ret.Tags = tags + p.Tags = tags return nil }); err != nil { @@ -55,42 +63,42 @@ func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerfo } // post-process - set the image if applicable - if err := setPerformerImage(ctx, c.client, ret, c.globalConfig); err != nil { - logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error()) + if err := setPerformerImage(ctx, c.client, p, c.globalConfig); err != nil { + logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error()) } - return ret, nil + return p, nil } -func (c Cache) postScrapeMovie(ctx context.Context, ret *models.ScrapedMovie) (models.ScrapedContent, error) { - if ret.Studio != nil { +func (c Cache) postScrapeMovie(ctx context.Context, m models.ScrapedMovie) (models.ScrapedContent, error) { + if m.Studio != nil { if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { - return match.ScrapedStudio(r.Studio(), ret.Studio, nil) + return match.ScrapedStudio(r.Studio(), m.Studio, nil) }); err != nil { return nil, err } } // post-process - set the image if applicable - if err := setMovieFrontImage(ctx, c.client, ret, c.globalConfig); err != nil { - logger.Warnf("could not set front image using URL %s: %v", *ret.FrontImage, err) + if err := setMovieFrontImage(ctx, c.client, m, c.globalConfig); err != nil { + logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err) } - if err := setMovieBackImage(ctx, c.client, ret, c.globalConfig); err != nil { - logger.Warnf("could not set back image using URL %s: %v", *ret.BackImage, err) + if err := setMovieBackImage(ctx, c.client, m, c.globalConfig); err != nil { + logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err) } - return ret, nil + return m, nil } -func (c Cache) postScrapeScenePerformer(ctx context.Context, ret *models.ScrapedPerformer) error { +func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPerformer) error { if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { tqb := r.Tag() - tags, err := postProcessTags(tqb, ret.Tags) + tags, err := postProcessTags(tqb, p.Tags) if err != nil { return err } - ret.Tags = tags + p.Tags = tags return nil }); err != nil { @@ -100,15 +108,19 @@ func (c Cache) postScrapeScenePerformer(ctx context.Context, ret *models.Scraped return nil } -func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) (models.ScrapedContent, error) { +func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene) (models.ScrapedContent, error) { if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { pqb := r.Performer() mqb := r.Movie() tqb := r.Tag() sqb := r.Studio() - for _, p := range ret.Performers { - if err := c.postScrapeScenePerformer(ctx, p); err != nil { + for _, p := range scene.Performers { + if p == nil { + continue + } + + if err := c.postScrapeScenePerformer(ctx, *p); err != nil { return err } @@ -117,21 +129,21 @@ func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) (m } } - for _, p := range ret.Movies { + for _, p := range scene.Movies { err := match.ScrapedMovie(mqb, p) if err != nil { return err } } - tags, err := postProcessTags(tqb, ret.Tags) + tags, err := postProcessTags(tqb, scene.Tags) if err != nil { return err } - ret.Tags = tags + scene.Tags = tags - if ret.Studio != nil { - err := match.ScrapedStudio(sqb, ret.Studio, nil) + if scene.Studio != nil { + err := match.ScrapedStudio(sqb, scene.Studio, nil) if err != nil { return err } @@ -143,34 +155,34 @@ func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) (m } // post-process - set the image if applicable - if err := setSceneImage(ctx, c.client, ret, c.globalConfig); err != nil { - logger.Warnf("Could not set image using URL %s: %v", *ret.Image, err) + if err := setSceneImage(ctx, c.client, scene, c.globalConfig); err != nil { + logger.Warnf("Could not set image using URL %s: %v", *scene.Image, err) } - return ret, nil + return scene, nil } -func (c Cache) postScrapeGallery(ctx context.Context, ret *models.ScrapedGallery) (models.ScrapedContent, error) { +func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (models.ScrapedContent, error) { if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { pqb := r.Performer() tqb := r.Tag() sqb := r.Studio() - for _, p := range ret.Performers { + for _, p := range g.Performers { err := match.ScrapedPerformer(pqb, p, nil) if err != nil { return err } } - tags, err := postProcessTags(tqb, ret.Tags) + tags, err := postProcessTags(tqb, g.Tags) if err != nil { return err } - ret.Tags = tags + g.Tags = tags - if ret.Studio != nil { - err := match.ScrapedStudio(sqb, ret.Studio, nil) + if g.Studio != nil { + err := match.ScrapedStudio(sqb, g.Studio, nil) if err != nil { return err } @@ -181,7 +193,7 @@ func (c Cache) postScrapeGallery(ctx context.Context, ret *models.ScrapedGallery return nil, err } - return ret, nil + return g, nil } func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) { diff --git a/pkg/scraper/script.go b/pkg/scraper/script.go index 59ab29f9f..cb6900ace 100644 --- a/pkg/scraper/script.go +++ b/pkg/scraper/script.go @@ -14,6 +14,8 @@ import ( "github.com/stashapp/stash/pkg/models" ) +var ErrScraperScript = errors.New("scraper script error") + type scriptScraper struct { scraper scraperTypeConfig config config @@ -74,62 +76,119 @@ func (s *scriptScraper) runScraperScript(inString string, out interface{}) error logger.Debugf("Scraper script <%s> started", strings.Join(cmd.Args, " ")) // TODO - add a timeout here - decodeErr := json.NewDecoder(stdout).Decode(out) - if decodeErr != nil { - logger.Error("could not unmarshal json: " + decodeErr.Error()) - return errors.New("could not unmarshal json: " + decodeErr.Error()) + // Make a copy of stdout here. This allows us to decode it twice. + var sb strings.Builder + tr := io.TeeReader(stdout, &sb) + + // First, perform a decode where unknown fields are disallowed. + d := json.NewDecoder(tr) + d.DisallowUnknownFields() + strictErr := d.Decode(out) + + if strictErr != nil { + // The decode failed for some reason, use the built string + // and allow unknown fields in the decode. + s := sb.String() + lenientErr := json.NewDecoder(strings.NewReader(s)).Decode(out) + if lenientErr != nil { + // The error is genuine, so return it + logger.Errorf("could not unmarshal json from script output: %v", lenientErr) + return fmt.Errorf("could not unmarshal json from script output: %w", lenientErr) + } + + // Lenient decode succeeded, print a warning, but use the decode + logger.Warnf("reading script result: %v", strictErr) } err = cmd.Wait() logger.Debugf("Scraper script finished") if err != nil { - return errors.New("error running scraper script") + return fmt.Errorf("%w: %v", ErrScraperScript, err) } return nil } -func (s *scriptScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) { - inString := `{"name": "` + name + `"}` +func (s *scriptScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) { + input := `{"name": "` + name + `"}` - var performers []models.ScrapedPerformer - - err := s.runScraperScript(inString, &performers) - - // convert to pointers - var ret []*models.ScrapedPerformer - if err == nil { - for i := 0; i < len(performers); i++ { - ret = append(ret, &performers[i]) + var ret []models.ScrapedContent + var err error + switch ty { + case models.ScrapeContentTypePerformer: + var performers []models.ScrapedPerformer + err = s.runScraperScript(input, &performers) + if err == nil { + for _, p := range performers { + v := p + ret = append(ret, &v) + } } + case models.ScrapeContentTypeScene: + var scenes []models.ScrapedScene + err = s.runScraperScript(input, &scenes) + if err == nil { + for _, s := range scenes { + v := s + ret = append(ret, &v) + } + } + default: + return nil, ErrNotSupported } return ret, err } -func (s *scriptScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { - inString, err := json.Marshal(scrapedPerformer) +func (s *scriptScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) { + var inString []byte + var err error + var ty models.ScrapeContentType + switch { + case input.Performer != nil: + inString, err = json.Marshal(*input.Performer) + ty = models.ScrapeContentTypePerformer + case input.Gallery != nil: + inString, err = json.Marshal(*input.Gallery) + ty = models.ScrapeContentTypeGallery + case input.Scene != nil: + inString, err = json.Marshal(*input.Scene) + ty = models.ScrapeContentTypeScene + } if err != nil { return nil, err } - var ret models.ScrapedPerformer - - err = s.runScraperScript(string(inString), &ret) - - return &ret, err + return s.scrape(ctx, string(inString), ty) } -func (s *scriptScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { - inString := `{"url": "` + url + `"}` +func (s *scriptScraper) scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) { + return s.scrape(ctx, `{"url": "`+url+`"}`, ty) +} - var ret models.ScrapedPerformer +func (s *scriptScraper) scrape(ctx context.Context, input string, ty models.ScrapeContentType) (models.ScrapedContent, error) { + switch ty { + case models.ScrapeContentTypePerformer: + var performer models.ScrapedPerformer + err := s.runScraperScript(input, &performer) + return &performer, err + case models.ScrapeContentTypeGallery: + var gallery models.ScrapedGallery + err := s.runScraperScript(input, &gallery) + return &gallery, err + case models.ScrapeContentTypeScene: + var scene models.ScrapedScene + err := s.runScraperScript(input, &scene) + return &scene, err + case models.ScrapeContentTypeMovie: + var movie models.ScrapedMovie + err := s.runScraperScript(input, &movie) + return &movie, err + } - err := s.runScraperScript(string(inString), &ret) - - return &ret, err + return nil, ErrNotSupported } func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) { @@ -146,38 +205,6 @@ func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sc return &ret, err } -func (s *scriptScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) { - inString := `{"name": "` + name + `"}` - - var scenes []models.ScrapedScene - - err := s.runScraperScript(inString, &scenes) - - // convert to pointers - var ret []*models.ScrapedScene - if err == nil { - for i := 0; i < len(scenes); i++ { - ret = append(ret, &scenes[i]) - } - } - - return ret, err -} - -func (s *scriptScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { - inString, err := json.Marshal(scene) - - if err != nil { - return nil, err - } - - var ret models.ScrapedScene - - err = s.runScraperScript(string(inString), &ret) - - return &ret, err -} - func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) { inString, err := json.Marshal(galleryToUpdateInput(gallery)) @@ -192,50 +219,6 @@ func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mod return &ret, err } -func (s *scriptScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { - inString, err := json.Marshal(gallery) - - if err != nil { - return nil, err - } - - var ret models.ScrapedGallery - - err = s.runScraperScript(string(inString), &ret) - - return &ret, err -} - -func (s *scriptScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) { - inString := `{"url": "` + url + `"}` - - var ret models.ScrapedScene - - err := s.runScraperScript(string(inString), &ret) - - return &ret, err -} - -func (s *scriptScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { - inString := `{"url": "` + url + `"}` - - var ret models.ScrapedGallery - - err := s.runScraperScript(string(inString), &ret) - - return &ret, err -} - -func (s *scriptScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { - inString := `{"url": "` + url + `"}` - - var ret models.ScrapedMovie - - err := s.runScraperScript(string(inString), &ret) - - return &ret, err -} - func findPythonExecutable() (string, error) { _, err := exec.LookPath("python3") diff --git a/pkg/scraper/stash.go b/pkg/scraper/stash.go index be7502c3b..8193f2a67 100644 --- a/pkg/scraper/stash.go +++ b/pkg/scraper/stash.go @@ -3,7 +3,7 @@ package scraper import ( "context" "database/sql" - "errors" + "fmt" "net/http" "strconv" @@ -54,37 +54,6 @@ type stashFindPerformerNamesResultType struct { Performers []*stashFindPerformerNamePerformer `graphql:"performers"` } -func (s *stashScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) { - client := s.getStashClient() - - var q struct { - FindPerformers stashFindPerformerNamesResultType `graphql:"findPerformers(filter: $f)"` - } - - page := 1 - perPage := 10 - - vars := map[string]interface{}{ - "f": models.FindFilterType{ - Q: &name, - Page: &page, - PerPage: &perPage, - }, - } - - err := client.Query(ctx, &q, vars) - if err != nil { - return nil, err - } - - var ret []*models.ScrapedPerformer - for _, p := range q.FindPerformers.Performers { - ret = append(ret, p.toPerformer()) - } - - return ret, nil -} - // need a separate for scraped stash performers - does not include remote_site_id or image type scrapedTagStash struct { Name string `graphql:"name" json:"name"` @@ -114,7 +83,17 @@ type scrapedPerformerStash struct { Weight *string `graphql:"weight" json:"weight"` } -func (s *stashScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { +func (s *stashScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) { + if input.Gallery != nil || input.Scene != nil { + return nil, fmt.Errorf("%w: using stash scraper as a fragment scraper", ErrNotSupported) + } + + if input.Performer == nil { + return nil, fmt.Errorf("%w: the given performer is nil", ErrNotSupported) + } + + scrapedPerformer := input.Performer + client := s.getStashClient() var q struct { @@ -128,7 +107,7 @@ func (s *stashScraper) scrapePerformerByFragment(scrapedPerformer models.Scraped "f": performerID, } - err := client.Query(context.TODO(), &q, vars) + err := client.Query(ctx, &q, vars) if err != nil { return nil, err } @@ -141,7 +120,7 @@ func (s *stashScraper) scrapePerformerByFragment(scrapedPerformer models.Scraped } // get the performer image directly - ret.Image, err = getStashPerformerImage(context.TODO(), s.config.StashServer.URL, performerID, s.client, s.globalConfig) + ret.Image, err = getStashPerformerImage(ctx, s.config.StashServer.URL, performerID, s.client, s.globalConfig) if err != nil { return nil, err } @@ -159,7 +138,7 @@ type stashFindSceneNamesResultType struct { Scenes []*scrapedSceneStash `graphql:"scenes"` } -func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash) (*models.ScrapedScene, error) { +func (s *stashScraper) scrapedStashSceneToScrapedScene(ctx context.Context, scene *scrapedSceneStash) (*models.ScrapedScene, error) { ret := models.ScrapedScene{} err := copier.Copy(&ret, scene) if err != nil { @@ -167,7 +146,7 @@ func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash) } // get the performer image directly - ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, scene.ID, s.client, s.globalConfig) + ret.Image, err = getStashSceneImage(ctx, s.config.StashServer.URL, scene.ID, s.client, s.globalConfig) if err != nil { return nil, err } @@ -175,13 +154,9 @@ func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash) return &ret, nil } -func (s *stashScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) { +func (s *stashScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) { client := s.getStashClient() - var q struct { - FindScenes stashFindSceneNamesResultType `graphql:"findScenes(filter: $f)"` - } - page := 1 perPage := 10 @@ -193,21 +168,45 @@ func (s *stashScraper) scrapeScenesByName(ctx context.Context, name string) ([]* }, } - err := client.Query(ctx, &q, vars) - if err != nil { - return nil, err - } + var ret []models.ScrapedContent + switch ty { + case models.ScrapeContentTypeScene: + var q struct { + FindScenes stashFindSceneNamesResultType `graphql:"findScenes(filter: $f)"` + } - var ret []*models.ScrapedScene - for _, scene := range q.FindScenes.Scenes { - converted, err := s.scrapedStashSceneToScrapedScene(scene) + err := client.Query(ctx, &q, vars) if err != nil { return nil, err } - ret = append(ret, converted) + + for _, scene := range q.FindScenes.Scenes { + converted, err := s.scrapedStashSceneToScrapedScene(ctx, scene) + if err != nil { + return nil, err + } + ret = append(ret, converted) + } + + return ret, nil + case models.ScrapeContentTypePerformer: + var q struct { + FindPerformers stashFindPerformerNamesResultType `graphql:"findPerformers(filter: $f)"` + } + + err := client.Query(ctx, &q, vars) + if err != nil { + return nil, err + } + + for _, p := range q.FindPerformers.Performers { + ret = append(ret, p.toPerformer()) + } + + return ret, nil } - return ret, nil + return nil, ErrNotSupported } type scrapedSceneStash struct { @@ -248,13 +247,13 @@ func (s *stashScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sce } // need to copy back to a scraped scene - ret, err := s.scrapedStashSceneToScrapedScene(q.FindScene) + ret, err := s.scrapedStashSceneToScrapedScene(ctx, q.FindScene) if err != nil { return nil, err } // get the performer image directly - ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, q.FindScene.ID, s.client, s.globalConfig) + ret.Image, err = getStashSceneImage(ctx, s.config.StashServer.URL, q.FindScene.ID, s.client, s.globalConfig) if err != nil { return nil, err } @@ -262,10 +261,6 @@ func (s *stashScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sce return ret, nil } -func (s *stashScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { - return nil, errors.New("scrapeSceneByFragment not supported for stash scraper") -} - type scrapedGalleryStash struct { ID string `graphql:"id" json:"id"` Title *string `graphql:"title" json:"title"` @@ -309,29 +304,13 @@ func (s *stashScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mode return &ret, nil } -func (s *stashScraper) scrapeGalleryByFragment(scene models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { - return nil, errors.New("scrapeGalleryByFragment not supported for stash scraper") +func (s *stashScraper) scrapeByURL(_ context.Context, _ string, _ models.ScrapeContentType) (models.ScrapedContent, error) { + return nil, ErrNotSupported } -func (s *stashScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { - return nil, errors.New("scrapePerformerByURL not supported for stash scraper") -} - -func (s *stashScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) { - return nil, errors.New("scrapeSceneByURL not supported for stash scraper") -} - -func (s *stashScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { - return nil, errors.New("scrapeGalleryByURL not supported for stash scraper") -} - -func (s *stashScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { - return nil, errors.New("scrapeMovieByURL not supported for stash scraper") -} - -func getScene(sceneID int, txnManager models.TransactionManager) (*models.Scene, error) { +func getScene(ctx context.Context, sceneID int, txnManager models.TransactionManager) (*models.Scene, error) { var ret *models.Scene - if err := txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + if err := txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { var err error ret, err = r.Scene().Find(sceneID) return err @@ -367,9 +346,9 @@ func sceneToUpdateInput(scene *models.Scene) models.SceneUpdateInput { } } -func getGallery(galleryID int, txnManager models.TransactionManager) (*models.Gallery, error) { +func getGallery(ctx context.Context, galleryID int, txnManager models.TransactionManager) (*models.Gallery, error) { var ret *models.Gallery - if err := txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + if err := txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { var err error ret, err = r.Gallery().Find(galleryID) return err diff --git a/pkg/scraper/stashbox/stash_box.go b/pkg/scraper/stashbox/stash_box.go index cd4638809..812b45aae 100644 --- a/pkg/scraper/stashbox/stash_box.go +++ b/pkg/scraper/stashbox/stash_box.go @@ -56,7 +56,7 @@ func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*mod var ret []*models.ScrapedScene for _, s := range sceneFragments { - ss, err := c.sceneFragmentToScrapedScene(context.TODO(), s) + ss, err := c.sceneFragmentToScrapedScene(ctx, s) if err != nil { return nil, err } @@ -69,9 +69,7 @@ func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*mod // FindStashBoxScenesByFingerprints queries stash-box for scenes using every // scene's MD5/OSHASH checksum, or PHash, and returns results in the same order // as the input slice. -func (c Client) FindStashBoxScenesByFingerprints(sceneIDs []string) ([][]*models.ScrapedScene, error) { - ctx := context.TODO() - +func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs []string) ([][]*models.ScrapedScene, error) { ids, err := utils.StringSliceToIntSlice(sceneIDs) if err != nil { return nil, err @@ -150,9 +148,7 @@ func (c Client) FindStashBoxScenesByFingerprints(sceneIDs []string) ([][]*models // FindStashBoxScenesByFingerprintsFlat queries stash-box for scenes using every // scene's MD5/OSHASH checksum, or PHash, and returns results a flat slice. -func (c Client) FindStashBoxScenesByFingerprintsFlat(sceneIDs []string) ([]*models.ScrapedScene, error) { - ctx := context.TODO() - +func (c Client) FindStashBoxScenesByFingerprintsFlat(ctx context.Context, sceneIDs []string) ([]*models.ScrapedScene, error) { ids, err := utils.StringSliceToIntSlice(sceneIDs) if err != nil { return nil, err @@ -230,7 +226,7 @@ func (c Client) findStashBoxScenesByFingerprints(ctx context.Context, fingerprin return ret, nil } -func (c Client) SubmitStashBoxFingerprints(sceneIDs []string, endpoint string) (bool, error) { +func (c Client) SubmitStashBoxFingerprints(ctx context.Context, sceneIDs []string, endpoint string) (bool, error) { ids, err := utils.StringSliceToIntSlice(sceneIDs) if err != nil { return false, err @@ -238,7 +234,7 @@ func (c Client) SubmitStashBoxFingerprints(sceneIDs []string, endpoint string) ( var fingerprints []graphql.FingerprintSubmission - if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { qb := r.Scene() for _, sceneID := range ids { @@ -307,12 +303,12 @@ func (c Client) SubmitStashBoxFingerprints(sceneIDs []string, endpoint string) ( return false, err } - return c.submitStashBoxFingerprints(fingerprints) + return c.submitStashBoxFingerprints(ctx, fingerprints) } -func (c Client) submitStashBoxFingerprints(fingerprints []graphql.FingerprintSubmission) (bool, error) { +func (c Client) submitStashBoxFingerprints(ctx context.Context, fingerprints []graphql.FingerprintSubmission) (bool, error) { for _, fingerprint := range fingerprints { - _, err := c.client.SubmitFingerprint(context.TODO(), fingerprint) + _, err := c.client.SubmitFingerprint(ctx, fingerprint) if err != nil { return false, err } @@ -322,8 +318,8 @@ func (c Client) submitStashBoxFingerprints(fingerprints []graphql.FingerprintSub } // QueryStashBoxPerformer queries stash-box for performers using a query string. -func (c Client) QueryStashBoxPerformer(queryStr string) ([]*models.StashBoxPerformerQueryResult, error) { - performers, err := c.queryStashBoxPerformer(queryStr) +func (c Client) QueryStashBoxPerformer(ctx context.Context, queryStr string) ([]*models.StashBoxPerformerQueryResult, error) { + performers, err := c.queryStashBoxPerformer(ctx, queryStr) res := []*models.StashBoxPerformerQueryResult{ { @@ -342,8 +338,8 @@ func (c Client) QueryStashBoxPerformer(queryStr string) ([]*models.StashBoxPerfo return res, err } -func (c Client) queryStashBoxPerformer(queryStr string) ([]*models.ScrapedPerformer, error) { - performers, err := c.client.SearchPerformer(context.TODO(), queryStr) +func (c Client) queryStashBoxPerformer(ctx context.Context, queryStr string) ([]*models.ScrapedPerformer, error) { + performers, err := c.client.SearchPerformer(ctx, queryStr) if err != nil { return nil, err } @@ -360,7 +356,7 @@ func (c Client) queryStashBoxPerformer(queryStr string) ([]*models.ScrapedPerfor } // FindStashBoxPerformersByNames queries stash-box for performers by name -func (c Client) FindStashBoxPerformersByNames(performerIDs []string) ([]*models.StashBoxPerformerQueryResult, error) { +func (c Client) FindStashBoxPerformersByNames(ctx context.Context, performerIDs []string) ([]*models.StashBoxPerformerQueryResult, error) { ids, err := utils.StringSliceToIntSlice(performerIDs) if err != nil { return nil, err @@ -368,7 +364,7 @@ func (c Client) FindStashBoxPerformersByNames(performerIDs []string) ([]*models. var performers []*models.Performer - if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { qb := r.Performer() for _, performerID := range ids { @@ -391,10 +387,10 @@ func (c Client) FindStashBoxPerformersByNames(performerIDs []string) ([]*models. return nil, err } - return c.findStashBoxPerformersByNames(performers) + return c.findStashBoxPerformersByNames(ctx, performers) } -func (c Client) FindStashBoxPerformersByPerformerNames(performerIDs []string) ([][]*models.ScrapedPerformer, error) { +func (c Client) FindStashBoxPerformersByPerformerNames(ctx context.Context, performerIDs []string) ([][]*models.ScrapedPerformer, error) { ids, err := utils.StringSliceToIntSlice(performerIDs) if err != nil { return nil, err @@ -402,7 +398,7 @@ func (c Client) FindStashBoxPerformersByPerformerNames(performerIDs []string) ([ var performers []*models.Performer - if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { qb := r.Performer() for _, performerID := range ids { @@ -425,7 +421,7 @@ func (c Client) FindStashBoxPerformersByPerformerNames(performerIDs []string) ([ return nil, err } - results, err := c.findStashBoxPerformersByNames(performers) + results, err := c.findStashBoxPerformersByNames(ctx, performers) if err != nil { return nil, err } @@ -438,11 +434,11 @@ func (c Client) FindStashBoxPerformersByPerformerNames(performerIDs []string) ([ return ret, nil } -func (c Client) findStashBoxPerformersByNames(performers []*models.Performer) ([]*models.StashBoxPerformerQueryResult, error) { +func (c Client) findStashBoxPerformersByNames(ctx context.Context, performers []*models.Performer) ([]*models.StashBoxPerformerQueryResult, error) { var ret []*models.StashBoxPerformerQueryResult for _, performer := range performers { if performer.Name.Valid { - performerResults, err := c.queryStashBoxPerformer(performer.Name.String) + performerResults, err := c.queryStashBoxPerformer(ctx, performer.Name.String) if err != nil { return nil, err } @@ -705,8 +701,8 @@ func (c Client) sceneFragmentToScrapedScene(ctx context.Context, s *graphql.Scen return ss, nil } -func (c Client) FindStashBoxPerformerByID(id string) (*models.ScrapedPerformer, error) { - performer, err := c.client.FindPerformerByID(context.TODO(), id) +func (c Client) FindStashBoxPerformerByID(ctx context.Context, id string) (*models.ScrapedPerformer, error) { + performer, err := c.client.FindPerformerByID(ctx, id) if err != nil { return nil, err } @@ -715,8 +711,8 @@ func (c Client) FindStashBoxPerformerByID(id string) (*models.ScrapedPerformer, return ret, nil } -func (c Client) FindStashBoxPerformerByName(name string) (*models.ScrapedPerformer, error) { - performers, err := c.client.SearchPerformer(context.TODO(), name) +func (c Client) FindStashBoxPerformerByName(ctx context.Context, name string) (*models.ScrapedPerformer, error) { + performers, err := c.client.SearchPerformer(ctx, name) if err != nil { return nil, err } diff --git a/pkg/scraper/url.go b/pkg/scraper/url.go index b07722d3f..10a160f40 100644 --- a/pkg/scraper/url.go +++ b/pkg/scraper/url.go @@ -99,8 +99,6 @@ func urlFromCDP(ctx context.Context, url string, driverOptions scraperDriverOpti sleepDuration = time.Duration(driverOptions.Sleep) * time.Second } - act := context.TODO() - // if scraperCDPPath is a remote address, then allocate accordingly cdpPath := globalConfig.GetScraperCDPPath() if cdpPath != "" { @@ -118,7 +116,7 @@ func urlFromCDP(ctx context.Context, url string, driverOptions scraperDriverOpti } } - act, cancelAct = chromedp.NewRemoteAllocator(act, remote) + ctx, cancelAct = chromedp.NewRemoteAllocator(ctx, remote) } else { // use a temporary user directory for chrome dir, err := os.MkdirTemp("", "stash-chromedp") @@ -131,13 +129,13 @@ func urlFromCDP(ctx context.Context, url string, driverOptions scraperDriverOpti chromedp.UserDataDir(dir), chromedp.ExecPath(cdpPath), ) - act, cancelAct = chromedp.NewExecAllocator(act, opts...) + ctx, cancelAct = chromedp.NewExecAllocator(ctx, opts...) } defer cancelAct() } - ctx, cancel := chromedp.NewContext(act) + ctx, cancel := chromedp.NewContext(ctx) defer cancel() // add a fixed timeout for the http request diff --git a/pkg/scraper/xpath.go b/pkg/scraper/xpath.go index ce4ba60ce..289adec72 100644 --- a/pkg/scraper/xpath.go +++ b/pkg/scraper/xpath.go @@ -56,7 +56,7 @@ func (s *xpathScraper) scrapeURL(ctx context.Context, url string) (*html.Node, * return doc, scraper, nil } -func (s *xpathScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { +func (s *xpathScraper) scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { @@ -64,47 +64,25 @@ func (s *xpathScraper) scrapePerformerByURL(ctx context.Context, url string) (*m } q := s.getXPathQuery(doc) - return scraper.scrapePerformer(q) -} - -func (s *xpathScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) { - u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries - doc, scraper, err := s.scrapeURL(ctx, u) - if err != nil { - return nil, err + switch ty { + case models.ScrapeContentTypePerformer: + return scraper.scrapePerformer(ctx, q) + case models.ScrapeContentTypeScene: + return scraper.scrapeScene(ctx, q) + case models.ScrapeContentTypeGallery: + return scraper.scrapeGallery(ctx, q) + case models.ScrapeContentTypeMovie: + return scraper.scrapeMovie(ctx, q) } - q := s.getXPathQuery(doc) - return scraper.scrapeScene(q) + return nil, ErrNotSupported } -func (s *xpathScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { - u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries - doc, scraper, err := s.scrapeURL(ctx, u) - if err != nil { - return nil, err - } - - q := s.getXPathQuery(doc) - return scraper.scrapeGallery(q) -} - -func (s *xpathScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { - u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries - doc, scraper, err := s.scrapeURL(ctx, u) - if err != nil { - return nil, err - } - - q := s.getXPathQuery(doc) - return scraper.scrapeMovie(q) -} - -func (s *xpathScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) { +func (s *xpathScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) { scraper := s.getXpathScraper() if scraper == nil { - return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config") + return nil, fmt.Errorf("%w: name %v", ErrNotFound, s.scraper.Scraper) } const placeholder = "{}" @@ -122,36 +100,32 @@ func (s *xpathScraper) scrapePerformersByName(ctx context.Context, name string) } q := s.getXPathQuery(doc) - return scraper.scrapePerformers(q) -} -func (s *xpathScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { - return nil, errors.New("scrapePerformerByFragment not supported for xpath scraper") -} + var content []models.ScrapedContent + switch ty { + case models.ScrapeContentTypePerformer: + performers, err := scraper.scrapePerformers(ctx, q) + if err != nil { + return nil, err + } + for _, p := range performers { + content = append(content, p) + } -func (s *xpathScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) { - scraper := s.getXpathScraper() + return content, nil + case models.ScrapeContentTypeScene: + scenes, err := scraper.scrapeScenes(ctx, q) + if err != nil { + return nil, err + } + for _, s := range scenes { + content = append(content, s) + } - if scraper == nil { - return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config") + return content, nil } - const placeholder = "{}" - - // replace the placeholder string with the URL-escaped name - escapedName := url.QueryEscape(name) - - url := s.scraper.QueryURL - url = strings.ReplaceAll(url, placeholder, escapedName) - - doc, err := s.loadURL(ctx, url) - - if err != nil { - return nil, err - } - - q := s.getXPathQuery(doc) - return scraper.scrapeScenes(q) + return nil, ErrNotSupported } func (s *xpathScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) { @@ -175,10 +149,21 @@ func (s *xpathScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sce } q := s.getXPathQuery(doc) - return scraper.scrapeScene(q) + return scraper.scrapeScene(ctx, q) } -func (s *xpathScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { +func (s *xpathScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) { + switch { + case input.Gallery != nil: + return nil, fmt.Errorf("%w: cannot use an xpath scraper as a gallery fragment scraper", ErrNotSupported) + case input.Performer != nil: + return nil, fmt.Errorf("%w: cannot use an xpath scraper as a performer fragment scraper", ErrNotSupported) + case input.Scene == nil: + return nil, fmt.Errorf("%w: scene input is nil", ErrNotSupported) + } + + scene := *input.Scene + // construct the URL queryURL := queryURLParametersFromScrapedScene(scene) if s.scraper.QueryURLReplacements != nil { @@ -199,7 +184,7 @@ func (s *xpathScraper) scrapeSceneByFragment(ctx context.Context, scene models.S } q := s.getXPathQuery(doc) - return scraper.scrapeScene(q) + return scraper.scrapeScene(ctx, q) } func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) { @@ -223,11 +208,7 @@ func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mode } q := s.getXPathQuery(doc) - return scraper.scrapeGallery(q) -} - -func (s *xpathScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { - return nil, errors.New("scrapeGalleryByFragment not supported for xpath scraper") + return scraper.scrapeGallery(ctx, q) } func (s *xpathScraper) loadURL(ctx context.Context, url string) (*html.Node, error) { @@ -301,8 +282,8 @@ func (q *xpathQuery) nodeText(n *html.Node) string { return ret } -func (q *xpathQuery) subScrape(value string) mappedQuery { - doc, err := q.scraper.loadURL(context.TODO(), value) +func (q *xpathQuery) subScrape(ctx context.Context, value string) mappedQuery { + doc, err := q.scraper.loadURL(ctx, value) if err != nil { logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error()) diff --git a/pkg/scraper/xpath_test.go b/pkg/scraper/xpath_test.go index 9393ea3c7..315f2bc8f 100644 --- a/pkg/scraper/xpath_test.go +++ b/pkg/scraper/xpath_test.go @@ -313,7 +313,7 @@ func TestScrapePerformerXPath(t *testing.T) { doc: doc, } - performer, err := scraper.scrapePerformer(q) + performer, err := scraper.scrapePerformer(context.Background(), q) if err != nil { t.Errorf("Error scraping performer: %s", err.Error()) @@ -408,7 +408,7 @@ func TestConcatXPath(t *testing.T) { doc: doc, } - performer, err := scraper.scrapePerformer(q) + performer, err := scraper.scrapePerformer(context.Background(), q) if err != nil { t.Errorf("Error scraping performer: %s", err.Error()) @@ -682,7 +682,7 @@ func TestApplySceneXPathConfig(t *testing.T) { q := &xpathQuery{ doc: doc, } - scene, err := scraper.scrapeScene(q) + scene, err := scraper.scrapeScene(context.Background(), q) if err != nil { t.Errorf("Error scraping scene: %s", err.Error()) @@ -805,7 +805,7 @@ func TestLoadInvalidXPath(t *testing.T) { doc: doc, } - config.process(q, nil) + config.process(context.Background(), q, nil) } type mockGlobalConfig struct{}