Scraper refactor middle (#2043)

* Push scrapeByURL into scrapers

Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.

* Fold name scraping into one call

Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.

* Do not export loop refs.

* Simplify fragment scraping

Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.

* Eliminate more context.TODO()

In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.

* Pass the context for the stashbox scraper

This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.

* Pass the context into subscrapers

Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.

* Warn on unknown fields from scripts

A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".

Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.

* Nil-check before running the postprocessing chain

Fixes panics when scraping returns nil values.

* Lift nil-ness in post-postprocessing

If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.

* Allow conversion routines to handle values

If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
This commit is contained in:
SmallCoccinelle 2021-11-26 01:20:06 +01:00 committed by GitHub
parent 19e69f5310
commit 4089fcf1e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 442 additions and 548 deletions

View file

@ -20,7 +20,7 @@ func (r *mutationResolver) SubmitStashBoxFingerprints(ctx context.Context, input
client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager)
return client.SubmitStashBoxFingerprints(input.SceneIds, boxes[input.StashBoxIndex].Endpoint)
return client.SubmitStashBoxFingerprints(ctx, input.SceneIds, boxes[input.StashBoxIndex].Endpoint)
}
func (r *mutationResolver) StashBoxBatchPerformerTag(ctx context.Context, input models.StashBoxBatchPerformerTagInput) (string, error) {

View file

@ -167,7 +167,7 @@ func (r *queryResolver) QueryStashBoxScene(ctx context.Context, input models.Sta
client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager)
if len(input.SceneIds) > 0 {
return client.FindStashBoxScenesByFingerprintsFlat(input.SceneIds)
return client.FindStashBoxScenesByFingerprintsFlat(ctx, input.SceneIds)
}
if input.Q != nil {
@ -187,11 +187,11 @@ func (r *queryResolver) QueryStashBoxPerformer(ctx context.Context, input models
client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager)
if len(input.PerformerIds) > 0 {
return client.FindStashBoxPerformersByNames(input.PerformerIds)
return client.FindStashBoxPerformersByNames(ctx, input.PerformerIds)
}
if input.Q != nil {
return client.QueryStashBoxPerformer(*input.Q)
return client.QueryStashBoxPerformer(ctx, *input.Q)
}
return nil, nil
@ -243,7 +243,7 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
}
if input.SceneID != nil {
return client.FindStashBoxScenesByFingerprintsFlat([]string{*input.SceneID})
return client.FindStashBoxScenesByFingerprintsFlat(ctx, []string{*input.SceneID})
} else if input.Query != nil {
return client.QueryStashBoxScene(ctx, *input.Query)
}
@ -263,7 +263,7 @@ func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.Scr
return nil, err
}
return client.FindStashBoxScenesByFingerprints(input.SceneIds)
return client.FindStashBoxScenesByFingerprints(ctx, input.SceneIds)
}
return nil, errors.New("scraper_id or stash_box_index must be set")
@ -299,9 +299,9 @@ func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source models
var ret []*models.StashBoxPerformerQueryResult
switch {
case input.PerformerID != nil:
ret, err = client.FindStashBoxPerformersByNames([]string{*input.PerformerID})
ret, err = client.FindStashBoxPerformersByNames(ctx, []string{*input.PerformerID})
case input.Query != nil:
ret, err = client.QueryStashBoxPerformer(*input.Query)
ret, err = client.QueryStashBoxPerformer(ctx, *input.Query)
default:
return nil, ErrNotImplemented
}
@ -329,7 +329,7 @@ func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source models
return nil, err
}
return client.FindStashBoxPerformersByPerformerNames(input.PerformerIds)
return client.FindStashBoxPerformersByPerformerNames(ctx, input.PerformerIds)
}
return nil, errors.New("scraper_id or stash_box_index must be set")

View file

@ -16,9 +16,12 @@ func marshalScrapedScenes(content []models.ScrapedContent) ([]*models.ScrapedSce
continue
}
if s, ok := c.(*models.ScrapedScene); ok {
switch s := c.(type) {
case *models.ScrapedScene:
ret = append(ret, s)
} else {
case models.ScrapedScene:
ret = append(ret, &s)
default:
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedScene", models.ErrConversion)
}
}
@ -36,9 +39,12 @@ func marshalScrapedPerformers(content []models.ScrapedContent) ([]*models.Scrape
continue
}
if p, ok := c.(*models.ScrapedPerformer); ok {
switch p := c.(type) {
case *models.ScrapedPerformer:
ret = append(ret, p)
} else {
case models.ScrapedPerformer:
ret = append(ret, &p)
default:
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedPerformer", models.ErrConversion)
}
}
@ -56,9 +62,12 @@ func marshalScrapedGalleries(content []models.ScrapedContent) ([]*models.Scraped
continue
}
if g, ok := c.(*models.ScrapedGallery); ok {
switch g := c.(type) {
case *models.ScrapedGallery:
ret = append(ret, g)
} else {
case models.ScrapedGallery:
ret = append(ret, &g)
default:
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedGallery", models.ErrConversion)
}
}
@ -76,9 +85,12 @@ func marshalScrapedMovies(content []models.ScrapedContent) ([]*models.ScrapedMov
continue
}
if m, ok := c.(*models.ScrapedMovie); ok {
switch m := c.(type) {
case *models.ScrapedMovie:
ret = append(ret, m)
} else {
case models.ScrapedMovie:
ret = append(ret, &m)
default:
return nil, fmt.Errorf("%w: cannot turn ScrapedConetnt into ScrapedMovie", models.ErrConversion)
}
}

View file

@ -211,8 +211,8 @@ type stashboxSource struct {
endpoint string
}
func (s stashboxSource) ScrapeScene(_ context.Context, sceneID int) (*models.ScrapedScene, error) {
results, err := s.FindStashBoxScenesByFingerprintsFlat([]string{strconv.Itoa(sceneID)})
func (s stashboxSource) ScrapeScene(ctx context.Context, sceneID int) (*models.ScrapedScene, error) {
results, err := s.FindStashBoxScenesByFingerprintsFlat(ctx, []string{strconv.Itoa(sceneID)})
if err != nil {
return nil, fmt.Errorf("error querying stash-box using scene ID %d: %w", sceneID, err)
}

View file

@ -44,7 +44,7 @@ func (t *StashBoxPerformerTagTask) stashBoxPerformerTag(ctx context.Context) {
if t.refresh {
var performerID string
txnErr := t.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
txnErr := t.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
stashids, _ := r.Performer().GetStashIDs(t.performer.ID)
for _, id := range stashids {
if id.Endpoint == t.box.Endpoint {
@ -57,7 +57,7 @@ func (t *StashBoxPerformerTagTask) stashBoxPerformerTag(ctx context.Context) {
logger.Warnf("error while executing read transaction: %v", err)
}
if performerID != "" {
performer, err = client.FindStashBoxPerformerByID(performerID)
performer, err = client.FindStashBoxPerformerByID(ctx, performerID)
}
} else {
var name string
@ -66,7 +66,7 @@ func (t *StashBoxPerformerTagTask) stashBoxPerformerTag(ctx context.Context) {
} else {
name = t.performer.Name.String
}
performer, err = client.FindStashBoxPerformerByName(name)
performer, err = client.FindStashBoxPerformerByName(ctx, name)
}
if err != nil {

View file

@ -25,20 +25,12 @@ func (e scraperAction) IsValid() bool {
}
type scraperActionImpl interface {
scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error)
scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error)
scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error)
scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error)
scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error)
scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error)
scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error)
scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error)
scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error)
scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error)
scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error)
scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error)
scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error)
}
func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl {

View file

@ -268,7 +268,7 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
return nil, fmt.Errorf("%w: cannot use scraper %s as a scene scraper", ErrNotSupported, scraperID)
}
scene, err := getScene(id, c.txnManager)
scene, err := getScene(ctx, id, c.txnManager)
if err != nil {
return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err)
}
@ -283,7 +283,7 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
return nil, fmt.Errorf("%w: cannot use scraper %s as a gallery scraper", ErrNotSupported, scraperID)
}
gallery, err := getGallery(id, c.txnManager)
gallery, err := getGallery(ctx, id, c.txnManager)
if err != nil {
return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err)
}

View file

@ -42,20 +42,6 @@ func (g group) fragmentScraper(input Input) *scraperTypeConfig {
return nil
}
// scrapeFragmentInput analyzes the input and calls an appropriate scraperActionImpl
func scrapeFragmentInput(ctx context.Context, input Input, s scraperActionImpl) (models.ScrapedContent, error) {
switch {
case input.Performer != nil:
return s.scrapePerformerByFragment(*input.Performer)
case input.Gallery != nil:
return s.scrapeGalleryByFragment(*input.Gallery)
case input.Scene != nil:
return s.scrapeSceneByFragment(ctx, *input.Scene)
}
return nil, ErrNotSupported
}
func (g group) viaFragment(ctx context.Context, client *http.Client, input Input) (models.ScrapedContent, error) {
stc := g.fragmentScraper(input)
if stc == nil {
@ -70,7 +56,7 @@ func (g group) viaFragment(ctx context.Context, client *http.Client, input Input
}
s := g.config.getScraper(*stc, client, g.txnManager, g.globalConf)
return scrapeFragmentInput(ctx, input, s)
return s.scrapeByFragment(ctx, input)
}
func (g group) viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) {
@ -106,27 +92,12 @@ func loadUrlCandidates(c config, ty models.ScrapeContentType) []*scrapeByURLConf
panic("loadUrlCandidates: unreachable")
}
func scrapeByUrl(ctx context.Context, url string, s scraperActionImpl, ty models.ScrapeContentType) (models.ScrapedContent, error) {
switch ty {
case models.ScrapeContentTypePerformer:
return s.scrapePerformerByURL(ctx, url)
case models.ScrapeContentTypeScene:
return s.scrapeSceneByURL(ctx, url)
case models.ScrapeContentTypeMovie:
return s.scrapeMovieByURL(ctx, url)
case models.ScrapeContentTypeGallery:
return s.scrapeGalleryByURL(ctx, url)
}
panic("scrapeByUrl: unreachable")
}
func (g group) viaURL(ctx context.Context, client *http.Client, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
candidates := loadUrlCandidates(g.config, ty)
for _, scraper := range candidates {
if scraper.matchesURL(url) {
s := g.config.getScraper(scraper.scraperTypeConfig, client, g.txnManager, g.globalConf)
ret, err := scrapeByUrl(ctx, url, s, ty)
ret, err := s.scrapeByURL(ctx, url, ty)
if err != nil {
return nil, err
}
@ -148,30 +119,14 @@ func (g group) viaName(ctx context.Context, client *http.Client, name string, ty
}
s := g.config.getScraper(*g.config.PerformerByName, client, g.txnManager, g.globalConf)
performers, err := s.scrapePerformersByName(ctx, name)
if err != nil {
return nil, err
}
content := make([]models.ScrapedContent, len(performers))
for i := range performers {
content[i] = performers[i]
}
return content, nil
return s.scrapeByName(ctx, name, ty)
case models.ScrapeContentTypeScene:
if g.config.SceneByName == nil {
break
}
s := g.config.getScraper(*g.config.SceneByName, client, g.txnManager, g.globalConf)
scenes, err := s.scrapeScenesByName(ctx, name)
if err != nil {
return nil, err
}
content := make([]models.ScrapedContent, len(scenes))
for i := range scenes {
content[i] = scenes[i]
}
return content, nil
return s.scrapeByName(ctx, name, ty)
}
return nil, fmt.Errorf("%w: cannot load %v by name", ErrNotSupported, ty)

View file

@ -11,8 +11,8 @@ import (
"github.com/stashapp/stash/pkg/utils"
)
func setPerformerImage(ctx context.Context, client *http.Client, p *models.ScrapedPerformer, globalConfig GlobalConfig) error {
if p == nil || p.Image == nil || !strings.HasPrefix(*p.Image, "http") {
func setPerformerImage(ctx context.Context, client *http.Client, p models.ScrapedPerformer, globalConfig GlobalConfig) error {
if p.Image == nil || !strings.HasPrefix(*p.Image, "http") {
// nothing to do
return nil
}
@ -29,9 +29,9 @@ func setPerformerImage(ctx context.Context, client *http.Client, p *models.Scrap
return nil
}
func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedScene, globalConfig GlobalConfig) error {
func setSceneImage(ctx context.Context, client *http.Client, s models.ScrapedScene, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL
if s == nil || s.Image == nil || !strings.HasPrefix(*s.Image, "http") {
if s.Image == nil || !strings.HasPrefix(*s.Image, "http") {
// nothing to do
return nil
}
@ -46,9 +46,9 @@ func setSceneImage(ctx context.Context, client *http.Client, s *models.ScrapedSc
return nil
}
func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
func setMovieFrontImage(ctx context.Context, client *http.Client, m models.ScrapedMovie, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL
if m == nil || m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
if m.FrontImage == nil || !strings.HasPrefix(*m.FrontImage, "http") {
// nothing to do
return nil
}
@ -63,9 +63,9 @@ func setMovieFrontImage(ctx context.Context, client *http.Client, m *models.Scra
return nil
}
func setMovieBackImage(ctx context.Context, client *http.Client, m *models.ScrapedMovie, globalConfig GlobalConfig) error {
func setMovieBackImage(ctx context.Context, client *http.Client, m models.ScrapedMovie, globalConfig GlobalConfig) error {
// don't try to get the image if it doesn't appear to be a URL
if m == nil || m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
if m.BackImage == nil || !strings.HasPrefix(*m.BackImage, "http") {
// nothing to do
return nil
}

View file

@ -75,84 +75,33 @@ func (s *jsonScraper) loadURL(ctx context.Context, url string) (string, error) {
return docStr, err
}
func (s *jsonScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries
func (s *jsonScraper) scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for url-queries
doc, scraper, err := s.scrapeURL(ctx, u)
if err != nil {
return nil, err
}
q := s.getJsonQuery(doc)
return scraper.scrapePerformer(q)
}
func (s *jsonScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries
doc, scraper, err := s.scrapeURL(ctx, u)
if err != nil {
return nil, err
switch ty {
case models.ScrapeContentTypePerformer:
return scraper.scrapePerformer(ctx, q)
case models.ScrapeContentTypeScene:
return scraper.scrapeScene(ctx, q)
case models.ScrapeContentTypeGallery:
return scraper.scrapeGallery(ctx, q)
case models.ScrapeContentTypeMovie:
return scraper.scrapeMovie(ctx, q)
}
q := s.getJsonQuery(doc)
return scraper.scrapeScene(q)
return nil, ErrNotSupported
}
func (s *jsonScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries
doc, scraper, err := s.scrapeURL(ctx, u)
if err != nil {
return nil, err
}
q := s.getJsonQuery(doc)
return scraper.scrapeGallery(q)
}
func (s *jsonScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries
doc, scraper, err := s.scrapeURL(ctx, u)
if err != nil {
return nil, err
}
q := s.getJsonQuery(doc)
return scraper.scrapeMovie(q)
}
func (s *jsonScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) {
func (s *jsonScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) {
scraper := s.getJsonScraper()
if scraper == nil {
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
}
const placeholder = "{}"
// replace the placeholder string with the URL-escaped name
escapedName := url.QueryEscape(name)
url := s.scraper.QueryURL
url = strings.ReplaceAll(url, placeholder, escapedName)
doc, err := s.loadURL(context.TODO(), url)
if err != nil {
return nil, err
}
q := s.getJsonQuery(doc)
return scraper.scrapePerformers(q)
}
func (s *jsonScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
return nil, errors.New("scrapePerformerByFragment not supported for json scraper")
}
func (s *jsonScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) {
scraper := s.getJsonScraper()
if scraper == nil {
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
return nil, fmt.Errorf("%w: name %v", ErrNotFound, s.scraper.Scraper)
}
const placeholder = "{}"
@ -170,7 +119,34 @@ func (s *jsonScraper) scrapeScenesByName(ctx context.Context, name string) ([]*m
}
q := s.getJsonQuery(doc)
return scraper.scrapeScenes(q)
var content []models.ScrapedContent
switch ty {
case models.ScrapeContentTypePerformer:
performers, err := scraper.scrapePerformers(ctx, q)
if err != nil {
return nil, err
}
for _, p := range performers {
content = append(content, p)
}
return content, nil
case models.ScrapeContentTypeScene:
scenes, err := scraper.scrapeScenes(ctx, q)
if err != nil {
return nil, err
}
for _, s := range scenes {
content = append(content, s)
}
return content, nil
}
return nil, ErrNotSupported
}
func (s *jsonScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
@ -194,10 +170,21 @@ func (s *jsonScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scen
}
q := s.getJsonQuery(doc)
return scraper.scrapeScene(q)
return scraper.scrapeScene(ctx, q)
}
func (s *jsonScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
func (s *jsonScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) {
switch {
case input.Gallery != nil:
return nil, fmt.Errorf("%w: cannot use a json scraper as a gallery fragment scraper", ErrNotSupported)
case input.Performer != nil:
return nil, fmt.Errorf("%w: cannot use a json scraper as a performer fragment scraper", ErrNotSupported)
case input.Scene == nil:
return nil, fmt.Errorf("%w: scene input is nil", ErrNotSupported)
}
scene := *input.Scene
// construct the URL
queryURL := queryURLParametersFromScrapedScene(scene)
if s.scraper.QueryURLReplacements != nil {
@ -218,7 +205,7 @@ func (s *jsonScraper) scrapeSceneByFragment(ctx context.Context, scene models.Sc
}
q := s.getJsonQuery(doc)
return scraper.scrapeScene(q)
return scraper.scrapeScene(ctx, q)
}
func (s *jsonScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
@ -242,11 +229,7 @@ func (s *jsonScraper) scrapeGalleryByGallery(ctx context.Context, gallery *model
}
q := s.getJsonQuery(doc)
return scraper.scrapeGallery(q)
}
func (s *jsonScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
return nil, errors.New("scrapeGalleryByFragment not supported for json scraper")
return scraper.scrapeGallery(ctx, q)
}
func (s *jsonScraper) getJsonQuery(doc string) *jsonQuery {
@ -281,8 +264,8 @@ func (q *jsonQuery) runQuery(selector string) ([]string, error) {
return ret, nil
}
func (q *jsonQuery) subScrape(value string) mappedQuery {
doc, err := q.scraper.loadURL(context.TODO(), value)
func (q *jsonQuery) subScrape(ctx context.Context, value string) mappedQuery {
doc, err := q.scraper.loadURL(ctx, value)
if err != nil {
logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error())

View file

@ -1,6 +1,7 @@
package scraper
import (
"context"
"testing"
"gopkg.in/yaml.v2"
@ -81,7 +82,7 @@ jsonScrapers:
doc: json,
}
scrapedPerformer, err := performerScraper.scrapePerformer(q)
scrapedPerformer, err := performerScraper.scrapePerformer(context.Background(), q)
if err != nil {
t.Fatalf("Error scraping performer: %s", err.Error())
}

View file

@ -1,6 +1,7 @@
package scraper
import (
"context"
"errors"
"fmt"
"math"
@ -18,7 +19,7 @@ import (
type mappedQuery interface {
runQuery(selector string) ([]string, error)
subScrape(value string) mappedQuery
subScrape(ctx context.Context, value string) mappedQuery
}
type commonMappedConfig map[string]string
@ -38,7 +39,7 @@ func (s mappedConfig) applyCommon(c commonMappedConfig, src string) string {
return ret
}
func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedResults {
func (s mappedConfig) process(ctx context.Context, q mappedQuery, common commonMappedConfig) mappedResults {
var ret mappedResults
for k, attrConfig := range s {
@ -57,7 +58,7 @@ func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedRe
}
if len(found) > 0 {
result := s.postProcess(q, attrConfig, found)
result := s.postProcess(ctx, q, attrConfig, found)
for i, text := range result {
ret = ret.setKey(i, k, text)
}
@ -68,12 +69,12 @@ func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedRe
return ret
}
func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrConfig, found []string) []string {
func (s mappedConfig) postProcess(ctx context.Context, q mappedQuery, attrConfig mappedScraperAttrConfig, found []string) []string {
// check if we're concatenating the results into a single result
var ret []string
if attrConfig.hasConcat() {
result := attrConfig.concatenateResults(found)
result = attrConfig.postProcess(result, q)
result = attrConfig.postProcess(ctx, result, q)
if attrConfig.hasSplit() {
results := attrConfig.splitString(result)
results = attrConfig.cleanResults(results)
@ -83,7 +84,7 @@ func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrCon
ret = []string{result}
} else {
for _, text := range found {
text = attrConfig.postProcess(text, q)
text = attrConfig.postProcess(ctx, text, q)
if attrConfig.hasSplit() {
return attrConfig.splitString(text)
}
@ -359,12 +360,12 @@ func (c mappedRegexConfigs) apply(value string) string {
}
type postProcessAction interface {
Apply(value string, q mappedQuery) string
Apply(ctx context.Context, value string, q mappedQuery) string
}
type postProcessParseDate string
func (p *postProcessParseDate) Apply(value string, q mappedQuery) string {
func (p *postProcessParseDate) Apply(ctx context.Context, value string, q mappedQuery) string {
parseDate := string(*p)
const internalDateFormat = "2006-01-02"
@ -396,7 +397,7 @@ func (p *postProcessParseDate) Apply(value string, q mappedQuery) string {
type postProcessSubtractDays bool
func (p *postProcessSubtractDays) Apply(value string, q mappedQuery) string {
func (p *postProcessSubtractDays) Apply(ctx context.Context, value string, q mappedQuery) string {
const internalDateFormat = "2006-01-02"
i, err := strconv.Atoi(value)
@ -412,18 +413,18 @@ func (p *postProcessSubtractDays) Apply(value string, q mappedQuery) string {
type postProcessReplace mappedRegexConfigs
func (c *postProcessReplace) Apply(value string, q mappedQuery) string {
func (c *postProcessReplace) Apply(ctx context.Context, value string, q mappedQuery) string {
replace := mappedRegexConfigs(*c)
return replace.apply(value)
}
type postProcessSubScraper mappedScraperAttrConfig
func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string {
func (p *postProcessSubScraper) Apply(ctx context.Context, value string, q mappedQuery) string {
subScrapeConfig := mappedScraperAttrConfig(*p)
logger.Debugf("Sub-scraping for: %s", value)
ss := q.subScrape(value)
ss := q.subScrape(ctx, value)
if ss != nil {
found, err := ss.runQuery(subScrapeConfig.Selector)
@ -440,7 +441,7 @@ func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string {
result = found[0]
}
result = subScrapeConfig.postProcess(result, ss)
result = subScrapeConfig.postProcess(ctx, result, ss)
return result
}
}
@ -450,7 +451,7 @@ func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string {
type postProcessMap map[string]string
func (p *postProcessMap) Apply(value string, q mappedQuery) string {
func (p *postProcessMap) Apply(ctx context.Context, value string, q mappedQuery) string {
// return the mapped value if present
m := *p
mapped, ok := m[value]
@ -464,7 +465,7 @@ func (p *postProcessMap) Apply(value string, q mappedQuery) string {
type postProcessFeetToCm bool
func (p *postProcessFeetToCm) Apply(value string, q mappedQuery) string {
func (p *postProcessFeetToCm) Apply(ctx context.Context, value string, q mappedQuery) string {
const foot_in_cm = 30.48
const inch_in_cm = 2.54
@ -488,7 +489,7 @@ func (p *postProcessFeetToCm) Apply(value string, q mappedQuery) string {
type postProcessLbToKg bool
func (p *postProcessLbToKg) Apply(value string, q mappedQuery) string {
func (p *postProcessLbToKg) Apply(ctx context.Context, value string, q mappedQuery) string {
const lb_in_kg = 0.45359237
w, err := strconv.ParseFloat(value, 64)
if err == nil {
@ -690,9 +691,9 @@ func (c mappedScraperAttrConfig) splitString(value string) []string {
return res
}
func (c mappedScraperAttrConfig) postProcess(value string, q mappedQuery) string {
func (c mappedScraperAttrConfig) postProcess(ctx context.Context, value string, q mappedQuery) string {
for _, action := range c.postProcessActions {
value = action.Apply(value, q)
value = action.Apply(ctx, value, q)
}
return value
@ -748,7 +749,7 @@ func (r mappedResults) setKey(index int, key string, value string) mappedResults
return r
}
func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer, error) {
func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*models.ScrapedPerformer, error) {
var ret models.ScrapedPerformer
performerMap := s.Performer
@ -758,14 +759,14 @@ func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer,
performerTagsMap := performerMap.Tags
results := performerMap.process(q, s.Common)
results := performerMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
// now apply the tags
if performerTagsMap != nil {
logger.Debug(`Processing performer tags:`)
tagResults := performerTagsMap.process(q, s.Common)
tagResults := performerTagsMap.process(ctx, q, s.Common)
for _, p := range tagResults {
tag := &models.ScrapedTag{}
@ -778,7 +779,7 @@ func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer,
return &ret, nil
}
func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerformer, error) {
func (s mappedScraper) scrapePerformers(ctx context.Context, q mappedQuery) ([]*models.ScrapedPerformer, error) {
var ret []*models.ScrapedPerformer
performerMap := s.Performer
@ -786,7 +787,7 @@ func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerform
return nil, nil
}
results := performerMap.process(q, s.Common)
results := performerMap.process(ctx, q, s.Common)
for _, r := range results {
var p models.ScrapedPerformer
r.apply(&p)
@ -796,7 +797,7 @@ func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerform
return ret, nil
}
func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.ScrapedScene {
func (s mappedScraper) processScene(ctx context.Context, q mappedQuery, r mappedResult) *models.ScrapedScene {
var ret models.ScrapedScene
sceneScraperConfig := s.Scene
@ -813,13 +814,13 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
// process performer tags once
var performerTagResults mappedResults
if scenePerformerTagsMap != nil {
performerTagResults = scenePerformerTagsMap.process(q, s.Common)
performerTagResults = scenePerformerTagsMap.process(ctx, q, s.Common)
}
// now apply the performers and tags
if scenePerformersMap.mappedConfig != nil {
logger.Debug(`Processing scene performers:`)
performerResults := scenePerformersMap.process(q, s.Common)
performerResults := scenePerformersMap.process(ctx, q, s.Common)
for _, p := range performerResults {
performer := &models.ScrapedPerformer{}
@ -837,7 +838,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
if sceneTagsMap != nil {
logger.Debug(`Processing scene tags:`)
tagResults := sceneTagsMap.process(q, s.Common)
tagResults := sceneTagsMap.process(ctx, q, s.Common)
for _, p := range tagResults {
tag := &models.ScrapedTag{}
@ -848,7 +849,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
if sceneStudioMap != nil {
logger.Debug(`Processing scene studio:`)
studioResults := sceneStudioMap.process(q, s.Common)
studioResults := sceneStudioMap.process(ctx, q, s.Common)
if len(studioResults) > 0 {
studio := &models.ScrapedStudio{}
@ -859,7 +860,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
if sceneMoviesMap != nil {
logger.Debug(`Processing scene movies:`)
movieResults := sceneMoviesMap.process(q, s.Common)
movieResults := sceneMoviesMap.process(ctx, q, s.Common)
for _, p := range movieResults {
movie := &models.ScrapedMovie{}
@ -871,7 +872,7 @@ func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.Scrap
return &ret
}
func (s mappedScraper) scrapeScenes(q mappedQuery) ([]*models.ScrapedScene, error) {
func (s mappedScraper) scrapeScenes(ctx context.Context, q mappedQuery) ([]*models.ScrapedScene, error) {
var ret []*models.ScrapedScene
sceneScraperConfig := s.Scene
@ -881,16 +882,16 @@ func (s mappedScraper) scrapeScenes(q mappedQuery) ([]*models.ScrapedScene, erro
}
logger.Debug(`Processing scenes:`)
results := sceneMap.process(q, s.Common)
results := sceneMap.process(ctx, q, s.Common)
for _, r := range results {
logger.Debug(`Processing scene:`)
ret = append(ret, s.processScene(q, r))
ret = append(ret, s.processScene(ctx, q, r))
}
return ret, nil
}
func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error) {
func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.ScrapedScene, error) {
var ret models.ScrapedScene
sceneScraperConfig := s.Scene
@ -900,16 +901,16 @@ func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error)
}
logger.Debug(`Processing scene:`)
results := sceneMap.process(q, s.Common)
results := sceneMap.process(ctx, q, s.Common)
if len(results) > 0 {
ss := s.processScene(q, results[0])
ss := s.processScene(ctx, q, results[0])
ret = *ss
}
return &ret, nil
}
func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, error) {
func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*models.ScrapedGallery, error) {
var ret models.ScrapedGallery
galleryScraperConfig := s.Gallery
@ -923,14 +924,14 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err
galleryStudioMap := galleryScraperConfig.Studio
logger.Debug(`Processing gallery:`)
results := galleryMap.process(q, s.Common)
results := galleryMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
// now apply the performers and tags
if galleryPerformersMap != nil {
logger.Debug(`Processing gallery performers:`)
performerResults := galleryPerformersMap.process(q, s.Common)
performerResults := galleryPerformersMap.process(ctx, q, s.Common)
for _, p := range performerResults {
performer := &models.ScrapedPerformer{}
@ -941,7 +942,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err
if galleryTagsMap != nil {
logger.Debug(`Processing gallery tags:`)
tagResults := galleryTagsMap.process(q, s.Common)
tagResults := galleryTagsMap.process(ctx, q, s.Common)
for _, p := range tagResults {
tag := &models.ScrapedTag{}
@ -952,7 +953,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err
if galleryStudioMap != nil {
logger.Debug(`Processing gallery studio:`)
studioResults := galleryStudioMap.process(q, s.Common)
studioResults := galleryStudioMap.process(ctx, q, s.Common)
if len(studioResults) > 0 {
studio := &models.ScrapedStudio{}
@ -965,7 +966,7 @@ func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, err
return &ret, nil
}
func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error) {
func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.ScrapedMovie, error) {
var ret models.ScrapedMovie
movieScraperConfig := s.Movie
@ -976,13 +977,13 @@ func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error)
movieStudioMap := movieScraperConfig.Studio
results := movieMap.process(q, s.Common)
results := movieMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
if movieStudioMap != nil {
logger.Debug(`Processing movie studio:`)
studioResults := movieStudioMap.process(q, s.Common)
studioResults := movieStudioMap.process(ctx, q, s.Common)
if len(studioResults) > 0 {
studio := &models.ScrapedStudio{}

View file

@ -1,6 +1,7 @@
package scraper
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
@ -15,7 +16,7 @@ performerByURL:
xPathScrapers:
performerScraper:
performer:
Name:
Name:
selector: //div/a/@href
postProcess:
- parseDate: Jan 2, 2006
@ -55,6 +56,6 @@ func TestFeetToCM(t *testing.T) {
q := &xpathQuery{}
for _, test := range feetToCMTests {
assert.Equal(t, test.out, pp.Apply(test.in, q))
assert.Equal(t, test.out, pp.Apply(context.Background(), test.in, q))
}
}

View file

@ -18,36 +18,44 @@ func (c Cache) postScrape(ctx context.Context, content models.ScrapedContent) (m
// Analyze the concrete type, call the right post-processing function
switch v := content.(type) {
case *models.ScrapedPerformer:
return c.postScrapePerformer(ctx, v)
if v != nil {
return c.postScrapePerformer(ctx, *v)
}
case models.ScrapedPerformer:
return c.postScrapePerformer(ctx, &v)
return c.postScrapePerformer(ctx, v)
case *models.ScrapedScene:
return c.postScrapeScene(ctx, v)
if v != nil {
return c.postScrapeScene(ctx, *v)
}
case models.ScrapedScene:
return c.postScrapeScene(ctx, &v)
return c.postScrapeScene(ctx, v)
case *models.ScrapedGallery:
return c.postScrapeGallery(ctx, v)
if v != nil {
return c.postScrapeGallery(ctx, *v)
}
case models.ScrapedGallery:
return c.postScrapeGallery(ctx, &v)
return c.postScrapeGallery(ctx, v)
case *models.ScrapedMovie:
return c.postScrapeMovie(ctx, v)
if v != nil {
return c.postScrapeMovie(ctx, *v)
}
case models.ScrapedMovie:
return c.postScrapeMovie(ctx, &v)
return c.postScrapeMovie(ctx, v)
}
// If nothing matches, pass the content through
return content, nil
}
func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerformer) (models.ScrapedContent, error) {
func (c Cache) postScrapePerformer(ctx context.Context, p models.ScrapedPerformer) (models.ScrapedContent, error) {
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
tqb := r.Tag()
tags, err := postProcessTags(tqb, ret.Tags)
tags, err := postProcessTags(tqb, p.Tags)
if err != nil {
return err
}
ret.Tags = tags
p.Tags = tags
return nil
}); err != nil {
@ -55,42 +63,42 @@ func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerfo
}
// post-process - set the image if applicable
if err := setPerformerImage(ctx, c.client, ret, c.globalConfig); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
if err := setPerformerImage(ctx, c.client, p, c.globalConfig); err != nil {
logger.Warnf("Could not set image using URL %s: %s", *p.Image, err.Error())
}
return ret, nil
return p, nil
}
func (c Cache) postScrapeMovie(ctx context.Context, ret *models.ScrapedMovie) (models.ScrapedContent, error) {
if ret.Studio != nil {
func (c Cache) postScrapeMovie(ctx context.Context, m models.ScrapedMovie) (models.ScrapedContent, error) {
if m.Studio != nil {
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
return match.ScrapedStudio(r.Studio(), ret.Studio, nil)
return match.ScrapedStudio(r.Studio(), m.Studio, nil)
}); err != nil {
return nil, err
}
}
// post-process - set the image if applicable
if err := setMovieFrontImage(ctx, c.client, ret, c.globalConfig); err != nil {
logger.Warnf("could not set front image using URL %s: %v", *ret.FrontImage, err)
if err := setMovieFrontImage(ctx, c.client, m, c.globalConfig); err != nil {
logger.Warnf("could not set front image using URL %s: %v", *m.FrontImage, err)
}
if err := setMovieBackImage(ctx, c.client, ret, c.globalConfig); err != nil {
logger.Warnf("could not set back image using URL %s: %v", *ret.BackImage, err)
if err := setMovieBackImage(ctx, c.client, m, c.globalConfig); err != nil {
logger.Warnf("could not set back image using URL %s: %v", *m.BackImage, err)
}
return ret, nil
return m, nil
}
func (c Cache) postScrapeScenePerformer(ctx context.Context, ret *models.ScrapedPerformer) error {
func (c Cache) postScrapeScenePerformer(ctx context.Context, p models.ScrapedPerformer) error {
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
tqb := r.Tag()
tags, err := postProcessTags(tqb, ret.Tags)
tags, err := postProcessTags(tqb, p.Tags)
if err != nil {
return err
}
ret.Tags = tags
p.Tags = tags
return nil
}); err != nil {
@ -100,15 +108,19 @@ func (c Cache) postScrapeScenePerformer(ctx context.Context, ret *models.Scraped
return nil
}
func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) (models.ScrapedContent, error) {
func (c Cache) postScrapeScene(ctx context.Context, scene models.ScrapedScene) (models.ScrapedContent, error) {
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
pqb := r.Performer()
mqb := r.Movie()
tqb := r.Tag()
sqb := r.Studio()
for _, p := range ret.Performers {
if err := c.postScrapeScenePerformer(ctx, p); err != nil {
for _, p := range scene.Performers {
if p == nil {
continue
}
if err := c.postScrapeScenePerformer(ctx, *p); err != nil {
return err
}
@ -117,21 +129,21 @@ func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) (m
}
}
for _, p := range ret.Movies {
for _, p := range scene.Movies {
err := match.ScrapedMovie(mqb, p)
if err != nil {
return err
}
}
tags, err := postProcessTags(tqb, ret.Tags)
tags, err := postProcessTags(tqb, scene.Tags)
if err != nil {
return err
}
ret.Tags = tags
scene.Tags = tags
if ret.Studio != nil {
err := match.ScrapedStudio(sqb, ret.Studio, nil)
if scene.Studio != nil {
err := match.ScrapedStudio(sqb, scene.Studio, nil)
if err != nil {
return err
}
@ -143,34 +155,34 @@ func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) (m
}
// post-process - set the image if applicable
if err := setSceneImage(ctx, c.client, ret, c.globalConfig); err != nil {
logger.Warnf("Could not set image using URL %s: %v", *ret.Image, err)
if err := setSceneImage(ctx, c.client, scene, c.globalConfig); err != nil {
logger.Warnf("Could not set image using URL %s: %v", *scene.Image, err)
}
return ret, nil
return scene, nil
}
func (c Cache) postScrapeGallery(ctx context.Context, ret *models.ScrapedGallery) (models.ScrapedContent, error) {
func (c Cache) postScrapeGallery(ctx context.Context, g models.ScrapedGallery) (models.ScrapedContent, error) {
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
pqb := r.Performer()
tqb := r.Tag()
sqb := r.Studio()
for _, p := range ret.Performers {
for _, p := range g.Performers {
err := match.ScrapedPerformer(pqb, p, nil)
if err != nil {
return err
}
}
tags, err := postProcessTags(tqb, ret.Tags)
tags, err := postProcessTags(tqb, g.Tags)
if err != nil {
return err
}
ret.Tags = tags
g.Tags = tags
if ret.Studio != nil {
err := match.ScrapedStudio(sqb, ret.Studio, nil)
if g.Studio != nil {
err := match.ScrapedStudio(sqb, g.Studio, nil)
if err != nil {
return err
}
@ -181,7 +193,7 @@ func (c Cache) postScrapeGallery(ctx context.Context, ret *models.ScrapedGallery
return nil, err
}
return ret, nil
return g, nil
}
func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) {

View file

@ -14,6 +14,8 @@ import (
"github.com/stashapp/stash/pkg/models"
)
var ErrScraperScript = errors.New("scraper script error")
type scriptScraper struct {
scraper scraperTypeConfig
config config
@ -74,62 +76,119 @@ func (s *scriptScraper) runScraperScript(inString string, out interface{}) error
logger.Debugf("Scraper script <%s> started", strings.Join(cmd.Args, " "))
// TODO - add a timeout here
decodeErr := json.NewDecoder(stdout).Decode(out)
if decodeErr != nil {
logger.Error("could not unmarshal json: " + decodeErr.Error())
return errors.New("could not unmarshal json: " + decodeErr.Error())
// Make a copy of stdout here. This allows us to decode it twice.
var sb strings.Builder
tr := io.TeeReader(stdout, &sb)
// First, perform a decode where unknown fields are disallowed.
d := json.NewDecoder(tr)
d.DisallowUnknownFields()
strictErr := d.Decode(out)
if strictErr != nil {
// The decode failed for some reason, use the built string
// and allow unknown fields in the decode.
s := sb.String()
lenientErr := json.NewDecoder(strings.NewReader(s)).Decode(out)
if lenientErr != nil {
// The error is genuine, so return it
logger.Errorf("could not unmarshal json from script output: %v", lenientErr)
return fmt.Errorf("could not unmarshal json from script output: %w", lenientErr)
}
// Lenient decode succeeded, print a warning, but use the decode
logger.Warnf("reading script result: %v", strictErr)
}
err = cmd.Wait()
logger.Debugf("Scraper script finished")
if err != nil {
return errors.New("error running scraper script")
return fmt.Errorf("%w: %v", ErrScraperScript, err)
}
return nil
}
func (s *scriptScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) {
inString := `{"name": "` + name + `"}`
func (s *scriptScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) {
input := `{"name": "` + name + `"}`
var performers []models.ScrapedPerformer
err := s.runScraperScript(inString, &performers)
// convert to pointers
var ret []*models.ScrapedPerformer
if err == nil {
for i := 0; i < len(performers); i++ {
ret = append(ret, &performers[i])
var ret []models.ScrapedContent
var err error
switch ty {
case models.ScrapeContentTypePerformer:
var performers []models.ScrapedPerformer
err = s.runScraperScript(input, &performers)
if err == nil {
for _, p := range performers {
v := p
ret = append(ret, &v)
}
}
case models.ScrapeContentTypeScene:
var scenes []models.ScrapedScene
err = s.runScraperScript(input, &scenes)
if err == nil {
for _, s := range scenes {
v := s
ret = append(ret, &v)
}
}
default:
return nil, ErrNotSupported
}
return ret, err
}
func (s *scriptScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
inString, err := json.Marshal(scrapedPerformer)
func (s *scriptScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) {
var inString []byte
var err error
var ty models.ScrapeContentType
switch {
case input.Performer != nil:
inString, err = json.Marshal(*input.Performer)
ty = models.ScrapeContentTypePerformer
case input.Gallery != nil:
inString, err = json.Marshal(*input.Gallery)
ty = models.ScrapeContentTypeGallery
case input.Scene != nil:
inString, err = json.Marshal(*input.Scene)
ty = models.ScrapeContentTypeScene
}
if err != nil {
return nil, err
}
var ret models.ScrapedPerformer
err = s.runScraperScript(string(inString), &ret)
return &ret, err
return s.scrape(ctx, string(inString), ty)
}
func (s *scriptScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
inString := `{"url": "` + url + `"}`
func (s *scriptScraper) scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
return s.scrape(ctx, `{"url": "`+url+`"}`, ty)
}
var ret models.ScrapedPerformer
func (s *scriptScraper) scrape(ctx context.Context, input string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
switch ty {
case models.ScrapeContentTypePerformer:
var performer models.ScrapedPerformer
err := s.runScraperScript(input, &performer)
return &performer, err
case models.ScrapeContentTypeGallery:
var gallery models.ScrapedGallery
err := s.runScraperScript(input, &gallery)
return &gallery, err
case models.ScrapeContentTypeScene:
var scene models.ScrapedScene
err := s.runScraperScript(input, &scene)
return &scene, err
case models.ScrapeContentTypeMovie:
var movie models.ScrapedMovie
err := s.runScraperScript(input, &movie)
return &movie, err
}
err := s.runScraperScript(string(inString), &ret)
return &ret, err
return nil, ErrNotSupported
}
func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
@ -146,38 +205,6 @@ func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sc
return &ret, err
}
func (s *scriptScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) {
inString := `{"name": "` + name + `"}`
var scenes []models.ScrapedScene
err := s.runScraperScript(inString, &scenes)
// convert to pointers
var ret []*models.ScrapedScene
if err == nil {
for i := 0; i < len(scenes); i++ {
ret = append(ret, &scenes[i])
}
}
return ret, err
}
func (s *scriptScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
inString, err := json.Marshal(scene)
if err != nil {
return nil, err
}
var ret models.ScrapedScene
err = s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
inString, err := json.Marshal(galleryToUpdateInput(gallery))
@ -192,50 +219,6 @@ func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mod
return &ret, err
}
func (s *scriptScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
inString, err := json.Marshal(gallery)
if err != nil {
return nil, err
}
var ret models.ScrapedGallery
err = s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedScene
err := s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedGallery
err := s.runScraperScript(string(inString), &ret)
return &ret, err
}
func (s *scriptScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedMovie
err := s.runScraperScript(string(inString), &ret)
return &ret, err
}
func findPythonExecutable() (string, error) {
_, err := exec.LookPath("python3")

View file

@ -3,7 +3,7 @@ package scraper
import (
"context"
"database/sql"
"errors"
"fmt"
"net/http"
"strconv"
@ -54,37 +54,6 @@ type stashFindPerformerNamesResultType struct {
Performers []*stashFindPerformerNamePerformer `graphql:"performers"`
}
func (s *stashScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) {
client := s.getStashClient()
var q struct {
FindPerformers stashFindPerformerNamesResultType `graphql:"findPerformers(filter: $f)"`
}
page := 1
perPage := 10
vars := map[string]interface{}{
"f": models.FindFilterType{
Q: &name,
Page: &page,
PerPage: &perPage,
},
}
err := client.Query(ctx, &q, vars)
if err != nil {
return nil, err
}
var ret []*models.ScrapedPerformer
for _, p := range q.FindPerformers.Performers {
ret = append(ret, p.toPerformer())
}
return ret, nil
}
// need a separate for scraped stash performers - does not include remote_site_id or image
type scrapedTagStash struct {
Name string `graphql:"name" json:"name"`
@ -114,7 +83,17 @@ type scrapedPerformerStash struct {
Weight *string `graphql:"weight" json:"weight"`
}
func (s *stashScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
func (s *stashScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) {
if input.Gallery != nil || input.Scene != nil {
return nil, fmt.Errorf("%w: using stash scraper as a fragment scraper", ErrNotSupported)
}
if input.Performer == nil {
return nil, fmt.Errorf("%w: the given performer is nil", ErrNotSupported)
}
scrapedPerformer := input.Performer
client := s.getStashClient()
var q struct {
@ -128,7 +107,7 @@ func (s *stashScraper) scrapePerformerByFragment(scrapedPerformer models.Scraped
"f": performerID,
}
err := client.Query(context.TODO(), &q, vars)
err := client.Query(ctx, &q, vars)
if err != nil {
return nil, err
}
@ -141,7 +120,7 @@ func (s *stashScraper) scrapePerformerByFragment(scrapedPerformer models.Scraped
}
// get the performer image directly
ret.Image, err = getStashPerformerImage(context.TODO(), s.config.StashServer.URL, performerID, s.client, s.globalConfig)
ret.Image, err = getStashPerformerImage(ctx, s.config.StashServer.URL, performerID, s.client, s.globalConfig)
if err != nil {
return nil, err
}
@ -159,7 +138,7 @@ type stashFindSceneNamesResultType struct {
Scenes []*scrapedSceneStash `graphql:"scenes"`
}
func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash) (*models.ScrapedScene, error) {
func (s *stashScraper) scrapedStashSceneToScrapedScene(ctx context.Context, scene *scrapedSceneStash) (*models.ScrapedScene, error) {
ret := models.ScrapedScene{}
err := copier.Copy(&ret, scene)
if err != nil {
@ -167,7 +146,7 @@ func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash)
}
// get the performer image directly
ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, scene.ID, s.client, s.globalConfig)
ret.Image, err = getStashSceneImage(ctx, s.config.StashServer.URL, scene.ID, s.client, s.globalConfig)
if err != nil {
return nil, err
}
@ -175,13 +154,9 @@ func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash)
return &ret, nil
}
func (s *stashScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) {
func (s *stashScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) {
client := s.getStashClient()
var q struct {
FindScenes stashFindSceneNamesResultType `graphql:"findScenes(filter: $f)"`
}
page := 1
perPage := 10
@ -193,21 +168,45 @@ func (s *stashScraper) scrapeScenesByName(ctx context.Context, name string) ([]*
},
}
err := client.Query(ctx, &q, vars)
if err != nil {
return nil, err
}
var ret []models.ScrapedContent
switch ty {
case models.ScrapeContentTypeScene:
var q struct {
FindScenes stashFindSceneNamesResultType `graphql:"findScenes(filter: $f)"`
}
var ret []*models.ScrapedScene
for _, scene := range q.FindScenes.Scenes {
converted, err := s.scrapedStashSceneToScrapedScene(scene)
err := client.Query(ctx, &q, vars)
if err != nil {
return nil, err
}
ret = append(ret, converted)
for _, scene := range q.FindScenes.Scenes {
converted, err := s.scrapedStashSceneToScrapedScene(ctx, scene)
if err != nil {
return nil, err
}
ret = append(ret, converted)
}
return ret, nil
case models.ScrapeContentTypePerformer:
var q struct {
FindPerformers stashFindPerformerNamesResultType `graphql:"findPerformers(filter: $f)"`
}
err := client.Query(ctx, &q, vars)
if err != nil {
return nil, err
}
for _, p := range q.FindPerformers.Performers {
ret = append(ret, p.toPerformer())
}
return ret, nil
}
return ret, nil
return nil, ErrNotSupported
}
type scrapedSceneStash struct {
@ -248,13 +247,13 @@ func (s *stashScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sce
}
// need to copy back to a scraped scene
ret, err := s.scrapedStashSceneToScrapedScene(q.FindScene)
ret, err := s.scrapedStashSceneToScrapedScene(ctx, q.FindScene)
if err != nil {
return nil, err
}
// get the performer image directly
ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, q.FindScene.ID, s.client, s.globalConfig)
ret.Image, err = getStashSceneImage(ctx, s.config.StashServer.URL, q.FindScene.ID, s.client, s.globalConfig)
if err != nil {
return nil, err
}
@ -262,10 +261,6 @@ func (s *stashScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sce
return ret, nil
}
func (s *stashScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
return nil, errors.New("scrapeSceneByFragment not supported for stash scraper")
}
type scrapedGalleryStash struct {
ID string `graphql:"id" json:"id"`
Title *string `graphql:"title" json:"title"`
@ -309,29 +304,13 @@ func (s *stashScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mode
return &ret, nil
}
func (s *stashScraper) scrapeGalleryByFragment(scene models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
return nil, errors.New("scrapeGalleryByFragment not supported for stash scraper")
func (s *stashScraper) scrapeByURL(_ context.Context, _ string, _ models.ScrapeContentType) (models.ScrapedContent, error) {
return nil, ErrNotSupported
}
func (s *stashScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
return nil, errors.New("scrapePerformerByURL not supported for stash scraper")
}
func (s *stashScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
return nil, errors.New("scrapeSceneByURL not supported for stash scraper")
}
func (s *stashScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
return nil, errors.New("scrapeGalleryByURL not supported for stash scraper")
}
func (s *stashScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
return nil, errors.New("scrapeMovieByURL not supported for stash scraper")
}
func getScene(sceneID int, txnManager models.TransactionManager) (*models.Scene, error) {
func getScene(ctx context.Context, sceneID int, txnManager models.TransactionManager) (*models.Scene, error) {
var ret *models.Scene
if err := txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
if err := txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
var err error
ret, err = r.Scene().Find(sceneID)
return err
@ -367,9 +346,9 @@ func sceneToUpdateInput(scene *models.Scene) models.SceneUpdateInput {
}
}
func getGallery(galleryID int, txnManager models.TransactionManager) (*models.Gallery, error) {
func getGallery(ctx context.Context, galleryID int, txnManager models.TransactionManager) (*models.Gallery, error) {
var ret *models.Gallery
if err := txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
if err := txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
var err error
ret, err = r.Gallery().Find(galleryID)
return err

View file

@ -56,7 +56,7 @@ func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*mod
var ret []*models.ScrapedScene
for _, s := range sceneFragments {
ss, err := c.sceneFragmentToScrapedScene(context.TODO(), s)
ss, err := c.sceneFragmentToScrapedScene(ctx, s)
if err != nil {
return nil, err
}
@ -69,9 +69,7 @@ func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*mod
// FindStashBoxScenesByFingerprints queries stash-box for scenes using every
// scene's MD5/OSHASH checksum, or PHash, and returns results in the same order
// as the input slice.
func (c Client) FindStashBoxScenesByFingerprints(sceneIDs []string) ([][]*models.ScrapedScene, error) {
ctx := context.TODO()
func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs []string) ([][]*models.ScrapedScene, error) {
ids, err := utils.StringSliceToIntSlice(sceneIDs)
if err != nil {
return nil, err
@ -150,9 +148,7 @@ func (c Client) FindStashBoxScenesByFingerprints(sceneIDs []string) ([][]*models
// FindStashBoxScenesByFingerprintsFlat queries stash-box for scenes using every
// scene's MD5/OSHASH checksum, or PHash, and returns results a flat slice.
func (c Client) FindStashBoxScenesByFingerprintsFlat(sceneIDs []string) ([]*models.ScrapedScene, error) {
ctx := context.TODO()
func (c Client) FindStashBoxScenesByFingerprintsFlat(ctx context.Context, sceneIDs []string) ([]*models.ScrapedScene, error) {
ids, err := utils.StringSliceToIntSlice(sceneIDs)
if err != nil {
return nil, err
@ -230,7 +226,7 @@ func (c Client) findStashBoxScenesByFingerprints(ctx context.Context, fingerprin
return ret, nil
}
func (c Client) SubmitStashBoxFingerprints(sceneIDs []string, endpoint string) (bool, error) {
func (c Client) SubmitStashBoxFingerprints(ctx context.Context, sceneIDs []string, endpoint string) (bool, error) {
ids, err := utils.StringSliceToIntSlice(sceneIDs)
if err != nil {
return false, err
@ -238,7 +234,7 @@ func (c Client) SubmitStashBoxFingerprints(sceneIDs []string, endpoint string) (
var fingerprints []graphql.FingerprintSubmission
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
qb := r.Scene()
for _, sceneID := range ids {
@ -307,12 +303,12 @@ func (c Client) SubmitStashBoxFingerprints(sceneIDs []string, endpoint string) (
return false, err
}
return c.submitStashBoxFingerprints(fingerprints)
return c.submitStashBoxFingerprints(ctx, fingerprints)
}
func (c Client) submitStashBoxFingerprints(fingerprints []graphql.FingerprintSubmission) (bool, error) {
func (c Client) submitStashBoxFingerprints(ctx context.Context, fingerprints []graphql.FingerprintSubmission) (bool, error) {
for _, fingerprint := range fingerprints {
_, err := c.client.SubmitFingerprint(context.TODO(), fingerprint)
_, err := c.client.SubmitFingerprint(ctx, fingerprint)
if err != nil {
return false, err
}
@ -322,8 +318,8 @@ func (c Client) submitStashBoxFingerprints(fingerprints []graphql.FingerprintSub
}
// QueryStashBoxPerformer queries stash-box for performers using a query string.
func (c Client) QueryStashBoxPerformer(queryStr string) ([]*models.StashBoxPerformerQueryResult, error) {
performers, err := c.queryStashBoxPerformer(queryStr)
func (c Client) QueryStashBoxPerformer(ctx context.Context, queryStr string) ([]*models.StashBoxPerformerQueryResult, error) {
performers, err := c.queryStashBoxPerformer(ctx, queryStr)
res := []*models.StashBoxPerformerQueryResult{
{
@ -342,8 +338,8 @@ func (c Client) QueryStashBoxPerformer(queryStr string) ([]*models.StashBoxPerfo
return res, err
}
func (c Client) queryStashBoxPerformer(queryStr string) ([]*models.ScrapedPerformer, error) {
performers, err := c.client.SearchPerformer(context.TODO(), queryStr)
func (c Client) queryStashBoxPerformer(ctx context.Context, queryStr string) ([]*models.ScrapedPerformer, error) {
performers, err := c.client.SearchPerformer(ctx, queryStr)
if err != nil {
return nil, err
}
@ -360,7 +356,7 @@ func (c Client) queryStashBoxPerformer(queryStr string) ([]*models.ScrapedPerfor
}
// FindStashBoxPerformersByNames queries stash-box for performers by name
func (c Client) FindStashBoxPerformersByNames(performerIDs []string) ([]*models.StashBoxPerformerQueryResult, error) {
func (c Client) FindStashBoxPerformersByNames(ctx context.Context, performerIDs []string) ([]*models.StashBoxPerformerQueryResult, error) {
ids, err := utils.StringSliceToIntSlice(performerIDs)
if err != nil {
return nil, err
@ -368,7 +364,7 @@ func (c Client) FindStashBoxPerformersByNames(performerIDs []string) ([]*models.
var performers []*models.Performer
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
qb := r.Performer()
for _, performerID := range ids {
@ -391,10 +387,10 @@ func (c Client) FindStashBoxPerformersByNames(performerIDs []string) ([]*models.
return nil, err
}
return c.findStashBoxPerformersByNames(performers)
return c.findStashBoxPerformersByNames(ctx, performers)
}
func (c Client) FindStashBoxPerformersByPerformerNames(performerIDs []string) ([][]*models.ScrapedPerformer, error) {
func (c Client) FindStashBoxPerformersByPerformerNames(ctx context.Context, performerIDs []string) ([][]*models.ScrapedPerformer, error) {
ids, err := utils.StringSliceToIntSlice(performerIDs)
if err != nil {
return nil, err
@ -402,7 +398,7 @@ func (c Client) FindStashBoxPerformersByPerformerNames(performerIDs []string) ([
var performers []*models.Performer
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
qb := r.Performer()
for _, performerID := range ids {
@ -425,7 +421,7 @@ func (c Client) FindStashBoxPerformersByPerformerNames(performerIDs []string) ([
return nil, err
}
results, err := c.findStashBoxPerformersByNames(performers)
results, err := c.findStashBoxPerformersByNames(ctx, performers)
if err != nil {
return nil, err
}
@ -438,11 +434,11 @@ func (c Client) FindStashBoxPerformersByPerformerNames(performerIDs []string) ([
return ret, nil
}
func (c Client) findStashBoxPerformersByNames(performers []*models.Performer) ([]*models.StashBoxPerformerQueryResult, error) {
func (c Client) findStashBoxPerformersByNames(ctx context.Context, performers []*models.Performer) ([]*models.StashBoxPerformerQueryResult, error) {
var ret []*models.StashBoxPerformerQueryResult
for _, performer := range performers {
if performer.Name.Valid {
performerResults, err := c.queryStashBoxPerformer(performer.Name.String)
performerResults, err := c.queryStashBoxPerformer(ctx, performer.Name.String)
if err != nil {
return nil, err
}
@ -705,8 +701,8 @@ func (c Client) sceneFragmentToScrapedScene(ctx context.Context, s *graphql.Scen
return ss, nil
}
func (c Client) FindStashBoxPerformerByID(id string) (*models.ScrapedPerformer, error) {
performer, err := c.client.FindPerformerByID(context.TODO(), id)
func (c Client) FindStashBoxPerformerByID(ctx context.Context, id string) (*models.ScrapedPerformer, error) {
performer, err := c.client.FindPerformerByID(ctx, id)
if err != nil {
return nil, err
}
@ -715,8 +711,8 @@ func (c Client) FindStashBoxPerformerByID(id string) (*models.ScrapedPerformer,
return ret, nil
}
func (c Client) FindStashBoxPerformerByName(name string) (*models.ScrapedPerformer, error) {
performers, err := c.client.SearchPerformer(context.TODO(), name)
func (c Client) FindStashBoxPerformerByName(ctx context.Context, name string) (*models.ScrapedPerformer, error) {
performers, err := c.client.SearchPerformer(ctx, name)
if err != nil {
return nil, err
}

View file

@ -99,8 +99,6 @@ func urlFromCDP(ctx context.Context, url string, driverOptions scraperDriverOpti
sleepDuration = time.Duration(driverOptions.Sleep) * time.Second
}
act := context.TODO()
// if scraperCDPPath is a remote address, then allocate accordingly
cdpPath := globalConfig.GetScraperCDPPath()
if cdpPath != "" {
@ -118,7 +116,7 @@ func urlFromCDP(ctx context.Context, url string, driverOptions scraperDriverOpti
}
}
act, cancelAct = chromedp.NewRemoteAllocator(act, remote)
ctx, cancelAct = chromedp.NewRemoteAllocator(ctx, remote)
} else {
// use a temporary user directory for chrome
dir, err := os.MkdirTemp("", "stash-chromedp")
@ -131,13 +129,13 @@ func urlFromCDP(ctx context.Context, url string, driverOptions scraperDriverOpti
chromedp.UserDataDir(dir),
chromedp.ExecPath(cdpPath),
)
act, cancelAct = chromedp.NewExecAllocator(act, opts...)
ctx, cancelAct = chromedp.NewExecAllocator(ctx, opts...)
}
defer cancelAct()
}
ctx, cancel := chromedp.NewContext(act)
ctx, cancel := chromedp.NewContext(ctx)
defer cancel()
// add a fixed timeout for the http request

View file

@ -56,7 +56,7 @@ func (s *xpathScraper) scrapeURL(ctx context.Context, url string) (*html.Node, *
return doc, scraper, nil
}
func (s *xpathScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
func (s *xpathScraper) scrapeByURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries
doc, scraper, err := s.scrapeURL(ctx, u)
if err != nil {
@ -64,47 +64,25 @@ func (s *xpathScraper) scrapePerformerByURL(ctx context.Context, url string) (*m
}
q := s.getXPathQuery(doc)
return scraper.scrapePerformer(q)
}
func (s *xpathScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries
doc, scraper, err := s.scrapeURL(ctx, u)
if err != nil {
return nil, err
switch ty {
case models.ScrapeContentTypePerformer:
return scraper.scrapePerformer(ctx, q)
case models.ScrapeContentTypeScene:
return scraper.scrapeScene(ctx, q)
case models.ScrapeContentTypeGallery:
return scraper.scrapeGallery(ctx, q)
case models.ScrapeContentTypeMovie:
return scraper.scrapeMovie(ctx, q)
}
q := s.getXPathQuery(doc)
return scraper.scrapeScene(q)
return nil, ErrNotSupported
}
func (s *xpathScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries
doc, scraper, err := s.scrapeURL(ctx, u)
if err != nil {
return nil, err
}
q := s.getXPathQuery(doc)
return scraper.scrapeGallery(q)
}
func (s *xpathScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries
doc, scraper, err := s.scrapeURL(ctx, u)
if err != nil {
return nil, err
}
q := s.getXPathQuery(doc)
return scraper.scrapeMovie(q)
}
func (s *xpathScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) {
func (s *xpathScraper) scrapeByName(ctx context.Context, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) {
scraper := s.getXpathScraper()
if scraper == nil {
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
return nil, fmt.Errorf("%w: name %v", ErrNotFound, s.scraper.Scraper)
}
const placeholder = "{}"
@ -122,36 +100,32 @@ func (s *xpathScraper) scrapePerformersByName(ctx context.Context, name string)
}
q := s.getXPathQuery(doc)
return scraper.scrapePerformers(q)
}
func (s *xpathScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
return nil, errors.New("scrapePerformerByFragment not supported for xpath scraper")
}
var content []models.ScrapedContent
switch ty {
case models.ScrapeContentTypePerformer:
performers, err := scraper.scrapePerformers(ctx, q)
if err != nil {
return nil, err
}
for _, p := range performers {
content = append(content, p)
}
func (s *xpathScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) {
scraper := s.getXpathScraper()
return content, nil
case models.ScrapeContentTypeScene:
scenes, err := scraper.scrapeScenes(ctx, q)
if err != nil {
return nil, err
}
for _, s := range scenes {
content = append(content, s)
}
if scraper == nil {
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
return content, nil
}
const placeholder = "{}"
// replace the placeholder string with the URL-escaped name
escapedName := url.QueryEscape(name)
url := s.scraper.QueryURL
url = strings.ReplaceAll(url, placeholder, escapedName)
doc, err := s.loadURL(ctx, url)
if err != nil {
return nil, err
}
q := s.getXPathQuery(doc)
return scraper.scrapeScenes(q)
return nil, ErrNotSupported
}
func (s *xpathScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
@ -175,10 +149,21 @@ func (s *xpathScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sce
}
q := s.getXPathQuery(doc)
return scraper.scrapeScene(q)
return scraper.scrapeScene(ctx, q)
}
func (s *xpathScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
func (s *xpathScraper) scrapeByFragment(ctx context.Context, input Input) (models.ScrapedContent, error) {
switch {
case input.Gallery != nil:
return nil, fmt.Errorf("%w: cannot use an xpath scraper as a gallery fragment scraper", ErrNotSupported)
case input.Performer != nil:
return nil, fmt.Errorf("%w: cannot use an xpath scraper as a performer fragment scraper", ErrNotSupported)
case input.Scene == nil:
return nil, fmt.Errorf("%w: scene input is nil", ErrNotSupported)
}
scene := *input.Scene
// construct the URL
queryURL := queryURLParametersFromScrapedScene(scene)
if s.scraper.QueryURLReplacements != nil {
@ -199,7 +184,7 @@ func (s *xpathScraper) scrapeSceneByFragment(ctx context.Context, scene models.S
}
q := s.getXPathQuery(doc)
return scraper.scrapeScene(q)
return scraper.scrapeScene(ctx, q)
}
func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
@ -223,11 +208,7 @@ func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mode
}
q := s.getXPathQuery(doc)
return scraper.scrapeGallery(q)
}
func (s *xpathScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
return nil, errors.New("scrapeGalleryByFragment not supported for xpath scraper")
return scraper.scrapeGallery(ctx, q)
}
func (s *xpathScraper) loadURL(ctx context.Context, url string) (*html.Node, error) {
@ -301,8 +282,8 @@ func (q *xpathQuery) nodeText(n *html.Node) string {
return ret
}
func (q *xpathQuery) subScrape(value string) mappedQuery {
doc, err := q.scraper.loadURL(context.TODO(), value)
func (q *xpathQuery) subScrape(ctx context.Context, value string) mappedQuery {
doc, err := q.scraper.loadURL(ctx, value)
if err != nil {
logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error())

View file

@ -313,7 +313,7 @@ func TestScrapePerformerXPath(t *testing.T) {
doc: doc,
}
performer, err := scraper.scrapePerformer(q)
performer, err := scraper.scrapePerformer(context.Background(), q)
if err != nil {
t.Errorf("Error scraping performer: %s", err.Error())
@ -408,7 +408,7 @@ func TestConcatXPath(t *testing.T) {
doc: doc,
}
performer, err := scraper.scrapePerformer(q)
performer, err := scraper.scrapePerformer(context.Background(), q)
if err != nil {
t.Errorf("Error scraping performer: %s", err.Error())
@ -682,7 +682,7 @@ func TestApplySceneXPathConfig(t *testing.T) {
q := &xpathQuery{
doc: doc,
}
scene, err := scraper.scrapeScene(q)
scene, err := scraper.scrapeScene(context.Background(), q)
if err != nil {
t.Errorf("Error scraping scene: %s", err.Error())
@ -805,7 +805,7 @@ func TestLoadInvalidXPath(t *testing.T) {
doc: doc,
}
config.process(q, nil)
config.process(context.Background(), q, nil)
}
type mockGlobalConfig struct{}