Fix identify and script scraper bugs (#2375)

* Continue identify if source fails
* Handle empty result set correctly
* Parse null values from scraper script correctly
* Omit warning when json selector value missing
* Return nil when scraped item not found
* Fix graphql validation errors
This commit is contained in:
WithoutPants 2022-03-15 09:42:22 +11:00 committed by GitHub
parent e4d6d3b085
commit 9e3d56b22f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 86 additions and 37 deletions

View file

@ -221,10 +221,14 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
return nil, fmt.Errorf("%w: sceneID is not an integer: '%s'", ErrInput, *input.SceneID) return nil, fmt.Errorf("%w: sceneID is not an integer: '%s'", ErrInput, *input.SceneID)
} }
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, sceneID, models.ScrapeContentTypeScene) c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, sceneID, models.ScrapeContentTypeScene)
content = []models.ScrapedContent{c} if c != nil {
content = []models.ScrapedContent{c}
}
case input.SceneInput != nil: case input.SceneInput != nil:
c, err = r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Scene: input.SceneInput}) c, err = r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Scene: input.SceneInput})
content = []models.ScrapedContent{c} if c != nil {
content = []models.ScrapedContent{c}
}
case input.Query != nil: case input.Query != nil:
content, err = r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypeScene) content, err = r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypeScene)
default: default:

View file

@ -12,7 +12,7 @@ func marshalScrapedScenes(content []models.ScrapedContent) ([]*models.ScrapedSce
var ret []*models.ScrapedScene var ret []*models.ScrapedScene
for _, c := range content { for _, c := range content {
if c == nil { if c == nil {
ret = append(ret, nil) // graphql schema requires scenes to be non-nil
continue continue
} }
@ -35,7 +35,7 @@ func marshalScrapedPerformers(content []models.ScrapedContent) ([]*models.Scrape
var ret []*models.ScrapedPerformer var ret []*models.ScrapedPerformer
for _, c := range content { for _, c := range content {
if c == nil { if c == nil {
ret = append(ret, nil) // graphql schema requires performers to be non-nil
continue continue
} }
@ -58,7 +58,7 @@ func marshalScrapedGalleries(content []models.ScrapedContent) ([]*models.Scraped
var ret []*models.ScrapedGallery var ret []*models.ScrapedGallery
for _, c := range content { for _, c := range content {
if c == nil { if c == nil {
ret = append(ret, nil) // graphql schema requires galleries to be non-nil
continue continue
} }
@ -81,7 +81,7 @@ func marshalScrapedMovies(content []models.ScrapedContent) ([]*models.ScrapedMov
var ret []*models.ScrapedMovie var ret []*models.ScrapedMovie
for _, c := range content { for _, c := range content {
if c == nil { if c == nil {
ret = append(ret, nil) // graphql schema requires movies to be non-nil
continue continue
} }

View file

@ -63,7 +63,8 @@ func (t *SceneIdentifier) scrapeScene(ctx context.Context, scene *models.Scene)
// scrape using the source // scrape using the source
scraped, err := source.Scraper.ScrapeScene(ctx, scene.ID) scraped, err := source.Scraper.ScrapeScene(ctx, scene.ID)
if err != nil { if err != nil {
return nil, fmt.Errorf("error scraping from %v: %v", source.Scraper, err) logger.Errorf("error scraping from %v: %v", source.Scraper, err)
continue
} }
// if results were found then return // if results were found then return

View file

@ -85,12 +85,12 @@ func TestSceneIdentifier_Identify(t *testing.T) {
{ {
"error scraping", "error scraping",
errID1, errID1,
true, false,
}, },
{ {
"error scraping from second", "error scraping from second",
errID2, errID2,
true, false,
}, },
{ {
"found in first scraper", "found in first scraper",

View file

@ -239,6 +239,11 @@ func (s scraperSource) ScrapeScene(ctx context.Context, sceneID int) (*models.Sc
return nil, err return nil, err
} }
// don't try to convert nil return value
if content == nil {
return nil, nil
}
if scene, ok := content.(models.ScrapedScene); ok { if scene, ok := content.(models.ScrapedScene); ok {
return &scene, nil return &scene, nil
} }

View file

@ -273,10 +273,16 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err) return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err)
} }
ret, err = ss.viaScene(ctx, c.client, scene) // don't assign nil concrete pointer to ret interface, otherwise nil
// detection is harder
scraped, err := ss.viaScene(ctx, c.client, scene)
if err != nil { if err != nil {
return nil, fmt.Errorf("scraper %s: %w", scraperID, err) return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
} }
if scraped != nil {
ret = scraped
}
case models.ScrapeContentTypeGallery: case models.ScrapeContentTypeGallery:
gs, ok := s.(galleryScraper) gs, ok := s.(galleryScraper)
if !ok { if !ok {
@ -288,10 +294,16 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err) return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err)
} }
ret, err = gs.viaGallery(ctx, c.client, gallery) // don't assign nil concrete pointer to ret interface, otherwise nil
// detection is harder
scraped, err := gs.viaGallery(ctx, c.client, gallery)
if err != nil { if err != nil {
return nil, fmt.Errorf("scraper %s: %w", scraperID, err) return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
} }
if scraped != nil {
ret = scraped
}
} }
return c.postScrape(ctx, ret) return c.postScrape(ctx, ret)

View file

@ -258,7 +258,10 @@ func (q *jsonQuery) runQuery(selector string) ([]string, error) {
value := gjson.Get(q.doc, selector) value := gjson.Get(q.doc, selector)
if !value.Exists() { if !value.Exists() {
return nil, fmt.Errorf("could not find json path '%s' in json object", selector) // many possible reasons why the selector may not be in the json object
// and not all are errors.
// Just return nil
return nil, nil
} }
var ret []string var ret []string

View file

@ -97,4 +97,22 @@ jsonScrapers:
verifyField(t, "Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and thats sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the ... arrow_drop_down Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and thats sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the VR Porn movies trust us. Ankles behind her neck and feet over her back so she can kiss her toes, turned, twisted and gyrating, she can fuck any which way she wants (and that ass!), will surely make you fall in love with this hot Virtual Reality Porn slut, as she is one of the finest of them all. Talking about perfection, maybe its all the acrobatic work that keeps it in such gorgeous shape? Who cares really, because you just want to take a big bite out of it and never let go. But its not all about the body. Mias also got a great smile, which might not sound kinky, but believe us, it is a smile that will heat up your innards and drop your pants. Is it her golden skin, her innocent pink lips or that heart-shaped face? There is just too much good stuff going on with Mia Malkova, which is maybe why these past few years have heaped awards upon awards on this Southern California native. Mia came to VR Bangers for her first VR Porn video, so you know shes only going for top-notch scenes with top-game performers, men, and women. Better hit up that yoga studio if you ever dream of being able to bang a flexible and talented chick like lady Malkova. arrow_drop_up", scrapedPerformer.Details, "Details") verifyField(t, "Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and thats sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the ... arrow_drop_down Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and thats sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the VR Porn movies trust us. Ankles behind her neck and feet over her back so she can kiss her toes, turned, twisted and gyrating, she can fuck any which way she wants (and that ass!), will surely make you fall in love with this hot Virtual Reality Porn slut, as she is one of the finest of them all. Talking about perfection, maybe its all the acrobatic work that keeps it in such gorgeous shape? Who cares really, because you just want to take a big bite out of it and never let go. But its not all about the body. Mias also got a great smile, which might not sound kinky, but believe us, it is a smile that will heat up your innards and drop your pants. Is it her golden skin, her innocent pink lips or that heart-shaped face? There is just too much good stuff going on with Mia Malkova, which is maybe why these past few years have heaped awards upon awards on this Southern California native. Mia came to VR Bangers for her first VR Porn video, so you know shes only going for top-notch scenes with top-game performers, men, and women. Better hit up that yoga studio if you ever dream of being able to bang a flexible and talented chick like lady Malkova. arrow_drop_up", scrapedPerformer.Details, "Details")
verifyField(t, "Blonde", scrapedPerformer.HairColor, "HairColor") verifyField(t, "Blonde", scrapedPerformer.HairColor, "HairColor")
verifyField(t, "57", scrapedPerformer.Weight, "Weight") verifyField(t, "57", scrapedPerformer.Weight, "Weight")
notFoundJson := `
{
"data": null
}`
q = &jsonQuery{
doc: notFoundJson,
}
scrapedPerformer, err = performerScraper.scrapePerformer(context.Background(), q)
if err != nil {
t.Fatalf("Error scraping performer: %s", err.Error())
}
if scrapedPerformer != nil {
t.Errorf("expected nil scraped performer when not found, got %v", scrapedPerformer)
}
} }

View file

@ -761,7 +761,7 @@ func (r mappedResults) setKey(index int, key string, value string) mappedResults
} }
func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*models.ScrapedPerformer, error) { func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*models.ScrapedPerformer, error) {
var ret models.ScrapedPerformer var ret *models.ScrapedPerformer
performerMap := s.Performer performerMap := s.Performer
if performerMap == nil { if performerMap == nil {
@ -772,7 +772,8 @@ func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*mod
results := performerMap.process(ctx, q, s.Common) results := performerMap.process(ctx, q, s.Common)
if len(results) > 0 { if len(results) > 0 {
results[0].apply(&ret) ret = &models.ScrapedPerformer{}
results[0].apply(ret)
// now apply the tags // now apply the tags
if performerTagsMap != nil { if performerTagsMap != nil {
@ -787,7 +788,7 @@ func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*mod
} }
} }
return &ret, nil return ret, nil
} }
func (s mappedScraper) scrapePerformers(ctx context.Context, q mappedQuery) ([]*models.ScrapedPerformer, error) { func (s mappedScraper) scrapePerformers(ctx context.Context, q mappedQuery) ([]*models.ScrapedPerformer, error) {
@ -903,7 +904,7 @@ func (s mappedScraper) scrapeScenes(ctx context.Context, q mappedQuery) ([]*mode
} }
func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.ScrapedScene, error) { func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.ScrapedScene, error) {
var ret models.ScrapedScene var ret *models.ScrapedScene
sceneScraperConfig := s.Scene sceneScraperConfig := s.Scene
sceneMap := sceneScraperConfig.mappedConfig sceneMap := sceneScraperConfig.mappedConfig
@ -914,15 +915,14 @@ func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.
logger.Debug(`Processing scene:`) logger.Debug(`Processing scene:`)
results := sceneMap.process(ctx, q, s.Common) results := sceneMap.process(ctx, q, s.Common)
if len(results) > 0 { if len(results) > 0 {
ss := s.processScene(ctx, q, results[0]) ret = s.processScene(ctx, q, results[0])
ret = *ss
} }
return &ret, nil return ret, nil
} }
func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*models.ScrapedGallery, error) { func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*models.ScrapedGallery, error) {
var ret models.ScrapedGallery var ret *models.ScrapedGallery
galleryScraperConfig := s.Gallery galleryScraperConfig := s.Gallery
galleryMap := galleryScraperConfig.mappedConfig galleryMap := galleryScraperConfig.mappedConfig
@ -937,7 +937,9 @@ func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*model
logger.Debug(`Processing gallery:`) logger.Debug(`Processing gallery:`)
results := galleryMap.process(ctx, q, s.Common) results := galleryMap.process(ctx, q, s.Common)
if len(results) > 0 { if len(results) > 0 {
results[0].apply(&ret) ret = &models.ScrapedGallery{}
results[0].apply(ret)
// now apply the performers and tags // now apply the performers and tags
if galleryPerformersMap != nil { if galleryPerformersMap != nil {
@ -974,11 +976,11 @@ func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*model
} }
} }
return &ret, nil return ret, nil
} }
func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.ScrapedMovie, error) { func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.ScrapedMovie, error) {
var ret models.ScrapedMovie var ret *models.ScrapedMovie
movieScraperConfig := s.Movie movieScraperConfig := s.Movie
movieMap := movieScraperConfig.mappedConfig movieMap := movieScraperConfig.mappedConfig
@ -990,7 +992,8 @@ func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.
results := movieMap.process(ctx, q, s.Common) results := movieMap.process(ctx, q, s.Common)
if len(results) > 0 { if len(results) > 0 {
results[0].apply(&ret) ret = &models.ScrapedMovie{}
results[0].apply(ret)
if movieStudioMap != nil { if movieStudioMap != nil {
logger.Debug(`Processing movie studio:`) logger.Debug(`Processing movie studio:`)
@ -1004,5 +1007,5 @@ func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.
} }
} }
return &ret, nil return ret, nil
} }

View file

@ -173,21 +173,21 @@ func (s *scriptScraper) scrapeByURL(ctx context.Context, url string, ty models.S
func (s *scriptScraper) scrape(ctx context.Context, input string, ty models.ScrapeContentType) (models.ScrapedContent, error) { func (s *scriptScraper) scrape(ctx context.Context, input string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
switch ty { switch ty {
case models.ScrapeContentTypePerformer: case models.ScrapeContentTypePerformer:
var performer models.ScrapedPerformer var performer *models.ScrapedPerformer
err := s.runScraperScript(input, &performer) err := s.runScraperScript(input, &performer)
return &performer, err return performer, err
case models.ScrapeContentTypeGallery: case models.ScrapeContentTypeGallery:
var gallery models.ScrapedGallery var gallery *models.ScrapedGallery
err := s.runScraperScript(input, &gallery) err := s.runScraperScript(input, &gallery)
return &gallery, err return gallery, err
case models.ScrapeContentTypeScene: case models.ScrapeContentTypeScene:
var scene models.ScrapedScene var scene *models.ScrapedScene
err := s.runScraperScript(input, &scene) err := s.runScraperScript(input, &scene)
return &scene, err return scene, err
case models.ScrapeContentTypeMovie: case models.ScrapeContentTypeMovie:
var movie models.ScrapedMovie var movie *models.ScrapedMovie
err := s.runScraperScript(input, &movie) err := s.runScraperScript(input, &movie)
return &movie, err return movie, err
} }
return nil, ErrNotSupported return nil, ErrNotSupported
@ -200,11 +200,11 @@ func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sc
return nil, err return nil, err
} }
var ret models.ScrapedScene var ret *models.ScrapedScene
err = s.runScraperScript(string(inString), &ret) err = s.runScraperScript(string(inString), &ret)
return &ret, err return ret, err
} }
func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) { func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
@ -214,11 +214,11 @@ func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mod
return nil, err return nil, err
} }
var ret models.ScrapedGallery var ret *models.ScrapedGallery
err = s.runScraperScript(string(inString), &ret) err = s.runScraperScript(string(inString), &ret)
return &ret, err return ret, err
} }
func findPythonExecutable() (string, error) { func findPythonExecutable() (string, error) {

View file

@ -3,4 +3,7 @@
* Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368)) * Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368))
### 🐛 Bug fixes ### 🐛 Bug fixes
* Removed warnings and incorrect error message in json scrapers. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Ensure identify continues using other scrapers if a scrape returns no results. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Continue trying to identify scene if scraper fails. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Fix auto-tag not using case-insensitive matching. ([#2378](https://github.com/stashapp/stash/pull/2378)) * Fix auto-tag not using case-insensitive matching. ([#2378](https://github.com/stashapp/stash/pull/2378))