Fix identify and script scraper bugs (#2375)

* Continue identify if source fails
* Handle empty result set correctly
* Parse null values from scraper script correctly
* Omit warning when json selector value missing
* Return nil when scraped item not found
* Fix graphql validation errors
This commit is contained in:
WithoutPants 2022-03-15 09:42:22 +11:00 committed by GitHub
parent e4d6d3b085
commit 9e3d56b22f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 86 additions and 37 deletions

View file

@ -221,10 +221,14 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
return nil, fmt.Errorf("%w: sceneID is not an integer: '%s'", ErrInput, *input.SceneID)
}
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, sceneID, models.ScrapeContentTypeScene)
if c != nil {
content = []models.ScrapedContent{c}
}
case input.SceneInput != nil:
c, err = r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Scene: input.SceneInput})
if c != nil {
content = []models.ScrapedContent{c}
}
case input.Query != nil:
content, err = r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypeScene)
default:

View file

@ -12,7 +12,7 @@ func marshalScrapedScenes(content []models.ScrapedContent) ([]*models.ScrapedSce
var ret []*models.ScrapedScene
for _, c := range content {
if c == nil {
ret = append(ret, nil)
// graphql schema requires scenes to be non-nil
continue
}
@ -35,7 +35,7 @@ func marshalScrapedPerformers(content []models.ScrapedContent) ([]*models.Scrape
var ret []*models.ScrapedPerformer
for _, c := range content {
if c == nil {
ret = append(ret, nil)
// graphql schema requires performers to be non-nil
continue
}
@ -58,7 +58,7 @@ func marshalScrapedGalleries(content []models.ScrapedContent) ([]*models.Scraped
var ret []*models.ScrapedGallery
for _, c := range content {
if c == nil {
ret = append(ret, nil)
// graphql schema requires galleries to be non-nil
continue
}
@ -81,7 +81,7 @@ func marshalScrapedMovies(content []models.ScrapedContent) ([]*models.ScrapedMov
var ret []*models.ScrapedMovie
for _, c := range content {
if c == nil {
ret = append(ret, nil)
// graphql schema requires movies to be non-nil
continue
}

View file

@ -63,7 +63,8 @@ func (t *SceneIdentifier) scrapeScene(ctx context.Context, scene *models.Scene)
// scrape using the source
scraped, err := source.Scraper.ScrapeScene(ctx, scene.ID)
if err != nil {
return nil, fmt.Errorf("error scraping from %v: %v", source.Scraper, err)
logger.Errorf("error scraping from %v: %v", source.Scraper, err)
continue
}
// if results were found then return

View file

@ -85,12 +85,12 @@ func TestSceneIdentifier_Identify(t *testing.T) {
{
"error scraping",
errID1,
true,
false,
},
{
"error scraping from second",
errID2,
true,
false,
},
{
"found in first scraper",

View file

@ -239,6 +239,11 @@ func (s scraperSource) ScrapeScene(ctx context.Context, sceneID int) (*models.Sc
return nil, err
}
// don't try to convert nil return value
if content == nil {
return nil, nil
}
if scene, ok := content.(models.ScrapedScene); ok {
return &scene, nil
}

View file

@ -273,10 +273,16 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err)
}
ret, err = ss.viaScene(ctx, c.client, scene)
// don't assign nil concrete pointer to ret interface, otherwise nil
// detection is harder
scraped, err := ss.viaScene(ctx, c.client, scene)
if err != nil {
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
}
if scraped != nil {
ret = scraped
}
case models.ScrapeContentTypeGallery:
gs, ok := s.(galleryScraper)
if !ok {
@ -288,10 +294,16 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err)
}
ret, err = gs.viaGallery(ctx, c.client, gallery)
// don't assign nil concrete pointer to ret interface, otherwise nil
// detection is harder
scraped, err := gs.viaGallery(ctx, c.client, gallery)
if err != nil {
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
}
if scraped != nil {
ret = scraped
}
}
return c.postScrape(ctx, ret)

View file

@ -258,7 +258,10 @@ func (q *jsonQuery) runQuery(selector string) ([]string, error) {
value := gjson.Get(q.doc, selector)
if !value.Exists() {
return nil, fmt.Errorf("could not find json path '%s' in json object", selector)
// many possible reasons why the selector may not be in the json object
// and not all are errors.
// Just return nil
return nil, nil
}
var ret []string

View file

@ -97,4 +97,22 @@ jsonScrapers:
verifyField(t, "Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and thats sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the ... arrow_drop_down Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and thats sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the VR Porn movies trust us. Ankles behind her neck and feet over her back so she can kiss her toes, turned, twisted and gyrating, she can fuck any which way she wants (and that ass!), will surely make you fall in love with this hot Virtual Reality Porn slut, as she is one of the finest of them all. Talking about perfection, maybe its all the acrobatic work that keeps it in such gorgeous shape? Who cares really, because you just want to take a big bite out of it and never let go. But its not all about the body. Mias also got a great smile, which might not sound kinky, but believe us, it is a smile that will heat up your innards and drop your pants. Is it her golden skin, her innocent pink lips or that heart-shaped face? There is just too much good stuff going on with Mia Malkova, which is maybe why these past few years have heaped awards upon awards on this Southern California native. Mia came to VR Bangers for her first VR Porn video, so you know shes only going for top-notch scenes with top-game performers, men, and women. Better hit up that yoga studio if you ever dream of being able to bang a flexible and talented chick like lady Malkova. arrow_drop_up", scrapedPerformer.Details, "Details")
verifyField(t, "Blonde", scrapedPerformer.HairColor, "HairColor")
verifyField(t, "57", scrapedPerformer.Weight, "Weight")
notFoundJson := `
{
"data": null
}`
q = &jsonQuery{
doc: notFoundJson,
}
scrapedPerformer, err = performerScraper.scrapePerformer(context.Background(), q)
if err != nil {
t.Fatalf("Error scraping performer: %s", err.Error())
}
if scrapedPerformer != nil {
t.Errorf("expected nil scraped performer when not found, got %v", scrapedPerformer)
}
}

View file

@ -761,7 +761,7 @@ func (r mappedResults) setKey(index int, key string, value string) mappedResults
}
func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*models.ScrapedPerformer, error) {
var ret models.ScrapedPerformer
var ret *models.ScrapedPerformer
performerMap := s.Performer
if performerMap == nil {
@ -772,7 +772,8 @@ func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*mod
results := performerMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
ret = &models.ScrapedPerformer{}
results[0].apply(ret)
// now apply the tags
if performerTagsMap != nil {
@ -787,7 +788,7 @@ func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*mod
}
}
return &ret, nil
return ret, nil
}
func (s mappedScraper) scrapePerformers(ctx context.Context, q mappedQuery) ([]*models.ScrapedPerformer, error) {
@ -903,7 +904,7 @@ func (s mappedScraper) scrapeScenes(ctx context.Context, q mappedQuery) ([]*mode
}
func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.ScrapedScene, error) {
var ret models.ScrapedScene
var ret *models.ScrapedScene
sceneScraperConfig := s.Scene
sceneMap := sceneScraperConfig.mappedConfig
@ -914,15 +915,14 @@ func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.
logger.Debug(`Processing scene:`)
results := sceneMap.process(ctx, q, s.Common)
if len(results) > 0 {
ss := s.processScene(ctx, q, results[0])
ret = *ss
ret = s.processScene(ctx, q, results[0])
}
return &ret, nil
return ret, nil
}
func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*models.ScrapedGallery, error) {
var ret models.ScrapedGallery
var ret *models.ScrapedGallery
galleryScraperConfig := s.Gallery
galleryMap := galleryScraperConfig.mappedConfig
@ -937,7 +937,9 @@ func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*model
logger.Debug(`Processing gallery:`)
results := galleryMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
ret = &models.ScrapedGallery{}
results[0].apply(ret)
// now apply the performers and tags
if galleryPerformersMap != nil {
@ -974,11 +976,11 @@ func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*model
}
}
return &ret, nil
return ret, nil
}
func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.ScrapedMovie, error) {
var ret models.ScrapedMovie
var ret *models.ScrapedMovie
movieScraperConfig := s.Movie
movieMap := movieScraperConfig.mappedConfig
@ -990,7 +992,8 @@ func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.
results := movieMap.process(ctx, q, s.Common)
if len(results) > 0 {
results[0].apply(&ret)
ret = &models.ScrapedMovie{}
results[0].apply(ret)
if movieStudioMap != nil {
logger.Debug(`Processing movie studio:`)
@ -1004,5 +1007,5 @@ func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.
}
}
return &ret, nil
return ret, nil
}

View file

@ -173,21 +173,21 @@ func (s *scriptScraper) scrapeByURL(ctx context.Context, url string, ty models.S
func (s *scriptScraper) scrape(ctx context.Context, input string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
switch ty {
case models.ScrapeContentTypePerformer:
var performer models.ScrapedPerformer
var performer *models.ScrapedPerformer
err := s.runScraperScript(input, &performer)
return &performer, err
return performer, err
case models.ScrapeContentTypeGallery:
var gallery models.ScrapedGallery
var gallery *models.ScrapedGallery
err := s.runScraperScript(input, &gallery)
return &gallery, err
return gallery, err
case models.ScrapeContentTypeScene:
var scene models.ScrapedScene
var scene *models.ScrapedScene
err := s.runScraperScript(input, &scene)
return &scene, err
return scene, err
case models.ScrapeContentTypeMovie:
var movie models.ScrapedMovie
var movie *models.ScrapedMovie
err := s.runScraperScript(input, &movie)
return &movie, err
return movie, err
}
return nil, ErrNotSupported
@ -200,11 +200,11 @@ func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sc
return nil, err
}
var ret models.ScrapedScene
var ret *models.ScrapedScene
err = s.runScraperScript(string(inString), &ret)
return &ret, err
return ret, err
}
func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
@ -214,11 +214,11 @@ func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mod
return nil, err
}
var ret models.ScrapedGallery
var ret *models.ScrapedGallery
err = s.runScraperScript(string(inString), &ret)
return &ret, err
return ret, err
}
func findPythonExecutable() (string, error) {

View file

@ -3,4 +3,7 @@
* Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368))
### 🐛 Bug fixes
* Removed warnings and incorrect error message in json scrapers. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Ensure identify continues using other scrapers if a scrape returns no results. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Continue trying to identify scene if scraper fails. ([#2375](https://github.com/stashapp/stash/pull/2375))
* Fix auto-tag not using case-insensitive matching. ([#2378](https://github.com/stashapp/stash/pull/2378))