mirror of
https://github.com/stashapp/stash.git
synced 2025-12-06 08:26:00 +01:00
Fix identify and script scraper bugs (#2375)
* Continue identify if source fails * Handle empty result set correctly * Parse null values from scraper script correctly * Omit warning when json selector value missing * Return nil when scraped item not found * Fix graphql validation errors
This commit is contained in:
parent
e4d6d3b085
commit
9e3d56b22f
11 changed files with 86 additions and 37 deletions
|
|
@ -221,10 +221,14 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
|
|||
return nil, fmt.Errorf("%w: sceneID is not an integer: '%s'", ErrInput, *input.SceneID)
|
||||
}
|
||||
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, sceneID, models.ScrapeContentTypeScene)
|
||||
content = []models.ScrapedContent{c}
|
||||
if c != nil {
|
||||
content = []models.ScrapedContent{c}
|
||||
}
|
||||
case input.SceneInput != nil:
|
||||
c, err = r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Scene: input.SceneInput})
|
||||
content = []models.ScrapedContent{c}
|
||||
if c != nil {
|
||||
content = []models.ScrapedContent{c}
|
||||
}
|
||||
case input.Query != nil:
|
||||
content, err = r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypeScene)
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ func marshalScrapedScenes(content []models.ScrapedContent) ([]*models.ScrapedSce
|
|||
var ret []*models.ScrapedScene
|
||||
for _, c := range content {
|
||||
if c == nil {
|
||||
ret = append(ret, nil)
|
||||
// graphql schema requires scenes to be non-nil
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
@ -35,7 +35,7 @@ func marshalScrapedPerformers(content []models.ScrapedContent) ([]*models.Scrape
|
|||
var ret []*models.ScrapedPerformer
|
||||
for _, c := range content {
|
||||
if c == nil {
|
||||
ret = append(ret, nil)
|
||||
// graphql schema requires performers to be non-nil
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ func marshalScrapedGalleries(content []models.ScrapedContent) ([]*models.Scraped
|
|||
var ret []*models.ScrapedGallery
|
||||
for _, c := range content {
|
||||
if c == nil {
|
||||
ret = append(ret, nil)
|
||||
// graphql schema requires galleries to be non-nil
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ func marshalScrapedMovies(content []models.ScrapedContent) ([]*models.ScrapedMov
|
|||
var ret []*models.ScrapedMovie
|
||||
for _, c := range content {
|
||||
if c == nil {
|
||||
ret = append(ret, nil)
|
||||
// graphql schema requires movies to be non-nil
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,8 @@ func (t *SceneIdentifier) scrapeScene(ctx context.Context, scene *models.Scene)
|
|||
// scrape using the source
|
||||
scraped, err := source.Scraper.ScrapeScene(ctx, scene.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error scraping from %v: %v", source.Scraper, err)
|
||||
logger.Errorf("error scraping from %v: %v", source.Scraper, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// if results were found then return
|
||||
|
|
|
|||
|
|
@ -85,12 +85,12 @@ func TestSceneIdentifier_Identify(t *testing.T) {
|
|||
{
|
||||
"error scraping",
|
||||
errID1,
|
||||
true,
|
||||
false,
|
||||
},
|
||||
{
|
||||
"error scraping from second",
|
||||
errID2,
|
||||
true,
|
||||
false,
|
||||
},
|
||||
{
|
||||
"found in first scraper",
|
||||
|
|
|
|||
|
|
@ -239,6 +239,11 @@ func (s scraperSource) ScrapeScene(ctx context.Context, sceneID int) (*models.Sc
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// don't try to convert nil return value
|
||||
if content == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if scene, ok := content.(models.ScrapedScene); ok {
|
||||
return &scene, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -273,10 +273,16 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
|
|||
return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err)
|
||||
}
|
||||
|
||||
ret, err = ss.viaScene(ctx, c.client, scene)
|
||||
// don't assign nil concrete pointer to ret interface, otherwise nil
|
||||
// detection is harder
|
||||
scraped, err := ss.viaScene(ctx, c.client, scene)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
|
||||
}
|
||||
|
||||
if scraped != nil {
|
||||
ret = scraped
|
||||
}
|
||||
case models.ScrapeContentTypeGallery:
|
||||
gs, ok := s.(galleryScraper)
|
||||
if !ok {
|
||||
|
|
@ -288,10 +294,16 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models
|
|||
return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err)
|
||||
}
|
||||
|
||||
ret, err = gs.viaGallery(ctx, c.client, gallery)
|
||||
// don't assign nil concrete pointer to ret interface, otherwise nil
|
||||
// detection is harder
|
||||
scraped, err := gs.viaGallery(ctx, c.client, gallery)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
|
||||
}
|
||||
|
||||
if scraped != nil {
|
||||
ret = scraped
|
||||
}
|
||||
}
|
||||
|
||||
return c.postScrape(ctx, ret)
|
||||
|
|
|
|||
|
|
@ -258,7 +258,10 @@ func (q *jsonQuery) runQuery(selector string) ([]string, error) {
|
|||
value := gjson.Get(q.doc, selector)
|
||||
|
||||
if !value.Exists() {
|
||||
return nil, fmt.Errorf("could not find json path '%s' in json object", selector)
|
||||
// many possible reasons why the selector may not be in the json object
|
||||
// and not all are errors.
|
||||
// Just return nil
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var ret []string
|
||||
|
|
|
|||
|
|
@ -97,4 +97,22 @@ jsonScrapers:
|
|||
verifyField(t, "Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the ... arrow_drop_down Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the VR Porn movies – trust us. Ankles behind her neck and feet over her back so she can kiss her toes, turned, twisted and gyrating, she can fuck any which way she wants (and that ass!), will surely make you fall in love with this hot Virtual Reality Porn slut, as she is one of the finest of them all. Talking about perfection, maybe it’s all the acrobatic work that keeps it in such gorgeous shape? Who cares really, because you just want to take a big bite out of it and never let go. But it’s not all about the body. Mia’s also got a great smile, which might not sound kinky, but believe us, it is a smile that will heat up your innards and drop your pants. Is it her golden skin, her innocent pink lips or that heart-shaped face? There is just too much good stuff going on with Mia Malkova, which is maybe why these past few years have heaped awards upon awards on this Southern California native. Mia came to VR Bangers for her first VR Porn video, so you know she’s only going for top-notch scenes with top-game performers, men, and women. Better hit up that yoga studio if you ever dream of being able to bang a flexible and talented chick like lady Malkova. arrow_drop_up", scrapedPerformer.Details, "Details")
|
||||
verifyField(t, "Blonde", scrapedPerformer.HairColor, "HairColor")
|
||||
verifyField(t, "57", scrapedPerformer.Weight, "Weight")
|
||||
|
||||
notFoundJson := `
|
||||
{
|
||||
"data": null
|
||||
}`
|
||||
|
||||
q = &jsonQuery{
|
||||
doc: notFoundJson,
|
||||
}
|
||||
|
||||
scrapedPerformer, err = performerScraper.scrapePerformer(context.Background(), q)
|
||||
if err != nil {
|
||||
t.Fatalf("Error scraping performer: %s", err.Error())
|
||||
}
|
||||
|
||||
if scrapedPerformer != nil {
|
||||
t.Errorf("expected nil scraped performer when not found, got %v", scrapedPerformer)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -761,7 +761,7 @@ func (r mappedResults) setKey(index int, key string, value string) mappedResults
|
|||
}
|
||||
|
||||
func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*models.ScrapedPerformer, error) {
|
||||
var ret models.ScrapedPerformer
|
||||
var ret *models.ScrapedPerformer
|
||||
|
||||
performerMap := s.Performer
|
||||
if performerMap == nil {
|
||||
|
|
@ -772,7 +772,8 @@ func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*mod
|
|||
|
||||
results := performerMap.process(ctx, q, s.Common)
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
ret = &models.ScrapedPerformer{}
|
||||
results[0].apply(ret)
|
||||
|
||||
// now apply the tags
|
||||
if performerTagsMap != nil {
|
||||
|
|
@ -787,7 +788,7 @@ func (s mappedScraper) scrapePerformer(ctx context.Context, q mappedQuery) (*mod
|
|||
}
|
||||
}
|
||||
|
||||
return &ret, nil
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapePerformers(ctx context.Context, q mappedQuery) ([]*models.ScrapedPerformer, error) {
|
||||
|
|
@ -903,7 +904,7 @@ func (s mappedScraper) scrapeScenes(ctx context.Context, q mappedQuery) ([]*mode
|
|||
}
|
||||
|
||||
func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.ScrapedScene, error) {
|
||||
var ret models.ScrapedScene
|
||||
var ret *models.ScrapedScene
|
||||
|
||||
sceneScraperConfig := s.Scene
|
||||
sceneMap := sceneScraperConfig.mappedConfig
|
||||
|
|
@ -914,15 +915,14 @@ func (s mappedScraper) scrapeScene(ctx context.Context, q mappedQuery) (*models.
|
|||
logger.Debug(`Processing scene:`)
|
||||
results := sceneMap.process(ctx, q, s.Common)
|
||||
if len(results) > 0 {
|
||||
ss := s.processScene(ctx, q, results[0])
|
||||
ret = *ss
|
||||
ret = s.processScene(ctx, q, results[0])
|
||||
}
|
||||
|
||||
return &ret, nil
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*models.ScrapedGallery, error) {
|
||||
var ret models.ScrapedGallery
|
||||
var ret *models.ScrapedGallery
|
||||
|
||||
galleryScraperConfig := s.Gallery
|
||||
galleryMap := galleryScraperConfig.mappedConfig
|
||||
|
|
@ -937,7 +937,9 @@ func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*model
|
|||
logger.Debug(`Processing gallery:`)
|
||||
results := galleryMap.process(ctx, q, s.Common)
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
ret = &models.ScrapedGallery{}
|
||||
|
||||
results[0].apply(ret)
|
||||
|
||||
// now apply the performers and tags
|
||||
if galleryPerformersMap != nil {
|
||||
|
|
@ -974,11 +976,11 @@ func (s mappedScraper) scrapeGallery(ctx context.Context, q mappedQuery) (*model
|
|||
}
|
||||
}
|
||||
|
||||
return &ret, nil
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.ScrapedMovie, error) {
|
||||
var ret models.ScrapedMovie
|
||||
var ret *models.ScrapedMovie
|
||||
|
||||
movieScraperConfig := s.Movie
|
||||
movieMap := movieScraperConfig.mappedConfig
|
||||
|
|
@ -990,7 +992,8 @@ func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.
|
|||
|
||||
results := movieMap.process(ctx, q, s.Common)
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
ret = &models.ScrapedMovie{}
|
||||
results[0].apply(ret)
|
||||
|
||||
if movieStudioMap != nil {
|
||||
logger.Debug(`Processing movie studio:`)
|
||||
|
|
@ -1004,5 +1007,5 @@ func (s mappedScraper) scrapeMovie(ctx context.Context, q mappedQuery) (*models.
|
|||
}
|
||||
}
|
||||
|
||||
return &ret, nil
|
||||
return ret, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -173,21 +173,21 @@ func (s *scriptScraper) scrapeByURL(ctx context.Context, url string, ty models.S
|
|||
func (s *scriptScraper) scrape(ctx context.Context, input string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
|
||||
switch ty {
|
||||
case models.ScrapeContentTypePerformer:
|
||||
var performer models.ScrapedPerformer
|
||||
var performer *models.ScrapedPerformer
|
||||
err := s.runScraperScript(input, &performer)
|
||||
return &performer, err
|
||||
return performer, err
|
||||
case models.ScrapeContentTypeGallery:
|
||||
var gallery models.ScrapedGallery
|
||||
var gallery *models.ScrapedGallery
|
||||
err := s.runScraperScript(input, &gallery)
|
||||
return &gallery, err
|
||||
return gallery, err
|
||||
case models.ScrapeContentTypeScene:
|
||||
var scene models.ScrapedScene
|
||||
var scene *models.ScrapedScene
|
||||
err := s.runScraperScript(input, &scene)
|
||||
return &scene, err
|
||||
return scene, err
|
||||
case models.ScrapeContentTypeMovie:
|
||||
var movie models.ScrapedMovie
|
||||
var movie *models.ScrapedMovie
|
||||
err := s.runScraperScript(input, &movie)
|
||||
return &movie, err
|
||||
return movie, err
|
||||
}
|
||||
|
||||
return nil, ErrNotSupported
|
||||
|
|
@ -200,11 +200,11 @@ func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sc
|
|||
return nil, err
|
||||
}
|
||||
|
||||
var ret models.ScrapedScene
|
||||
var ret *models.ScrapedScene
|
||||
|
||||
err = s.runScraperScript(string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
|
|
@ -214,11 +214,11 @@ func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mod
|
|||
return nil, err
|
||||
}
|
||||
|
||||
var ret models.ScrapedGallery
|
||||
var ret *models.ScrapedGallery
|
||||
|
||||
err = s.runScraperScript(string(inString), &ret)
|
||||
|
||||
return &ret, err
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func findPythonExecutable() (string, error) {
|
||||
|
|
|
|||
|
|
@ -3,4 +3,7 @@
|
|||
* Improved autotag performance. ([#2368](https://github.com/stashapp/stash/pull/2368))
|
||||
|
||||
### 🐛 Bug fixes
|
||||
* Removed warnings and incorrect error message in json scrapers. ([#2375](https://github.com/stashapp/stash/pull/2375))
|
||||
* Ensure identify continues using other scrapers if a scrape returns no results. ([#2375](https://github.com/stashapp/stash/pull/2375))
|
||||
* Continue trying to identify scene if scraper fails. ([#2375](https://github.com/stashapp/stash/pull/2375))
|
||||
* Fix auto-tag not using case-insensitive matching. ([#2378](https://github.com/stashapp/stash/pull/2378))
|
||||
Loading…
Reference in a new issue