mirror of
https://github.com/stashapp/stash.git
synced 2026-04-30 19:03:23 +02:00
Refactor scraper package (#6495)
* Remove reflection from mapped value processing * AI generated unit tests * Move mappedConfig to separate file * Rename group to configScraper * Separate mapped post-processing code into separate file * Update test after group rename * Check map entry when returning scraper * Refactor config into definition * Support single string for string slice translation * Rename config.go to definition.go * Rename configScraper to definedScraper * Rename config_scraper.go to defined_scraper.go
This commit is contained in:
parent
ed0fb53ae0
commit
88eb46380c
20 changed files with 2475 additions and 1324 deletions
|
|
@ -24,9 +24,85 @@ func (e scraperAction) IsValid() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
type scraperActionImpl interface {
|
||||
type urlScraperActionImpl interface {
|
||||
scrapeByURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error)
|
||||
}
|
||||
|
||||
func (c Definition) getURLScraper(def ByURLDefinition, client *http.Client, globalConfig GlobalConfig) urlScraperActionImpl {
|
||||
switch def.Action {
|
||||
case scraperActionScript:
|
||||
return &scriptURLScraper{
|
||||
scriptScraper: scriptScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
},
|
||||
definition: def,
|
||||
}
|
||||
case scraperActionStash:
|
||||
return newStashScraper(client, c, globalConfig)
|
||||
case scraperActionXPath:
|
||||
return &xpathURLScraper{
|
||||
xpathScraper: xpathScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
client: client,
|
||||
},
|
||||
definition: def,
|
||||
}
|
||||
case scraperActionJson:
|
||||
return &jsonURLScraper{
|
||||
jsonScraper: jsonScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
client: client,
|
||||
},
|
||||
definition: def,
|
||||
}
|
||||
}
|
||||
|
||||
panic("unknown scraper action: " + def.Action)
|
||||
}
|
||||
|
||||
type nameScraperActionImpl interface {
|
||||
scrapeByName(ctx context.Context, name string, ty ScrapeContentType) ([]ScrapedContent, error)
|
||||
}
|
||||
|
||||
func (c Definition) getNameScraper(def ByNameDefinition, client *http.Client, globalConfig GlobalConfig) nameScraperActionImpl {
|
||||
switch def.Action {
|
||||
case scraperActionScript:
|
||||
return &scriptNameScraper{
|
||||
scriptScraper: scriptScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
},
|
||||
definition: def,
|
||||
}
|
||||
case scraperActionStash:
|
||||
return newStashScraper(client, c, globalConfig)
|
||||
case scraperActionXPath:
|
||||
return &xpathNameScraper{
|
||||
xpathScraper: xpathScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
client: client,
|
||||
},
|
||||
definition: def,
|
||||
}
|
||||
case scraperActionJson:
|
||||
return &jsonNameScraper{
|
||||
jsonScraper: jsonScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
client: client,
|
||||
},
|
||||
definition: def,
|
||||
}
|
||||
}
|
||||
|
||||
panic("unknown scraper action: " + def.Action)
|
||||
}
|
||||
|
||||
type fragmentScraperActionImpl interface {
|
||||
scrapeByFragment(ctx context.Context, input Input) (ScrapedContent, error)
|
||||
|
||||
scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error)
|
||||
|
|
@ -34,17 +110,37 @@ type scraperActionImpl interface {
|
|||
scrapeImageByImage(ctx context.Context, image *models.Image) (*models.ScrapedImage, error)
|
||||
}
|
||||
|
||||
func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, globalConfig GlobalConfig) scraperActionImpl {
|
||||
switch scraper.Action {
|
||||
func (c Definition) getFragmentScraper(actionDef ByFragmentDefinition, client *http.Client, globalConfig GlobalConfig) fragmentScraperActionImpl {
|
||||
switch actionDef.Action {
|
||||
case scraperActionScript:
|
||||
return newScriptScraper(scraper, c, globalConfig)
|
||||
return &scriptFragmentScraper{
|
||||
scriptScraper: scriptScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
},
|
||||
definition: actionDef,
|
||||
}
|
||||
case scraperActionStash:
|
||||
return newStashScraper(scraper, client, c, globalConfig)
|
||||
return newStashScraper(client, c, globalConfig)
|
||||
case scraperActionXPath:
|
||||
return newXpathScraper(scraper, client, c, globalConfig)
|
||||
return &xpathFragmentScraper{
|
||||
xpathScraper: xpathScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
client: client,
|
||||
},
|
||||
definition: actionDef,
|
||||
}
|
||||
case scraperActionJson:
|
||||
return newJsonScraper(scraper, client, c, globalConfig)
|
||||
return &jsonFragmentScraper{
|
||||
jsonScraper: jsonScraper{
|
||||
definition: c,
|
||||
globalConfig: globalConfig,
|
||||
client: client,
|
||||
},
|
||||
definition: actionDef,
|
||||
}
|
||||
}
|
||||
|
||||
panic("unknown scraper action: " + scraper.Action)
|
||||
panic("unknown scraper action: " + actionDef.Action)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ func (c *Cache) ReloadScrapers() {
|
|||
if err != nil {
|
||||
logger.Errorf("Error loading scraper %s: %v", fp, err)
|
||||
} else {
|
||||
scraper := newGroupScraper(*conf, c.globalConfig)
|
||||
scraper := scraperFromDefinition(*conf, c.globalConfig)
|
||||
scrapers[scraper.spec().ID] = scraper
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ import (
|
|||
)
|
||||
|
||||
// jar constructs a cookie jar from a configuration
|
||||
func (c config) jar() (*cookiejar.Jar, error) {
|
||||
func (c Definition) jar() (*cookiejar.Jar, error) {
|
||||
opts := c.DriverOptions
|
||||
jar, err := cookiejar.New(&cookiejar.Options{
|
||||
PublicSuffixList: publicsuffix.List,
|
||||
|
|
@ -77,7 +77,7 @@ func randomSequence(n int) string {
|
|||
}
|
||||
|
||||
// printCookies prints all cookies from the given cookie jar
|
||||
func printCookies(jar *cookiejar.Jar, scraperConfig config, msg string) {
|
||||
func printCookies(jar *cookiejar.Jar, scraperConfig Definition, msg string) {
|
||||
driverOptions := scraperConfig.DriverOptions
|
||||
if driverOptions != nil && !driverOptions.UseCDP {
|
||||
var foundURLs []*url.URL
|
||||
|
|
|
|||
|
|
@ -8,25 +8,26 @@ import (
|
|||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
type group struct {
|
||||
config config
|
||||
// definedScraper implements the scraper interface using a Definition object.
|
||||
type definedScraper struct {
|
||||
config Definition
|
||||
|
||||
globalConf GlobalConfig
|
||||
}
|
||||
|
||||
func newGroupScraper(c config, globalConfig GlobalConfig) scraper {
|
||||
return group{
|
||||
func scraperFromDefinition(c Definition, globalConfig GlobalConfig) definedScraper {
|
||||
return definedScraper{
|
||||
config: c,
|
||||
globalConf: globalConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (g group) spec() Scraper {
|
||||
func (g definedScraper) spec() Scraper {
|
||||
return g.config.spec()
|
||||
}
|
||||
|
||||
// fragmentScraper finds an appropriate fragment scraper based on input.
|
||||
func (g group) fragmentScraper(input Input) *scraperTypeConfig {
|
||||
func (g definedScraper) fragmentScraper(input Input) *ByFragmentDefinition {
|
||||
switch {
|
||||
case input.Performer != nil:
|
||||
return g.config.PerformerByFragment
|
||||
|
|
@ -43,7 +44,7 @@ func (g group) fragmentScraper(input Input) *scraperTypeConfig {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (g group) viaFragment(ctx context.Context, client *http.Client, input Input) (ScrapedContent, error) {
|
||||
func (g definedScraper) viaFragment(ctx context.Context, client *http.Client, input Input) (ScrapedContent, error) {
|
||||
stc := g.fragmentScraper(input)
|
||||
if stc == nil {
|
||||
// If there's no performer fragment scraper in the group, we try to use
|
||||
|
|
@ -56,38 +57,38 @@ func (g group) viaFragment(ctx context.Context, client *http.Client, input Input
|
|||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*stc, client, g.globalConf)
|
||||
s := g.config.getFragmentScraper(*stc, client, g.globalConf)
|
||||
return s.scrapeByFragment(ctx, input)
|
||||
}
|
||||
|
||||
func (g group) viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
func (g definedScraper) viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
if g.config.SceneByFragment == nil {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.SceneByFragment, client, g.globalConf)
|
||||
s := g.config.getFragmentScraper(*g.config.SceneByFragment, client, g.globalConf)
|
||||
return s.scrapeSceneByScene(ctx, scene)
|
||||
}
|
||||
|
||||
func (g group) viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (g definedScraper) viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
if g.config.GalleryByFragment == nil {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.GalleryByFragment, client, g.globalConf)
|
||||
s := g.config.getFragmentScraper(*g.config.GalleryByFragment, client, g.globalConf)
|
||||
return s.scrapeGalleryByGallery(ctx, gallery)
|
||||
}
|
||||
|
||||
func (g group) viaImage(ctx context.Context, client *http.Client, gallery *models.Image) (*models.ScrapedImage, error) {
|
||||
func (g definedScraper) viaImage(ctx context.Context, client *http.Client, gallery *models.Image) (*models.ScrapedImage, error) {
|
||||
if g.config.ImageByFragment == nil {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.ImageByFragment, client, g.globalConf)
|
||||
s := g.config.getFragmentScraper(*g.config.ImageByFragment, client, g.globalConf)
|
||||
return s.scrapeImageByImage(ctx, gallery)
|
||||
}
|
||||
|
||||
func loadUrlCandidates(c config, ty ScrapeContentType) []*scrapeByURLConfig {
|
||||
func loadUrlCandidates(c Definition, ty ScrapeContentType) []*ByURLDefinition {
|
||||
switch ty {
|
||||
case ScrapeContentTypePerformer:
|
||||
return c.PerformerByURL
|
||||
|
|
@ -104,12 +105,13 @@ func loadUrlCandidates(c config, ty ScrapeContentType) []*scrapeByURLConfig {
|
|||
panic("loadUrlCandidates: unreachable")
|
||||
}
|
||||
|
||||
func (g group) viaURL(ctx context.Context, client *http.Client, url string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
func (g definedScraper) viaURL(ctx context.Context, client *http.Client, url string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
candidates := loadUrlCandidates(g.config, ty)
|
||||
for _, scraper := range candidates {
|
||||
if scraper.matchesURL(url) {
|
||||
s := g.config.getScraper(scraper.scraperTypeConfig, client, g.globalConf)
|
||||
ret, err := s.scrapeByURL(ctx, url, ty)
|
||||
u := replaceURL(url, *scraper) // allow a URL Replace for url-queries
|
||||
s := g.config.getURLScraper(*scraper, client, g.globalConf)
|
||||
ret, err := s.scrapeByURL(ctx, u, ty)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -123,31 +125,31 @@ func (g group) viaURL(ctx context.Context, client *http.Client, url string, ty S
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (g group) viaName(ctx context.Context, client *http.Client, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
|
||||
func (g definedScraper) viaName(ctx context.Context, client *http.Client, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
|
||||
switch ty {
|
||||
case ScrapeContentTypePerformer:
|
||||
if g.config.PerformerByName == nil {
|
||||
break
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.PerformerByName, client, g.globalConf)
|
||||
s := g.config.getNameScraper(*g.config.PerformerByName, client, g.globalConf)
|
||||
return s.scrapeByName(ctx, name, ty)
|
||||
case ScrapeContentTypeScene:
|
||||
if g.config.SceneByName == nil {
|
||||
break
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.SceneByName, client, g.globalConf)
|
||||
s := g.config.getNameScraper(*g.config.SceneByName, client, g.globalConf)
|
||||
return s.scrapeByName(ctx, name, ty)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("%w: cannot load %v by name", ErrNotSupported, ty)
|
||||
}
|
||||
|
||||
func (g group) supports(ty ScrapeContentType) bool {
|
||||
func (g definedScraper) supports(ty ScrapeContentType) bool {
|
||||
return g.config.supports(ty)
|
||||
}
|
||||
|
||||
func (g group) supportsURL(url string, ty ScrapeContentType) bool {
|
||||
func (g definedScraper) supportsURL(url string, ty ScrapeContentType) bool {
|
||||
return g.config.matchesURL(url, ty)
|
||||
}
|
||||
|
|
@ -11,7 +11,8 @@ import (
|
|||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
type config struct {
|
||||
// Definition represents a scraper definition (typically) loaded from a YAML configuration file.
|
||||
type Definition struct {
|
||||
ID string
|
||||
path string
|
||||
|
||||
|
|
@ -19,43 +20,43 @@ type config struct {
|
|||
Name string `yaml:"name"`
|
||||
|
||||
// Configuration for querying performers by name
|
||||
PerformerByName *scraperTypeConfig `yaml:"performerByName"`
|
||||
PerformerByName *ByNameDefinition `yaml:"performerByName"`
|
||||
|
||||
// Configuration for querying performers by a Performer fragment
|
||||
PerformerByFragment *scraperTypeConfig `yaml:"performerByFragment"`
|
||||
PerformerByFragment *ByFragmentDefinition `yaml:"performerByFragment"`
|
||||
|
||||
// Configuration for querying a performer by a URL
|
||||
PerformerByURL []*scrapeByURLConfig `yaml:"performerByURL"`
|
||||
PerformerByURL []*ByURLDefinition `yaml:"performerByURL"`
|
||||
|
||||
// Configuration for querying scenes by a Scene fragment
|
||||
SceneByFragment *scraperTypeConfig `yaml:"sceneByFragment"`
|
||||
SceneByFragment *ByFragmentDefinition `yaml:"sceneByFragment"`
|
||||
|
||||
// Configuration for querying gallery by a Gallery fragment
|
||||
GalleryByFragment *scraperTypeConfig `yaml:"galleryByFragment"`
|
||||
GalleryByFragment *ByFragmentDefinition `yaml:"galleryByFragment"`
|
||||
|
||||
// Configuration for querying scenes by name
|
||||
SceneByName *scraperTypeConfig `yaml:"sceneByName"`
|
||||
SceneByName *ByNameDefinition `yaml:"sceneByName"`
|
||||
|
||||
// Configuration for querying scenes by query fragment
|
||||
SceneByQueryFragment *scraperTypeConfig `yaml:"sceneByQueryFragment"`
|
||||
SceneByQueryFragment *ByFragmentDefinition `yaml:"sceneByQueryFragment"`
|
||||
|
||||
// Configuration for querying a scene by a URL
|
||||
SceneByURL []*scrapeByURLConfig `yaml:"sceneByURL"`
|
||||
SceneByURL []*ByURLDefinition `yaml:"sceneByURL"`
|
||||
|
||||
// Configuration for querying a gallery by a URL
|
||||
GalleryByURL []*scrapeByURLConfig `yaml:"galleryByURL"`
|
||||
GalleryByURL []*ByURLDefinition `yaml:"galleryByURL"`
|
||||
|
||||
// Configuration for querying an image by a URL
|
||||
ImageByURL []*scrapeByURLConfig `yaml:"imageByURL"`
|
||||
ImageByURL []*ByURLDefinition `yaml:"imageByURL"`
|
||||
|
||||
// Configuration for querying image by an Image fragment
|
||||
ImageByFragment *scraperTypeConfig `yaml:"imageByFragment"`
|
||||
ImageByFragment *ByFragmentDefinition `yaml:"imageByFragment"`
|
||||
|
||||
// Configuration for querying a movie by a URL - deprecated, use GroupByURL
|
||||
MovieByURL []*scrapeByURLConfig `yaml:"movieByURL"`
|
||||
MovieByURL []*ByURLDefinition `yaml:"movieByURL"`
|
||||
|
||||
// Configuration for querying a group by a URL
|
||||
GroupByURL []*scrapeByURLConfig `yaml:"groupByURL"`
|
||||
GroupByURL []*ByURLDefinition `yaml:"groupByURL"`
|
||||
|
||||
// Scraper debugging options
|
||||
DebugOptions *scraperDebugOptions `yaml:"debug"`
|
||||
|
|
@ -73,7 +74,7 @@ type config struct {
|
|||
DriverOptions *scraperDriverOptions `yaml:"driver"`
|
||||
}
|
||||
|
||||
func (c config) validate() error {
|
||||
func (c Definition) validate() error {
|
||||
if strings.TrimSpace(c.Name) == "" {
|
||||
return errors.New("name must not be empty")
|
||||
}
|
||||
|
|
@ -126,17 +127,13 @@ type stashServer struct {
|
|||
ApiKey string `yaml:"apiKey"`
|
||||
}
|
||||
|
||||
type scraperTypeConfig struct {
|
||||
type ActionDefinition struct {
|
||||
Action scraperAction `yaml:"action"`
|
||||
Script []string `yaml:"script,flow"`
|
||||
Scraper string `yaml:"scraper"`
|
||||
|
||||
// for xpath name scraper only
|
||||
QueryURL string `yaml:"queryURL"`
|
||||
QueryURLReplacements queryURLReplacements `yaml:"queryURLReplace"`
|
||||
}
|
||||
|
||||
func (c scraperTypeConfig) validate() error {
|
||||
func (c ActionDefinition) validate() error {
|
||||
if !c.Action.IsValid() {
|
||||
return fmt.Errorf("%s is not a valid scraper action", c.Action)
|
||||
}
|
||||
|
|
@ -148,20 +145,22 @@ func (c scraperTypeConfig) validate() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
type scrapeByURLConfig struct {
|
||||
scraperTypeConfig `yaml:",inline"`
|
||||
URL []string `yaml:"url,flow"`
|
||||
type ByURLDefinition struct {
|
||||
ActionDefinition `yaml:",inline"`
|
||||
URL []string `yaml:"url,flow"`
|
||||
QueryURL string `yaml:"queryURL"`
|
||||
QueryURLReplacements queryURLReplacements `yaml:"queryURLReplace"`
|
||||
}
|
||||
|
||||
func (c scrapeByURLConfig) validate() error {
|
||||
func (c ByURLDefinition) validate() error {
|
||||
if len(c.URL) == 0 {
|
||||
return errors.New("url is mandatory for scrape by url scrapers")
|
||||
}
|
||||
|
||||
return c.scraperTypeConfig.validate()
|
||||
return c.ActionDefinition.validate()
|
||||
}
|
||||
|
||||
func (c scrapeByURLConfig) matchesURL(url string) bool {
|
||||
func (c ByURLDefinition) matchesURL(url string) bool {
|
||||
for _, thisURL := range c.URL {
|
||||
if strings.Contains(url, thisURL) {
|
||||
return true
|
||||
|
|
@ -171,6 +170,18 @@ func (c scrapeByURLConfig) matchesURL(url string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
type ByFragmentDefinition struct {
|
||||
ActionDefinition `yaml:",inline"`
|
||||
|
||||
QueryURL string `yaml:"queryURL"`
|
||||
QueryURLReplacements queryURLReplacements `yaml:"queryURLReplace"`
|
||||
}
|
||||
|
||||
type ByNameDefinition struct {
|
||||
ActionDefinition `yaml:",inline"`
|
||||
QueryURL string `yaml:"queryURL"`
|
||||
}
|
||||
|
||||
type scraperDebugOptions struct {
|
||||
PrintHTML bool `yaml:"printHTML"`
|
||||
}
|
||||
|
|
@ -206,8 +217,8 @@ type scraperDriverOptions struct {
|
|||
Headers []*header `yaml:"headers"`
|
||||
}
|
||||
|
||||
func loadConfigFromYAML(id string, reader io.Reader) (*config, error) {
|
||||
ret := &config{}
|
||||
func loadConfigFromYAML(id string, reader io.Reader) (*Definition, error) {
|
||||
ret := &Definition{}
|
||||
|
||||
parser := yaml.NewDecoder(reader)
|
||||
parser.SetStrict(true)
|
||||
|
|
@ -225,7 +236,7 @@ func loadConfigFromYAML(id string, reader io.Reader) (*config, error) {
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func loadConfigFromYAMLFile(path string) (*config, error) {
|
||||
func loadConfigFromYAMLFile(path string) (*Definition, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -246,7 +257,7 @@ func loadConfigFromYAMLFile(path string) (*config, error) {
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func (c config) spec() Scraper {
|
||||
func (c Definition) spec() Scraper {
|
||||
ret := Scraper{
|
||||
ID: c.ID,
|
||||
Name: c.Name,
|
||||
|
|
@ -334,7 +345,7 @@ func (c config) spec() Scraper {
|
|||
return ret
|
||||
}
|
||||
|
||||
func (c config) supports(ty ScrapeContentType) bool {
|
||||
func (c Definition) supports(ty ScrapeContentType) bool {
|
||||
switch ty {
|
||||
case ScrapeContentTypePerformer:
|
||||
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
|
||||
|
|
@ -351,7 +362,7 @@ func (c config) supports(ty ScrapeContentType) bool {
|
|||
panic("Unhandled ScrapeContentType")
|
||||
}
|
||||
|
||||
func (c config) matchesURL(url string, ty ScrapeContentType) bool {
|
||||
func (c Definition) matchesURL(url string, ty ScrapeContentType) bool {
|
||||
switch ty {
|
||||
case ScrapeContentTypePerformer:
|
||||
for _, scraper := range c.PerformerByURL {
|
||||
|
|
@ -139,5 +139,5 @@ func getFreeonesScraper(globalConfig GlobalConfig) scraper {
|
|||
logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error())
|
||||
}
|
||||
|
||||
return newGroupScraper(*c, globalConfig)
|
||||
return scraperFromDefinition(*c, globalConfig)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,43 +15,22 @@ import (
|
|||
)
|
||||
|
||||
type jsonScraper struct {
|
||||
scraper scraperTypeConfig
|
||||
config config
|
||||
definition Definition
|
||||
globalConfig GlobalConfig
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func newJsonScraper(scraper scraperTypeConfig, client *http.Client, config config, globalConfig GlobalConfig) *jsonScraper {
|
||||
return &jsonScraper{
|
||||
scraper: scraper,
|
||||
config: config,
|
||||
client: client,
|
||||
globalConfig: globalConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *jsonScraper) getJsonScraper() *mappedScraper {
|
||||
return s.config.JsonScrapers[s.scraper.Scraper]
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeURL(ctx context.Context, url string) (string, *mappedScraper, error) {
|
||||
scraper := s.getJsonScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return "", nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
func (s *jsonScraper) getJsonScraper(name string) (*mappedScraper, error) {
|
||||
ret, ok := s.definition.JsonScrapers[name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("json scraper with name %s not found in config", name)
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
return doc, scraper, nil
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s *jsonScraper) loadURL(ctx context.Context, url string) (string, error) {
|
||||
r, err := loadURL(ctx, url, s.client, s.config, s.globalConfig)
|
||||
r, err := loadURL(ctx, url, s.client, s.definition, s.globalConfig)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
@ -66,21 +45,30 @@ func (s *jsonScraper) loadURL(ctx context.Context, url string) (string, error) {
|
|||
return "", errors.New("not valid json")
|
||||
}
|
||||
|
||||
if s.config.DebugOptions != nil && s.config.DebugOptions.PrintHTML {
|
||||
if s.definition.DebugOptions != nil && s.definition.DebugOptions.PrintHTML {
|
||||
logger.Infof("loadURL (%s) response: \n%s", url, docStr)
|
||||
}
|
||||
|
||||
return docStr, err
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for url-queries
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
type jsonURLScraper struct {
|
||||
jsonScraper
|
||||
definition ByURLDefinition
|
||||
}
|
||||
|
||||
func (s *jsonURLScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
scraper, err := s.getJsonScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := s.getJsonQuery(doc, u)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := s.getJsonQuery(doc, url)
|
||||
// if these just return the return values from scraper.scrape* functions then
|
||||
// it ends up returning ScrapedContent(nil) rather than nil
|
||||
switch ty {
|
||||
|
|
@ -119,11 +107,15 @@ func (s *jsonScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeCont
|
|||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeByName(ctx context.Context, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
|
||||
scraper := s.getJsonScraper()
|
||||
type jsonNameScraper struct {
|
||||
jsonScraper
|
||||
definition ByNameDefinition
|
||||
}
|
||||
|
||||
if scraper == nil {
|
||||
return nil, fmt.Errorf("%w: name %v", ErrNotFound, s.scraper.Scraper)
|
||||
func (s *jsonNameScraper) scrapeByName(ctx context.Context, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
|
||||
scraper, err := s.getJsonScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
const placeholder = "{}"
|
||||
|
|
@ -131,7 +123,7 @@ func (s *jsonScraper) scrapeByName(ctx context.Context, name string, ty ScrapeCo
|
|||
// replace the placeholder string with the URL-escaped name
|
||||
escapedName := url.QueryEscape(name)
|
||||
|
||||
url := s.scraper.QueryURL
|
||||
url := s.definition.QueryURL
|
||||
url = strings.ReplaceAll(url, placeholder, escapedName)
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -172,18 +164,22 @@ func (s *jsonScraper) scrapeByName(ctx context.Context, name string, ty ScrapeCo
|
|||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
type jsonFragmentScraper struct {
|
||||
jsonScraper
|
||||
definition ByFragmentDefinition
|
||||
}
|
||||
|
||||
func (s *jsonFragmentScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromScene(scene)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
if s.definition.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.definition.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
url := queryURL.constructURL(s.definition.QueryURL)
|
||||
|
||||
scraper := s.getJsonScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
scraper, err := s.getJsonScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -196,7 +192,7 @@ func (s *jsonScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scen
|
|||
return scraper.scrapeScene(ctx, q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeByFragment(ctx context.Context, input Input) (ScrapedContent, error) {
|
||||
func (s *jsonFragmentScraper) scrapeByFragment(ctx context.Context, input Input) (ScrapedContent, error) {
|
||||
switch {
|
||||
case input.Gallery != nil:
|
||||
return nil, fmt.Errorf("%w: cannot use a json scraper as a gallery fragment scraper", ErrNotSupported)
|
||||
|
|
@ -210,15 +206,14 @@ func (s *jsonScraper) scrapeByFragment(ctx context.Context, input Input) (Scrape
|
|||
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromScrapedScene(scene)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
if s.definition.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.definition.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
url := queryURL.constructURL(s.definition.QueryURL)
|
||||
|
||||
scraper := s.getJsonScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
scraper, err := s.getJsonScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -231,18 +226,17 @@ func (s *jsonScraper) scrapeByFragment(ctx context.Context, input Input) (Scrape
|
|||
return scraper.scrapeScene(ctx, q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*models.ScrapedImage, error) {
|
||||
func (s *jsonFragmentScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*models.ScrapedImage, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromImage(image)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
if s.definition.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.definition.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
url := queryURL.constructURL(s.definition.QueryURL)
|
||||
|
||||
scraper := s.getJsonScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
scraper, err := s.getJsonScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -255,18 +249,17 @@ func (s *jsonScraper) scrapeImageByImage(ctx context.Context, image *models.Imag
|
|||
return scraper.scrapeImage(ctx, q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (s *jsonFragmentScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromGallery(gallery)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
if s.definition.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.definition.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
url := queryURL.constructURL(s.definition.QueryURL)
|
||||
|
||||
scraper := s.getJsonScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
scraper, err := s.getJsonScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ jsonScrapers:
|
|||
}
|
||||
`
|
||||
|
||||
c := &config{}
|
||||
c := &Definition{}
|
||||
err := yaml.Unmarshal([]byte(yamlStr), &c)
|
||||
|
||||
if err != nil {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
537
pkg/scraper/mapped_config.go
Normal file
537
pkg/scraper/mapped_config.go
Normal file
|
|
@ -0,0 +1,537 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/sliceutil"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
type commonMappedConfig map[string]string
|
||||
|
||||
type mappedConfig map[string]mappedScraperAttrConfig
|
||||
|
||||
func (s mappedConfig) applyCommon(c commonMappedConfig, src string) string {
|
||||
if c == nil {
|
||||
return src
|
||||
}
|
||||
|
||||
ret := src
|
||||
for commonKey, commonVal := range c {
|
||||
ret = strings.ReplaceAll(ret, commonKey, commonVal)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// extractHostname parses a URL string and returns the hostname.
|
||||
// Returns empty string if the URL cannot be parsed.
|
||||
func extractHostname(urlStr string) string {
|
||||
if urlStr == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
u, err := url.Parse(urlStr)
|
||||
if err != nil {
|
||||
logger.Warnf("Error parsing URL '%s': %s", urlStr, err.Error())
|
||||
return ""
|
||||
}
|
||||
|
||||
return u.Hostname()
|
||||
}
|
||||
|
||||
type isMultiFunc func(key string) bool
|
||||
|
||||
func (s mappedConfig) process(ctx context.Context, q mappedQuery, common commonMappedConfig, isMulti isMultiFunc) mappedResults {
|
||||
var ret mappedResults
|
||||
|
||||
for k, attrConfig := range s {
|
||||
|
||||
if attrConfig.Fixed != "" {
|
||||
// TODO - not sure if this needs to set _all_ indexes for the key
|
||||
const i = 0
|
||||
// Support {inputURL} and {inputHostname} placeholders in fixed values
|
||||
value := strings.ReplaceAll(attrConfig.Fixed, "{inputURL}", q.getURL())
|
||||
value = strings.ReplaceAll(value, "{inputHostname}", extractHostname(q.getURL()))
|
||||
ret = ret.setSingleValue(i, k, value)
|
||||
} else {
|
||||
selector := attrConfig.Selector
|
||||
selector = s.applyCommon(common, selector)
|
||||
// Support {inputURL} and {inputHostname} placeholders in selectors
|
||||
selector = strings.ReplaceAll(selector, "{inputURL}", q.getURL())
|
||||
selector = strings.ReplaceAll(selector, "{inputHostname}", extractHostname(q.getURL()))
|
||||
|
||||
found, err := q.runQuery(selector)
|
||||
if err != nil {
|
||||
logger.Warnf("key '%v': %v", k, err)
|
||||
}
|
||||
|
||||
if len(found) > 0 {
|
||||
result := s.postProcess(ctx, q, attrConfig, found)
|
||||
|
||||
// HACK - if the key is URLs, then we need to set the value as a multi-value
|
||||
isMulti := isMulti != nil && isMulti(k)
|
||||
if isMulti {
|
||||
ret = ret.setMultiValue(0, k, result)
|
||||
} else {
|
||||
for i, text := range result {
|
||||
ret = ret.setSingleValue(i, k, text)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (s mappedConfig) postProcess(ctx context.Context, q mappedQuery, attrConfig mappedScraperAttrConfig, found []string) []string {
|
||||
// check if we're concatenating the results into a single result
|
||||
var ret []string
|
||||
if attrConfig.hasConcat() {
|
||||
result := attrConfig.concatenateResults(found)
|
||||
result = attrConfig.postProcess(ctx, result, q)
|
||||
if attrConfig.hasSplit() {
|
||||
results := attrConfig.splitString(result)
|
||||
// skip cleaning when the query is used for searching
|
||||
if q.getType() == SearchQuery {
|
||||
return results
|
||||
}
|
||||
results = attrConfig.cleanResults(results)
|
||||
return results
|
||||
}
|
||||
|
||||
ret = []string{result}
|
||||
} else {
|
||||
for _, text := range found {
|
||||
text = attrConfig.postProcess(ctx, text, q)
|
||||
if attrConfig.hasSplit() {
|
||||
return attrConfig.splitString(text)
|
||||
}
|
||||
|
||||
ret = append(ret, text)
|
||||
}
|
||||
// skip cleaning when the query is used for searching
|
||||
if q.getType() == SearchQuery {
|
||||
return ret
|
||||
}
|
||||
ret = attrConfig.cleanResults(ret)
|
||||
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
type mappedSceneScraperConfig struct {
|
||||
mappedConfig
|
||||
|
||||
Tags mappedConfig `yaml:"Tags"`
|
||||
Performers mappedPerformerScraperConfig `yaml:"Performers"`
|
||||
Studio mappedConfig `yaml:"Studio"`
|
||||
Movies mappedConfig `yaml:"Movies"`
|
||||
Groups mappedConfig `yaml:"Groups"`
|
||||
}
|
||||
type _mappedSceneScraperConfig mappedSceneScraperConfig
|
||||
|
||||
const (
|
||||
mappedScraperConfigSceneTags = "Tags"
|
||||
mappedScraperConfigScenePerformers = "Performers"
|
||||
mappedScraperConfigSceneStudio = "Studio"
|
||||
mappedScraperConfigSceneMovies = "Movies"
|
||||
mappedScraperConfigSceneGroups = "Groups"
|
||||
)
|
||||
|
||||
func (s *mappedSceneScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// HACK - unmarshal to map first, then remove known scene sub-fields, then
|
||||
// remarshal to yaml and pass that down to the base map
|
||||
parentMap := make(map[string]interface{})
|
||||
if err := unmarshal(parentMap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move the known sub-fields to a separate map
|
||||
thisMap := make(map[string]interface{})
|
||||
|
||||
thisMap[mappedScraperConfigSceneTags] = parentMap[mappedScraperConfigSceneTags]
|
||||
thisMap[mappedScraperConfigScenePerformers] = parentMap[mappedScraperConfigScenePerformers]
|
||||
thisMap[mappedScraperConfigSceneStudio] = parentMap[mappedScraperConfigSceneStudio]
|
||||
thisMap[mappedScraperConfigSceneMovies] = parentMap[mappedScraperConfigSceneMovies]
|
||||
thisMap[mappedScraperConfigSceneGroups] = parentMap[mappedScraperConfigSceneGroups]
|
||||
|
||||
delete(parentMap, mappedScraperConfigSceneTags)
|
||||
delete(parentMap, mappedScraperConfigScenePerformers)
|
||||
delete(parentMap, mappedScraperConfigSceneStudio)
|
||||
delete(parentMap, mappedScraperConfigSceneMovies)
|
||||
delete(parentMap, mappedScraperConfigSceneGroups)
|
||||
|
||||
// re-unmarshal the sub-fields
|
||||
yml, err := yaml.Marshal(thisMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// needs to be a different type to prevent infinite recursion
|
||||
c := _mappedSceneScraperConfig{}
|
||||
if err := yaml.Unmarshal(yml, &c); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*s = mappedSceneScraperConfig(c)
|
||||
|
||||
yml, err = yaml.Marshal(parentMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mappedGalleryScraperConfig struct {
|
||||
mappedConfig
|
||||
|
||||
Tags mappedConfig `yaml:"Tags"`
|
||||
Performers mappedConfig `yaml:"Performers"`
|
||||
Studio mappedConfig `yaml:"Studio"`
|
||||
}
|
||||
|
||||
type _mappedGalleryScraperConfig mappedGalleryScraperConfig
|
||||
|
||||
func (s *mappedGalleryScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// HACK - unmarshal to map first, then remove known scene sub-fields, then
|
||||
// remarshal to yaml and pass that down to the base map
|
||||
parentMap := make(map[string]interface{})
|
||||
if err := unmarshal(parentMap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move the known sub-fields to a separate map
|
||||
thisMap := make(map[string]interface{})
|
||||
|
||||
thisMap[mappedScraperConfigSceneTags] = parentMap[mappedScraperConfigSceneTags]
|
||||
thisMap[mappedScraperConfigScenePerformers] = parentMap[mappedScraperConfigScenePerformers]
|
||||
thisMap[mappedScraperConfigSceneStudio] = parentMap[mappedScraperConfigSceneStudio]
|
||||
|
||||
delete(parentMap, mappedScraperConfigSceneTags)
|
||||
delete(parentMap, mappedScraperConfigScenePerformers)
|
||||
delete(parentMap, mappedScraperConfigSceneStudio)
|
||||
|
||||
// re-unmarshal the sub-fields
|
||||
yml, err := yaml.Marshal(thisMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// needs to be a different type to prevent infinite recursion
|
||||
c := _mappedGalleryScraperConfig{}
|
||||
if err := yaml.Unmarshal(yml, &c); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*s = mappedGalleryScraperConfig(c)
|
||||
|
||||
yml, err = yaml.Marshal(parentMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mappedImageScraperConfig struct {
|
||||
mappedConfig
|
||||
|
||||
Tags mappedConfig `yaml:"Tags"`
|
||||
Performers mappedConfig `yaml:"Performers"`
|
||||
Studio mappedConfig `yaml:"Studio"`
|
||||
}
|
||||
type _mappedImageScraperConfig mappedImageScraperConfig
|
||||
|
||||
func (s *mappedImageScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// HACK - unmarshal to map first, then remove known scene sub-fields, then
|
||||
// remarshal to yaml and pass that down to the base map
|
||||
parentMap := make(map[string]interface{})
|
||||
if err := unmarshal(parentMap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move the known sub-fields to a separate map
|
||||
thisMap := make(map[string]interface{})
|
||||
|
||||
thisMap[mappedScraperConfigSceneTags] = parentMap[mappedScraperConfigSceneTags]
|
||||
thisMap[mappedScraperConfigScenePerformers] = parentMap[mappedScraperConfigScenePerformers]
|
||||
thisMap[mappedScraperConfigSceneStudio] = parentMap[mappedScraperConfigSceneStudio]
|
||||
|
||||
delete(parentMap, mappedScraperConfigSceneTags)
|
||||
delete(parentMap, mappedScraperConfigScenePerformers)
|
||||
delete(parentMap, mappedScraperConfigSceneStudio)
|
||||
|
||||
// re-unmarshal the sub-fields
|
||||
yml, err := yaml.Marshal(thisMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// needs to be a different type to prevent infinite recursion
|
||||
c := _mappedImageScraperConfig{}
|
||||
if err := yaml.Unmarshal(yml, &c); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*s = mappedImageScraperConfig(c)
|
||||
|
||||
yml, err = yaml.Marshal(parentMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mappedPerformerScraperConfig struct {
|
||||
mappedConfig
|
||||
|
||||
Tags mappedConfig `yaml:"Tags"`
|
||||
}
|
||||
type _mappedPerformerScraperConfig mappedPerformerScraperConfig
|
||||
|
||||
const (
|
||||
mappedScraperConfigPerformerTags = "Tags"
|
||||
)
|
||||
|
||||
func (s *mappedPerformerScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// HACK - unmarshal to map first, then remove known scene sub-fields, then
|
||||
// remarshal to yaml and pass that down to the base map
|
||||
parentMap := make(map[string]interface{})
|
||||
if err := unmarshal(parentMap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move the known sub-fields to a separate map
|
||||
thisMap := make(map[string]interface{})
|
||||
|
||||
thisMap[mappedScraperConfigPerformerTags] = parentMap[mappedScraperConfigPerformerTags]
|
||||
|
||||
delete(parentMap, mappedScraperConfigPerformerTags)
|
||||
|
||||
// re-unmarshal the sub-fields
|
||||
yml, err := yaml.Marshal(thisMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// needs to be a different type to prevent infinite recursion
|
||||
c := _mappedPerformerScraperConfig{}
|
||||
if err := yaml.Unmarshal(yml, &c); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*s = mappedPerformerScraperConfig(c)
|
||||
|
||||
yml, err = yaml.Marshal(parentMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mappedMovieScraperConfig struct {
|
||||
mappedConfig
|
||||
|
||||
Studio mappedConfig `yaml:"Studio"`
|
||||
Tags mappedConfig `yaml:"Tags"`
|
||||
}
|
||||
type _mappedMovieScraperConfig mappedMovieScraperConfig
|
||||
|
||||
const (
|
||||
mappedScraperConfigMovieStudio = "Studio"
|
||||
mappedScraperConfigMovieTags = "Tags"
|
||||
)
|
||||
|
||||
func (s *mappedMovieScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// HACK - unmarshal to map first, then remove known movie sub-fields, then
|
||||
// remarshal to yaml and pass that down to the base map
|
||||
parentMap := make(map[string]interface{})
|
||||
if err := unmarshal(parentMap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move the known sub-fields to a separate map
|
||||
thisMap := make(map[string]interface{})
|
||||
|
||||
thisMap[mappedScraperConfigMovieStudio] = parentMap[mappedScraperConfigMovieStudio]
|
||||
delete(parentMap, mappedScraperConfigMovieStudio)
|
||||
|
||||
thisMap[mappedScraperConfigMovieTags] = parentMap[mappedScraperConfigMovieTags]
|
||||
delete(parentMap, mappedScraperConfigMovieTags)
|
||||
|
||||
// re-unmarshal the sub-fields
|
||||
yml, err := yaml.Marshal(thisMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// needs to be a different type to prevent infinite recursion
|
||||
c := _mappedMovieScraperConfig{}
|
||||
if err := yaml.Unmarshal(yml, &c); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*s = mappedMovieScraperConfig(c)
|
||||
|
||||
yml, err = yaml.Marshal(parentMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mappedScraperAttrConfig struct {
|
||||
Selector string `yaml:"selector"`
|
||||
Fixed string `yaml:"fixed"`
|
||||
PostProcess []mappedPostProcessAction `yaml:"postProcess"`
|
||||
Concat string `yaml:"concat"`
|
||||
Split string `yaml:"split"`
|
||||
|
||||
postProcessActions []postProcessAction
|
||||
|
||||
// Deprecated: use PostProcess instead
|
||||
ParseDate string `yaml:"parseDate"`
|
||||
Replace mappedRegexConfigs `yaml:"replace"`
|
||||
SubScraper *mappedScraperAttrConfig `yaml:"subScraper"`
|
||||
}
|
||||
|
||||
type _mappedScraperAttrConfig mappedScraperAttrConfig
|
||||
|
||||
func (c *mappedScraperAttrConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
// try unmarshalling into a string first
|
||||
if err := unmarshal(&c.Selector); err != nil {
|
||||
// if it's a type error then we try to unmarshall to the full object
|
||||
var typeErr *yaml.TypeError
|
||||
if !errors.As(err, &typeErr) {
|
||||
return err
|
||||
}
|
||||
|
||||
// unmarshall to full object
|
||||
// need it as a separate object
|
||||
t := _mappedScraperAttrConfig{}
|
||||
if err = unmarshal(&t); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*c = mappedScraperAttrConfig(t)
|
||||
}
|
||||
|
||||
return c.convertPostProcessActions()
|
||||
}
|
||||
|
||||
func (c *mappedScraperAttrConfig) convertPostProcessActions() error {
|
||||
// ensure we don't have the old deprecated fields and the new post process field
|
||||
if len(c.PostProcess) > 0 {
|
||||
if c.ParseDate != "" || len(c.Replace) > 0 || c.SubScraper != nil {
|
||||
return errors.New("cannot include postProcess and (parseDate, replace, subScraper) deprecated fields")
|
||||
}
|
||||
|
||||
// convert xpathPostProcessAction actions to postProcessActions
|
||||
for _, a := range c.PostProcess {
|
||||
action, err := a.ToPostProcessAction()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.postProcessActions = append(c.postProcessActions, action)
|
||||
}
|
||||
|
||||
c.PostProcess = nil
|
||||
} else {
|
||||
// convert old deprecated fields if present
|
||||
// in same order as they used to be executed
|
||||
if len(c.Replace) > 0 {
|
||||
action := postProcessReplace(c.Replace)
|
||||
c.postProcessActions = append(c.postProcessActions, &action)
|
||||
c.Replace = nil
|
||||
}
|
||||
|
||||
if c.SubScraper != nil {
|
||||
action := postProcessSubScraper(*c.SubScraper)
|
||||
c.postProcessActions = append(c.postProcessActions, &action)
|
||||
c.SubScraper = nil
|
||||
}
|
||||
|
||||
if c.ParseDate != "" {
|
||||
action := postProcessParseDate(c.ParseDate)
|
||||
c.postProcessActions = append(c.postProcessActions, &action)
|
||||
c.ParseDate = ""
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c mappedScraperAttrConfig) hasConcat() bool {
|
||||
return c.Concat != ""
|
||||
}
|
||||
|
||||
func (c mappedScraperAttrConfig) hasSplit() bool {
|
||||
return c.Split != ""
|
||||
}
|
||||
|
||||
func (c mappedScraperAttrConfig) concatenateResults(nodes []string) string {
|
||||
separator := c.Concat
|
||||
return strings.Join(nodes, separator)
|
||||
}
|
||||
|
||||
func (c mappedScraperAttrConfig) cleanResults(nodes []string) []string {
|
||||
cleaned := sliceutil.Unique(nodes) // remove duplicate values
|
||||
cleaned = sliceutil.Delete(cleaned, "") // remove empty values
|
||||
return cleaned
|
||||
}
|
||||
|
||||
func (c mappedScraperAttrConfig) splitString(value string) []string {
|
||||
separator := c.Split
|
||||
var res []string
|
||||
|
||||
if separator == "" {
|
||||
return []string{value}
|
||||
}
|
||||
|
||||
for _, str := range strings.Split(value, separator) {
|
||||
if str != "" {
|
||||
res = append(res, str)
|
||||
}
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (c mappedScraperAttrConfig) postProcess(ctx context.Context, value string, q mappedQuery) string {
|
||||
for _, action := range c.postProcessActions {
|
||||
value = action.Apply(ctx, value, q)
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
333
pkg/scraper/mapped_postprocessing.go
Normal file
333
pkg/scraper/mapped_postprocessing.go
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/stashapp/stash/pkg/javascript"
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
)
|
||||
|
||||
type mappedRegexConfig struct {
|
||||
Regex string `yaml:"regex"`
|
||||
With string `yaml:"with"`
|
||||
}
|
||||
|
||||
type mappedRegexConfigs []mappedRegexConfig
|
||||
|
||||
func (c mappedRegexConfig) apply(value string) string {
|
||||
if c.Regex != "" {
|
||||
re, err := regexp.Compile(c.Regex)
|
||||
if err != nil {
|
||||
logger.Warnf("Error compiling regex '%s': %s", c.Regex, err.Error())
|
||||
return value
|
||||
}
|
||||
|
||||
ret := re.ReplaceAllString(value, c.With)
|
||||
|
||||
// trim leading and trailing whitespace
|
||||
// this is done to maintain backwards compatibility with existing
|
||||
// scrapers
|
||||
ret = strings.TrimSpace(ret)
|
||||
|
||||
logger.Debugf(`Replace: '%s' with '%s'`, c.Regex, c.With)
|
||||
logger.Debugf("Before: %s", value)
|
||||
logger.Debugf("After: %s", ret)
|
||||
return ret
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func (c mappedRegexConfigs) apply(value string) string {
|
||||
// apply regex in order
|
||||
for _, config := range c {
|
||||
value = config.apply(value)
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
type postProcessAction interface {
|
||||
Apply(ctx context.Context, value string, q mappedQuery) string
|
||||
}
|
||||
|
||||
type postProcessParseDate string
|
||||
|
||||
func (p *postProcessParseDate) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
parseDate := string(*p)
|
||||
|
||||
const internalDateFormat = "2006-01-02"
|
||||
|
||||
valueLower := strings.ToLower(value)
|
||||
if valueLower == "today" || valueLower == "yesterday" { // handle today, yesterday
|
||||
dt := time.Now()
|
||||
if valueLower == "yesterday" { // subtract 1 day from now
|
||||
dt = dt.AddDate(0, 0, -1)
|
||||
}
|
||||
return dt.Format(internalDateFormat)
|
||||
}
|
||||
|
||||
if parseDate == "" {
|
||||
return value
|
||||
}
|
||||
|
||||
if parseDate == "unix" {
|
||||
// try to parse the date using unix timestamp format
|
||||
// if it fails, then just fall back to the original value
|
||||
timeAsInt, err := strconv.ParseInt(value, 10, 64)
|
||||
if err != nil {
|
||||
logger.Warnf("Error parsing date string '%s' using unix timestamp format : %s", value, err.Error())
|
||||
return value
|
||||
}
|
||||
parsedValue := time.Unix(timeAsInt, 0)
|
||||
|
||||
return parsedValue.Format(internalDateFormat)
|
||||
}
|
||||
|
||||
// try to parse the date using the pattern
|
||||
// if it fails, then just fall back to the original value
|
||||
parsedValue, err := time.Parse(parseDate, value)
|
||||
if err != nil {
|
||||
logger.Warnf("Error parsing date string '%s' using format '%s': %s", value, parseDate, err.Error())
|
||||
return value
|
||||
}
|
||||
|
||||
// convert it into our date format
|
||||
return parsedValue.Format(internalDateFormat)
|
||||
}
|
||||
|
||||
type postProcessSubtractDays bool
|
||||
|
||||
func (p *postProcessSubtractDays) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
const internalDateFormat = "2006-01-02"
|
||||
|
||||
i, err := strconv.Atoi(value)
|
||||
if err != nil {
|
||||
logger.Warnf("Error parsing day string %s: %s", value, err)
|
||||
return value
|
||||
}
|
||||
|
||||
dt := time.Now()
|
||||
dt = dt.AddDate(0, 0, -i)
|
||||
return dt.Format(internalDateFormat)
|
||||
}
|
||||
|
||||
type postProcessReplace mappedRegexConfigs
|
||||
|
||||
func (c *postProcessReplace) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
replace := mappedRegexConfigs(*c)
|
||||
return replace.apply(value)
|
||||
}
|
||||
|
||||
type postProcessSubScraper mappedScraperAttrConfig
|
||||
|
||||
func (p *postProcessSubScraper) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
subScrapeConfig := mappedScraperAttrConfig(*p)
|
||||
|
||||
logger.Debugf("Sub-scraping for: %s", value)
|
||||
ss := q.subScrape(ctx, value)
|
||||
|
||||
if ss != nil {
|
||||
found, err := ss.runQuery(subScrapeConfig.Selector)
|
||||
if err != nil {
|
||||
logger.Warnf("subscrape for '%v': %v", value, err)
|
||||
}
|
||||
|
||||
if len(found) > 0 {
|
||||
// check if we're concatenating the results into a single result
|
||||
var result string
|
||||
if subScrapeConfig.hasConcat() {
|
||||
result = subScrapeConfig.concatenateResults(found)
|
||||
} else {
|
||||
result = found[0]
|
||||
}
|
||||
|
||||
result = subScrapeConfig.postProcess(ctx, result, ss)
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
type postProcessMap map[string]string
|
||||
|
||||
func (p *postProcessMap) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
// return the mapped value if present
|
||||
m := *p
|
||||
mapped, ok := m[value]
|
||||
|
||||
if ok {
|
||||
return mapped
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
type postProcessFeetToCm bool
|
||||
|
||||
func (p *postProcessFeetToCm) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
const foot_in_cm = 30.48
|
||||
const inch_in_cm = 2.54
|
||||
|
||||
reg := regexp.MustCompile("[0-9]+")
|
||||
filtered := reg.FindAllString(value, -1)
|
||||
|
||||
var feet float64
|
||||
var inches float64
|
||||
if len(filtered) > 0 {
|
||||
feet, _ = strconv.ParseFloat(filtered[0], 64)
|
||||
}
|
||||
if len(filtered) > 1 {
|
||||
inches, _ = strconv.ParseFloat(filtered[1], 64)
|
||||
}
|
||||
|
||||
var centimeters = feet*foot_in_cm + inches*inch_in_cm
|
||||
|
||||
// Return rounded integer string
|
||||
return strconv.Itoa(int(math.Round(centimeters)))
|
||||
}
|
||||
|
||||
type postProcessLbToKg bool
|
||||
|
||||
func (p *postProcessLbToKg) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
const lb_in_kg = 0.45359237
|
||||
w, err := strconv.ParseFloat(value, 64)
|
||||
if err == nil {
|
||||
w *= lb_in_kg
|
||||
value = strconv.Itoa(int(math.Round(w)))
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
type postProcessJavascript string
|
||||
|
||||
func (p *postProcessJavascript) Apply(ctx context.Context, value string, q mappedQuery) string {
|
||||
vm := javascript.NewVM()
|
||||
if err := vm.Set("value", value); err != nil {
|
||||
logger.Warnf("javascript failed to set value: %v", err)
|
||||
return value
|
||||
}
|
||||
|
||||
log := &javascript.Log{
|
||||
Logger: logger.Logger,
|
||||
Prefix: "",
|
||||
ProgressChan: make(chan float64),
|
||||
}
|
||||
|
||||
if err := log.AddToVM("log", vm); err != nil {
|
||||
logger.Logger.Errorf("error adding log API: %w", err)
|
||||
}
|
||||
|
||||
util := &javascript.Util{}
|
||||
if err := util.AddToVM("util", vm); err != nil {
|
||||
logger.Logger.Errorf("error adding util API: %w", err)
|
||||
}
|
||||
|
||||
script, err := javascript.CompileScript("", "(function() { "+string(*p)+"})()")
|
||||
if err != nil {
|
||||
logger.Warnf("javascript failed to compile: %v", err)
|
||||
return value
|
||||
}
|
||||
|
||||
output, err := vm.RunProgram(script)
|
||||
if err != nil {
|
||||
logger.Warnf("javascript failed to run: %v", err)
|
||||
return value
|
||||
}
|
||||
|
||||
// assume output is string
|
||||
return output.String()
|
||||
}
|
||||
|
||||
type mappedPostProcessAction struct {
|
||||
ParseDate string `yaml:"parseDate"`
|
||||
SubtractDays bool `yaml:"subtractDays"`
|
||||
Replace mappedRegexConfigs `yaml:"replace"`
|
||||
SubScraper *mappedScraperAttrConfig `yaml:"subScraper"`
|
||||
Map map[string]string `yaml:"map"`
|
||||
FeetToCm bool `yaml:"feetToCm"`
|
||||
LbToKg bool `yaml:"lbToKg"`
|
||||
Javascript string `yaml:"javascript"`
|
||||
}
|
||||
|
||||
func (a mappedPostProcessAction) ToPostProcessAction() (postProcessAction, error) {
|
||||
var found string
|
||||
var ret postProcessAction
|
||||
|
||||
ensureOnly := func(field string) error {
|
||||
if found != "" {
|
||||
return fmt.Errorf("post-process actions must have a single field, found %s and %s", found, field)
|
||||
}
|
||||
found = field
|
||||
return nil
|
||||
}
|
||||
|
||||
if a.ParseDate != "" {
|
||||
found = "parseDate"
|
||||
action := postProcessParseDate(a.ParseDate)
|
||||
ret = &action
|
||||
}
|
||||
if len(a.Replace) > 0 {
|
||||
if err := ensureOnly("replace"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
action := postProcessReplace(a.Replace)
|
||||
ret = &action
|
||||
}
|
||||
if a.SubScraper != nil {
|
||||
if err := ensureOnly("subScraper"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
action := postProcessSubScraper(*a.SubScraper)
|
||||
ret = &action
|
||||
}
|
||||
if a.Map != nil {
|
||||
if err := ensureOnly("map"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
action := postProcessMap(a.Map)
|
||||
ret = &action
|
||||
}
|
||||
if a.FeetToCm {
|
||||
if err := ensureOnly("feetToCm"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
action := postProcessFeetToCm(a.FeetToCm)
|
||||
ret = &action
|
||||
}
|
||||
if a.LbToKg {
|
||||
if err := ensureOnly("lbToKg"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
action := postProcessLbToKg(a.LbToKg)
|
||||
ret = &action
|
||||
}
|
||||
if a.SubtractDays {
|
||||
if err := ensureOnly("subtractDays"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
action := postProcessSubtractDays(a.SubtractDays)
|
||||
ret = &action
|
||||
}
|
||||
if a.Javascript != "" {
|
||||
if err := ensureOnly("javascript"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
action := postProcessJavascript(a.Javascript)
|
||||
ret = &action
|
||||
}
|
||||
|
||||
if ret == nil {
|
||||
return nil, errors.New("invalid post-process action")
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
276
pkg/scraper/mapped_result.go
Normal file
276
pkg/scraper/mapped_result.go
Normal file
|
|
@ -0,0 +1,276 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
type mappedResult map[string]interface{}
|
||||
type mappedResults []mappedResult
|
||||
|
||||
func (r mappedResult) string(key string) (string, bool) {
|
||||
v, ok := r[key]
|
||||
if !ok {
|
||||
return "", false
|
||||
}
|
||||
|
||||
val, ok := v.(string)
|
||||
if !ok {
|
||||
logger.Errorf("String field %s is %T in mappedResult", key, r[key])
|
||||
}
|
||||
|
||||
return val, true
|
||||
}
|
||||
|
||||
func (r mappedResult) mustString(key string) string {
|
||||
v, ok := r[key]
|
||||
if !ok {
|
||||
logger.Errorf("Missing required string field %s in mappedResult", key)
|
||||
return ""
|
||||
}
|
||||
|
||||
val, ok := v.(string)
|
||||
if !ok {
|
||||
logger.Errorf("String field %s is %T in mappedResult", key, r[key])
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
func (r mappedResult) stringPtr(key string) *string {
|
||||
val, ok := r.string(key)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return &val
|
||||
}
|
||||
|
||||
func (r mappedResult) stringSlice(key string) []string {
|
||||
v, ok := r[key]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
// need to try both []string and string
|
||||
val, ok := v.([]string)
|
||||
|
||||
if ok {
|
||||
return val
|
||||
}
|
||||
|
||||
// try single string
|
||||
singleVal, ok := v.(string)
|
||||
if !ok {
|
||||
logger.Errorf("String slice field %s is %T in mappedResult", key, r[key])
|
||||
return nil
|
||||
}
|
||||
|
||||
return []string{singleVal}
|
||||
}
|
||||
|
||||
func (r mappedResult) IntPtr(key string) *int {
|
||||
v, ok := r[key]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
val, ok := v.(int)
|
||||
if !ok {
|
||||
logger.Errorf("Int field %s is %T in mappedResult", key, r[key])
|
||||
return nil
|
||||
}
|
||||
|
||||
return &val
|
||||
}
|
||||
|
||||
func (r mappedResults) setSingleValue(index int, key string, value string) mappedResults {
|
||||
if index >= len(r) {
|
||||
r = append(r, make(mappedResult))
|
||||
}
|
||||
|
||||
logger.Debugf(`[%d][%s] = %s`, index, key, value)
|
||||
r[index][key] = value
|
||||
return r
|
||||
}
|
||||
|
||||
func (r mappedResults) setMultiValue(index int, key string, value []string) mappedResults {
|
||||
if index >= len(r) {
|
||||
r = append(r, make(mappedResult))
|
||||
}
|
||||
|
||||
logger.Debugf(`[%d][%s] = %s`, index, key, value)
|
||||
r[index][key] = value
|
||||
return r
|
||||
}
|
||||
|
||||
func (r mappedResults) scrapedTags() []*models.ScrapedTag {
|
||||
if len(r) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ret := make([]*models.ScrapedTag, len(r))
|
||||
for i, result := range r {
|
||||
ret[i] = result.scrapedTag()
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResult) scrapedTag() *models.ScrapedTag {
|
||||
return &models.ScrapedTag{
|
||||
Name: r.mustString("Name"),
|
||||
}
|
||||
}
|
||||
|
||||
func (r mappedResult) scrapedPerformer() *models.ScrapedPerformer {
|
||||
ret := &models.ScrapedPerformer{
|
||||
Name: r.stringPtr("Name"),
|
||||
Disambiguation: r.stringPtr("Disambiguation"),
|
||||
Gender: r.stringPtr("Gender"),
|
||||
URL: r.stringPtr("URL"),
|
||||
URLs: r.stringSlice("URLs"),
|
||||
Twitter: r.stringPtr("Twitter"),
|
||||
Birthdate: r.stringPtr("Birthdate"),
|
||||
Ethnicity: r.stringPtr("Ethnicity"),
|
||||
Country: r.stringPtr("Country"),
|
||||
EyeColor: r.stringPtr("EyeColor"),
|
||||
Height: r.stringPtr("Height"),
|
||||
Measurements: r.stringPtr("Measurements"),
|
||||
FakeTits: r.stringPtr("FakeTits"),
|
||||
PenisLength: r.stringPtr("PenisLength"),
|
||||
Circumcised: r.stringPtr("Circumcised"),
|
||||
CareerLength: r.stringPtr("CareerLength"),
|
||||
Tattoos: r.stringPtr("Tattoos"),
|
||||
Piercings: r.stringPtr("Piercings"),
|
||||
Aliases: r.stringPtr("Aliases"),
|
||||
Image: r.stringPtr("Image"),
|
||||
Images: r.stringSlice("Images"),
|
||||
Details: r.stringPtr("Details"),
|
||||
DeathDate: r.stringPtr("DeathDate"),
|
||||
HairColor: r.stringPtr("HairColor"),
|
||||
Weight: r.stringPtr("Weight"),
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResults) scrapedPerformers() []*models.ScrapedPerformer {
|
||||
if len(r) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ret := make([]*models.ScrapedPerformer, len(r))
|
||||
for i, result := range r {
|
||||
ret[i] = result.scrapedPerformer()
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResult) scrapedScene() *models.ScrapedScene {
|
||||
ret := &models.ScrapedScene{
|
||||
Title: r.stringPtr("Title"),
|
||||
Code: r.stringPtr("Code"),
|
||||
Details: r.stringPtr("Details"),
|
||||
Director: r.stringPtr("Director"),
|
||||
URL: r.stringPtr("URL"),
|
||||
URLs: r.stringSlice("URLs"),
|
||||
Date: r.stringPtr("Date"),
|
||||
Image: r.stringPtr("Image"),
|
||||
Duration: r.IntPtr("Duration"),
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResult) scrapedImage() *models.ScrapedImage {
|
||||
ret := &models.ScrapedImage{
|
||||
Title: r.stringPtr("Title"),
|
||||
Code: r.stringPtr("Code"),
|
||||
Details: r.stringPtr("Details"),
|
||||
Photographer: r.stringPtr("Photographer"),
|
||||
URLs: r.stringSlice("URLs"),
|
||||
Date: r.stringPtr("Date"),
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResult) scrapedGallery() *models.ScrapedGallery {
|
||||
ret := &models.ScrapedGallery{
|
||||
Title: r.stringPtr("Title"),
|
||||
Code: r.stringPtr("Code"),
|
||||
Details: r.stringPtr("Details"),
|
||||
Photographer: r.stringPtr("Photographer"),
|
||||
URL: r.stringPtr("URL"),
|
||||
URLs: r.stringSlice("URLs"),
|
||||
Date: r.stringPtr("Date"),
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResult) scrapedStudio() *models.ScrapedStudio {
|
||||
ret := &models.ScrapedStudio{
|
||||
Name: r.mustString("Name"),
|
||||
URL: r.stringPtr("URL"),
|
||||
URLs: r.stringSlice("URLs"),
|
||||
Image: r.stringPtr("Image"),
|
||||
Details: r.stringPtr("Details"),
|
||||
Aliases: r.stringPtr("Aliases"),
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResult) scrapedMovie() *models.ScrapedMovie {
|
||||
ret := &models.ScrapedMovie{
|
||||
Name: r.stringPtr("Name"),
|
||||
Aliases: r.stringPtr("Aliases"),
|
||||
URLs: r.stringSlice("URLs"),
|
||||
Duration: r.stringPtr("Duration"),
|
||||
Date: r.stringPtr("Date"),
|
||||
Director: r.stringPtr("Director"),
|
||||
Synopsis: r.stringPtr("Synopsis"),
|
||||
FrontImage: r.stringPtr("FrontImage"),
|
||||
BackImage: r.stringPtr("BackImage"),
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResult) scrapedGroup() *models.ScrapedGroup {
|
||||
ret := &models.ScrapedGroup{
|
||||
Name: r.stringPtr("Name"),
|
||||
Aliases: r.stringPtr("Aliases"),
|
||||
URL: r.stringPtr("URL"),
|
||||
URLs: r.stringSlice("URLs"),
|
||||
Duration: r.stringPtr("Duration"),
|
||||
Date: r.stringPtr("Date"),
|
||||
Director: r.stringPtr("Director"),
|
||||
Synopsis: r.stringPtr("Synopsis"),
|
||||
FrontImage: r.stringPtr("FrontImage"),
|
||||
BackImage: r.stringPtr("BackImage"),
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResults) scrapedMovies() []*models.ScrapedMovie {
|
||||
if len(r) == 0 {
|
||||
return nil
|
||||
}
|
||||
ret := make([]*models.ScrapedMovie, len(r))
|
||||
for i, result := range r {
|
||||
ret[i] = result.scrapedMovie()
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (r mappedResults) scrapedGroups() []*models.ScrapedGroup {
|
||||
if len(r) == 0 {
|
||||
return nil
|
||||
}
|
||||
ret := make([]*models.ScrapedGroup, len(r))
|
||||
for i, result := range r {
|
||||
ret[i] = result.scrapedGroup()
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
908
pkg/scraper/mapped_result_test.go
Normal file
908
pkg/scraper/mapped_result_test.go
Normal file
|
|
@ -0,0 +1,908 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// Test string method
|
||||
func TestMappedResultString(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
key string
|
||||
expectedValue string
|
||||
expectedOk bool
|
||||
}{
|
||||
{
|
||||
name: "valid string",
|
||||
data: mappedResult{"name": "test"},
|
||||
key: "name",
|
||||
expectedValue: "test",
|
||||
expectedOk: true,
|
||||
},
|
||||
{
|
||||
name: "missing key",
|
||||
data: mappedResult{},
|
||||
key: "missing",
|
||||
expectedValue: "",
|
||||
expectedOk: false,
|
||||
},
|
||||
{
|
||||
name: "wrong type still returns ok true but empty value",
|
||||
data: mappedResult{"num": 123},
|
||||
key: "num",
|
||||
expectedValue: "",
|
||||
expectedOk: true, // logs error but returns ok=true
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
val, ok := test.data.string(test.key)
|
||||
assert.Equal(t, test.expectedValue, val)
|
||||
assert.Equal(t, test.expectedOk, ok)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test mustString method
|
||||
func TestMappedResultMustString(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
key string
|
||||
expectedValue string
|
||||
}{
|
||||
{
|
||||
name: "valid string",
|
||||
data: mappedResult{"name": "test"},
|
||||
key: "name",
|
||||
expectedValue: "test",
|
||||
},
|
||||
{
|
||||
name: "missing key returns empty string",
|
||||
data: mappedResult{},
|
||||
key: "missing",
|
||||
expectedValue: "",
|
||||
},
|
||||
{
|
||||
name: "wrong type returns empty string",
|
||||
data: mappedResult{"num": 123},
|
||||
key: "num",
|
||||
expectedValue: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
val := test.data.mustString(test.key)
|
||||
assert.Equal(t, test.expectedValue, val)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test stringPtr method
|
||||
func TestMappedResultStringPtr(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
key string
|
||||
expectedValue *string
|
||||
}{
|
||||
{
|
||||
name: "valid string",
|
||||
data: mappedResult{"name": "test"},
|
||||
key: "name",
|
||||
expectedValue: strPtr("test"),
|
||||
},
|
||||
{
|
||||
name: "missing key returns nil",
|
||||
data: mappedResult{},
|
||||
key: "missing",
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "wrong type returns non-nil pointer to empty string",
|
||||
data: mappedResult{"num": 123},
|
||||
key: "num",
|
||||
expectedValue: strPtr(""), // string() returns empty string but ok=true
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
val := test.data.stringPtr(test.key)
|
||||
if test.expectedValue == nil {
|
||||
assert.Nil(t, val)
|
||||
} else {
|
||||
assert.NotNil(t, val)
|
||||
assert.Equal(t, *test.expectedValue, *val)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test stringSlice method
|
||||
func TestMappedResultStringSlice(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
key string
|
||||
expectedValue []string
|
||||
}{
|
||||
{
|
||||
name: "valid slice",
|
||||
data: mappedResult{"tags": []string{"a", "b", "c"}},
|
||||
key: "tags",
|
||||
expectedValue: []string{"a", "b", "c"},
|
||||
},
|
||||
{
|
||||
name: "missing key returns nil",
|
||||
data: mappedResult{},
|
||||
key: "missing",
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "single value converted to slice",
|
||||
data: mappedResult{"tags": "not a slice"},
|
||||
key: "tags",
|
||||
expectedValue: []string{"not a slice"},
|
||||
},
|
||||
{
|
||||
name: "wrong type returns nil",
|
||||
data: mappedResult{"tags": 123},
|
||||
key: "tags",
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "empty slice",
|
||||
data: mappedResult{"tags": []string{}},
|
||||
key: "tags",
|
||||
expectedValue: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
val := test.data.stringSlice(test.key)
|
||||
assert.Equal(t, test.expectedValue, val)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test IntPtr method
|
||||
func TestMappedResultIntPtr(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
key string
|
||||
expectedValue *int
|
||||
}{
|
||||
{
|
||||
name: "valid int",
|
||||
data: mappedResult{"duration": 120},
|
||||
key: "duration",
|
||||
expectedValue: intPtr(120),
|
||||
},
|
||||
{
|
||||
name: "missing key returns nil",
|
||||
data: mappedResult{},
|
||||
key: "missing",
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "wrong type returns nil",
|
||||
data: mappedResult{"duration": "120"},
|
||||
key: "duration",
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "zero value",
|
||||
data: mappedResult{"duration": 0},
|
||||
key: "duration",
|
||||
expectedValue: intPtr(0),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
val := test.data.IntPtr(test.key)
|
||||
assert.Equal(t, test.expectedValue, val)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test setSingleValue method
|
||||
func TestMappedResultsSetSingleValue(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
initialResults mappedResults
|
||||
index int
|
||||
key string
|
||||
value string
|
||||
expectedLen int
|
||||
shouldPanic bool
|
||||
}{
|
||||
{
|
||||
name: "append to empty",
|
||||
initialResults: mappedResults{},
|
||||
index: 0,
|
||||
key: "name",
|
||||
value: "test",
|
||||
expectedLen: 1,
|
||||
shouldPanic: false,
|
||||
},
|
||||
{
|
||||
name: "set in existing",
|
||||
initialResults: mappedResults{mappedResult{}},
|
||||
index: 0,
|
||||
key: "name",
|
||||
value: "test",
|
||||
expectedLen: 1,
|
||||
shouldPanic: false,
|
||||
},
|
||||
{
|
||||
name: "append to existing",
|
||||
initialResults: mappedResults{mappedResult{}},
|
||||
index: 1,
|
||||
key: "name",
|
||||
value: "test",
|
||||
expectedLen: 2,
|
||||
shouldPanic: false,
|
||||
},
|
||||
{
|
||||
name: "sparse index causes panic",
|
||||
initialResults: mappedResults{mappedResult{}},
|
||||
index: 5,
|
||||
key: "name",
|
||||
value: "test",
|
||||
expectedLen: 6,
|
||||
shouldPanic: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
if test.shouldPanic {
|
||||
assert.Panics(t, func() {
|
||||
test.initialResults.setSingleValue(test.index, test.key, test.value)
|
||||
})
|
||||
} else {
|
||||
results := test.initialResults.setSingleValue(test.index, test.key, test.value)
|
||||
assert.Equal(t, test.expectedLen, len(results))
|
||||
assert.Equal(t, test.value, results[test.index][test.key])
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test setMultiValue method
|
||||
func TestMappedResultsSetMultiValue(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
initialResults mappedResults
|
||||
index int
|
||||
key string
|
||||
value []string
|
||||
expectedLen int
|
||||
}{
|
||||
{
|
||||
name: "append to empty",
|
||||
initialResults: mappedResults{},
|
||||
index: 0,
|
||||
key: "tags",
|
||||
value: []string{"a", "b"},
|
||||
expectedLen: 1,
|
||||
},
|
||||
{
|
||||
name: "set in existing",
|
||||
initialResults: mappedResults{mappedResult{}},
|
||||
index: 0,
|
||||
key: "tags",
|
||||
value: []string{"a", "b"},
|
||||
expectedLen: 1,
|
||||
},
|
||||
{
|
||||
name: "append to existing",
|
||||
initialResults: mappedResults{mappedResult{}},
|
||||
index: 1,
|
||||
key: "tags",
|
||||
value: []string{"x", "y"},
|
||||
expectedLen: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
results := test.initialResults.setMultiValue(test.index, test.key, test.value)
|
||||
assert.Equal(t, test.expectedLen, len(results))
|
||||
assert.Equal(t, test.value, results[test.index][test.key])
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedTag method
|
||||
func TestMappedResultScrapedTag(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
expectedName string
|
||||
}{
|
||||
{
|
||||
name: "valid tag",
|
||||
data: mappedResult{"Name": "Action"},
|
||||
expectedName: "Action",
|
||||
},
|
||||
{
|
||||
name: "missing name",
|
||||
data: mappedResult{},
|
||||
expectedName: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
tag := test.data.scrapedTag()
|
||||
assert.NotNil(t, tag)
|
||||
assert.Equal(t, test.expectedName, tag.Name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedTags method
|
||||
func TestMappedResultsScrapedTags(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResults
|
||||
expectedCount int
|
||||
expectedNames []string
|
||||
}{
|
||||
{
|
||||
name: "empty results",
|
||||
data: mappedResults{},
|
||||
expectedCount: 0,
|
||||
},
|
||||
{
|
||||
name: "single tag",
|
||||
data: mappedResults{
|
||||
mappedResult{"Name": "Action"},
|
||||
},
|
||||
expectedCount: 1,
|
||||
expectedNames: []string{"Action"},
|
||||
},
|
||||
{
|
||||
name: "multiple tags",
|
||||
data: mappedResults{
|
||||
mappedResult{"Name": "Action"},
|
||||
mappedResult{"Name": "Drama"},
|
||||
mappedResult{"Name": "Comedy"},
|
||||
},
|
||||
expectedCount: 3,
|
||||
expectedNames: []string{"Action", "Drama", "Comedy"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
tags := test.data.scrapedTags()
|
||||
if test.expectedCount == 0 {
|
||||
assert.Nil(t, tags)
|
||||
} else {
|
||||
assert.NotNil(t, tags)
|
||||
assert.Equal(t, test.expectedCount, len(tags))
|
||||
for i, expectedName := range test.expectedNames {
|
||||
assert.Equal(t, expectedName, tags[i].Name)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedPerformer method
|
||||
func TestMappedResultScrapedPerformer(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
validate func(t *testing.T, p *models.ScrapedPerformer)
|
||||
}{
|
||||
{
|
||||
name: "full performer",
|
||||
data: mappedResult{
|
||||
"Name": "Jane Doe",
|
||||
"Disambiguation": "Actress",
|
||||
"Gender": "Female",
|
||||
"URL": "https://example.com/jane",
|
||||
"URLs": []string{"url1", "url2"},
|
||||
"Twitter": "@jane",
|
||||
"Birthdate": "1990-01-01",
|
||||
"Ethnicity": "Caucasian",
|
||||
"Country": "USA",
|
||||
"EyeColor": "Blue",
|
||||
"Height": "5'6\"",
|
||||
"Measurements": "36-24-36",
|
||||
"FakeTits": "No",
|
||||
"PenisLength": "N/A",
|
||||
"Circumcised": "N/A",
|
||||
"CareerLength": "10 years",
|
||||
"Tattoos": "Yes",
|
||||
"Piercings": "Yes",
|
||||
"Aliases": "Jane Smith",
|
||||
"Image": "image.jpg",
|
||||
"Images": []string{"img1", "img2"},
|
||||
"Details": "Some details",
|
||||
"DeathDate": "N/A",
|
||||
"HairColor": "Blonde",
|
||||
"Weight": "130 lbs",
|
||||
},
|
||||
validate: func(t *testing.T, p *models.ScrapedPerformer) {
|
||||
assert.NotNil(t, p)
|
||||
assert.Equal(t, "Jane Doe", *p.Name)
|
||||
assert.Equal(t, "Actress", *p.Disambiguation)
|
||||
assert.Equal(t, "Female", *p.Gender)
|
||||
assert.Equal(t, "https://example.com/jane", *p.URL)
|
||||
assert.Equal(t, []string{"url1", "url2"}, p.URLs)
|
||||
assert.Equal(t, "@jane", *p.Twitter)
|
||||
assert.Equal(t, "Blonde", *p.HairColor)
|
||||
assert.Equal(t, "130 lbs", *p.Weight)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal performer",
|
||||
data: mappedResult{},
|
||||
validate: func(t *testing.T, p *models.ScrapedPerformer) {
|
||||
assert.NotNil(t, p)
|
||||
assert.Nil(t, p.Name)
|
||||
assert.Nil(t, p.Gender)
|
||||
assert.Empty(t, p.URLs)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
performer := test.data.scrapedPerformer()
|
||||
test.validate(t, performer)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedPerformers method
|
||||
func TestMappedResultsScrapedPerformers(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResults
|
||||
expectedCount int
|
||||
}{
|
||||
{
|
||||
name: "empty results",
|
||||
data: mappedResults{},
|
||||
expectedCount: 0,
|
||||
},
|
||||
{
|
||||
name: "single performer",
|
||||
data: mappedResults{
|
||||
mappedResult{"Name": "Jane Doe"},
|
||||
},
|
||||
expectedCount: 1,
|
||||
},
|
||||
{
|
||||
name: "multiple performers",
|
||||
data: mappedResults{
|
||||
mappedResult{"Name": "Jane Doe"},
|
||||
mappedResult{"Name": "John Doe"},
|
||||
mappedResult{"Name": "Alice"},
|
||||
},
|
||||
expectedCount: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
performers := test.data.scrapedPerformers()
|
||||
if test.expectedCount == 0 {
|
||||
assert.Nil(t, performers)
|
||||
} else {
|
||||
assert.NotNil(t, performers)
|
||||
assert.Equal(t, test.expectedCount, len(performers))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedScene method
|
||||
func TestMappedResultScrapedScene(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
validate func(t *testing.T, s *models.ScrapedScene)
|
||||
}{
|
||||
{
|
||||
name: "full scene",
|
||||
data: mappedResult{
|
||||
"Title": "Scene Title",
|
||||
"Code": "CODE123",
|
||||
"Details": "Scene details",
|
||||
"Director": "John Smith",
|
||||
"URL": "https://example.com/scene",
|
||||
"URLs": []string{"url1", "url2"},
|
||||
"Date": "2020-01-01",
|
||||
"Image": "scene.jpg",
|
||||
"Duration": 3600,
|
||||
},
|
||||
validate: func(t *testing.T, s *models.ScrapedScene) {
|
||||
assert.NotNil(t, s)
|
||||
assert.Equal(t, "Scene Title", *s.Title)
|
||||
assert.Equal(t, "CODE123", *s.Code)
|
||||
assert.Equal(t, "Scene details", *s.Details)
|
||||
assert.Equal(t, "John Smith", *s.Director)
|
||||
assert.Equal(t, "https://example.com/scene", *s.URL)
|
||||
assert.Equal(t, []string{"url1", "url2"}, s.URLs)
|
||||
assert.Equal(t, "2020-01-01", *s.Date)
|
||||
assert.Equal(t, "scene.jpg", *s.Image)
|
||||
assert.Equal(t, 3600, *s.Duration)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal scene",
|
||||
data: mappedResult{},
|
||||
validate: func(t *testing.T, s *models.ScrapedScene) {
|
||||
assert.NotNil(t, s)
|
||||
assert.Nil(t, s.Title)
|
||||
assert.Nil(t, s.Duration)
|
||||
assert.Empty(t, s.URLs)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
scene := test.data.scrapedScene()
|
||||
test.validate(t, scene)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedImage method
|
||||
func TestMappedResultScrapedImage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
validate func(t *testing.T, i *models.ScrapedImage)
|
||||
}{
|
||||
{
|
||||
name: "full image",
|
||||
data: mappedResult{
|
||||
"Title": "Image Title",
|
||||
"Code": "IMG123",
|
||||
"Details": "Image details",
|
||||
"Photographer": "Jane Photographer",
|
||||
"URLs": []string{"url1", "url2"},
|
||||
"Date": "2020-06-15",
|
||||
},
|
||||
validate: func(t *testing.T, i *models.ScrapedImage) {
|
||||
assert.NotNil(t, i)
|
||||
assert.Equal(t, "Image Title", *i.Title)
|
||||
assert.Equal(t, "IMG123", *i.Code)
|
||||
assert.Equal(t, "Image details", *i.Details)
|
||||
assert.Equal(t, "Jane Photographer", *i.Photographer)
|
||||
assert.Equal(t, []string{"url1", "url2"}, i.URLs)
|
||||
assert.Equal(t, "2020-06-15", *i.Date)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal image",
|
||||
data: mappedResult{},
|
||||
validate: func(t *testing.T, i *models.ScrapedImage) {
|
||||
assert.NotNil(t, i)
|
||||
assert.Nil(t, i.Title)
|
||||
assert.Empty(t, i.URLs)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
image := test.data.scrapedImage()
|
||||
test.validate(t, image)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedGallery method
|
||||
func TestMappedResultScrapedGallery(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
validate func(t *testing.T, g *models.ScrapedGallery)
|
||||
}{
|
||||
{
|
||||
name: "full gallery",
|
||||
data: mappedResult{
|
||||
"Title": "Gallery Title",
|
||||
"Code": "GAL123",
|
||||
"Details": "Gallery details",
|
||||
"Photographer": "Jane Photographer",
|
||||
"URL": "https://example.com/gallery",
|
||||
"URLs": []string{"url1", "url2"},
|
||||
"Date": "2020-07-20",
|
||||
},
|
||||
validate: func(t *testing.T, g *models.ScrapedGallery) {
|
||||
assert.NotNil(t, g)
|
||||
assert.Equal(t, "Gallery Title", *g.Title)
|
||||
assert.Equal(t, "GAL123", *g.Code)
|
||||
assert.Equal(t, "Gallery details", *g.Details)
|
||||
assert.Equal(t, "Jane Photographer", *g.Photographer)
|
||||
assert.Equal(t, "https://example.com/gallery", *g.URL)
|
||||
assert.Equal(t, []string{"url1", "url2"}, g.URLs)
|
||||
assert.Equal(t, "2020-07-20", *g.Date)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal gallery",
|
||||
data: mappedResult{},
|
||||
validate: func(t *testing.T, g *models.ScrapedGallery) {
|
||||
assert.NotNil(t, g)
|
||||
assert.Nil(t, g.Title)
|
||||
assert.Empty(t, g.URLs)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
gallery := test.data.scrapedGallery()
|
||||
test.validate(t, gallery)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedStudio method
|
||||
func TestMappedResultScrapedStudio(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
validate func(t *testing.T, st *models.ScrapedStudio)
|
||||
}{
|
||||
{
|
||||
name: "full studio",
|
||||
data: mappedResult{
|
||||
"Name": "Studio Name",
|
||||
"URL": "https://example.com/studio",
|
||||
"URLs": []string{"url1", "url2"},
|
||||
"Image": "studio.jpg",
|
||||
"Details": "Studio details",
|
||||
"Aliases": "Studio Alias",
|
||||
},
|
||||
validate: func(t *testing.T, st *models.ScrapedStudio) {
|
||||
assert.NotNil(t, st)
|
||||
assert.Equal(t, "Studio Name", st.Name)
|
||||
assert.Equal(t, "https://example.com/studio", *st.URL)
|
||||
assert.Equal(t, []string{"url1", "url2"}, st.URLs)
|
||||
assert.Equal(t, "studio.jpg", *st.Image)
|
||||
assert.Equal(t, "Studio details", *st.Details)
|
||||
assert.Equal(t, "Studio Alias", *st.Aliases)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal studio",
|
||||
data: mappedResult{},
|
||||
validate: func(t *testing.T, st *models.ScrapedStudio) {
|
||||
assert.NotNil(t, st)
|
||||
assert.Equal(t, "", st.Name) // mustString returns empty string
|
||||
assert.Nil(t, st.URL)
|
||||
assert.Empty(t, st.URLs)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
studio := test.data.scrapedStudio()
|
||||
test.validate(t, studio)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedMovie method
|
||||
func TestMappedResultScrapedMovie(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
validate func(t *testing.T, m *models.ScrapedMovie)
|
||||
}{
|
||||
{
|
||||
name: "full movie",
|
||||
data: mappedResult{
|
||||
"Name": "Movie Title",
|
||||
"Aliases": "Movie Alias",
|
||||
"URLs": []string{"url1", "url2"},
|
||||
"Duration": "120 minutes",
|
||||
"Date": "2020-05-10",
|
||||
"Director": "John Director",
|
||||
"Synopsis": "Movie synopsis",
|
||||
"FrontImage": "front.jpg",
|
||||
"BackImage": "back.jpg",
|
||||
},
|
||||
validate: func(t *testing.T, m *models.ScrapedMovie) {
|
||||
assert.NotNil(t, m)
|
||||
assert.Equal(t, "Movie Title", *m.Name)
|
||||
assert.Equal(t, "Movie Alias", *m.Aliases)
|
||||
assert.Equal(t, []string{"url1", "url2"}, m.URLs)
|
||||
assert.Equal(t, "120 minutes", *m.Duration)
|
||||
assert.Equal(t, "2020-05-10", *m.Date)
|
||||
assert.Equal(t, "John Director", *m.Director)
|
||||
assert.Equal(t, "Movie synopsis", *m.Synopsis)
|
||||
assert.Equal(t, "front.jpg", *m.FrontImage)
|
||||
assert.Equal(t, "back.jpg", *m.BackImage)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal movie",
|
||||
data: mappedResult{},
|
||||
validate: func(t *testing.T, m *models.ScrapedMovie) {
|
||||
assert.NotNil(t, m)
|
||||
assert.Nil(t, m.Name)
|
||||
assert.Empty(t, m.URLs)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
movie := test.data.scrapedMovie()
|
||||
test.validate(t, movie)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedMovies method
|
||||
func TestMappedResultsScrapedMovies(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResults
|
||||
expectedCount int
|
||||
}{
|
||||
{
|
||||
name: "empty results",
|
||||
data: mappedResults{},
|
||||
expectedCount: 0,
|
||||
},
|
||||
{
|
||||
name: "single movie",
|
||||
data: mappedResults{
|
||||
mappedResult{"Name": "Movie 1"},
|
||||
},
|
||||
expectedCount: 1,
|
||||
},
|
||||
{
|
||||
name: "multiple movies",
|
||||
data: mappedResults{
|
||||
mappedResult{"Name": "Movie 1"},
|
||||
mappedResult{"Name": "Movie 2"},
|
||||
mappedResult{"Name": "Movie 3"},
|
||||
},
|
||||
expectedCount: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
movies := test.data.scrapedMovies()
|
||||
if test.expectedCount == 0 {
|
||||
assert.Nil(t, movies)
|
||||
} else {
|
||||
assert.NotNil(t, movies)
|
||||
assert.Equal(t, test.expectedCount, len(movies))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedGroup method
|
||||
func TestMappedResultScrapedGroup(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResult
|
||||
validate func(t *testing.T, g *models.ScrapedGroup)
|
||||
}{
|
||||
{
|
||||
name: "full group",
|
||||
data: mappedResult{
|
||||
"Name": "Group Title",
|
||||
"Aliases": "Group Alias",
|
||||
"URL": "https://example.com/group",
|
||||
"URLs": []string{"url1", "url2"},
|
||||
"Duration": "240 minutes",
|
||||
"Date": "2020-08-15",
|
||||
"Director": "Jane Director",
|
||||
"Synopsis": "Group synopsis",
|
||||
"FrontImage": "front.jpg",
|
||||
"BackImage": "back.jpg",
|
||||
},
|
||||
validate: func(t *testing.T, g *models.ScrapedGroup) {
|
||||
assert.NotNil(t, g)
|
||||
assert.Equal(t, "Group Title", *g.Name)
|
||||
assert.Equal(t, "Group Alias", *g.Aliases)
|
||||
assert.Equal(t, "https://example.com/group", *g.URL)
|
||||
assert.Equal(t, []string{"url1", "url2"}, g.URLs)
|
||||
assert.Equal(t, "240 minutes", *g.Duration)
|
||||
assert.Equal(t, "2020-08-15", *g.Date)
|
||||
assert.Equal(t, "Jane Director", *g.Director)
|
||||
assert.Equal(t, "Group synopsis", *g.Synopsis)
|
||||
assert.Equal(t, "front.jpg", *g.FrontImage)
|
||||
assert.Equal(t, "back.jpg", *g.BackImage)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal group",
|
||||
data: mappedResult{},
|
||||
validate: func(t *testing.T, g *models.ScrapedGroup) {
|
||||
assert.NotNil(t, g)
|
||||
assert.Nil(t, g.Name)
|
||||
assert.Empty(t, g.URLs)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
group := test.data.scrapedGroup()
|
||||
test.validate(t, group)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test scrapedGroups method
|
||||
func TestMappedResultsScrapedGroups(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data mappedResults
|
||||
expectedCount int
|
||||
}{
|
||||
{
|
||||
name: "empty results",
|
||||
data: mappedResults{},
|
||||
expectedCount: 0,
|
||||
},
|
||||
{
|
||||
name: "single group",
|
||||
data: mappedResults{
|
||||
mappedResult{"Name": "Group 1"},
|
||||
},
|
||||
expectedCount: 1,
|
||||
},
|
||||
{
|
||||
name: "multiple groups",
|
||||
data: mappedResults{
|
||||
mappedResult{"Name": "Group 1"},
|
||||
mappedResult{"Name": "Group 2"},
|
||||
mappedResult{"Name": "Group 3"},
|
||||
},
|
||||
expectedCount: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
groups := test.data.scrapedGroups()
|
||||
if test.expectedCount == 0 {
|
||||
assert.Nil(t, groups)
|
||||
} else {
|
||||
assert.NotNil(t, groups)
|
||||
assert.Equal(t, test.expectedCount, len(groups))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
func strPtr(s string) *string {
|
||||
return &s
|
||||
}
|
||||
|
||||
func intPtr(i int) *int {
|
||||
return &i
|
||||
}
|
||||
|
|
@ -25,7 +25,7 @@ xPathScrapers:
|
|||
- anything
|
||||
`
|
||||
|
||||
c := &config{}
|
||||
c := &Definition{}
|
||||
err := yaml.Unmarshal([]byte(yamlStr), &c)
|
||||
|
||||
if err == nil {
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ func (p queryURLParameters) constructURL(url string) string {
|
|||
}
|
||||
|
||||
// replaceURL does a partial URL Replace ( only url parameter is used)
|
||||
func replaceURL(url string, scraperConfig scraperTypeConfig) string {
|
||||
func replaceURL(url string, scraperConfig ByURLDefinition) string {
|
||||
u := url
|
||||
queryURL := queryURLParameterFromURL(u)
|
||||
if scraperConfig.QueryURLReplacements != nil {
|
||||
|
|
|
|||
|
|
@ -208,22 +208,11 @@ func galleryInputFromGallery(gallery *models.Gallery) galleryInput {
|
|||
var ErrScraperScript = errors.New("scraper script error")
|
||||
|
||||
type scriptScraper struct {
|
||||
scraper scraperTypeConfig
|
||||
config config
|
||||
definition Definition
|
||||
globalConfig GlobalConfig
|
||||
}
|
||||
|
||||
func newScriptScraper(scraper scraperTypeConfig, config config, globalConfig GlobalConfig) *scriptScraper {
|
||||
return &scriptScraper{
|
||||
scraper: scraper,
|
||||
config: config,
|
||||
globalConfig: globalConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scriptScraper) runScraperScript(ctx context.Context, inString string, out interface{}) error {
|
||||
command := s.scraper.Script
|
||||
|
||||
func (s *scriptScraper) runScraperScript(ctx context.Context, command []string, inString string, out interface{}) error {
|
||||
var cmd *exec.Cmd
|
||||
if python.IsPythonCommand(command[0]) {
|
||||
pythonPath := s.globalConfig.GetPythonPath()
|
||||
|
|
@ -233,7 +222,7 @@ func (s *scriptScraper) runScraperScript(ctx context.Context, inString string, o
|
|||
logger.Warnf("%s", err)
|
||||
} else {
|
||||
cmd = p.Command(ctx, command[1:])
|
||||
envVariable, _ := filepath.Abs(filepath.Dir(filepath.Dir(s.config.path)))
|
||||
envVariable, _ := filepath.Abs(filepath.Dir(filepath.Dir(s.definition.path)))
|
||||
python.AppendPythonPath(cmd, envVariable)
|
||||
}
|
||||
}
|
||||
|
|
@ -243,7 +232,7 @@ func (s *scriptScraper) runScraperScript(ctx context.Context, inString string, o
|
|||
cmd = stashExec.CommandContext(ctx, command[0], command[1:]...)
|
||||
}
|
||||
|
||||
cmd.Dir = filepath.Dir(s.config.path)
|
||||
cmd.Dir = filepath.Dir(s.definition.path)
|
||||
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
|
|
@ -273,7 +262,7 @@ func (s *scriptScraper) runScraperScript(ctx context.Context, inString string, o
|
|||
return errors.New("error running scraper script")
|
||||
}
|
||||
|
||||
go handleScraperStderr(s.config.Name, stderr)
|
||||
go handleScraperStderr(s.definition.Name, stderr)
|
||||
|
||||
logger.Debugf("Scraper script <%s> started", strings.Join(cmd.Args, " "))
|
||||
|
||||
|
|
@ -312,7 +301,39 @@ func (s *scriptScraper) runScraperScript(ctx context.Context, inString string, o
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeByName(ctx context.Context, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
|
||||
func (s *scriptScraper) scrape(ctx context.Context, command []string, input string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
switch ty {
|
||||
case ScrapeContentTypePerformer:
|
||||
var performer *models.ScrapedPerformer
|
||||
err := s.runScraperScript(ctx, command, input, &performer)
|
||||
return performer, err
|
||||
case ScrapeContentTypeGallery:
|
||||
var gallery *models.ScrapedGallery
|
||||
err := s.runScraperScript(ctx, command, input, &gallery)
|
||||
return gallery, err
|
||||
case ScrapeContentTypeScene:
|
||||
var scene *models.ScrapedScene
|
||||
err := s.runScraperScript(ctx, command, input, &scene)
|
||||
return scene, err
|
||||
case ScrapeContentTypeMovie, ScrapeContentTypeGroup:
|
||||
var movie *models.ScrapedMovie
|
||||
err := s.runScraperScript(ctx, command, input, &movie)
|
||||
return movie, err
|
||||
case ScrapeContentTypeImage:
|
||||
var image *models.ScrapedImage
|
||||
err := s.runScraperScript(ctx, command, input, &image)
|
||||
return image, err
|
||||
}
|
||||
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
type scriptNameScraper struct {
|
||||
scriptScraper
|
||||
definition ByNameDefinition
|
||||
}
|
||||
|
||||
func (s *scriptNameScraper) scrapeByName(ctx context.Context, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
|
||||
input := `{"name": "` + name + `"}`
|
||||
|
||||
var ret []ScrapedContent
|
||||
|
|
@ -320,7 +341,7 @@ func (s *scriptScraper) scrapeByName(ctx context.Context, name string, ty Scrape
|
|||
switch ty {
|
||||
case ScrapeContentTypePerformer:
|
||||
var performers []models.ScrapedPerformer
|
||||
err = s.runScraperScript(ctx, input, &performers)
|
||||
err = s.runScraperScript(ctx, s.definition.Script, input, &performers)
|
||||
if err == nil {
|
||||
for _, p := range performers {
|
||||
v := p
|
||||
|
|
@ -329,7 +350,7 @@ func (s *scriptScraper) scrapeByName(ctx context.Context, name string, ty Scrape
|
|||
}
|
||||
case ScrapeContentTypeScene:
|
||||
var scenes []models.ScrapedScene
|
||||
err = s.runScraperScript(ctx, input, &scenes)
|
||||
err = s.runScraperScript(ctx, s.definition.Script, input, &scenes)
|
||||
if err == nil {
|
||||
for _, s := range scenes {
|
||||
v := s
|
||||
|
|
@ -343,7 +364,21 @@ func (s *scriptScraper) scrapeByName(ctx context.Context, name string, ty Scrape
|
|||
return ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeByFragment(ctx context.Context, input Input) (ScrapedContent, error) {
|
||||
type scriptURLScraper struct {
|
||||
scriptScraper
|
||||
definition ByURLDefinition
|
||||
}
|
||||
|
||||
func (s *scriptURLScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
return s.scrape(ctx, s.definition.Script, `{"url": "`+url+`"}`, ty)
|
||||
}
|
||||
|
||||
type scriptFragmentScraper struct {
|
||||
scriptScraper
|
||||
definition ByFragmentDefinition
|
||||
}
|
||||
|
||||
func (s *scriptFragmentScraper) scrapeByFragment(ctx context.Context, input Input) (ScrapedContent, error) {
|
||||
var inString []byte
|
||||
var err error
|
||||
var ty ScrapeContentType
|
||||
|
|
@ -363,41 +398,10 @@ func (s *scriptScraper) scrapeByFragment(ctx context.Context, input Input) (Scra
|
|||
return nil, err
|
||||
}
|
||||
|
||||
return s.scrape(ctx, string(inString), ty)
|
||||
return s.scrape(ctx, s.definition.Script, string(inString), ty)
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
return s.scrape(ctx, `{"url": "`+url+`"}`, ty)
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrape(ctx context.Context, input string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
switch ty {
|
||||
case ScrapeContentTypePerformer:
|
||||
var performer *models.ScrapedPerformer
|
||||
err := s.runScraperScript(ctx, input, &performer)
|
||||
return performer, err
|
||||
case ScrapeContentTypeGallery:
|
||||
var gallery *models.ScrapedGallery
|
||||
err := s.runScraperScript(ctx, input, &gallery)
|
||||
return gallery, err
|
||||
case ScrapeContentTypeScene:
|
||||
var scene *models.ScrapedScene
|
||||
err := s.runScraperScript(ctx, input, &scene)
|
||||
return scene, err
|
||||
case ScrapeContentTypeMovie, ScrapeContentTypeGroup:
|
||||
var movie *models.ScrapedMovie
|
||||
err := s.runScraperScript(ctx, input, &movie)
|
||||
return movie, err
|
||||
case ScrapeContentTypeImage:
|
||||
var image *models.ScrapedImage
|
||||
err := s.runScraperScript(ctx, input, &image)
|
||||
return image, err
|
||||
}
|
||||
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
func (s *scriptFragmentScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
inString, err := json.Marshal(sceneInputFromScene(scene))
|
||||
|
||||
if err != nil {
|
||||
|
|
@ -406,12 +410,12 @@ func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sc
|
|||
|
||||
var ret *models.ScrapedScene
|
||||
|
||||
err = s.runScraperScript(ctx, string(inString), &ret)
|
||||
err = s.runScraperScript(ctx, s.definition.Script, string(inString), &ret)
|
||||
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (s *scriptFragmentScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
inString, err := json.Marshal(galleryInputFromGallery(gallery))
|
||||
|
||||
if err != nil {
|
||||
|
|
@ -420,12 +424,12 @@ func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mod
|
|||
|
||||
var ret *models.ScrapedGallery
|
||||
|
||||
err = s.runScraperScript(ctx, string(inString), &ret)
|
||||
err = s.runScraperScript(ctx, s.definition.Script, string(inString), &ret)
|
||||
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*models.ScrapedImage, error) {
|
||||
func (s *scriptFragmentScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*models.ScrapedImage, error) {
|
||||
inString, err := json.Marshal(imageToUpdateInput(image))
|
||||
|
||||
if err != nil {
|
||||
|
|
@ -434,7 +438,7 @@ func (s *scriptScraper) scrapeImageByImage(ctx context.Context, image *models.Im
|
|||
|
||||
var ret *models.ScrapedImage
|
||||
|
||||
err = s.runScraperScript(ctx, string(inString), &ret)
|
||||
err = s.runScraperScript(ctx, s.definition.Script, string(inString), &ret)
|
||||
|
||||
return ret, err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,15 +14,13 @@ import (
|
|||
)
|
||||
|
||||
type stashScraper struct {
|
||||
scraper scraperTypeConfig
|
||||
config config
|
||||
config Definition
|
||||
globalConfig GlobalConfig
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func newStashScraper(scraper scraperTypeConfig, client *http.Client, config config, globalConfig GlobalConfig) *stashScraper {
|
||||
func newStashScraper(client *http.Client, config Definition, globalConfig GlobalConfig) *stashScraper {
|
||||
return &stashScraper{
|
||||
scraper: scraper,
|
||||
config: config,
|
||||
client: client,
|
||||
globalConfig: globalConfig,
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@ import (
|
|||
|
||||
const scrapeDefaultSleep = time.Second * 2
|
||||
|
||||
func loadURL(ctx context.Context, loadURL string, client *http.Client, scraperConfig config, globalConfig GlobalConfig) (io.Reader, error) {
|
||||
driverOptions := scraperConfig.DriverOptions
|
||||
func loadURL(ctx context.Context, loadURL string, client *http.Client, def Definition, globalConfig GlobalConfig) (io.Reader, error) {
|
||||
driverOptions := def.DriverOptions
|
||||
if driverOptions != nil && driverOptions.UseCDP {
|
||||
// get the page using chrome dp
|
||||
return urlFromCDP(ctx, loadURL, *driverOptions, globalConfig)
|
||||
|
|
@ -37,7 +37,7 @@ func loadURL(ctx context.Context, loadURL string, client *http.Client, scraperCo
|
|||
return nil, err
|
||||
}
|
||||
|
||||
jar, err := scraperConfig.jar()
|
||||
jar, err := def.jar()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating cookie jar: %w", err)
|
||||
}
|
||||
|
|
@ -83,7 +83,7 @@ func loadURL(ctx context.Context, loadURL string, client *http.Client, scraperCo
|
|||
}
|
||||
|
||||
bodyReader := bytes.NewReader(body)
|
||||
printCookies(jar, scraperConfig, "Jar cookies found for scraper urls")
|
||||
printCookies(jar, def, "Jar cookies found for scraper urls")
|
||||
return charset.NewReader(bodyReader, resp.Header.Get("Content-Type"))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ package scraper
|
|||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
|
@ -19,49 +18,36 @@ import (
|
|||
)
|
||||
|
||||
type xpathScraper struct {
|
||||
scraper scraperTypeConfig
|
||||
config config
|
||||
definition Definition
|
||||
globalConfig GlobalConfig
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func newXpathScraper(scraper scraperTypeConfig, client *http.Client, config config, globalConfig GlobalConfig) *xpathScraper {
|
||||
return &xpathScraper{
|
||||
scraper: scraper,
|
||||
config: config,
|
||||
globalConfig: globalConfig,
|
||||
client: client,
|
||||
func (s *xpathScraper) getXpathScraper(name string) (*mappedScraper, error) {
|
||||
ret, ok := s.definition.XPathScrapers[name]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("xpath scraper with name %s not found in config", name)
|
||||
}
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s *xpathScraper) getXpathScraper() *mappedScraper {
|
||||
return s.config.XPathScrapers[s.scraper.Scraper]
|
||||
type xpathURLScraper struct {
|
||||
xpathScraper
|
||||
definition ByURLDefinition
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeURL(ctx context.Context, url string) (*html.Node, *mappedScraper, error) {
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return doc, scraper, nil
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
func (s *xpathURLScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error) {
|
||||
scraper, err := s.getXpathScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := s.getXPathQuery(doc, u)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := s.getXPathQuery(doc, url)
|
||||
// if these just return the return values from scraper.scrape* functions then
|
||||
// it ends up returning ScrapedContent(nil) rather than nil
|
||||
switch ty {
|
||||
|
|
@ -100,11 +86,15 @@ func (s *xpathScraper) scrapeByURL(ctx context.Context, url string, ty ScrapeCon
|
|||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeByName(ctx context.Context, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
|
||||
scraper := s.getXpathScraper()
|
||||
type xpathNameScraper struct {
|
||||
xpathScraper
|
||||
definition ByNameDefinition
|
||||
}
|
||||
|
||||
if scraper == nil {
|
||||
return nil, fmt.Errorf("%w: name %v", ErrNotFound, s.scraper.Scraper)
|
||||
func (s *xpathNameScraper) scrapeByName(ctx context.Context, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
|
||||
scraper, err := s.getXpathScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
const placeholder = "{}"
|
||||
|
|
@ -112,7 +102,7 @@ func (s *xpathScraper) scrapeByName(ctx context.Context, name string, ty ScrapeC
|
|||
// replace the placeholder string with the URL-escaped name
|
||||
escapedName := url.QueryEscape(name)
|
||||
|
||||
url := s.scraper.QueryURL
|
||||
url := s.definition.QueryURL
|
||||
url = strings.ReplaceAll(url, placeholder, escapedName)
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -151,18 +141,22 @@ func (s *xpathScraper) scrapeByName(ctx context.Context, name string, ty ScrapeC
|
|||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
type xpathFragmentScraper struct {
|
||||
xpathScraper
|
||||
definition ByFragmentDefinition
|
||||
}
|
||||
|
||||
func (s *xpathFragmentScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromScene(scene)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
if s.definition.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.definition.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
url := queryURL.constructURL(s.definition.QueryURL)
|
||||
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
scraper, err := s.getXpathScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -175,7 +169,7 @@ func (s *xpathScraper) scrapeSceneByScene(ctx context.Context, scene *models.Sce
|
|||
return scraper.scrapeScene(ctx, q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeByFragment(ctx context.Context, input Input) (ScrapedContent, error) {
|
||||
func (s *xpathFragmentScraper) scrapeByFragment(ctx context.Context, input Input) (ScrapedContent, error) {
|
||||
switch {
|
||||
case input.Gallery != nil:
|
||||
return nil, fmt.Errorf("%w: cannot use an xpath scraper as a gallery fragment scraper", ErrNotSupported)
|
||||
|
|
@ -189,15 +183,14 @@ func (s *xpathScraper) scrapeByFragment(ctx context.Context, input Input) (Scrap
|
|||
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromScrapedScene(scene)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
if s.definition.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.definition.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
url := queryURL.constructURL(s.definition.QueryURL)
|
||||
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
scraper, err := s.getXpathScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -210,18 +203,17 @@ func (s *xpathScraper) scrapeByFragment(ctx context.Context, input Input) (Scrap
|
|||
return scraper.scrapeScene(ctx, q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (s *xpathFragmentScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromGallery(gallery)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
if s.definition.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.definition.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
url := queryURL.constructURL(s.definition.QueryURL)
|
||||
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
scraper, err := s.getXpathScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -234,18 +226,17 @@ func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *mode
|
|||
return scraper.scrapeGallery(ctx, q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*models.ScrapedImage, error) {
|
||||
func (s *xpathFragmentScraper) scrapeImageByImage(ctx context.Context, image *models.Image) (*models.ScrapedImage, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromImage(image)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.scraper.QueryURLReplacements)
|
||||
if s.definition.QueryURLReplacements != nil {
|
||||
queryURL.applyReplacements(s.definition.QueryURLReplacements)
|
||||
}
|
||||
url := queryURL.constructURL(s.scraper.QueryURL)
|
||||
url := queryURL.constructURL(s.definition.QueryURL)
|
||||
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
scraper, err := s.getXpathScraper(s.definition.Scraper)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
|
@ -259,14 +250,14 @@ func (s *xpathScraper) scrapeImageByImage(ctx context.Context, image *models.Ima
|
|||
}
|
||||
|
||||
func (s *xpathScraper) loadURL(ctx context.Context, url string) (*html.Node, error) {
|
||||
r, err := loadURL(ctx, url, s.client, s.config, s.globalConfig)
|
||||
r, err := loadURL(ctx, url, s.client, s.definition, s.globalConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load URL %q: %w", url, err)
|
||||
}
|
||||
|
||||
ret, err := html.Parse(r)
|
||||
|
||||
if err == nil && s.config.DebugOptions != nil && s.config.DebugOptions.PrintHTML {
|
||||
if err == nil && s.definition.DebugOptions != nil && s.definition.DebugOptions.PrintHTML {
|
||||
var b bytes.Buffer
|
||||
if err := html.Render(&b, ret); err != nil {
|
||||
logger.Warnf("could not render HTML: %v", err)
|
||||
|
|
|
|||
|
|
@ -674,10 +674,10 @@ func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []strin
|
|||
}
|
||||
|
||||
if expectedName != actualName {
|
||||
t.Errorf("Expected performer name %s, got %s", expectedName, actualName)
|
||||
t.Errorf("Expected performer name %q, got %q", expectedName, actualName)
|
||||
}
|
||||
if expectedURL != actualURL {
|
||||
t.Errorf("Expected performer URL %s, got %s", expectedName, actualName)
|
||||
t.Errorf("Expected performer URL %q, got %q", expectedURL, actualURL)
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
|
@ -780,7 +780,7 @@ xPathScrapers:
|
|||
Name: //studio
|
||||
`
|
||||
|
||||
c := &config{}
|
||||
c := &Definition{}
|
||||
err := yaml.Unmarshal([]byte(yamlStr), &c)
|
||||
|
||||
if err != nil {
|
||||
|
|
@ -892,7 +892,7 @@ xPathScrapers:
|
|||
selector: //span
|
||||
`
|
||||
|
||||
c := &config{}
|
||||
c := &Definition{}
|
||||
err := yaml.Unmarshal([]byte(yamlStr), &c)
|
||||
|
||||
if err != nil {
|
||||
|
|
@ -904,12 +904,8 @@ xPathScrapers:
|
|||
|
||||
client := &http.Client{}
|
||||
ctx := context.Background()
|
||||
s := newGroupScraper(*c, globalConfig)
|
||||
us, ok := s.(urlScraper)
|
||||
if !ok {
|
||||
t.Error("couldn't convert scraper into url scraper")
|
||||
}
|
||||
content, err := us.viaURL(ctx, client, ts.URL, ScrapeContentTypePerformer)
|
||||
s := scraperFromDefinition(*c, globalConfig)
|
||||
content, err := s.viaURL(ctx, client, ts.URL, ScrapeContentTypePerformer)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Error scraping performer: %s", err.Error())
|
||||
|
|
|
|||
Loading…
Reference in a new issue