mirror of
https://github.com/stashapp/stash.git
synced 2026-02-08 08:21:32 +01:00
* Remove reflection from mapped value processing * AI generated unit tests * Move mappedConfig to separate file * Rename group to configScraper * Separate mapped post-processing code into separate file * Update test after group rename * Check map entry when returning scraper * Refactor config into definition * Support single string for string slice translation * Rename config.go to definition.go * Rename configScraper to definedScraper * Rename config_scraper.go to defined_scraper.go
333 lines
7.9 KiB
Go
333 lines
7.9 KiB
Go
package scraper
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"math"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/stashapp/stash/pkg/javascript"
|
|
"github.com/stashapp/stash/pkg/logger"
|
|
)
|
|
|
|
type mappedRegexConfig struct {
|
|
Regex string `yaml:"regex"`
|
|
With string `yaml:"with"`
|
|
}
|
|
|
|
type mappedRegexConfigs []mappedRegexConfig
|
|
|
|
func (c mappedRegexConfig) apply(value string) string {
|
|
if c.Regex != "" {
|
|
re, err := regexp.Compile(c.Regex)
|
|
if err != nil {
|
|
logger.Warnf("Error compiling regex '%s': %s", c.Regex, err.Error())
|
|
return value
|
|
}
|
|
|
|
ret := re.ReplaceAllString(value, c.With)
|
|
|
|
// trim leading and trailing whitespace
|
|
// this is done to maintain backwards compatibility with existing
|
|
// scrapers
|
|
ret = strings.TrimSpace(ret)
|
|
|
|
logger.Debugf(`Replace: '%s' with '%s'`, c.Regex, c.With)
|
|
logger.Debugf("Before: %s", value)
|
|
logger.Debugf("After: %s", ret)
|
|
return ret
|
|
}
|
|
|
|
return value
|
|
}
|
|
|
|
func (c mappedRegexConfigs) apply(value string) string {
|
|
// apply regex in order
|
|
for _, config := range c {
|
|
value = config.apply(value)
|
|
}
|
|
|
|
return value
|
|
}
|
|
|
|
type postProcessAction interface {
|
|
Apply(ctx context.Context, value string, q mappedQuery) string
|
|
}
|
|
|
|
type postProcessParseDate string
|
|
|
|
func (p *postProcessParseDate) Apply(ctx context.Context, value string, q mappedQuery) string {
|
|
parseDate := string(*p)
|
|
|
|
const internalDateFormat = "2006-01-02"
|
|
|
|
valueLower := strings.ToLower(value)
|
|
if valueLower == "today" || valueLower == "yesterday" { // handle today, yesterday
|
|
dt := time.Now()
|
|
if valueLower == "yesterday" { // subtract 1 day from now
|
|
dt = dt.AddDate(0, 0, -1)
|
|
}
|
|
return dt.Format(internalDateFormat)
|
|
}
|
|
|
|
if parseDate == "" {
|
|
return value
|
|
}
|
|
|
|
if parseDate == "unix" {
|
|
// try to parse the date using unix timestamp format
|
|
// if it fails, then just fall back to the original value
|
|
timeAsInt, err := strconv.ParseInt(value, 10, 64)
|
|
if err != nil {
|
|
logger.Warnf("Error parsing date string '%s' using unix timestamp format : %s", value, err.Error())
|
|
return value
|
|
}
|
|
parsedValue := time.Unix(timeAsInt, 0)
|
|
|
|
return parsedValue.Format(internalDateFormat)
|
|
}
|
|
|
|
// try to parse the date using the pattern
|
|
// if it fails, then just fall back to the original value
|
|
parsedValue, err := time.Parse(parseDate, value)
|
|
if err != nil {
|
|
logger.Warnf("Error parsing date string '%s' using format '%s': %s", value, parseDate, err.Error())
|
|
return value
|
|
}
|
|
|
|
// convert it into our date format
|
|
return parsedValue.Format(internalDateFormat)
|
|
}
|
|
|
|
type postProcessSubtractDays bool
|
|
|
|
func (p *postProcessSubtractDays) Apply(ctx context.Context, value string, q mappedQuery) string {
|
|
const internalDateFormat = "2006-01-02"
|
|
|
|
i, err := strconv.Atoi(value)
|
|
if err != nil {
|
|
logger.Warnf("Error parsing day string %s: %s", value, err)
|
|
return value
|
|
}
|
|
|
|
dt := time.Now()
|
|
dt = dt.AddDate(0, 0, -i)
|
|
return dt.Format(internalDateFormat)
|
|
}
|
|
|
|
type postProcessReplace mappedRegexConfigs
|
|
|
|
func (c *postProcessReplace) Apply(ctx context.Context, value string, q mappedQuery) string {
|
|
replace := mappedRegexConfigs(*c)
|
|
return replace.apply(value)
|
|
}
|
|
|
|
type postProcessSubScraper mappedScraperAttrConfig
|
|
|
|
func (p *postProcessSubScraper) Apply(ctx context.Context, value string, q mappedQuery) string {
|
|
subScrapeConfig := mappedScraperAttrConfig(*p)
|
|
|
|
logger.Debugf("Sub-scraping for: %s", value)
|
|
ss := q.subScrape(ctx, value)
|
|
|
|
if ss != nil {
|
|
found, err := ss.runQuery(subScrapeConfig.Selector)
|
|
if err != nil {
|
|
logger.Warnf("subscrape for '%v': %v", value, err)
|
|
}
|
|
|
|
if len(found) > 0 {
|
|
// check if we're concatenating the results into a single result
|
|
var result string
|
|
if subScrapeConfig.hasConcat() {
|
|
result = subScrapeConfig.concatenateResults(found)
|
|
} else {
|
|
result = found[0]
|
|
}
|
|
|
|
result = subScrapeConfig.postProcess(ctx, result, ss)
|
|
return result
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
type postProcessMap map[string]string
|
|
|
|
func (p *postProcessMap) Apply(ctx context.Context, value string, q mappedQuery) string {
|
|
// return the mapped value if present
|
|
m := *p
|
|
mapped, ok := m[value]
|
|
|
|
if ok {
|
|
return mapped
|
|
}
|
|
|
|
return value
|
|
}
|
|
|
|
type postProcessFeetToCm bool
|
|
|
|
func (p *postProcessFeetToCm) Apply(ctx context.Context, value string, q mappedQuery) string {
|
|
const foot_in_cm = 30.48
|
|
const inch_in_cm = 2.54
|
|
|
|
reg := regexp.MustCompile("[0-9]+")
|
|
filtered := reg.FindAllString(value, -1)
|
|
|
|
var feet float64
|
|
var inches float64
|
|
if len(filtered) > 0 {
|
|
feet, _ = strconv.ParseFloat(filtered[0], 64)
|
|
}
|
|
if len(filtered) > 1 {
|
|
inches, _ = strconv.ParseFloat(filtered[1], 64)
|
|
}
|
|
|
|
var centimeters = feet*foot_in_cm + inches*inch_in_cm
|
|
|
|
// Return rounded integer string
|
|
return strconv.Itoa(int(math.Round(centimeters)))
|
|
}
|
|
|
|
type postProcessLbToKg bool
|
|
|
|
func (p *postProcessLbToKg) Apply(ctx context.Context, value string, q mappedQuery) string {
|
|
const lb_in_kg = 0.45359237
|
|
w, err := strconv.ParseFloat(value, 64)
|
|
if err == nil {
|
|
w *= lb_in_kg
|
|
value = strconv.Itoa(int(math.Round(w)))
|
|
}
|
|
return value
|
|
}
|
|
|
|
type postProcessJavascript string
|
|
|
|
func (p *postProcessJavascript) Apply(ctx context.Context, value string, q mappedQuery) string {
|
|
vm := javascript.NewVM()
|
|
if err := vm.Set("value", value); err != nil {
|
|
logger.Warnf("javascript failed to set value: %v", err)
|
|
return value
|
|
}
|
|
|
|
log := &javascript.Log{
|
|
Logger: logger.Logger,
|
|
Prefix: "",
|
|
ProgressChan: make(chan float64),
|
|
}
|
|
|
|
if err := log.AddToVM("log", vm); err != nil {
|
|
logger.Logger.Errorf("error adding log API: %w", err)
|
|
}
|
|
|
|
util := &javascript.Util{}
|
|
if err := util.AddToVM("util", vm); err != nil {
|
|
logger.Logger.Errorf("error adding util API: %w", err)
|
|
}
|
|
|
|
script, err := javascript.CompileScript("", "(function() { "+string(*p)+"})()")
|
|
if err != nil {
|
|
logger.Warnf("javascript failed to compile: %v", err)
|
|
return value
|
|
}
|
|
|
|
output, err := vm.RunProgram(script)
|
|
if err != nil {
|
|
logger.Warnf("javascript failed to run: %v", err)
|
|
return value
|
|
}
|
|
|
|
// assume output is string
|
|
return output.String()
|
|
}
|
|
|
|
type mappedPostProcessAction struct {
|
|
ParseDate string `yaml:"parseDate"`
|
|
SubtractDays bool `yaml:"subtractDays"`
|
|
Replace mappedRegexConfigs `yaml:"replace"`
|
|
SubScraper *mappedScraperAttrConfig `yaml:"subScraper"`
|
|
Map map[string]string `yaml:"map"`
|
|
FeetToCm bool `yaml:"feetToCm"`
|
|
LbToKg bool `yaml:"lbToKg"`
|
|
Javascript string `yaml:"javascript"`
|
|
}
|
|
|
|
func (a mappedPostProcessAction) ToPostProcessAction() (postProcessAction, error) {
|
|
var found string
|
|
var ret postProcessAction
|
|
|
|
ensureOnly := func(field string) error {
|
|
if found != "" {
|
|
return fmt.Errorf("post-process actions must have a single field, found %s and %s", found, field)
|
|
}
|
|
found = field
|
|
return nil
|
|
}
|
|
|
|
if a.ParseDate != "" {
|
|
found = "parseDate"
|
|
action := postProcessParseDate(a.ParseDate)
|
|
ret = &action
|
|
}
|
|
if len(a.Replace) > 0 {
|
|
if err := ensureOnly("replace"); err != nil {
|
|
return nil, err
|
|
}
|
|
action := postProcessReplace(a.Replace)
|
|
ret = &action
|
|
}
|
|
if a.SubScraper != nil {
|
|
if err := ensureOnly("subScraper"); err != nil {
|
|
return nil, err
|
|
}
|
|
action := postProcessSubScraper(*a.SubScraper)
|
|
ret = &action
|
|
}
|
|
if a.Map != nil {
|
|
if err := ensureOnly("map"); err != nil {
|
|
return nil, err
|
|
}
|
|
action := postProcessMap(a.Map)
|
|
ret = &action
|
|
}
|
|
if a.FeetToCm {
|
|
if err := ensureOnly("feetToCm"); err != nil {
|
|
return nil, err
|
|
}
|
|
action := postProcessFeetToCm(a.FeetToCm)
|
|
ret = &action
|
|
}
|
|
if a.LbToKg {
|
|
if err := ensureOnly("lbToKg"); err != nil {
|
|
return nil, err
|
|
}
|
|
action := postProcessLbToKg(a.LbToKg)
|
|
ret = &action
|
|
}
|
|
if a.SubtractDays {
|
|
if err := ensureOnly("subtractDays"); err != nil {
|
|
return nil, err
|
|
}
|
|
action := postProcessSubtractDays(a.SubtractDays)
|
|
ret = &action
|
|
}
|
|
if a.Javascript != "" {
|
|
if err := ensureOnly("javascript"); err != nil {
|
|
return nil, err
|
|
}
|
|
action := postProcessJavascript(a.Javascript)
|
|
ret = &action
|
|
}
|
|
|
|
if ret == nil {
|
|
return nil, errors.New("invalid post-process action")
|
|
}
|
|
|
|
return ret, nil
|
|
}
|