stash/pkg/scraper/mapped_postprocessing.go
WithoutPants 88eb46380c
Refactor scraper package (#6495)
* Remove reflection from mapped value processing
* AI generated unit tests
* Move mappedConfig to separate file
* Rename group to configScraper
* Separate mapped post-processing code into separate file
* Update test after group rename
* Check map entry when returning scraper
* Refactor config into definition
* Support single string for string slice translation
* Rename config.go to definition.go
* Rename configScraper to definedScraper
* Rename config_scraper.go to defined_scraper.go
2026-02-04 11:07:51 +11:00

333 lines
7.9 KiB
Go

package scraper
import (
"context"
"errors"
"fmt"
"math"
"regexp"
"strconv"
"strings"
"time"
"github.com/stashapp/stash/pkg/javascript"
"github.com/stashapp/stash/pkg/logger"
)
type mappedRegexConfig struct {
Regex string `yaml:"regex"`
With string `yaml:"with"`
}
type mappedRegexConfigs []mappedRegexConfig
func (c mappedRegexConfig) apply(value string) string {
if c.Regex != "" {
re, err := regexp.Compile(c.Regex)
if err != nil {
logger.Warnf("Error compiling regex '%s': %s", c.Regex, err.Error())
return value
}
ret := re.ReplaceAllString(value, c.With)
// trim leading and trailing whitespace
// this is done to maintain backwards compatibility with existing
// scrapers
ret = strings.TrimSpace(ret)
logger.Debugf(`Replace: '%s' with '%s'`, c.Regex, c.With)
logger.Debugf("Before: %s", value)
logger.Debugf("After: %s", ret)
return ret
}
return value
}
func (c mappedRegexConfigs) apply(value string) string {
// apply regex in order
for _, config := range c {
value = config.apply(value)
}
return value
}
type postProcessAction interface {
Apply(ctx context.Context, value string, q mappedQuery) string
}
type postProcessParseDate string
func (p *postProcessParseDate) Apply(ctx context.Context, value string, q mappedQuery) string {
parseDate := string(*p)
const internalDateFormat = "2006-01-02"
valueLower := strings.ToLower(value)
if valueLower == "today" || valueLower == "yesterday" { // handle today, yesterday
dt := time.Now()
if valueLower == "yesterday" { // subtract 1 day from now
dt = dt.AddDate(0, 0, -1)
}
return dt.Format(internalDateFormat)
}
if parseDate == "" {
return value
}
if parseDate == "unix" {
// try to parse the date using unix timestamp format
// if it fails, then just fall back to the original value
timeAsInt, err := strconv.ParseInt(value, 10, 64)
if err != nil {
logger.Warnf("Error parsing date string '%s' using unix timestamp format : %s", value, err.Error())
return value
}
parsedValue := time.Unix(timeAsInt, 0)
return parsedValue.Format(internalDateFormat)
}
// try to parse the date using the pattern
// if it fails, then just fall back to the original value
parsedValue, err := time.Parse(parseDate, value)
if err != nil {
logger.Warnf("Error parsing date string '%s' using format '%s': %s", value, parseDate, err.Error())
return value
}
// convert it into our date format
return parsedValue.Format(internalDateFormat)
}
type postProcessSubtractDays bool
func (p *postProcessSubtractDays) Apply(ctx context.Context, value string, q mappedQuery) string {
const internalDateFormat = "2006-01-02"
i, err := strconv.Atoi(value)
if err != nil {
logger.Warnf("Error parsing day string %s: %s", value, err)
return value
}
dt := time.Now()
dt = dt.AddDate(0, 0, -i)
return dt.Format(internalDateFormat)
}
type postProcessReplace mappedRegexConfigs
func (c *postProcessReplace) Apply(ctx context.Context, value string, q mappedQuery) string {
replace := mappedRegexConfigs(*c)
return replace.apply(value)
}
type postProcessSubScraper mappedScraperAttrConfig
func (p *postProcessSubScraper) Apply(ctx context.Context, value string, q mappedQuery) string {
subScrapeConfig := mappedScraperAttrConfig(*p)
logger.Debugf("Sub-scraping for: %s", value)
ss := q.subScrape(ctx, value)
if ss != nil {
found, err := ss.runQuery(subScrapeConfig.Selector)
if err != nil {
logger.Warnf("subscrape for '%v': %v", value, err)
}
if len(found) > 0 {
// check if we're concatenating the results into a single result
var result string
if subScrapeConfig.hasConcat() {
result = subScrapeConfig.concatenateResults(found)
} else {
result = found[0]
}
result = subScrapeConfig.postProcess(ctx, result, ss)
return result
}
}
return ""
}
type postProcessMap map[string]string
func (p *postProcessMap) Apply(ctx context.Context, value string, q mappedQuery) string {
// return the mapped value if present
m := *p
mapped, ok := m[value]
if ok {
return mapped
}
return value
}
type postProcessFeetToCm bool
func (p *postProcessFeetToCm) Apply(ctx context.Context, value string, q mappedQuery) string {
const foot_in_cm = 30.48
const inch_in_cm = 2.54
reg := regexp.MustCompile("[0-9]+")
filtered := reg.FindAllString(value, -1)
var feet float64
var inches float64
if len(filtered) > 0 {
feet, _ = strconv.ParseFloat(filtered[0], 64)
}
if len(filtered) > 1 {
inches, _ = strconv.ParseFloat(filtered[1], 64)
}
var centimeters = feet*foot_in_cm + inches*inch_in_cm
// Return rounded integer string
return strconv.Itoa(int(math.Round(centimeters)))
}
type postProcessLbToKg bool
func (p *postProcessLbToKg) Apply(ctx context.Context, value string, q mappedQuery) string {
const lb_in_kg = 0.45359237
w, err := strconv.ParseFloat(value, 64)
if err == nil {
w *= lb_in_kg
value = strconv.Itoa(int(math.Round(w)))
}
return value
}
type postProcessJavascript string
func (p *postProcessJavascript) Apply(ctx context.Context, value string, q mappedQuery) string {
vm := javascript.NewVM()
if err := vm.Set("value", value); err != nil {
logger.Warnf("javascript failed to set value: %v", err)
return value
}
log := &javascript.Log{
Logger: logger.Logger,
Prefix: "",
ProgressChan: make(chan float64),
}
if err := log.AddToVM("log", vm); err != nil {
logger.Logger.Errorf("error adding log API: %w", err)
}
util := &javascript.Util{}
if err := util.AddToVM("util", vm); err != nil {
logger.Logger.Errorf("error adding util API: %w", err)
}
script, err := javascript.CompileScript("", "(function() { "+string(*p)+"})()")
if err != nil {
logger.Warnf("javascript failed to compile: %v", err)
return value
}
output, err := vm.RunProgram(script)
if err != nil {
logger.Warnf("javascript failed to run: %v", err)
return value
}
// assume output is string
return output.String()
}
type mappedPostProcessAction struct {
ParseDate string `yaml:"parseDate"`
SubtractDays bool `yaml:"subtractDays"`
Replace mappedRegexConfigs `yaml:"replace"`
SubScraper *mappedScraperAttrConfig `yaml:"subScraper"`
Map map[string]string `yaml:"map"`
FeetToCm bool `yaml:"feetToCm"`
LbToKg bool `yaml:"lbToKg"`
Javascript string `yaml:"javascript"`
}
func (a mappedPostProcessAction) ToPostProcessAction() (postProcessAction, error) {
var found string
var ret postProcessAction
ensureOnly := func(field string) error {
if found != "" {
return fmt.Errorf("post-process actions must have a single field, found %s and %s", found, field)
}
found = field
return nil
}
if a.ParseDate != "" {
found = "parseDate"
action := postProcessParseDate(a.ParseDate)
ret = &action
}
if len(a.Replace) > 0 {
if err := ensureOnly("replace"); err != nil {
return nil, err
}
action := postProcessReplace(a.Replace)
ret = &action
}
if a.SubScraper != nil {
if err := ensureOnly("subScraper"); err != nil {
return nil, err
}
action := postProcessSubScraper(*a.SubScraper)
ret = &action
}
if a.Map != nil {
if err := ensureOnly("map"); err != nil {
return nil, err
}
action := postProcessMap(a.Map)
ret = &action
}
if a.FeetToCm {
if err := ensureOnly("feetToCm"); err != nil {
return nil, err
}
action := postProcessFeetToCm(a.FeetToCm)
ret = &action
}
if a.LbToKg {
if err := ensureOnly("lbToKg"); err != nil {
return nil, err
}
action := postProcessLbToKg(a.LbToKg)
ret = &action
}
if a.SubtractDays {
if err := ensureOnly("subtractDays"); err != nil {
return nil, err
}
action := postProcessSubtractDays(a.SubtractDays)
ret = &action
}
if a.Javascript != "" {
if err := ensureOnly("javascript"); err != nil {
return nil, err
}
action := postProcessJavascript(a.Javascript)
ret = &action
}
if ret == nil {
return nil, errors.New("invalid post-process action")
}
return ret, nil
}