mirror of
https://github.com/stashapp/stash.git
synced 2026-05-09 05:05:29 +02:00
Merge 5ee96ac68a into 01a7583364
This commit is contained in:
commit
ede9c8e807
5 changed files with 144 additions and 10 deletions
14
go.mod
14
go.mod
|
|
@ -15,6 +15,8 @@ require (
|
|||
github.com/disintegration/imaging v1.6.2
|
||||
github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d
|
||||
github.com/doug-martin/goqu/v9 v9.18.0
|
||||
github.com/enetx/g v1.0.224
|
||||
github.com/enetx/surf v1.0.199
|
||||
github.com/feederbox826/gosx-notifier v0.2.2
|
||||
github.com/go-chi/chi/v5 v5.2.2
|
||||
github.com/go-chi/cors v1.2.1
|
||||
|
|
@ -48,7 +50,7 @@ require (
|
|||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/spf13/cast v1.6.0
|
||||
github.com/spf13/pflag v1.0.6
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/tidwall/gjson v1.16.0
|
||||
github.com/vearutop/statigz v1.4.0
|
||||
github.com/vektah/dataloaden v0.3.0
|
||||
|
|
@ -70,6 +72,7 @@ require (
|
|||
|
||||
require (
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/andybalholm/brotli v1.2.1 // indirect
|
||||
github.com/antchfx/xpath v1.3.6 // indirect
|
||||
github.com/asticode/go-astikit v0.20.0 // indirect
|
||||
github.com/asticode/go-astits v1.8.0 // indirect
|
||||
|
|
@ -78,6 +81,10 @@ require (
|
|||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/dlclark/regexp2 v1.7.0 // indirect
|
||||
github.com/enetx/http v1.0.28 // indirect
|
||||
github.com/enetx/http2 v1.0.26 // indirect
|
||||
github.com/enetx/http3 v1.0.7 // indirect
|
||||
github.com/enetx/iter v0.0.0-20250912135656-f1583323588f // indirect
|
||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||
github.com/go-json-experiment/json v0.0.0-20250725192818-e39067aee2d2 // indirect
|
||||
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
|
||||
|
|
@ -92,6 +99,7 @@ require (
|
|||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/hcl v1.0.0 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/klauspost/compress v1.18.5 // indirect
|
||||
github.com/knadh/koanf/maps v0.1.2 // indirect
|
||||
github.com/magiconair/properties v1.8.7 // indirect
|
||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||
|
|
@ -106,6 +114,9 @@ require (
|
|||
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/quic-go/qpack v0.6.0 // indirect
|
||||
github.com/quic-go/quic-go v0.59.0 // indirect
|
||||
github.com/refraction-networking/utls v1.8.3-0.20260301010127-aa6edf4b11af // indirect
|
||||
github.com/rs/zerolog v1.30.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/sosodev/duration v1.3.1 // indirect
|
||||
|
|
@ -118,6 +129,7 @@ require (
|
|||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.1 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.6 // indirect
|
||||
github.com/wzshiming/socks5 v0.7.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.3 // indirect
|
||||
|
|
|
|||
44
go.sum
44
go.sum
|
|
@ -81,8 +81,10 @@ github.com/anacrolix/missinggo v1.1.0/go.mod h1:MBJu3Sk/k3ZfGYcS7z18gwfu72Ey/xop
|
|||
github.com/anacrolix/tagflag v0.0.0-20180109131632-2146c8d41bf0/go.mod h1:1m2U/K6ZT+JZG0+bdMK6qauP49QT4wE5pmhJXOKKCHw=
|
||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ=
|
||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
||||
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
|
||||
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
|
||||
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
|
||||
github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eTWro=
|
||||
github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
|
||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||
github.com/antchfx/htmlquery v1.3.5 h1:aYthDDClnG2a2xePf6tys/UyyM/kRcsFRm+ifhFKoU0=
|
||||
|
|
@ -174,6 +176,22 @@ github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d/go.mod h1:DngW8
|
|||
github.com/doug-martin/goqu/v9 v9.18.0 h1:/6bcuEtAe6nsSMVK/M+fOiXUNfyFF3yYtE07DBPFMYY=
|
||||
github.com/doug-martin/goqu/v9 v9.18.0/go.mod h1:nf0Wc2/hV3gYK9LiyqIrzBEVGlI8qW3GuDCEobC4wBQ=
|
||||
github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
|
||||
github.com/enetx/g v1.0.223 h1:9J8y76uiBLvNlMDyaLyCBUWELKNaHsRST+93Zxm2+No=
|
||||
github.com/enetx/g v1.0.223/go.mod h1:dxOHnNkhdZkwwOvgbJKcniq187TqyLO3DuTJpmk1tLQ=
|
||||
github.com/enetx/g v1.0.224 h1:H/uonguFE4qG8YCn5bSpZX5Wh+wTSb+jgf3I2ZM25XM=
|
||||
github.com/enetx/g v1.0.224/go.mod h1:lxhby3LjP8jOTGbxJ/PCd+2Zq1gYiSBbtL/llPhAg5c=
|
||||
github.com/enetx/http v1.0.28 h1:IaNSSDFlAVVdHnYhNIR9wAN7GY4TWL/kkvYC3jOaueY=
|
||||
github.com/enetx/http v1.0.28/go.mod h1:1f4mytfF/SfjATEJnynpwGS6aa1ALjb8DtmYgFVblY0=
|
||||
github.com/enetx/http2 v1.0.26 h1:wy3lYGVwnIUY4Q+gyPPQCJ1a+BMXD1B7Unpyc/Csrxc=
|
||||
github.com/enetx/http2 v1.0.26/go.mod h1:t54ex5HIS8V1+2j6cvEOv6umlrHsbUPFKQ54nYB58Nk=
|
||||
github.com/enetx/http3 v1.0.7 h1:daFhveKBtv8rRallCjaHErzzSHIrq07ovoSvVkvhcMM=
|
||||
github.com/enetx/http3 v1.0.7/go.mod h1:sqpVGZ9F1/wCiW6sjBUS2errKAh3SUYn6VlWE7LL6KM=
|
||||
github.com/enetx/iter v0.0.0-20250912135656-f1583323588f h1:GUW+4AWfECIEJ9oAxgEAVGCpaozMCjRiUYnuR6Q0bCQ=
|
||||
github.com/enetx/iter v0.0.0-20250912135656-f1583323588f/go.mod h1:oMZN8hGLUpi7QBlMEUqailocNy0NFAO/7Lu+Nwh9HMM=
|
||||
github.com/enetx/surf v1.0.198 h1:TJkyEyy5M+GnLZlGKmByeXwG7K2vv7F5L+0SgwlDu7g=
|
||||
github.com/enetx/surf v1.0.198/go.mod h1:BtLmZDYAny66azybFr9UdFVnwy8WRV4FTAzElsd7bvE=
|
||||
github.com/enetx/surf v1.0.199 h1:RtqcwlyLM8O4U+43laNnNJwx5hALkH5cJRxDX1F2VjM=
|
||||
github.com/enetx/surf v1.0.199/go.mod h1:c6g53gi273RBiZFO4THWIqpn5n9RLC6vw5WpUwHrT4U=
|
||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
|
||||
|
|
@ -396,6 +414,8 @@ github.com/kermieisinthehouse/systray v1.2.4 h1:pdH5vnl+KKjRrVCRU4g/2W1/0HVzuuJ6
|
|||
github.com/kermieisinthehouse/systray v1.2.4/go.mod h1:axh6C/jNuSyC0QGtidZJURc9h+h41HNoMySoLVrhVR4=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
|
||||
github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
|
||||
github.com/knadh/koanf/maps v0.1.2 h1:RBfmAW5CnZT+PJ1CVc1QSJKf4Xu9kxfQgYVQSu8hpbo=
|
||||
github.com/knadh/koanf/maps v0.1.2/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI=
|
||||
github.com/knadh/koanf/parsers/yaml v1.1.0 h1:3ltfm9ljprAHt4jxgeYLlFPmUaunuCgu1yILuTXRdM4=
|
||||
|
|
@ -518,13 +538,19 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b
|
|||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
|
||||
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
|
||||
github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
|
||||
github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
|
||||
github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SAw=
|
||||
github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
|
||||
github.com/refraction-networking/utls v1.8.3-0.20260301010127-aa6edf4b11af h1:er2acxbi3N1nvEq6HXHUAR1nTWEJmQfqiGR8EVT9rfs=
|
||||
github.com/refraction-networking/utls v1.8.3-0.20260301010127-aa6edf4b11af/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
|
||||
github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E=
|
||||
github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo=
|
||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||
github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
|
|
@ -590,8 +616,8 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
|
|||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
|
||||
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
|
||||
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
|
||||
|
|
@ -615,10 +641,14 @@ github.com/vektah/gqlparser/v2 v2.5.27/go.mod h1:D1/VCZtV3LPnQrcPBeR/q5jkSQIPti0
|
|||
github.com/vektra/mockery/v2 v2.10.0 h1:MiiQWxwdq7/ET6dCXLaJzSGEN17k758H7JHS9kOdiks=
|
||||
github.com/vektra/mockery/v2 v2.10.0/go.mod h1:m/WO2UzWzqgVX3nvqpRQq70I4Z7jbSCRhdmkgtp+Ab4=
|
||||
github.com/willf/bitset v1.1.9/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
|
||||
github.com/wzshiming/socks5 v0.7.0 h1:euJ+U48WrvVngi+opC8vAnpZ5sK12y1C2hPvb1f48Rg=
|
||||
github.com/wzshiming/socks5 v0.7.0/go.mod h1:BvCAqlzocQN5xwLjBZDBbvWlrx8sCYSSbHEOf2wZgT0=
|
||||
github.com/xWTF/chardet v0.0.0-20230208095535-c780f2ac244e h1:GruPsb+44XvYAzuAgJW1d1WHqmcI73L2XSjsbx/eJZw=
|
||||
github.com/xWTF/chardet v0.0.0-20230208095535-c780f2ac244e/go.mod h1:wA8kQ8WFipMciY9WcWzqQgZordm/P7l8IZdvx1crwmc=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
||||
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
|
||||
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
|
||||
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
|
|
@ -643,6 +673,8 @@ go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqe
|
|||
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
|
||||
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
||||
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
||||
go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko=
|
||||
go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o=
|
||||
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
|
||||
go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
|
||||
go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE=
|
||||
|
|
|
|||
|
|
@ -211,6 +211,7 @@ type header struct {
|
|||
|
||||
type scraperDriverOptions struct {
|
||||
UseCDP bool `yaml:"useCDP"`
|
||||
UseSurf bool `yaml:"useSurf"`
|
||||
Sleep int `yaml:"sleep"`
|
||||
Clicks []*clickOptions `yaml:"clicks"`
|
||||
Cookies []*cookieOptions `yaml:"cookies"`
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ import (
|
|||
"github.com/chromedp/cdproto/fetch"
|
||||
"github.com/chromedp/cdproto/network"
|
||||
"github.com/chromedp/chromedp"
|
||||
"github.com/enetx/g"
|
||||
"github.com/enetx/surf"
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
"golang.org/x/net/html/charset"
|
||||
|
||||
|
|
@ -27,9 +29,13 @@ const scrapeDefaultSleep = time.Second * 2
|
|||
|
||||
func loadURL(ctx context.Context, loadURL string, client *http.Client, def Definition, globalConfig GlobalConfig) (io.Reader, error) {
|
||||
driverOptions := def.DriverOptions
|
||||
if driverOptions != nil && driverOptions.UseCDP {
|
||||
// get the page using chrome dp
|
||||
return urlFromCDP(ctx, loadURL, *driverOptions, globalConfig)
|
||||
if driverOptions != nil {
|
||||
switch {
|
||||
case driverOptions.UseCDP:
|
||||
return urlFromCDP(ctx, loadURL, *driverOptions, globalConfig)
|
||||
case driverOptions.UseSurf:
|
||||
return urlFromSurf(ctx, loadURL, *driverOptions, def, globalConfig)
|
||||
}
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, loadURL, nil)
|
||||
|
|
@ -71,11 +77,75 @@ func loadURL(ctx context.Context, loadURL string, client *http.Client, def Defin
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 400 {
|
||||
return nil, fmt.Errorf("http error %d:%s", resp.StatusCode, http.StatusText(resp.StatusCode))
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bodyReader := bytes.NewReader(body)
|
||||
printCookies(jar, def, "Jar cookies found for scraper urls")
|
||||
return charset.NewReader(bodyReader, resp.Header.Get("Content-Type"))
|
||||
}
|
||||
|
||||
// func urlFromSurf uses enetx/surf with TLS browser emulation to bypass fingerprint-based blocking.
|
||||
// this is a step down from CDP but faster and more lightweight and can succeed where CDP might fail
|
||||
func urlFromSurf(ctx context.Context, loadURL string, driverOptions scraperDriverOptions, def Definition, globalConfig GlobalConfig) (io.Reader, error) {
|
||||
// get cookies
|
||||
jar, err := def.jar()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating cookie jar: %w", err)
|
||||
}
|
||||
|
||||
// create client
|
||||
builder := surf.NewClient().
|
||||
Builder().
|
||||
Impersonate().Chrome()
|
||||
|
||||
// get global proxy
|
||||
proxyURL := globalConfig.GetProxy()
|
||||
if proxyURL != "" {
|
||||
builder = builder.Proxy(g.String(proxyURL))
|
||||
}
|
||||
|
||||
client := builder.
|
||||
Build().
|
||||
Unwrap().
|
||||
Std() // use stdlib adapter
|
||||
|
||||
// pass in jar directly
|
||||
client.Jar = jar
|
||||
|
||||
// try to mimic normal method above
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, loadURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// remove User-Agent header. This undermines TLS fingerprinting
|
||||
// because of GREASE (RFC 8701)
|
||||
// older fingerprints + UAs are still sustainable.
|
||||
for _, h := range driverOptions.Headers {
|
||||
if h.Key != "" {
|
||||
if strings.ToLower(h.Key) == "user-agent" {
|
||||
continue
|
||||
}
|
||||
req.Header.Set(h.Key, h.Value)
|
||||
logger.Debugf("[scraper] adding header <%s:%s>", h.Key, h.Value)
|
||||
}
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 400 {
|
||||
return nil, fmt.Errorf("http error %d:%s", resp.StatusCode, http.StatusText(resp.StatusCode))
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -594,6 +594,25 @@ driver:
|
|||
|
||||
> **⚠️ Note:** each `click` adds an extra delay of `clicks sleep` seconds, so the above adds `2+4+1+2+2=11` seconds to the loading time of the page.
|
||||
|
||||
### TLS emulation (surf) support
|
||||
|
||||
Some websites use Transport Layer Security (TLS) fingerprinting to identify and block automated traffic. These fingerprints can't be passed through headers so we use the [surf](https://github.com/enetx/surf) library for TLS emulation. TLS emulation is a technique that allows scrapers to mimic the TLS fingerprint of a real browser, making it more difficult for websites to detect and block the scraper.
|
||||
|
||||
It should be faster and more reliable than Chrome DevTools Protocol (CDP) or Python based script scrapers in most cases, but it does not execute JavaScript, so it is not suitable for websites that require JavaScript to load content.
|
||||
|
||||
If website works well in the browser but is inexplicably blocked when accessing through curl/Python/Stash, then TLS emulation may be worth trying.
|
||||
|
||||
TLS emulation can be enabled for a specific configuration by adding the following to the root of the YAML configuration:
|
||||
|
||||
```yaml
|
||||
driver:
|
||||
useSurf: true
|
||||
```
|
||||
|
||||
Enabling `useSurf` driver will automatically remove the `User-Agent` [header](#headers), since overriding it would create a mismatch between TLS fingerprint and User-Agent, defeating the entire purpose of emulation.
|
||||
|
||||
> **⚠️ Note:** Proxies that do not support SOCKS5 will not be able to take full advantage of TLS emulation and might fail in rare cases.
|
||||
|
||||
### Cookie support
|
||||
|
||||
In some websites the use of cookies is needed to bypass a welcoming message or some other kind of protection. Stash supports the setting of cookies for the direct xpath scraper and the CDP based one. Due to implementation issues the usage varies a bit.
|
||||
|
|
|
|||
Loading…
Reference in a new issue