From 3473bd1a63118cf9a35407e13878dc2a39457555 Mon Sep 17 00:00:00 2001 From: feederbox826 Date: Sun, 3 May 2026 22:35:06 -0400 Subject: [PATCH] [docs] add surf docs, update --- go.mod | 6 +++--- go.sum | 6 ++++++ .../src/docs/en/Manual/ScraperDevelopment.md | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index a6cb896f2..d15c42b61 100644 --- a/go.mod +++ b/go.mod @@ -15,8 +15,8 @@ require ( github.com/disintegration/imaging v1.6.2 github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d github.com/doug-martin/goqu/v9 v9.18.0 - github.com/enetx/g v1.0.223 - github.com/enetx/surf v1.0.198 + github.com/enetx/g v1.0.224 + github.com/enetx/surf v1.0.199 github.com/feederbox826/gosx-notifier v0.2.2 github.com/go-chi/chi/v5 v5.2.2 github.com/go-chi/cors v1.2.1 @@ -72,7 +72,7 @@ require ( require ( github.com/agnivade/levenshtein v1.2.1 // indirect - github.com/andybalholm/brotli v1.2.0 // indirect + github.com/andybalholm/brotli v1.2.1 // indirect github.com/antchfx/xpath v1.3.6 // indirect github.com/asticode/go-astikit v0.20.0 // indirect github.com/asticode/go-astits v1.8.0 // indirect diff --git a/go.sum b/go.sum index 4ff22adfb..d7a17118d 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,8 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNg github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ= github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= +github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eTWro= +github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/antchfx/htmlquery v1.3.5 h1:aYthDDClnG2a2xePf6tys/UyyM/kRcsFRm+ifhFKoU0= @@ -176,6 +178,8 @@ github.com/doug-martin/goqu/v9 v9.18.0/go.mod h1:nf0Wc2/hV3gYK9LiyqIrzBEVGlI8qW3 github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/enetx/g v1.0.223 h1:9J8y76uiBLvNlMDyaLyCBUWELKNaHsRST+93Zxm2+No= github.com/enetx/g v1.0.223/go.mod h1:dxOHnNkhdZkwwOvgbJKcniq187TqyLO3DuTJpmk1tLQ= +github.com/enetx/g v1.0.224 h1:H/uonguFE4qG8YCn5bSpZX5Wh+wTSb+jgf3I2ZM25XM= +github.com/enetx/g v1.0.224/go.mod h1:lxhby3LjP8jOTGbxJ/PCd+2Zq1gYiSBbtL/llPhAg5c= github.com/enetx/http v1.0.28 h1:IaNSSDFlAVVdHnYhNIR9wAN7GY4TWL/kkvYC3jOaueY= github.com/enetx/http v1.0.28/go.mod h1:1f4mytfF/SfjATEJnynpwGS6aa1ALjb8DtmYgFVblY0= github.com/enetx/http2 v1.0.26 h1:wy3lYGVwnIUY4Q+gyPPQCJ1a+BMXD1B7Unpyc/Csrxc= @@ -186,6 +190,8 @@ github.com/enetx/iter v0.0.0-20250912135656-f1583323588f h1:GUW+4AWfECIEJ9oAxgEA github.com/enetx/iter v0.0.0-20250912135656-f1583323588f/go.mod h1:oMZN8hGLUpi7QBlMEUqailocNy0NFAO/7Lu+Nwh9HMM= github.com/enetx/surf v1.0.198 h1:TJkyEyy5M+GnLZlGKmByeXwG7K2vv7F5L+0SgwlDu7g= github.com/enetx/surf v1.0.198/go.mod h1:BtLmZDYAny66azybFr9UdFVnwy8WRV4FTAzElsd7bvE= +github.com/enetx/surf v1.0.199 h1:RtqcwlyLM8O4U+43laNnNJwx5hALkH5cJRxDX1F2VjM= +github.com/enetx/surf v1.0.199/go.mod h1:c6g53gi273RBiZFO4THWIqpn5n9RLC6vw5WpUwHrT4U= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= diff --git a/ui/v2.5/src/docs/en/Manual/ScraperDevelopment.md b/ui/v2.5/src/docs/en/Manual/ScraperDevelopment.md index 858fb89a0..021776ad1 100644 --- a/ui/v2.5/src/docs/en/Manual/ScraperDevelopment.md +++ b/ui/v2.5/src/docs/en/Manual/ScraperDevelopment.md @@ -594,6 +594,25 @@ driver: > **⚠️ Note:** each `click` adds an extra delay of `clicks sleep` seconds, so the above adds `2+4+1+2+2=11` seconds to the loading time of the page. +### TLS emulation (surf) support + +Some websites use TLS fingerprinting to identify automated traffic. These cannot be set in headers. Compared to the previous method of python libraries, [surf](https://github.com/enetx/surf) TLS emulation is faster and more reliable in most cases. + +One might consider TLS emulation if the site works well in browser but is inexplicably blocked when accessing through curl/ python/ stash. + +TLS emulation can be enabled for a specific configuration by adding the following to the root of the yml configuration: + +```yaml +driver: + useSurf: true +``` + +This is only supported on stash v0.32+ + +The `User-Agent` [header](#headers) is automatically removed, since overriding it would create a mismatch between TLS fingerprint and User-Agent, defeating the entire purpose of emulation. + +> **⚠️ Note:** Proxies that do not support SOCKS5 will not be able to take full advantage of TLS emulation and might fail in rare cases. + ### Cookie support In some websites the use of cookies is needed to bypass a welcoming message or some other kind of protection. Stash supports the setting of cookies for the direct xpath scraper and the CDP based one. Due to implementation issues the usage varies a bit.