From 3a04a5518f0c4d9c15c2ca38e6eb8b8b0379513a Mon Sep 17 00:00:00 2001 From: feederbox826 Date: Sun, 3 May 2026 21:50:39 -0400 Subject: [PATCH] [scraper] copy all headers except for user-agent --- pkg/scraper/url.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pkg/scraper/url.go b/pkg/scraper/url.go index 9d95a69a6..2ee8306af 100644 --- a/pkg/scraper/url.go +++ b/pkg/scraper/url.go @@ -34,7 +34,7 @@ func loadURL(ctx context.Context, loadURL string, client *http.Client, def Defin case driverOptions.UseCDP: return urlFromCDP(ctx, loadURL, *driverOptions, globalConfig) case driverOptions.UseSurf: - return urlFromSurf(ctx, loadURL, def, globalConfig) + return urlFromSurf(ctx, loadURL, *driverOptions, def, globalConfig) } } @@ -94,7 +94,7 @@ func loadURL(ctx context.Context, loadURL string, client *http.Client, def Defin // func urlFromSurf uses enetx/surf with TLS browser emulation to bypass fingerprint-based blocking. // this is a step down from CDP but faster and more lightweight and can succeed where CDP might fail -func urlFromSurf(ctx context.Context, loadURL string, def Definition, globalConfig GlobalConfig) (io.Reader, error) { +func urlFromSurf(ctx context.Context, loadURL string, driverOptions scraperDriverOptions, def Definition, globalConfig GlobalConfig) (io.Reader, error) { // get cookies jar, err := def.jar() if err != nil { @@ -125,6 +125,19 @@ func urlFromSurf(ctx context.Context, loadURL string, def Definition, globalConf if err != nil { return nil, err } + // remove User-Agent header. This undermines TLS fingerprinting + // because of GREASE (RFC 8701) + // older fingerprints + UAs are still sustainable. + for _, h := range driverOptions.Headers { + if h.Key != "" { + if strings.ToLower(h.Key) == "user-agent" { + continue + } + req.Header.Set(h.Key, h.Value) + logger.Debugf("[scraper] adding header <%s:%s>", h.Key, h.Value) + } + } + resp, err := client.Do(req) if err != nil { return nil, err