Add User Agent to image download reqs (#1222)

This commit is contained in:
bnkai 2021-03-23 23:12:11 +02:00 committed by GitHub
parent 73a8bad1bc
commit 68d4a4fe42
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 0 deletions

View file

@ -2,6 +2,7 @@ package scraper
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"strings"
@ -113,6 +114,10 @@ func getImage(url string, globalConfig GlobalConfig) (*string, error) {
return nil, err
}
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("http error %d", resp.StatusCode)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)

View file

@ -79,6 +79,10 @@ func loadURL(url string, scraperConfig config, globalConfig GlobalConfig) (io.Re
if err != nil {
return nil, err
}
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("http error %d", resp.StatusCode)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)

View file

@ -2,6 +2,7 @@ package utils
import (
"crypto/md5"
"crypto/tls"
"encoding/base64"
"fmt"
"io/ioutil"
@ -33,6 +34,10 @@ func ProcessImageInput(imageInput string) ([]byte, error) {
// ReadImageFromURL returns image data from a URL
func ReadImageFromURL(url string) ([]byte, error) {
client := &http.Client{
Transport: &http.Transport{ // ignore insecure certificates
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
Timeout: imageGetTimeout,
}
@ -47,6 +52,7 @@ func ReadImageFromURL(url string) ([]byte, error) {
if req.URL.Scheme != "" {
req.Header.Set("Referer", req.URL.Scheme+"://"+req.Host+"/")
}
req.Header.Set("User-Agent", GetUserAgent())
resp, err := client.Do(req)
@ -54,6 +60,10 @@ func ReadImageFromURL(url string) ([]byte, error) {
return nil, err
}
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("http error %d", resp.StatusCode)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)

36
pkg/utils/user_agent.go Normal file
View file

@ -0,0 +1,36 @@
package utils
import "runtime"
// valid UA from https://user-agents.net
const Safari = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15/iY0wnXbs-59"
const FirefoxWindows = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0"
const FirefoxLinux = "Mozilla/5.0 (X11; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0"
const FirefoxLinuxArm = "Mozilla/5.0 (X11; Linux armv7l; rv:86.0) Gecko/20100101 Firefox/86.0"
const FirefoxLinuxArm64 = "Mozilla/5.0 (X11; Linux aarch64; rv:86.0) Gecko/20100101 Firefox/86.0"
// GetUserAgent returns a valid User Agent string that matches the running os/arch
func GetUserAgent() string {
arch := runtime.GOARCH
os := runtime.GOOS
switch os {
case "darwin":
return Safari
case "windows":
return FirefoxWindows
case "linux":
switch arch {
case "arm":
return FirefoxLinuxArm
case "arm64":
return FirefoxLinuxArm64
case "amd64":
return FirefoxLinux
default:
return FirefoxLinux
}
default:
return FirefoxLinux
}
}