From 5ca13c71b38fefb014704dd8ec373239c3656d6f Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 13 Sep 2021 13:24:35 -0500 Subject: [PATCH] Adding flaresolverr_proxy. --- fanficfare/configurable.py | 11 ++- fanficfare/fetcher.py | 18 +++- fanficfare/flaresolverr_proxy.py | 156 +++++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+), 4 deletions(-) create mode 100644 fanficfare/flaresolverr_proxy.py diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index d1ab85e8..d784d0c5 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -42,6 +42,7 @@ except ImportError: from . import exceptions from . import fetcher from . import nsapa_proxy +from . import flaresolverr_proxy ## has to be up here for brotli-dict to load correctly. from .browsercache import BrowserCache @@ -197,6 +198,7 @@ def get_valid_set_options(): 'use_cloudscraper':(None,None,boollist), 'use_basic_cache':(None,None,boollist), 'use_nsapa_proxy':(None,None,boollist), + 'use_flaresolverr_proxy':(None,None,boollist), ## currently, browser_cache_path is assumed to be ## shared and only ffnet uses it so far @@ -491,6 +493,9 @@ def get_valid_keywords(): 'use_nsapa_proxy', 'nsapa_proxy_address', 'nsapa_proxy_port', + 'use_flaresolverr_proxy', + 'flaresolverr_proxy_address', + 'flaresolverr_proxy_port', 'browser_cache_path', 'browser_cache_age_limit', 'user_agent', @@ -989,7 +994,11 @@ class Configuration(ConfigParser): cookiejar = self.get_fetcher().get_cookiejar() # save and re-apply cookiejar when make_new. if not self.fetcher or make_new: - if self.getConfig('use_nsapa_proxy',False): + + if self.getConfig('use_flaresolverr_proxy',False): + logger.debug("use_flaresolverr_proxy:%s"%self.getConfig('use_flaresolverr_proxy')) + fetchcls = flaresolverr_proxy.FlareSolverr_ProxyFetcher + elif self.getConfig('use_nsapa_proxy',False): logger.debug("use_nsapa_proxy:%s"%self.getConfig('use_nsapa_proxy')) fetchcls = nsapa_proxy.NSAPA_ProxyFetcher elif self.getConfig('use_cloudscraper',False): diff --git a/fanficfare/fetcher.py b/fanficfare/fetcher.py index 57984c97..2714bf05 100644 --- a/fanficfare/fetcher.py +++ b/fanficfare/fetcher.py @@ -291,10 +291,11 @@ class BrowserCacheDecorator(FetcherDecorator): usecache=usecache) class FetcherResponse(object): - def __init__(self,content,redirecturl=None,fromcache=False): + def __init__(self,content,redirecturl=None,fromcache=False,json=None): self.content = content self.redirecturl = redirecturl self.fromcache = fromcache + self.json = json class BasicCookieJar(LWPCookieJar,object): def __init__(self,*args,**kargs): @@ -453,7 +454,7 @@ class RequestsFetcher(Fetcher): def use_verify(self): return not self.getConfig('use_ssl_unverified_context',False) - def request(self,method,url,headers=None,parameters=None): + def request(self,method,url,headers=None,parameters=None,json=None): '''Returns a FetcherResponse regardless of mechanism''' if method not in ('GET','POST'): raise NotImplementedError() @@ -463,14 +464,25 @@ class RequestsFetcher(Fetcher): resp = self.get_requests_session().request(method, url, headers=headers, data=parameters, + json=json, verify=self.use_verify()) logger.debug("response code:%s"%resp.status_code) resp.raise_for_status() # raises RequestsHTTPError if error code. # consider 'cached' if from file. fromcache = resp.url.startswith('file:') + ## currently only saving response json if there input was json. + ## for flaresolverr_proxy + resp_json = None + if json: + try: + resp_json = resp.json() + except: + pass + # logger.debug(resp_json) return FetcherResponse(resp.content, resp.url, - fromcache) + fromcache, + resp_json) except RequestsHTTPError as e: ## not RequestsHTTPError(requests.exceptions.HTTPError) or ## .six.moves.urllib.error import HTTPError because we diff --git a/fanficfare/flaresolverr_proxy.py b/fanficfare/flaresolverr_proxy.py new file mode 100644 index 00000000..57f7057e --- /dev/null +++ b/fanficfare/flaresolverr_proxy.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 FanFicFare team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import base64 +import time +import json +import logging +logger = logging.getLogger(__name__) + +from . import exceptions +from .fetcher import RequestsFetcher, FetcherResponse, make_log +from .six.moves.http_cookiejar import Cookie + +class FlareSolverr_ProxyFetcher(RequestsFetcher): + def __init__(self, getConfig_fn, getConfigList_fn): + logger.debug("using FlareSolverr_ProxyFetcher") + super(FlareSolverr_ProxyFetcher, self).__init__(getConfig_fn, + getConfigList_fn) + self.super_request = super(FlareSolverr_ProxyFetcher,self).request + + def request(self, method, url, headers=None, parameters=None): + '''Returns a FetcherResponse regardless of mechanism''' + if method not in ('GET','POST'): + raise NotImplementedError() + + ## XXX + ## + ## create, use then destroy a session on the proxy? Would + ## need to add some kind of 'end session' thing. Proc wide + ## singleton with session value? + + logger.debug( + make_log('FlareSolverr_ProxyFetcher', method, url, hit='REQ', bar='-')) + cmd = ('request.'+method).lower() + + resp = self.super_request('POST', + 'http://'+self.getConfig("flaresolverr_proxy_address", "localhost")+\ + ':'+self.getConfig("flaresolverr_proxy_port", '8191')+'/v1', + headers={'Content-Type':'application/json'}, + json={'cmd': cmd, + 'url':url, + #'userAgent': 'Mozilla/5.0', + 'maxTimeout': 60000, + 'download': True, + # causes response to be base64 + # encoded which makes images + # work. + 'cookies':cookiejar_to_jsonable(self.cookiejar) + } + ) + if( resp.json['status'] == 'ok' and + 'solution' in resp.json and + 'status' in resp.json['solution'] + ): + status_code = resp.json['solution']['status'] + logger.debug("response code:%s"%status_code) + logger.debug(json.dumps(resp.json, sort_keys=True, + indent=2, separators=(',', ':'))) + data = base64.b64decode(resp.json['solution']['response']) + url = resp.json['solution']['url'] + for c in cookiejson_to_jarable(resp.json['solution']['cookies']): + self.cookiejar.set_cookie(c) + else: + logger.debug("flaresolverr error resp:") + logger.debug(json.dumps(resp.json, sort_keys=True, + indent=2, separators=(',', ':'))) + status_code = 428 # 404 & 410 trip StoryDoesNotExist + # 428 ('Precondition Required') gets the + # error_msg through to the user. + data = resp.json['message'] + if status_code != 200: + raise exceptions.HTTPErrorFFF( + url, + status_code, + data + ) + + return FetcherResponse(data, + url, + False) + +def cookiejar_to_jsonable(cookiejar): + retval = [] + for c in cookiejar: + cval = { + 'name':c.name, + 'value':c.value, + 'domain':c.domain, + 'path':c.path, + } + if c.expires: + cval['expires'] = c.expires + + retval.append(cval) + return retval + +def cookiejson_to_jarable(data): + retval = [] + for c in data: + retval.append(Cookie(None, # version + c['name'], + c['value'], + None, # port + False, # port_specified, + c['domain'], + True, # domain_specified, + c['domain'].startswith('.'), # domain_initial_dot, + c['path'], + c['path'] == None or c['path'] == '', # path_specified, + c['secure'], + c['expires'], + c['expires'] == -1, # discard + None, # comment, + None, # comment_url, + {}, # rest + )) + return retval + +# "cookies":[ +# { +# "domain":"www.hentai-foundry.com", +# "expires":-1, +# "httpOnly":false, +# "name":"YII_CSRF_TOKEN", +# "path":"/", +# "secure":false, +# "session":true, +# "size":164, +# "value":"952f8cf13b88ad98a3ea485a7360b9671f026b85s%3A88%3A%22YWFRTn43ekJFUkFzeUJrSXdmQTRzbXgya3pCNGd1d26UvTvOzIHijrHnfb3ttZYX2RAJX4HbBjbBWifMIUjjJQ%3D%3D%22%3B" +# }, +# { +# "domain":"www.hentai-foundry.com", +# "expires":-1, +# "httpOnly":false, +# "name":"PHPSESSID", +# "path":"/", +# "secure":false, +# "session":true, +# "size":59, +# "value":"Uiw6N47QIPB29hs-gHC161vH%2CUjjMbrtNrVKb0ZxatDtkdoj" +# } +# ],