mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-15 05:14:25 +01:00
Refactor sleep and progressbar into FetcherDecorators
This commit is contained in:
parent
67d4eb46ee
commit
3f6793b301
3 changed files with 169 additions and 60 deletions
|
|
@ -528,7 +528,8 @@ def get_configuration(url,
|
|||
chaptercount=None,
|
||||
output_filename=None):
|
||||
try:
|
||||
configuration = Configuration(adapters.getConfigSectionsFor(url), options.format)
|
||||
configuration = Configuration(adapters.getConfigSectionsFor(url),
|
||||
options.format)
|
||||
except exceptions.UnknownSite as e:
|
||||
if options.list or options.normalize or options.downloadlist:
|
||||
# list for page doesn't have to be a supported site.
|
||||
|
|
|
|||
|
|
@ -537,8 +537,10 @@ class Configuration(ConfigParser):
|
|||
site = sections[-1] # first section is site DN.
|
||||
ConfigParser.__init__(self)
|
||||
|
||||
self.fetcher = None # the network layer for getting pages
|
||||
self.cache = None # the caching layer for getting pages
|
||||
self.fetcher = None # the network layer for getting pages the
|
||||
# caching layer for getting pages, created now for
|
||||
# get_empty_pagecache() etc.
|
||||
self.cache = fetcher.BaseCache()
|
||||
self.opener = None # used for _filelist
|
||||
|
||||
self.lightweight = lightweight
|
||||
|
|
@ -579,8 +581,10 @@ class Configuration(ConfigParser):
|
|||
|
||||
self.url_config_set = False
|
||||
|
||||
## XXX make sure fetcher & cache exist--revisit
|
||||
self.get_fetcher()
|
||||
# ## XXX make sure fetcher & cache exist--revisit Breaks
|
||||
# ## use_cloudscraper in CLI because CONFIG FILES HAVEN'T BEEN
|
||||
# ## READ YET.
|
||||
# self.get_fetcher()
|
||||
|
||||
def section_url_names(self,domain,section_url_f):
|
||||
## domain is passed as a method to limit the damage if/when an
|
||||
|
|
@ -951,14 +955,29 @@ class Configuration(ConfigParser):
|
|||
|
||||
def get_fetcher(self):
|
||||
if not self.fetcher:
|
||||
logger.error(self.getConfig('use_cloudscraper'))
|
||||
if self.getConfig('use_cloudscraper',False):
|
||||
fetchcls = fetcher.CloudScraperFetcher
|
||||
else:
|
||||
fetchcls = fetcher.RequestsFetcher
|
||||
self.fetcher = fetchcls(self.getConfig,
|
||||
self.getConfigList)
|
||||
self.cache = fetcher.BaseCache()
|
||||
|
||||
########################################################
|
||||
## Adding fetcher decorators. Order matters--last in,
|
||||
## first called. If ProgressBarDecorator is added before
|
||||
## Cache, it's never called for cache hits, for example.
|
||||
|
||||
fetcher.SleepDecorator().decorate_fetcher(self.fetcher)
|
||||
|
||||
# cache decorator terminates the chain when found. Cache
|
||||
# created in __init__ because of get_empty_pagecache()
|
||||
# etc, but not used until now.
|
||||
self.cache.decorate_fetcher(self.fetcher)
|
||||
|
||||
if self.getConfig('progressbar'):
|
||||
fetcher.ProgressBarDecorator().decorate_fetcher(self.fetcher)
|
||||
|
||||
return self.fetcher
|
||||
|
||||
## XXX which should be in requestable?
|
||||
|
|
|
|||
|
|
@ -48,18 +48,125 @@ logger = logging.getLogger(__name__)
|
|||
# import http.client as http_client
|
||||
# http_client.HTTPConnection.debuglevel = 5
|
||||
|
||||
'''
|
||||
baseclass should be a Fetcher
|
||||
|
||||
BaseCacheFetcher class is dynamically created each time
|
||||
create_cachedfetcher() is called. Need to share underlying data
|
||||
structure or do differently to share cache between configuration
|
||||
objects?
|
||||
|
||||
Make something like LWPCookieJar is for cookies?
|
||||
'''
|
||||
class BaseCache(object):
|
||||
class FetcherDecorator(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def decorate_fetcher(self,fetcher):
|
||||
# replace fetcher's do_request with a func that wraps it.
|
||||
# can be chained.
|
||||
fetcher.do_request = partial(self.fetcher_do_request,
|
||||
fetcher,
|
||||
fetcher.do_request)
|
||||
|
||||
def fetcher_do_request(self,
|
||||
fetcher,
|
||||
chainfn,
|
||||
method,
|
||||
url,
|
||||
parameters=None,
|
||||
extrasleep=None,
|
||||
referer=None,
|
||||
usecache=True):
|
||||
## can use fetcher.getConfig()/getConfigList().
|
||||
fetchresp = chainfn(
|
||||
method,
|
||||
url,
|
||||
parameters=parameters,
|
||||
extrasleep=extrasleep,
|
||||
referer=referer,
|
||||
usecache=usecache)
|
||||
|
||||
return fetchresp
|
||||
|
||||
class ProgressBarDecorator(FetcherDecorator):
|
||||
def fetcher_do_request(self,
|
||||
fetcher,
|
||||
chainfn,
|
||||
method,
|
||||
url,
|
||||
parameters=None,
|
||||
extrasleep=None,
|
||||
referer=None,
|
||||
usecache=True):
|
||||
logger.debug("ProgressBarDecorator fetcher_do_request")
|
||||
fetchresp = chainfn(
|
||||
method,
|
||||
url,
|
||||
parameters=parameters,
|
||||
extrasleep=extrasleep,
|
||||
referer=referer,
|
||||
usecache=usecache)
|
||||
## added ages ago for CLI to give a line of dots showing it's
|
||||
## doing something.
|
||||
logger.debug("..")
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
return fetchresp
|
||||
|
||||
class SleepDecorator(FetcherDecorator):
|
||||
def __init__(self):
|
||||
super(SleepDecorator,self).__init__()
|
||||
self.override_sleep = None
|
||||
|
||||
def decorate_fetcher(self,fetcher):
|
||||
super(SleepDecorator,self).decorate_fetcher(fetcher)
|
||||
fetcher.set_sleep = partial(self.fetcher_set_sleep,
|
||||
fetcher,
|
||||
fetcher.set_sleep)
|
||||
|
||||
def fetcher_set_sleep(self,
|
||||
fetcher,
|
||||
chainfn,
|
||||
val):
|
||||
logger.debug("\n===========\n set sleep time %s\n==========="%val)
|
||||
self.override_sleep = val
|
||||
return chainfn(val)
|
||||
|
||||
def fetcher_do_request(self,
|
||||
fetcher,
|
||||
chainfn,
|
||||
method,
|
||||
url,
|
||||
parameters=None,
|
||||
extrasleep=None,
|
||||
referer=None,
|
||||
usecache=True):
|
||||
logger.debug("SleepDecorator fetcher_do_request")
|
||||
fetchresp = chainfn(
|
||||
method,
|
||||
url,
|
||||
parameters=parameters,
|
||||
extrasleep=extrasleep,
|
||||
referer=referer,
|
||||
usecache=usecache)
|
||||
|
||||
# don't sleep cached results. Usually MemCache results will
|
||||
# be before sleep, but check fetchresp.fromcache for file://
|
||||
# and other intermediate caches.
|
||||
if not fetchresp.fromcache:
|
||||
if extrasleep:
|
||||
logger.debug("extra sleep:%s"%extrasleep)
|
||||
time.sleep(float(extrasleep))
|
||||
t = None
|
||||
if self.override_sleep:
|
||||
t = float(self.override_sleep)
|
||||
elif fetcher.getConfig('slow_down_sleep_time'):
|
||||
t = float(fetcher.getConfig('slow_down_sleep_time'))
|
||||
## sleep randomly between 0.5 time and 1.5 time.
|
||||
## So 8 would be between 4 and 12.
|
||||
if t:
|
||||
rt = random.uniform(t*0.5, t*1.5)
|
||||
logger.debug("random sleep(%0.2f-%0.2f):%0.2f"%(t*0.5, t*1.5,rt))
|
||||
time.sleep(rt)
|
||||
else:
|
||||
logger.debug("Skip sleeps")
|
||||
|
||||
return fetchresp
|
||||
|
||||
class BaseCache(FetcherDecorator):
|
||||
def __init__(self):
|
||||
super(BaseCache,self).__init__()
|
||||
self.pagecache = self.get_empty_pagecache()
|
||||
self.save_cache_file = None
|
||||
|
||||
|
|
@ -95,23 +202,21 @@ class BaseCache(object):
|
|||
with open(self.save_cache_file,'wb') as jout:
|
||||
pickle.dump(self.get_pagecache(),jout,protocol=2)
|
||||
|
||||
def decorate_fetcher(self,fetcher):
|
||||
# replace
|
||||
fetcher.do_request = partial(self.do_request,fetcher.do_request)
|
||||
|
||||
def do_request(self,
|
||||
chainfn,
|
||||
method,
|
||||
url,
|
||||
parameters=None,
|
||||
extrasleep=None,
|
||||
referer=None,
|
||||
usecache=True):
|
||||
def fetcher_do_request(self,
|
||||
fetcher,
|
||||
chainfn,
|
||||
method,
|
||||
url,
|
||||
parameters=None,
|
||||
extrasleep=None,
|
||||
referer=None,
|
||||
usecache=True):
|
||||
'''
|
||||
When should cache be cleared or not used? logins, primarily
|
||||
Note that usecache=False prevents lookup, but cache still saves
|
||||
result
|
||||
'''
|
||||
logger.debug("BaseCache fetcher_do_request")
|
||||
cachekey=self.make_cachekey(url, parameters)
|
||||
|
||||
if usecache and self.has_cachekey(cachekey) and not cachekey.startswith('file:'):
|
||||
|
|
@ -131,9 +236,14 @@ class BaseCache(object):
|
|||
|
||||
data = fetchresp.content
|
||||
|
||||
self.set_to_cache(cachekey,data,fetchresp.redirecturl)
|
||||
if url != fetchresp.redirecturl: # cache both?
|
||||
self.set_to_cache(cachekey,data,url)
|
||||
## don't re-cache, which includes file://, marked fromcache
|
||||
## down in RequestsFetcher. I can foresee using the dev CLI
|
||||
## saved-cache and wondering why file changes aren't showing
|
||||
## up.
|
||||
if not fetchresp.fromcache:
|
||||
self.set_to_cache(cachekey,data,fetchresp.redirecturl)
|
||||
if url != fetchresp.redirecturl: # cache both?
|
||||
self.set_to_cache(cachekey,data,url)
|
||||
return fetchresp
|
||||
|
||||
class FetcherResponse(object):
|
||||
|
|
@ -147,7 +257,6 @@ class Fetcher(object):
|
|||
self.getConfig = getConfig_fn
|
||||
self.getConfigList = getConfigList_fn
|
||||
|
||||
self.override_sleep = None
|
||||
self.cookiejar = None
|
||||
|
||||
def get_cookiejar(self,filename=None):
|
||||
|
|
@ -178,31 +287,10 @@ class Fetcher(object):
|
|||
ignore_discard=True,
|
||||
ignore_expires=True)
|
||||
|
||||
def progressbar(self):
|
||||
if self.getConfig('progressbar'):
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
|
||||
# used by plugin for ffnet variable timing
|
||||
## this will need to be moved. XXX
|
||||
def set_sleep(self,val):
|
||||
# logger.debug("\n===========\n set sleep time %s\n==========="%val)
|
||||
self.override_sleep = val
|
||||
|
||||
def do_sleep(self,extrasleep=None):
|
||||
if extrasleep:
|
||||
logger.debug("extra sleep:%s"%extrasleep)
|
||||
time.sleep(float(extrasleep))
|
||||
t = None
|
||||
if self.override_sleep:
|
||||
t = float(self.override_sleep)
|
||||
elif self.getConfig('slow_down_sleep_time'):
|
||||
t = float(self.getConfig('slow_down_sleep_time'))
|
||||
## sleep randomly between 0.5 time and 1.5 time.
|
||||
## So 8 would be between 4 and 12.
|
||||
if t:
|
||||
rt = random.uniform(t*0.5, t*1.5)
|
||||
logger.debug("random sleep(%0.2f-%0.2f):%0.2f"%(t*0.5, t*1.5,rt))
|
||||
time.sleep(rt)
|
||||
pass
|
||||
|
||||
def make_headers(self,url,referer=None):
|
||||
headers = {}
|
||||
|
|
@ -230,15 +318,13 @@ class Fetcher(object):
|
|||
sleeps. Passed into fetchs so it can be bypassed when
|
||||
cache hits.
|
||||
'''
|
||||
logger.debug("fetcher do_request")
|
||||
headers = self.make_headers(url,referer=referer)
|
||||
fetchresp = self.request(method,url,
|
||||
headers=headers,
|
||||
parameters=parameters)
|
||||
data = fetchresp.content
|
||||
self.save_cookiejar()
|
||||
self.progressbar()
|
||||
if not url.startswith('file:'): # don't sleep for file: URLs.
|
||||
self.do_sleep(extrasleep)
|
||||
return fetchresp
|
||||
|
||||
def condition_url(self, url):
|
||||
|
|
@ -320,8 +406,11 @@ class RequestsFetcher(Fetcher):
|
|||
verify=verify)
|
||||
logger.debug("response code:%s"%resp.status_code)
|
||||
resp.raise_for_status() # raises RequestsHTTPError if error code.
|
||||
# consider 'cached' if from file.
|
||||
fromcache = resp.url.startswith('file:')
|
||||
return FetcherResponse(resp.content,
|
||||
resp.url)
|
||||
resp.url,
|
||||
fromcache)
|
||||
except RequestsHTTPError as e:
|
||||
## not RequestsHTTPError(requests.exceptions.HTTPError) or
|
||||
## .six.moves.urllib.error import HTTPError because we
|
||||
|
|
|
|||
Loading…
Reference in a new issue