From eb63b8bae51ca2c5ce33b116868a51f3d431bce5 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Thu, 28 Jan 2021 13:35:32 -0600 Subject: [PATCH] Refactor cookiejar into Fetcher. --- calibre-plugin/fff_plugin.py | 12 ++++-- calibre-plugin/jobs.py | 5 ++- fanficfare/cli.py | 41 +++++++++++--------- fanficfare/configurable.py | 18 ++++----- fanficfare/fetcher.py | 73 ++++++++++++++++++++---------------- 5 files changed, 85 insertions(+), 64 deletions(-) diff --git a/calibre-plugin/fff_plugin.py b/calibre-plugin/fff_plugin.py index f225c096..c4a4292d 100644 --- a/calibre-plugin/fff_plugin.py +++ b/calibre-plugin/fff_plugin.py @@ -1263,9 +1263,14 @@ class FanFicFarePlugin(InterfaceAction): if 'pagecache' not in options: options['pagecache'] = configuration.get_empty_pagecache() configuration.set_pagecache(options['pagecache']) - if 'cookiejar' not in options: - options['cookiejar'] = configuration.get_empty_cookiejar() - configuration.set_cookiejar(options['cookiejar']) + + ## save and share cookiejar between all downloads. + if 'cookiejar' in options: + configuration.set_cookiejar(options['cookiejar']) + else: + ## *not* giving a cookiejar filename now so it's only in + ## *memory and not writing to disk all the time. + options['cookiejar'] = configuration.get_cookiejar() if collision in (CALIBREONLY, CALIBREONLYSAVECOL): ## Getting metadata from configured column. @@ -1689,6 +1694,7 @@ class FanFicFarePlugin(InterfaceAction): cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar', dir=options['tdir']) + ## assumed to be a LWPCookieJar options['cookiejar'].save(cookiejarfile.name, ignore_discard=True, ignore_expires=True) diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 796ea777..d9725524 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -179,8 +179,11 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x): adapter.password = book['password'] adapter.setChaptersRange(book['begin'],book['end']) + ## each download starts with a new copy of the cookiejar + ## from the FG process. They are not shared between BG + ## downloads. configuration.load_cookiejar(options['cookiejarfile']) - #logger.debug("cookiejar:%s"%configuration.cookiejar) + configuration.set_pagecache(options['pagecache']) story = adapter.getStoryMetadataOnly() diff --git a/fanficfare/cli.py b/fanficfare/cli.py index 2dc50487..77244d6a 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -62,7 +62,6 @@ try: get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub) from calibre_plugins.fanficfare_plugin.fanficfare.geturls import get_urls_from_page, get_urls_from_imap from calibre_plugins.fanficfare_plugin.fanficfare.six.moves import configparser - from calibre_plugins.fanficfare_plugin.fanficfare.six.moves import http_cookiejar as cl from calibre_plugins.fanficfare_plugin.fanficfare.six import text_type as unicode except ImportError: from fanficfare import adapters, writers, exceptions @@ -71,7 +70,6 @@ except ImportError: get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub) from fanficfare.geturls import get_urls_from_page, get_urls_from_imap from fanficfare.six.moves import configparser - from fanficfare.six.moves import http_cookiejar as cl from fanficfare.six import text_type as unicode def write_story(config, adapter, writeformat, @@ -248,17 +246,6 @@ def main(argv=None, parser.print_help(); return - if options.save_cache: - try: - with open(global_cache,'rb') as jin: - options.pagecache = pickle_load(jin) - options.cookiejar = cl.LWPCookieJar() - options.cookiejar.load(global_cookies) - except Exception as e: - ## This is not uncommon, will happen when starting a new - ## cache, for example. - print("Didn't load --save-cache %s"%e) - if options.list: configuration = get_configuration(options.list, passed_defaultsini, @@ -611,18 +598,36 @@ def get_configuration(url, if options.progressbar: configuration.set('overrides','progressbar','true') - ## Share pagecache and cookiejar between multiple downloads. + ## All CLI downloads are sequential and share one cookiejar, + ## loaded the first time through here. + if not hasattr(options,'cookiejar'): + ## only loaded/saved if has a filename + ## only has a filename if options.save_cache + if options.save_cache: + options.cookiejar = configuration.get_cookiejar(filename=global_cookies) + else: + options.cookiejar = configuration.get_cookiejar() + else: + configuration.set_cookiejar(options.cookiejar) + + ## Share pagecache between multiple downloads. if not hasattr(options,'pagecache'): options.pagecache = configuration.get_empty_pagecache() - if not hasattr(options,'cookiejar'): - options.cookiejar = configuration.get_empty_cookiejar() + + cookie_file = None if options.save_cache: + cookie_file = global_cookies save_cache = global_cache - save_cookies = global_cookies + try: + with open(global_cache,'rb') as jin: + options.pagecache = pickle_load(jin) + except Exception as e: + ## This is not uncommon, will happen when starting a new + ## cache, for example. + print("Didn't load --save-cache %s"%e) else: save_cache = save_cookies = None configuration.set_pagecache(options.pagecache,save_cache) - configuration.set_cookiejar(options.cookiejar,save_cookies) return configuration diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index 878edd4a..f3842dbe 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -962,19 +962,17 @@ class Configuration(ConfigParser): def set_sleep(self,val): return self.get_fetcher().set_sleep(val) - def get_empty_cookiejar(self): - return self.get_fetcher().cache.get_empty_cookiejar() + def get_cookiejar(self,filename=None): + return self.get_fetcher().get_cookiejar(filename) - def get_cookiejar(self): - return self.get_fetcher().cache.get_cookiejar() + def set_cookiejar(self,cookiejar): + self.get_fetcher().set_cookiejar(cookiejar) - def set_cookiejar(self,cookiejar,cookiejar_file=None): - self.get_fetcher().cache.set_cookiejar(cookiejar,cookiejar_file) - ## XXX will need to move cookiejar - return self.get_fetcher().set_cookiejar(cookiejar,cookiejar_file) + def load_cookiejar(self,filename=None): + self.get_fetcher().load_cookiejar(filename) - def load_cookiejar(self,filename): - return self.get_fetcher().cache.load_cookiejar(filename) + def save_cookiejar(self,filename=None): + self.get_fetcher().save_cookiejar(filename) def get_empty_pagecache(self): return self.get_fetcher().cache.get_empty_pagecache() diff --git a/fanficfare/fetcher.py b/fanficfare/fetcher.py index c4c677ab..d87a43bb 100644 --- a/fanficfare/fetcher.py +++ b/fanficfare/fetcher.py @@ -21,7 +21,7 @@ import random # py2 vs py3 transition from .six.moves.urllib.parse import quote_plus -from .six.moves import http_cookiejar as cl +from .six.moves.http_cookiejar import LWPCookieJar from .six import text_type as unicode from .six import ensure_binary, ensure_text @@ -51,28 +51,8 @@ logger = logging.getLogger(__name__) class Cache(object): def __init__(self): - self.cookiejar = self.get_empty_cookiejar() - self.pagecache = self.get_empty_pagecache() self.save_cache_file = None - self.save_cookiejar_file = None - - def get_empty_cookiejar(self): - return cl.LWPCookieJar() - - def get_cookiejar(self): - return self.cookiejar - - def set_cookiejar(self,cj,save_cookiejar_file=None): - self.cookiejar = cj - self.save_cookiejar_file = save_cookiejar_file - - def load_cookiejar(self,filename): - ''' - Needs to be called after adapter create, but before any fetchs - are done. Takes file *name*. - ''' - self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True) def get_empty_pagecache(self): return {} @@ -105,9 +85,6 @@ class Cache(object): if self.save_cache_file: with open(self.save_cache_file,'wb') as jout: pickle.dump(self._get_pagecache(),jout,protocol=2) - if self.save_cookiejar_file: - self.get_cookiejar().save(self.save_cookiejar_file) - class FetcherResponse(object): def __init__(self,content,redirecturl=None,fromcache=False): @@ -124,8 +101,35 @@ class Fetcher(object): self.cache = Cache() - def set_cookiejar(self,cj,save_cookiejar_file=None): - pass + self.cookiejar = None + + def get_cookiejar(self,filename=None): + if self.cookiejar is None: + self.cookiejar = LWPCookieJar(filename=filename) + if filename: + try: + self.cookiejar.load(ignore_discard=True, ignore_expires=True) + except: + logger.debug("Failed to load cookiejar(%s), going on without."%filename) + return self.cookiejar + + def set_cookiejar(self,cookiejar): + self.cookiejar = cookiejar + + def load_cookiejar(self,filename): + ''' + Needs to be called after adapter create, but before any fetchs + are done. Takes file *name*. + ''' + # get_cookiejar() creates an empty jar if not already. + self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True) + + def save_cookiejar(self,filename=None): + if filename or self.get_cookiejar().filename: + ## raises exception on save w/o filename + self.get_cookiejar().save(filename or self.get_cookiejar().filename, + ignore_discard=True, + ignore_expires=True) def _progressbar(self): if self.getConfig('progressbar'): @@ -201,6 +205,9 @@ class Fetcher(object): headers=headers, parameters=parameters) data = fetchresp.content + + self.save_cookiejar() + self._progressbar() self.cache.set_to_cache(cachekey,data,fetchresp.redirecturl) if url != fetchresp.redirecturl: # cache both? @@ -232,10 +239,12 @@ class RequestsFetcher(Fetcher): super(RequestsFetcher,self).__init__(getConfig_fn,getConfigList_fn) self.requests_session = None self.retries = self.make_retries() - self.cookiejar = None - def set_cookiejar(self,cj,save_cookiejar_file=None): - self.cookiejar = cj + def set_cookiejar(self,cookiejar): + super(RequestsFetcher,self).set_cookiejar(cookiejar) + ## in case where cookiejar is set second + if self.requests_session: + self.requests_session.cookies = self.cookiejar def make_retries(self): return Retry(total=4, @@ -257,7 +266,8 @@ class RequestsFetcher(Fetcher): if not self.requests_session: self.requests_session = self.make_sesssion() self.do_mounts(self.requests_session) - if self.cookiejar: + ## in case where cookiejar is set first + if self.cookiejar is not None: # present but *empty* jar==False self.requests_session.cookies = self.cookiejar return self.requests_session @@ -317,8 +327,7 @@ class CloudScraperFetcher(RequestsFetcher): def make_headers(self,url,referer=None): headers = super(CloudScraperFetcher,self).make_headers(url, - referer=referer, - headers=headers) + referer=referer) ## let cloudscraper do its thing with UA. if 'User-Agent' in headers: del headers['User-Agent']