Refactor cookiejar into Fetcher.

This commit is contained in:
Jim Miller 2021-01-28 13:35:32 -06:00
parent aa5706f372
commit eb63b8bae5
5 changed files with 85 additions and 64 deletions

View file

@ -1263,9 +1263,14 @@ class FanFicFarePlugin(InterfaceAction):
if 'pagecache' not in options:
options['pagecache'] = configuration.get_empty_pagecache()
configuration.set_pagecache(options['pagecache'])
if 'cookiejar' not in options:
options['cookiejar'] = configuration.get_empty_cookiejar()
configuration.set_cookiejar(options['cookiejar'])
## save and share cookiejar between all downloads.
if 'cookiejar' in options:
configuration.set_cookiejar(options['cookiejar'])
else:
## *not* giving a cookiejar filename now so it's only in
## *memory and not writing to disk all the time.
options['cookiejar'] = configuration.get_cookiejar()
if collision in (CALIBREONLY, CALIBREONLYSAVECOL):
## Getting metadata from configured column.
@ -1689,6 +1694,7 @@ class FanFicFarePlugin(InterfaceAction):
cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar',
dir=options['tdir'])
## assumed to be a LWPCookieJar
options['cookiejar'].save(cookiejarfile.name,
ignore_discard=True,
ignore_expires=True)

View file

@ -179,8 +179,11 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
adapter.password = book['password']
adapter.setChaptersRange(book['begin'],book['end'])
## each download starts with a new copy of the cookiejar
## from the FG process. They are not shared between BG
## downloads.
configuration.load_cookiejar(options['cookiejarfile'])
#logger.debug("cookiejar:%s"%configuration.cookiejar)
configuration.set_pagecache(options['pagecache'])
story = adapter.getStoryMetadataOnly()

View file

@ -62,7 +62,6 @@ try:
get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub)
from calibre_plugins.fanficfare_plugin.fanficfare.geturls import get_urls_from_page, get_urls_from_imap
from calibre_plugins.fanficfare_plugin.fanficfare.six.moves import configparser
from calibre_plugins.fanficfare_plugin.fanficfare.six.moves import http_cookiejar as cl
from calibre_plugins.fanficfare_plugin.fanficfare.six import text_type as unicode
except ImportError:
from fanficfare import adapters, writers, exceptions
@ -71,7 +70,6 @@ except ImportError:
get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub)
from fanficfare.geturls import get_urls_from_page, get_urls_from_imap
from fanficfare.six.moves import configparser
from fanficfare.six.moves import http_cookiejar as cl
from fanficfare.six import text_type as unicode
def write_story(config, adapter, writeformat,
@ -248,17 +246,6 @@ def main(argv=None,
parser.print_help();
return
if options.save_cache:
try:
with open(global_cache,'rb') as jin:
options.pagecache = pickle_load(jin)
options.cookiejar = cl.LWPCookieJar()
options.cookiejar.load(global_cookies)
except Exception as e:
## This is not uncommon, will happen when starting a new
## cache, for example.
print("Didn't load --save-cache %s"%e)
if options.list:
configuration = get_configuration(options.list,
passed_defaultsini,
@ -611,18 +598,36 @@ def get_configuration(url,
if options.progressbar:
configuration.set('overrides','progressbar','true')
## Share pagecache and cookiejar between multiple downloads.
## All CLI downloads are sequential and share one cookiejar,
## loaded the first time through here.
if not hasattr(options,'cookiejar'):
## only loaded/saved if has a filename
## only has a filename if options.save_cache
if options.save_cache:
options.cookiejar = configuration.get_cookiejar(filename=global_cookies)
else:
options.cookiejar = configuration.get_cookiejar()
else:
configuration.set_cookiejar(options.cookiejar)
## Share pagecache between multiple downloads.
if not hasattr(options,'pagecache'):
options.pagecache = configuration.get_empty_pagecache()
if not hasattr(options,'cookiejar'):
options.cookiejar = configuration.get_empty_cookiejar()
cookie_file = None
if options.save_cache:
cookie_file = global_cookies
save_cache = global_cache
save_cookies = global_cookies
try:
with open(global_cache,'rb') as jin:
options.pagecache = pickle_load(jin)
except Exception as e:
## This is not uncommon, will happen when starting a new
## cache, for example.
print("Didn't load --save-cache %s"%e)
else:
save_cache = save_cookies = None
configuration.set_pagecache(options.pagecache,save_cache)
configuration.set_cookiejar(options.cookiejar,save_cookies)
return configuration

View file

@ -962,19 +962,17 @@ class Configuration(ConfigParser):
def set_sleep(self,val):
return self.get_fetcher().set_sleep(val)
def get_empty_cookiejar(self):
return self.get_fetcher().cache.get_empty_cookiejar()
def get_cookiejar(self,filename=None):
return self.get_fetcher().get_cookiejar(filename)
def get_cookiejar(self):
return self.get_fetcher().cache.get_cookiejar()
def set_cookiejar(self,cookiejar):
self.get_fetcher().set_cookiejar(cookiejar)
def set_cookiejar(self,cookiejar,cookiejar_file=None):
self.get_fetcher().cache.set_cookiejar(cookiejar,cookiejar_file)
## XXX will need to move cookiejar
return self.get_fetcher().set_cookiejar(cookiejar,cookiejar_file)
def load_cookiejar(self,filename=None):
self.get_fetcher().load_cookiejar(filename)
def load_cookiejar(self,filename):
return self.get_fetcher().cache.load_cookiejar(filename)
def save_cookiejar(self,filename=None):
self.get_fetcher().save_cookiejar(filename)
def get_empty_pagecache(self):
return self.get_fetcher().cache.get_empty_pagecache()

View file

@ -21,7 +21,7 @@ import random
# py2 vs py3 transition
from .six.moves.urllib.parse import quote_plus
from .six.moves import http_cookiejar as cl
from .six.moves.http_cookiejar import LWPCookieJar
from .six import text_type as unicode
from .six import ensure_binary, ensure_text
@ -51,28 +51,8 @@ logger = logging.getLogger(__name__)
class Cache(object):
def __init__(self):
self.cookiejar = self.get_empty_cookiejar()
self.pagecache = self.get_empty_pagecache()
self.save_cache_file = None
self.save_cookiejar_file = None
def get_empty_cookiejar(self):
return cl.LWPCookieJar()
def get_cookiejar(self):
return self.cookiejar
def set_cookiejar(self,cj,save_cookiejar_file=None):
self.cookiejar = cj
self.save_cookiejar_file = save_cookiejar_file
def load_cookiejar(self,filename):
'''
Needs to be called after adapter create, but before any fetchs
are done. Takes file *name*.
'''
self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True)
def get_empty_pagecache(self):
return {}
@ -105,9 +85,6 @@ class Cache(object):
if self.save_cache_file:
with open(self.save_cache_file,'wb') as jout:
pickle.dump(self._get_pagecache(),jout,protocol=2)
if self.save_cookiejar_file:
self.get_cookiejar().save(self.save_cookiejar_file)
class FetcherResponse(object):
def __init__(self,content,redirecturl=None,fromcache=False):
@ -124,8 +101,35 @@ class Fetcher(object):
self.cache = Cache()
def set_cookiejar(self,cj,save_cookiejar_file=None):
pass
self.cookiejar = None
def get_cookiejar(self,filename=None):
if self.cookiejar is None:
self.cookiejar = LWPCookieJar(filename=filename)
if filename:
try:
self.cookiejar.load(ignore_discard=True, ignore_expires=True)
except:
logger.debug("Failed to load cookiejar(%s), going on without."%filename)
return self.cookiejar
def set_cookiejar(self,cookiejar):
self.cookiejar = cookiejar
def load_cookiejar(self,filename):
'''
Needs to be called after adapter create, but before any fetchs
are done. Takes file *name*.
'''
# get_cookiejar() creates an empty jar if not already.
self.get_cookiejar().load(filename, ignore_discard=True, ignore_expires=True)
def save_cookiejar(self,filename=None):
if filename or self.get_cookiejar().filename:
## raises exception on save w/o filename
self.get_cookiejar().save(filename or self.get_cookiejar().filename,
ignore_discard=True,
ignore_expires=True)
def _progressbar(self):
if self.getConfig('progressbar'):
@ -201,6 +205,9 @@ class Fetcher(object):
headers=headers,
parameters=parameters)
data = fetchresp.content
self.save_cookiejar()
self._progressbar()
self.cache.set_to_cache(cachekey,data,fetchresp.redirecturl)
if url != fetchresp.redirecturl: # cache both?
@ -232,10 +239,12 @@ class RequestsFetcher(Fetcher):
super(RequestsFetcher,self).__init__(getConfig_fn,getConfigList_fn)
self.requests_session = None
self.retries = self.make_retries()
self.cookiejar = None
def set_cookiejar(self,cj,save_cookiejar_file=None):
self.cookiejar = cj
def set_cookiejar(self,cookiejar):
super(RequestsFetcher,self).set_cookiejar(cookiejar)
## in case where cookiejar is set second
if self.requests_session:
self.requests_session.cookies = self.cookiejar
def make_retries(self):
return Retry(total=4,
@ -257,7 +266,8 @@ class RequestsFetcher(Fetcher):
if not self.requests_session:
self.requests_session = self.make_sesssion()
self.do_mounts(self.requests_session)
if self.cookiejar:
## in case where cookiejar is set first
if self.cookiejar is not None: # present but *empty* jar==False
self.requests_session.cookies = self.cookiejar
return self.requests_session
@ -317,8 +327,7 @@ class CloudScraperFetcher(RequestsFetcher):
def make_headers(self,url,referer=None):
headers = super(CloudScraperFetcher,self).make_headers(url,
referer=referer,
headers=headers)
referer=referer)
## let cloudscraper do its thing with UA.
if 'User-Agent' in headers:
del headers['User-Agent']