diff --git a/calibre-plugin/fff_plugin.py b/calibre-plugin/fff_plugin.py index 34f15d73..1ee48537 100644 --- a/calibre-plugin/fff_plugin.py +++ b/calibre-plugin/fff_plugin.py @@ -1257,13 +1257,14 @@ class FanFicFarePlugin(InterfaceAction): adapter = get_fff_adapter(url,fileform) ## chapter range for title_chapter_range_pattern adapter.setChaptersRange(book['begin'],book['end']) + ## save and share cookiejar and pagecache between all ## downloads. configuration = adapter.get_configuration() - if 'pagecache' not in options: - options['pagecache'] = configuration.get_empty_pagecache() - configuration.set_pagecache(options['pagecache']) - + if 'pagecache' in options: + configuration.set_pagecache(options['pagecache']) + else: + options['pagecache'] = configuration.get_pagecache() ## save and share cookiejar between all downloads. if 'cookiejar' in options: configuration.set_cookiejar(options['cookiejar']) @@ -1692,6 +1693,12 @@ class FanFicFarePlugin(InterfaceAction): msgl) return + pagecachefile = PersistentTemporaryFile(suffix='.pagecache', + dir=options['tdir']) + options['pagecache'].save_cache(pagecachefile.name) + options['pagecachefile'] = pagecachefile.name + del options['pagecache'] ## can't be pickled. + cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar', dir=options['tdir']) ## assumed to be a LWPCookieJar diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index f54a5bab..4d75cf7f 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -183,11 +183,10 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x): adapter.setChaptersRange(book['begin'],book['end']) ## each download starts with a new copy of the cookiejar - ## from the FG process. They are not shared between BG - ## downloads. + ## and pagecache from the FG process. They are not shared + ## between BG downloads at this time. configuration.load_cookiejar(options['cookiejarfile']) - - configuration.set_pagecache(options['pagecache']) + configuration.get_pagecache().load_cache(options['pagecachefile']) story = adapter.getStoryMetadataOnly() if not story.getMetadata("series") and 'calibre_series' in book: diff --git a/fanficfare/cli.py b/fanficfare/cli.py index b647c685..80a697e7 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -26,18 +26,6 @@ import logging import pprint import string import os, sys -import pickle - -if sys.version_info < (2, 7): - sys.exit('This program requires Python 2.7 or newer.') -elif sys.version_info < (3, 0): - reload(sys) # Reload restores 'hidden' setdefaultencoding method - sys.setdefaultencoding("utf-8") - def pickle_load(f): - return pickle.load(f) -else: # > 3.0 - def pickle_load(f): - return pickle.load(f,encoding="bytes") version="3.99.1" os.environ['CURRENT_VERSION_ID']=version @@ -58,6 +46,7 @@ try: # running under calibre from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions from calibre_plugins.fanficfare_plugin.fanficfare.configurable import Configuration + from calibre_plugins.fanficfare_plugin.fanficfare.fetcher import BasicCache from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import ( get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub) from calibre_plugins.fanficfare_plugin.fanficfare.geturls import get_urls_from_page, get_urls_from_imap @@ -66,6 +55,7 @@ try: except ImportError: from fanficfare import adapters, writers, exceptions from fanficfare.configurable import Configuration + from fanficfare.fetcher import BasicCache from fanficfare.epubutils import ( get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub) from fanficfare.geturls import get_urls_from_page, get_urls_from_imap @@ -527,9 +517,17 @@ def get_configuration(url, options, chaptercount=None, output_filename=None): + ## Share pagecache between multiple downloads. + if not hasattr(options,'pagecache'): + if options.save_cache: + options.pagecache = BasicCache(global_cache) #configuration.get_empty_pagecache() + else: + options.pagecache = BasicCache() + logger.debug(options.pagecache.pagecache.keys()) try: configuration = Configuration(adapters.getConfigSectionsFor(url), - options.format) + options.format, + pagecache=options.pagecache) except exceptions.UnknownSite as e: if options.list or options.normalize or options.downloadlist: # list for page doesn't have to be a supported site. @@ -611,25 +609,6 @@ def get_configuration(url, else: configuration.set_cookiejar(options.cookiejar) - ## Share pagecache between multiple downloads. - if not hasattr(options,'pagecache'): - options.pagecache = configuration.get_empty_pagecache() - - cookie_file = None - if options.save_cache: - cookie_file = global_cookies - save_cache = global_cache - try: - with open(global_cache,'rb') as jin: - options.pagecache = pickle_load(jin) - except Exception as e: - ## This is not uncommon, will happen when starting a new - ## cache, for example. - print("Didn't load --save-cache %s"%e) - else: - save_cache = save_cookies = None - configuration.set_pagecache(options.pagecache,save_cache) - return configuration if __name__ == '__main__': diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index 2e947970..db250c26 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -533,15 +533,14 @@ def make_generate_cover_settings(param): class Configuration(ConfigParser): - def __init__(self, sections, fileform, lightweight=False): + def __init__(self, sections, fileform, lightweight=False, pagecache=None): site = sections[-1] # first section is site DN. ConfigParser.__init__(self) self.fetcher = None # the network layer for getting pages the self.sleeper = None - # caching layer for getting pages, created now for - # get_empty_pagecache() etc. - self.cache = fetcher.BasicCache() + # caching layer for getting pages, create one if not given. + self.pagecache = pagecache or fetcher.BasicCache() self.opener = None # used for _filelist self.lightweight = lightweight @@ -582,11 +581,6 @@ class Configuration(ConfigParser): self.url_config_set = False - # ## XXX make sure fetcher & cache exist--revisit Breaks - # ## use_cloudscraper in CLI because CONFIG FILES HAVEN'T BEEN - # ## READ YET. - # self.get_fetcher() - def section_url_names(self,domain,section_url_f): ## domain is passed as a method to limit the damage if/when an ## adapter screws up _section_url @@ -954,8 +948,8 @@ class Configuration(ConfigParser): #### methods for fetching. Moved here from base_adapter when #### *_filelist feature was added. - def get_fetcher(self): - if not self.fetcher: + def get_fetcher(self, make_new = False): + if not self.fetcher or make_new: logger.debug("use_cloudscraper:%s"%self.getConfig('use_cloudscraper')) if self.getConfig('use_cloudscraper',False): fetchcls = fetcher.CloudScraperFetcher @@ -976,8 +970,8 @@ class Configuration(ConfigParser): ## cache decorator terminates the chain when found. logger.debug("use_pagecache:%s"%self.getConfig('use_pagecache')) - if self.getConfig('use_pagecache'): - fetcher.BasicCacheDecorator(self.cache).decorate_fetcher(self.fetcher) + if self.getConfig('use_pagecache') and self.pagecache is not None: + fetcher.BasicCacheDecorator(self.pagecache).decorate_fetcher(self.fetcher) if self.getConfig('progressbar'): fetcher.ProgressBarDecorator().decorate_fetcher(self.fetcher) @@ -1003,14 +997,15 @@ class Configuration(ConfigParser): def save_cookiejar(self,filename=None): self.get_fetcher().save_cookiejar(filename) - def get_empty_pagecache(self): - return self.cache.get_empty_pagecache() - def get_pagecache(self): - return self.cache.get_pagecache() + return self.pagecache - def set_pagecache(self,cache,cache_file=None): - return self.cache.set_pagecache(cache,cache_file) + ## replace cache, then replace fetcher (while keeping cookiejar) + ## to replace fetcher decorators. + def set_pagecache(self,cache): + self.pagecache = cache + cookiejar = self.get_fetcher().get_cookiejar() + self.get_fetcher(make_new=True).set_cookiejar(cookiejar) # extended by adapter, writer and story for ease of calling configuration. class Configurable(object): diff --git a/fanficfare/fetcher.py b/fanficfare/fetcher.py index c4beb236..7030660b 100644 --- a/fanficfare/fetcher.py +++ b/fanficfare/fetcher.py @@ -28,7 +28,19 @@ from .six import ensure_binary, ensure_text import time import logging import sys + import pickle +if sys.version_info < (2, 7): + sys.exit('This program requires Python 2.7 or newer.') +elif sys.version_info < (3, 0): + reload(sys) # Reload restores 'hidden' setdefaultencoding method + sys.setdefaultencoding("utf-8") + def pickle_load(f): + return pickle.load(f) +else: # > 3.0 + def pickle_load(f): + return pickle.load(f,encoding="bytes") + from functools import partial import threading @@ -140,6 +152,7 @@ class SleepDecorator(FetcherDecorator): # be before sleep, but check fetchresp.fromcache for file:// # and other intermediate caches. if not fetchresp.fromcache: + t = None if extrasleep: logger.debug("extra sleep:%s"%extrasleep) time.sleep(float(extrasleep)) @@ -159,22 +172,28 @@ class SleepDecorator(FetcherDecorator): return fetchresp class BasicCache(object): - def __init__(self): + def __init__(self,filename=None): self.cache_lock = threading.RLock() - self.pagecache = self.get_empty_pagecache() - self.save_cache_file = None + self.pagecache = {} + self.filename = filename + if self.filename: + try: + self.load_cache() + except: + raise + logger.debug("Failed to load cache(%s), going on without."%filename) - def get_empty_pagecache(self): - return {} + def load_cache(self,filename=None): + logger.debug(filename or self.filename) + with open(filename or self.filename,'rb') as jin: + self.pagecache = pickle_load(jin) + logger.debug(self.pagecache.keys()) - def get_pagecache(self): - with self.cache_lock: - return self.pagecache - - def set_pagecache(self,d,save_cache_file=None): - with self.cache_lock: - self.save_cache_file = save_cache_file - self.pagecache=d + def save_cache(self,filename=None): + logger.debug(filename or self.filename) + with open(filename or self.filename,'wb') as jout: + pickle.dump(self.pagecache,jout,protocol=2) + logger.debug("wrote") def make_cachekey(self, url, parameters=None): with self.cache_lock: @@ -185,19 +204,18 @@ class BasicCache(object): def has_cachekey(self,cachekey): with self.cache_lock: - return cachekey in self.get_pagecache() + return cachekey in self.pagecache def get_from_cache(self,cachekey): with self.cache_lock: - return self.get_pagecache().get(cachekey,None) + return self.pagecache.get(cachekey,None) def set_to_cache(self,cachekey,data,redirectedurl): with self.cache_lock: - self.get_pagecache()[cachekey] = (data,ensure_text(redirectedurl)) - if self.save_cache_file: - with open(self.save_cache_file,'wb') as jout: - pickle.dump(self.get_pagecache(),jout,protocol=2) - + self.pagecache[cachekey] = (data,ensure_text(redirectedurl)) + logger.debug("set_to_cache:%s"%self.filename) + if self.filename: + self.save_cache() class BasicCacheDecorator(FetcherDecorator): def __init__(self,cache): @@ -222,11 +240,10 @@ class BasicCacheDecorator(FetcherDecorator): cachekey=self.cache.make_cachekey(url, parameters) if usecache and self.cache.has_cachekey(cachekey) and not cachekey.startswith('file:'): - logger.debug("#####################################\npagecache(%s) HIT: %s"%(method,safe_url(cachekey))) + logger.debug("\n>>>> pagecache(%s) HIT: %s"%(method,safe_url(cachekey))) data,redirecturl = self.cache.get_from_cache(cachekey) return FetcherResponse(data,redirecturl=redirecturl,fromcache=True) - - logger.debug("#####################################\npagecache(%s) MISS: %s"%(method,safe_url(cachekey))) + logger.debug("\n<<<< pagecache(%s) MISS: %s"%(method,safe_url(cachekey))) fetchresp = chainfn( method,