BasicCache save/load working.

This commit is contained in:
Jim Miller 2021-01-30 20:01:26 -06:00
parent e4d81f0dff
commit 3c20a4c247
5 changed files with 79 additions and 82 deletions

View file

@ -1257,13 +1257,14 @@ class FanFicFarePlugin(InterfaceAction):
adapter = get_fff_adapter(url,fileform) adapter = get_fff_adapter(url,fileform)
## chapter range for title_chapter_range_pattern ## chapter range for title_chapter_range_pattern
adapter.setChaptersRange(book['begin'],book['end']) adapter.setChaptersRange(book['begin'],book['end'])
## save and share cookiejar and pagecache between all ## save and share cookiejar and pagecache between all
## downloads. ## downloads.
configuration = adapter.get_configuration() configuration = adapter.get_configuration()
if 'pagecache' not in options: if 'pagecache' in options:
options['pagecache'] = configuration.get_empty_pagecache() configuration.set_pagecache(options['pagecache'])
configuration.set_pagecache(options['pagecache']) else:
options['pagecache'] = configuration.get_pagecache()
## save and share cookiejar between all downloads. ## save and share cookiejar between all downloads.
if 'cookiejar' in options: if 'cookiejar' in options:
configuration.set_cookiejar(options['cookiejar']) configuration.set_cookiejar(options['cookiejar'])
@ -1692,6 +1693,12 @@ class FanFicFarePlugin(InterfaceAction):
msgl) msgl)
return return
pagecachefile = PersistentTemporaryFile(suffix='.pagecache',
dir=options['tdir'])
options['pagecache'].save_cache(pagecachefile.name)
options['pagecachefile'] = pagecachefile.name
del options['pagecache'] ## can't be pickled.
cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar', cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar',
dir=options['tdir']) dir=options['tdir'])
## assumed to be a LWPCookieJar ## assumed to be a LWPCookieJar

View file

@ -183,11 +183,10 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
adapter.setChaptersRange(book['begin'],book['end']) adapter.setChaptersRange(book['begin'],book['end'])
## each download starts with a new copy of the cookiejar ## each download starts with a new copy of the cookiejar
## from the FG process. They are not shared between BG ## and pagecache from the FG process. They are not shared
## downloads. ## between BG downloads at this time.
configuration.load_cookiejar(options['cookiejarfile']) configuration.load_cookiejar(options['cookiejarfile'])
configuration.get_pagecache().load_cache(options['pagecachefile'])
configuration.set_pagecache(options['pagecache'])
story = adapter.getStoryMetadataOnly() story = adapter.getStoryMetadataOnly()
if not story.getMetadata("series") and 'calibre_series' in book: if not story.getMetadata("series") and 'calibre_series' in book:

View file

@ -26,18 +26,6 @@ import logging
import pprint import pprint
import string import string
import os, sys import os, sys
import pickle
if sys.version_info < (2, 7):
sys.exit('This program requires Python 2.7 or newer.')
elif sys.version_info < (3, 0):
reload(sys) # Reload restores 'hidden' setdefaultencoding method
sys.setdefaultencoding("utf-8")
def pickle_load(f):
return pickle.load(f)
else: # > 3.0
def pickle_load(f):
return pickle.load(f,encoding="bytes")
version="3.99.1" version="3.99.1"
os.environ['CURRENT_VERSION_ID']=version os.environ['CURRENT_VERSION_ID']=version
@ -58,6 +46,7 @@ try:
# running under calibre # running under calibre
from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions
from calibre_plugins.fanficfare_plugin.fanficfare.configurable import Configuration from calibre_plugins.fanficfare_plugin.fanficfare.configurable import Configuration
from calibre_plugins.fanficfare_plugin.fanficfare.fetcher import BasicCache
from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import ( from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import (
get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub) get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub)
from calibre_plugins.fanficfare_plugin.fanficfare.geturls import get_urls_from_page, get_urls_from_imap from calibre_plugins.fanficfare_plugin.fanficfare.geturls import get_urls_from_page, get_urls_from_imap
@ -66,6 +55,7 @@ try:
except ImportError: except ImportError:
from fanficfare import adapters, writers, exceptions from fanficfare import adapters, writers, exceptions
from fanficfare.configurable import Configuration from fanficfare.configurable import Configuration
from fanficfare.fetcher import BasicCache
from fanficfare.epubutils import ( from fanficfare.epubutils import (
get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub) get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub)
from fanficfare.geturls import get_urls_from_page, get_urls_from_imap from fanficfare.geturls import get_urls_from_page, get_urls_from_imap
@ -527,9 +517,17 @@ def get_configuration(url,
options, options,
chaptercount=None, chaptercount=None,
output_filename=None): output_filename=None):
## Share pagecache between multiple downloads.
if not hasattr(options,'pagecache'):
if options.save_cache:
options.pagecache = BasicCache(global_cache) #configuration.get_empty_pagecache()
else:
options.pagecache = BasicCache()
logger.debug(options.pagecache.pagecache.keys())
try: try:
configuration = Configuration(adapters.getConfigSectionsFor(url), configuration = Configuration(adapters.getConfigSectionsFor(url),
options.format) options.format,
pagecache=options.pagecache)
except exceptions.UnknownSite as e: except exceptions.UnknownSite as e:
if options.list or options.normalize or options.downloadlist: if options.list or options.normalize or options.downloadlist:
# list for page doesn't have to be a supported site. # list for page doesn't have to be a supported site.
@ -611,25 +609,6 @@ def get_configuration(url,
else: else:
configuration.set_cookiejar(options.cookiejar) configuration.set_cookiejar(options.cookiejar)
## Share pagecache between multiple downloads.
if not hasattr(options,'pagecache'):
options.pagecache = configuration.get_empty_pagecache()
cookie_file = None
if options.save_cache:
cookie_file = global_cookies
save_cache = global_cache
try:
with open(global_cache,'rb') as jin:
options.pagecache = pickle_load(jin)
except Exception as e:
## This is not uncommon, will happen when starting a new
## cache, for example.
print("Didn't load --save-cache %s"%e)
else:
save_cache = save_cookies = None
configuration.set_pagecache(options.pagecache,save_cache)
return configuration return configuration
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -533,15 +533,14 @@ def make_generate_cover_settings(param):
class Configuration(ConfigParser): class Configuration(ConfigParser):
def __init__(self, sections, fileform, lightweight=False): def __init__(self, sections, fileform, lightweight=False, pagecache=None):
site = sections[-1] # first section is site DN. site = sections[-1] # first section is site DN.
ConfigParser.__init__(self) ConfigParser.__init__(self)
self.fetcher = None # the network layer for getting pages the self.fetcher = None # the network layer for getting pages the
self.sleeper = None self.sleeper = None
# caching layer for getting pages, created now for # caching layer for getting pages, create one if not given.
# get_empty_pagecache() etc. self.pagecache = pagecache or fetcher.BasicCache()
self.cache = fetcher.BasicCache()
self.opener = None # used for _filelist self.opener = None # used for _filelist
self.lightweight = lightweight self.lightweight = lightweight
@ -582,11 +581,6 @@ class Configuration(ConfigParser):
self.url_config_set = False self.url_config_set = False
# ## XXX make sure fetcher & cache exist--revisit Breaks
# ## use_cloudscraper in CLI because CONFIG FILES HAVEN'T BEEN
# ## READ YET.
# self.get_fetcher()
def section_url_names(self,domain,section_url_f): def section_url_names(self,domain,section_url_f):
## domain is passed as a method to limit the damage if/when an ## domain is passed as a method to limit the damage if/when an
## adapter screws up _section_url ## adapter screws up _section_url
@ -954,8 +948,8 @@ class Configuration(ConfigParser):
#### methods for fetching. Moved here from base_adapter when #### methods for fetching. Moved here from base_adapter when
#### *_filelist feature was added. #### *_filelist feature was added.
def get_fetcher(self): def get_fetcher(self, make_new = False):
if not self.fetcher: if not self.fetcher or make_new:
logger.debug("use_cloudscraper:%s"%self.getConfig('use_cloudscraper')) logger.debug("use_cloudscraper:%s"%self.getConfig('use_cloudscraper'))
if self.getConfig('use_cloudscraper',False): if self.getConfig('use_cloudscraper',False):
fetchcls = fetcher.CloudScraperFetcher fetchcls = fetcher.CloudScraperFetcher
@ -976,8 +970,8 @@ class Configuration(ConfigParser):
## cache decorator terminates the chain when found. ## cache decorator terminates the chain when found.
logger.debug("use_pagecache:%s"%self.getConfig('use_pagecache')) logger.debug("use_pagecache:%s"%self.getConfig('use_pagecache'))
if self.getConfig('use_pagecache'): if self.getConfig('use_pagecache') and self.pagecache is not None:
fetcher.BasicCacheDecorator(self.cache).decorate_fetcher(self.fetcher) fetcher.BasicCacheDecorator(self.pagecache).decorate_fetcher(self.fetcher)
if self.getConfig('progressbar'): if self.getConfig('progressbar'):
fetcher.ProgressBarDecorator().decorate_fetcher(self.fetcher) fetcher.ProgressBarDecorator().decorate_fetcher(self.fetcher)
@ -1003,14 +997,15 @@ class Configuration(ConfigParser):
def save_cookiejar(self,filename=None): def save_cookiejar(self,filename=None):
self.get_fetcher().save_cookiejar(filename) self.get_fetcher().save_cookiejar(filename)
def get_empty_pagecache(self):
return self.cache.get_empty_pagecache()
def get_pagecache(self): def get_pagecache(self):
return self.cache.get_pagecache() return self.pagecache
def set_pagecache(self,cache,cache_file=None): ## replace cache, then replace fetcher (while keeping cookiejar)
return self.cache.set_pagecache(cache,cache_file) ## to replace fetcher decorators.
def set_pagecache(self,cache):
self.pagecache = cache
cookiejar = self.get_fetcher().get_cookiejar()
self.get_fetcher(make_new=True).set_cookiejar(cookiejar)
# extended by adapter, writer and story for ease of calling configuration. # extended by adapter, writer and story for ease of calling configuration.
class Configurable(object): class Configurable(object):

View file

@ -28,7 +28,19 @@ from .six import ensure_binary, ensure_text
import time import time
import logging import logging
import sys import sys
import pickle import pickle
if sys.version_info < (2, 7):
sys.exit('This program requires Python 2.7 or newer.')
elif sys.version_info < (3, 0):
reload(sys) # Reload restores 'hidden' setdefaultencoding method
sys.setdefaultencoding("utf-8")
def pickle_load(f):
return pickle.load(f)
else: # > 3.0
def pickle_load(f):
return pickle.load(f,encoding="bytes")
from functools import partial from functools import partial
import threading import threading
@ -140,6 +152,7 @@ class SleepDecorator(FetcherDecorator):
# be before sleep, but check fetchresp.fromcache for file:// # be before sleep, but check fetchresp.fromcache for file://
# and other intermediate caches. # and other intermediate caches.
if not fetchresp.fromcache: if not fetchresp.fromcache:
t = None
if extrasleep: if extrasleep:
logger.debug("extra sleep:%s"%extrasleep) logger.debug("extra sleep:%s"%extrasleep)
time.sleep(float(extrasleep)) time.sleep(float(extrasleep))
@ -159,22 +172,28 @@ class SleepDecorator(FetcherDecorator):
return fetchresp return fetchresp
class BasicCache(object): class BasicCache(object):
def __init__(self): def __init__(self,filename=None):
self.cache_lock = threading.RLock() self.cache_lock = threading.RLock()
self.pagecache = self.get_empty_pagecache() self.pagecache = {}
self.save_cache_file = None self.filename = filename
if self.filename:
try:
self.load_cache()
except:
raise
logger.debug("Failed to load cache(%s), going on without."%filename)
def get_empty_pagecache(self): def load_cache(self,filename=None):
return {} logger.debug(filename or self.filename)
with open(filename or self.filename,'rb') as jin:
self.pagecache = pickle_load(jin)
logger.debug(self.pagecache.keys())
def get_pagecache(self): def save_cache(self,filename=None):
with self.cache_lock: logger.debug(filename or self.filename)
return self.pagecache with open(filename or self.filename,'wb') as jout:
pickle.dump(self.pagecache,jout,protocol=2)
def set_pagecache(self,d,save_cache_file=None): logger.debug("wrote")
with self.cache_lock:
self.save_cache_file = save_cache_file
self.pagecache=d
def make_cachekey(self, url, parameters=None): def make_cachekey(self, url, parameters=None):
with self.cache_lock: with self.cache_lock:
@ -185,19 +204,18 @@ class BasicCache(object):
def has_cachekey(self,cachekey): def has_cachekey(self,cachekey):
with self.cache_lock: with self.cache_lock:
return cachekey in self.get_pagecache() return cachekey in self.pagecache
def get_from_cache(self,cachekey): def get_from_cache(self,cachekey):
with self.cache_lock: with self.cache_lock:
return self.get_pagecache().get(cachekey,None) return self.pagecache.get(cachekey,None)
def set_to_cache(self,cachekey,data,redirectedurl): def set_to_cache(self,cachekey,data,redirectedurl):
with self.cache_lock: with self.cache_lock:
self.get_pagecache()[cachekey] = (data,ensure_text(redirectedurl)) self.pagecache[cachekey] = (data,ensure_text(redirectedurl))
if self.save_cache_file: logger.debug("set_to_cache:%s"%self.filename)
with open(self.save_cache_file,'wb') as jout: if self.filename:
pickle.dump(self.get_pagecache(),jout,protocol=2) self.save_cache()
class BasicCacheDecorator(FetcherDecorator): class BasicCacheDecorator(FetcherDecorator):
def __init__(self,cache): def __init__(self,cache):
@ -222,11 +240,10 @@ class BasicCacheDecorator(FetcherDecorator):
cachekey=self.cache.make_cachekey(url, parameters) cachekey=self.cache.make_cachekey(url, parameters)
if usecache and self.cache.has_cachekey(cachekey) and not cachekey.startswith('file:'): if usecache and self.cache.has_cachekey(cachekey) and not cachekey.startswith('file:'):
logger.debug("#####################################\npagecache(%s) HIT: %s"%(method,safe_url(cachekey))) logger.debug("\n>>>> pagecache(%s) HIT: %s"%(method,safe_url(cachekey)))
data,redirecturl = self.cache.get_from_cache(cachekey) data,redirecturl = self.cache.get_from_cache(cachekey)
return FetcherResponse(data,redirecturl=redirecturl,fromcache=True) return FetcherResponse(data,redirecturl=redirecturl,fromcache=True)
logger.debug("\n<<<< pagecache(%s) MISS: %s"%(method,safe_url(cachekey)))
logger.debug("#####################################\npagecache(%s) MISS: %s"%(method,safe_url(cachekey)))
fetchresp = chainfn( fetchresp = chainfn(
method, method,