BasicCache save/load working.

This commit is contained in:
Jim Miller 2021-01-30 20:01:26 -06:00
parent e4d81f0dff
commit 3c20a4c247
5 changed files with 79 additions and 82 deletions

View file

@ -1257,13 +1257,14 @@ class FanFicFarePlugin(InterfaceAction):
adapter = get_fff_adapter(url,fileform)
## chapter range for title_chapter_range_pattern
adapter.setChaptersRange(book['begin'],book['end'])
## save and share cookiejar and pagecache between all
## downloads.
configuration = adapter.get_configuration()
if 'pagecache' not in options:
options['pagecache'] = configuration.get_empty_pagecache()
configuration.set_pagecache(options['pagecache'])
if 'pagecache' in options:
configuration.set_pagecache(options['pagecache'])
else:
options['pagecache'] = configuration.get_pagecache()
## save and share cookiejar between all downloads.
if 'cookiejar' in options:
configuration.set_cookiejar(options['cookiejar'])
@ -1692,6 +1693,12 @@ class FanFicFarePlugin(InterfaceAction):
msgl)
return
pagecachefile = PersistentTemporaryFile(suffix='.pagecache',
dir=options['tdir'])
options['pagecache'].save_cache(pagecachefile.name)
options['pagecachefile'] = pagecachefile.name
del options['pagecache'] ## can't be pickled.
cookiejarfile = PersistentTemporaryFile(suffix='.cookiejar',
dir=options['tdir'])
## assumed to be a LWPCookieJar

View file

@ -183,11 +183,10 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
adapter.setChaptersRange(book['begin'],book['end'])
## each download starts with a new copy of the cookiejar
## from the FG process. They are not shared between BG
## downloads.
## and pagecache from the FG process. They are not shared
## between BG downloads at this time.
configuration.load_cookiejar(options['cookiejarfile'])
configuration.set_pagecache(options['pagecache'])
configuration.get_pagecache().load_cache(options['pagecachefile'])
story = adapter.getStoryMetadataOnly()
if not story.getMetadata("series") and 'calibre_series' in book:

View file

@ -26,18 +26,6 @@ import logging
import pprint
import string
import os, sys
import pickle
if sys.version_info < (2, 7):
sys.exit('This program requires Python 2.7 or newer.')
elif sys.version_info < (3, 0):
reload(sys) # Reload restores 'hidden' setdefaultencoding method
sys.setdefaultencoding("utf-8")
def pickle_load(f):
return pickle.load(f)
else: # > 3.0
def pickle_load(f):
return pickle.load(f,encoding="bytes")
version="3.99.1"
os.environ['CURRENT_VERSION_ID']=version
@ -58,6 +46,7 @@ try:
# running under calibre
from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions
from calibre_plugins.fanficfare_plugin.fanficfare.configurable import Configuration
from calibre_plugins.fanficfare_plugin.fanficfare.fetcher import BasicCache
from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import (
get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub)
from calibre_plugins.fanficfare_plugin.fanficfare.geturls import get_urls_from_page, get_urls_from_imap
@ -66,6 +55,7 @@ try:
except ImportError:
from fanficfare import adapters, writers, exceptions
from fanficfare.configurable import Configuration
from fanficfare.fetcher import BasicCache
from fanficfare.epubutils import (
get_dcsource_chaptercount, get_update_data, reset_orig_chapters_epub)
from fanficfare.geturls import get_urls_from_page, get_urls_from_imap
@ -527,9 +517,17 @@ def get_configuration(url,
options,
chaptercount=None,
output_filename=None):
## Share pagecache between multiple downloads.
if not hasattr(options,'pagecache'):
if options.save_cache:
options.pagecache = BasicCache(global_cache) #configuration.get_empty_pagecache()
else:
options.pagecache = BasicCache()
logger.debug(options.pagecache.pagecache.keys())
try:
configuration = Configuration(adapters.getConfigSectionsFor(url),
options.format)
options.format,
pagecache=options.pagecache)
except exceptions.UnknownSite as e:
if options.list or options.normalize or options.downloadlist:
# list for page doesn't have to be a supported site.
@ -611,25 +609,6 @@ def get_configuration(url,
else:
configuration.set_cookiejar(options.cookiejar)
## Share pagecache between multiple downloads.
if not hasattr(options,'pagecache'):
options.pagecache = configuration.get_empty_pagecache()
cookie_file = None
if options.save_cache:
cookie_file = global_cookies
save_cache = global_cache
try:
with open(global_cache,'rb') as jin:
options.pagecache = pickle_load(jin)
except Exception as e:
## This is not uncommon, will happen when starting a new
## cache, for example.
print("Didn't load --save-cache %s"%e)
else:
save_cache = save_cookies = None
configuration.set_pagecache(options.pagecache,save_cache)
return configuration
if __name__ == '__main__':

View file

@ -533,15 +533,14 @@ def make_generate_cover_settings(param):
class Configuration(ConfigParser):
def __init__(self, sections, fileform, lightweight=False):
def __init__(self, sections, fileform, lightweight=False, pagecache=None):
site = sections[-1] # first section is site DN.
ConfigParser.__init__(self)
self.fetcher = None # the network layer for getting pages the
self.sleeper = None
# caching layer for getting pages, created now for
# get_empty_pagecache() etc.
self.cache = fetcher.BasicCache()
# caching layer for getting pages, create one if not given.
self.pagecache = pagecache or fetcher.BasicCache()
self.opener = None # used for _filelist
self.lightweight = lightweight
@ -582,11 +581,6 @@ class Configuration(ConfigParser):
self.url_config_set = False
# ## XXX make sure fetcher & cache exist--revisit Breaks
# ## use_cloudscraper in CLI because CONFIG FILES HAVEN'T BEEN
# ## READ YET.
# self.get_fetcher()
def section_url_names(self,domain,section_url_f):
## domain is passed as a method to limit the damage if/when an
## adapter screws up _section_url
@ -954,8 +948,8 @@ class Configuration(ConfigParser):
#### methods for fetching. Moved here from base_adapter when
#### *_filelist feature was added.
def get_fetcher(self):
if not self.fetcher:
def get_fetcher(self, make_new = False):
if not self.fetcher or make_new:
logger.debug("use_cloudscraper:%s"%self.getConfig('use_cloudscraper'))
if self.getConfig('use_cloudscraper',False):
fetchcls = fetcher.CloudScraperFetcher
@ -976,8 +970,8 @@ class Configuration(ConfigParser):
## cache decorator terminates the chain when found.
logger.debug("use_pagecache:%s"%self.getConfig('use_pagecache'))
if self.getConfig('use_pagecache'):
fetcher.BasicCacheDecorator(self.cache).decorate_fetcher(self.fetcher)
if self.getConfig('use_pagecache') and self.pagecache is not None:
fetcher.BasicCacheDecorator(self.pagecache).decorate_fetcher(self.fetcher)
if self.getConfig('progressbar'):
fetcher.ProgressBarDecorator().decorate_fetcher(self.fetcher)
@ -1003,14 +997,15 @@ class Configuration(ConfigParser):
def save_cookiejar(self,filename=None):
self.get_fetcher().save_cookiejar(filename)
def get_empty_pagecache(self):
return self.cache.get_empty_pagecache()
def get_pagecache(self):
return self.cache.get_pagecache()
return self.pagecache
def set_pagecache(self,cache,cache_file=None):
return self.cache.set_pagecache(cache,cache_file)
## replace cache, then replace fetcher (while keeping cookiejar)
## to replace fetcher decorators.
def set_pagecache(self,cache):
self.pagecache = cache
cookiejar = self.get_fetcher().get_cookiejar()
self.get_fetcher(make_new=True).set_cookiejar(cookiejar)
# extended by adapter, writer and story for ease of calling configuration.
class Configurable(object):

View file

@ -28,7 +28,19 @@ from .six import ensure_binary, ensure_text
import time
import logging
import sys
import pickle
if sys.version_info < (2, 7):
sys.exit('This program requires Python 2.7 or newer.')
elif sys.version_info < (3, 0):
reload(sys) # Reload restores 'hidden' setdefaultencoding method
sys.setdefaultencoding("utf-8")
def pickle_load(f):
return pickle.load(f)
else: # > 3.0
def pickle_load(f):
return pickle.load(f,encoding="bytes")
from functools import partial
import threading
@ -140,6 +152,7 @@ class SleepDecorator(FetcherDecorator):
# be before sleep, but check fetchresp.fromcache for file://
# and other intermediate caches.
if not fetchresp.fromcache:
t = None
if extrasleep:
logger.debug("extra sleep:%s"%extrasleep)
time.sleep(float(extrasleep))
@ -159,22 +172,28 @@ class SleepDecorator(FetcherDecorator):
return fetchresp
class BasicCache(object):
def __init__(self):
def __init__(self,filename=None):
self.cache_lock = threading.RLock()
self.pagecache = self.get_empty_pagecache()
self.save_cache_file = None
self.pagecache = {}
self.filename = filename
if self.filename:
try:
self.load_cache()
except:
raise
logger.debug("Failed to load cache(%s), going on without."%filename)
def get_empty_pagecache(self):
return {}
def load_cache(self,filename=None):
logger.debug(filename or self.filename)
with open(filename or self.filename,'rb') as jin:
self.pagecache = pickle_load(jin)
logger.debug(self.pagecache.keys())
def get_pagecache(self):
with self.cache_lock:
return self.pagecache
def set_pagecache(self,d,save_cache_file=None):
with self.cache_lock:
self.save_cache_file = save_cache_file
self.pagecache=d
def save_cache(self,filename=None):
logger.debug(filename or self.filename)
with open(filename or self.filename,'wb') as jout:
pickle.dump(self.pagecache,jout,protocol=2)
logger.debug("wrote")
def make_cachekey(self, url, parameters=None):
with self.cache_lock:
@ -185,19 +204,18 @@ class BasicCache(object):
def has_cachekey(self,cachekey):
with self.cache_lock:
return cachekey in self.get_pagecache()
return cachekey in self.pagecache
def get_from_cache(self,cachekey):
with self.cache_lock:
return self.get_pagecache().get(cachekey,None)
return self.pagecache.get(cachekey,None)
def set_to_cache(self,cachekey,data,redirectedurl):
with self.cache_lock:
self.get_pagecache()[cachekey] = (data,ensure_text(redirectedurl))
if self.save_cache_file:
with open(self.save_cache_file,'wb') as jout:
pickle.dump(self.get_pagecache(),jout,protocol=2)
self.pagecache[cachekey] = (data,ensure_text(redirectedurl))
logger.debug("set_to_cache:%s"%self.filename)
if self.filename:
self.save_cache()
class BasicCacheDecorator(FetcherDecorator):
def __init__(self,cache):
@ -222,11 +240,10 @@ class BasicCacheDecorator(FetcherDecorator):
cachekey=self.cache.make_cachekey(url, parameters)
if usecache and self.cache.has_cachekey(cachekey) and not cachekey.startswith('file:'):
logger.debug("#####################################\npagecache(%s) HIT: %s"%(method,safe_url(cachekey)))
logger.debug("\n>>>> pagecache(%s) HIT: %s"%(method,safe_url(cachekey)))
data,redirecturl = self.cache.get_from_cache(cachekey)
return FetcherResponse(data,redirecturl=redirecturl,fromcache=True)
logger.debug("#####################################\npagecache(%s) MISS: %s"%(method,safe_url(cachekey)))
logger.debug("\n<<<< pagecache(%s) MISS: %s"%(method,safe_url(cachekey)))
fetchresp = chainfn(
method,