mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
Fix BrowserCache for image--cache partitioned by parent(story) page.
This commit is contained in:
parent
06dc2add8f
commit
11b2d5643e
5 changed files with 45 additions and 32 deletions
|
|
@ -31,11 +31,12 @@ class BrowserCache(object):
|
|||
Class to read web browser cache
|
||||
This wrapper class contains the actual impl object.
|
||||
"""
|
||||
def __init__(self, getConfig_fn, getConfigList_fn):
|
||||
def __init__(self, site, getConfig_fn, getConfigList_fn):
|
||||
"""Constructor for BrowserCache"""
|
||||
# import of child classes have to be inside the def to avoid circular import error
|
||||
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2]:
|
||||
self.browser_cache_impl = browser_cache_class.new_browser_cache(getConfig_fn,
|
||||
self.browser_cache_impl = browser_cache_class.new_browser_cache(site,
|
||||
getConfig_fn,
|
||||
getConfigList_fn)
|
||||
if self.browser_cache_impl is not None:
|
||||
break
|
||||
|
|
|
|||
|
|
@ -51,9 +51,10 @@ AGE_LIMIT_CONFIG="browser_cache_age_limit"
|
|||
class BaseBrowserCache(object):
|
||||
"""Base class to read various formats of web browser cache file"""
|
||||
|
||||
def __init__(self, getConfig_fn, getConfigList_fn):
|
||||
def __init__(self, site, getConfig_fn, getConfigList_fn):
|
||||
"""Constructor for BaseBrowserCache"""
|
||||
## only ever called by class method new_browser_cache()
|
||||
self.site = site
|
||||
self.getConfig = getConfig_fn
|
||||
self.getConfigList = getConfigList_fn
|
||||
|
||||
|
|
@ -66,11 +67,12 @@ class BaseBrowserCache(object):
|
|||
self.age_limit = float(age_limit) * 3600
|
||||
|
||||
@classmethod
|
||||
def new_browser_cache(cls, getConfig_fn, getConfigList_fn):
|
||||
def new_browser_cache(cls, site, getConfig_fn, getConfigList_fn):
|
||||
"""Return new instance of this BrowserCache class, or None if supplied directory not the correct cache type"""
|
||||
if cls.is_cache_dir(cls.expand_cache_dir(getConfig_fn(CACHE_DIR_CONFIG))):
|
||||
try:
|
||||
return cls(getConfig_fn,
|
||||
return cls(site,
|
||||
getConfig_fn,
|
||||
getConfigList_fn)
|
||||
except BrowserCacheException:
|
||||
return None
|
||||
|
|
@ -136,27 +138,36 @@ class BaseBrowserCache(object):
|
|||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def make_key_parts(self, url):
|
||||
def make_key_parts(self, url, site=False):
|
||||
"""
|
||||
Modern browser all also key their cache with the domain to
|
||||
reduce info leaking, but differently. However, some parts
|
||||
are common
|
||||
are common.
|
||||
|
||||
Now returns a list of domains, one for the story URL site and
|
||||
one for the URLs own domain. Cache partitioning of images is
|
||||
done based on the parent page (ie, the story site), but if
|
||||
it's not found/expired/etc and called directly instead, then
|
||||
it will be partitioned by the image URL instead. This way we
|
||||
have both.
|
||||
"""
|
||||
parsedUrl = urlparse(url)
|
||||
scheme = parsedUrl.scheme
|
||||
domain = parsedUrl.netloc
|
||||
# logger.debug(domain)
|
||||
domains = [self.site, parsedUrl.netloc]
|
||||
|
||||
# discard www. -- others likely needed to distinguish host
|
||||
# from domain. Something like tldextract ideally, but
|
||||
# dependencies
|
||||
# XXX forums?
|
||||
domain = re.sub(r'^(www|m)\.',r'',domain)
|
||||
|
||||
## only keep the first domain.TLD, more general than
|
||||
## discarding www.
|
||||
domains = [ re.sub(r'.*?([^\.]+\.[^\.]+)$',r'\1',d) for d in domains ]
|
||||
## don't need both if they are the same. Could use a set() to
|
||||
## dedup, but want to preserve order.
|
||||
if domains[0] == domains[1]:
|
||||
domains.pop()
|
||||
|
||||
# discard any #anchor part
|
||||
url = url.split('#')[0]
|
||||
|
||||
return (scheme, domain, url) # URL still contains domain, params, etc
|
||||
return (scheme, domains, url) # URL still contains domain, params, etc
|
||||
|
||||
def make_redirect_url(self,location,origurl):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -39,10 +39,9 @@ class BaseChromiumCache(BaseBrowserCache):
|
|||
# 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/11377932/2/Guilt
|
||||
# 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/14161667/10/That-Time-I-Was-Reincarnated-In-Brockton-Bay
|
||||
def make_keys(self,url):
|
||||
(scheme, domain, url) = self.make_key_parts(url)
|
||||
return [ '1/0/_dk_'+scheme+'://'+domain+' '+scheme+'://'+domain+' '+url,
|
||||
'1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm '+url
|
||||
]
|
||||
(scheme, domains, url) = self.make_key_parts(url)
|
||||
return [ '1/0/_dk_'+scheme+'://'+d+' '+scheme+'://'+d+' '+url for d in domains ] + \
|
||||
[ '1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm '+url ]
|
||||
|
||||
def make_age(self,response_time):
|
||||
return int(response_time/1000000)-EPOCH_DIFFERENCE
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ class FirefoxCache2(BaseBrowserCache):
|
|||
self.utc_offset = datetime.datetime.now() - utcnow().replace(tzinfo=None)
|
||||
|
||||
# self.scan_cache_keys()
|
||||
# logger.debug("cache site:%s"%self.site)
|
||||
# 1/0
|
||||
|
||||
def scan_cache_keys(self):
|
||||
|
|
@ -59,7 +60,7 @@ class FirefoxCache2(BaseBrowserCache):
|
|||
if entry.stat().st_mtime > time.time() - 3600: # last hour only
|
||||
with share_open(entry.path, "rb") as entry_file:
|
||||
metadata = _read_entry_headers(entry_file)
|
||||
if '14055284' in metadata['key']:
|
||||
if 'Battle_of_Antarctica_9' in metadata['key']:
|
||||
logger.debug("%s->%s"%(metadata['key'],metadata['key_hash']))
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -77,14 +78,12 @@ class FirefoxCache2(BaseBrowserCache):
|
|||
return False
|
||||
|
||||
def make_keys(self,url):
|
||||
(scheme,domain, url) = self.make_key_parts(url)
|
||||
(scheme, domains, url) = self.make_key_parts(url)
|
||||
## WebToEpub appears to leave just
|
||||
## ':'+url
|
||||
## May 2024, WebToEpub now uses '~FETCH,:'
|
||||
return [ 'O^partitionKey=%28'+scheme+'%2C'+domain+'%29,:'+url,
|
||||
':'+url,
|
||||
'~FETCH,:'+url
|
||||
]
|
||||
return [ 'O^partitionKey=%28'+scheme+'%2C'+d+'%29,:'+url for d in domains ] + \
|
||||
[ ':'+url, '~FETCH,:'+url ]
|
||||
|
||||
def make_key_path(self,key):
|
||||
logger.debug(key)
|
||||
|
|
@ -97,6 +96,7 @@ class FirefoxCache2(BaseBrowserCache):
|
|||
def get_data_key_impl(self, url, key):
|
||||
key_path = self.make_key_path(key)
|
||||
if os.path.isfile(key_path): # share_open()'s failure for non-existent is some win error.
|
||||
logger.debug("found cache: %s"%key_path)
|
||||
with share_open(key_path, "rb") as entry_file:
|
||||
metadata = _read_entry_headers(entry_file)
|
||||
# import json
|
||||
|
|
|
|||
|
|
@ -614,7 +614,8 @@ class Configuration(ConfigParser):
|
|||
|
||||
def __init__(self, sections, fileform, lightweight=False,
|
||||
basic_cache=None, browser_cache=None):
|
||||
site = sections[-1] # first section is site DN.
|
||||
self.site = sections[-1] # first section is site DN.
|
||||
logger.debug("config site:%s"%self.site)
|
||||
ConfigParser.__init__(self)
|
||||
|
||||
self.fetcher = None # the network layer for getting pages the
|
||||
|
|
@ -637,12 +638,12 @@ class Configuration(ConfigParser):
|
|||
for section in sections[:-1]:
|
||||
self.addConfigSection(section)
|
||||
|
||||
if site.startswith("www."):
|
||||
sitewith = site
|
||||
sitewithout = site.replace("www.","")
|
||||
if self.site.startswith("www."):
|
||||
sitewith = self.site
|
||||
sitewithout = self.site.replace("www.","")
|
||||
else:
|
||||
sitewith = "www."+site
|
||||
sitewithout = site
|
||||
sitewith = "www."+self.site
|
||||
sitewithout = self.site
|
||||
|
||||
self.addConfigSection(sitewith)
|
||||
self.addConfigSection(sitewithout)
|
||||
|
|
@ -1088,7 +1089,8 @@ class Configuration(ConfigParser):
|
|||
## make a data list of decorators to re-apply if
|
||||
## there are many more.
|
||||
if self.browser_cache is None:
|
||||
self.browser_cache = BrowserCache(self.getConfig,
|
||||
self.browser_cache = BrowserCache(self.site,
|
||||
self.getConfig,
|
||||
self.getConfigList)
|
||||
fetchers.BrowserCacheDecorator(self.browser_cache).decorate_fetcher(self.fetcher)
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Reference in a new issue