mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 17:02:43 +01:00
Fix BrowserCache for image--cache partitioned by parent(story) page.
This commit is contained in:
parent
06dc2add8f
commit
11b2d5643e
5 changed files with 45 additions and 32 deletions
|
|
@ -31,11 +31,12 @@ class BrowserCache(object):
|
||||||
Class to read web browser cache
|
Class to read web browser cache
|
||||||
This wrapper class contains the actual impl object.
|
This wrapper class contains the actual impl object.
|
||||||
"""
|
"""
|
||||||
def __init__(self, getConfig_fn, getConfigList_fn):
|
def __init__(self, site, getConfig_fn, getConfigList_fn):
|
||||||
"""Constructor for BrowserCache"""
|
"""Constructor for BrowserCache"""
|
||||||
# import of child classes have to be inside the def to avoid circular import error
|
# import of child classes have to be inside the def to avoid circular import error
|
||||||
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2]:
|
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2]:
|
||||||
self.browser_cache_impl = browser_cache_class.new_browser_cache(getConfig_fn,
|
self.browser_cache_impl = browser_cache_class.new_browser_cache(site,
|
||||||
|
getConfig_fn,
|
||||||
getConfigList_fn)
|
getConfigList_fn)
|
||||||
if self.browser_cache_impl is not None:
|
if self.browser_cache_impl is not None:
|
||||||
break
|
break
|
||||||
|
|
|
||||||
|
|
@ -51,9 +51,10 @@ AGE_LIMIT_CONFIG="browser_cache_age_limit"
|
||||||
class BaseBrowserCache(object):
|
class BaseBrowserCache(object):
|
||||||
"""Base class to read various formats of web browser cache file"""
|
"""Base class to read various formats of web browser cache file"""
|
||||||
|
|
||||||
def __init__(self, getConfig_fn, getConfigList_fn):
|
def __init__(self, site, getConfig_fn, getConfigList_fn):
|
||||||
"""Constructor for BaseBrowserCache"""
|
"""Constructor for BaseBrowserCache"""
|
||||||
## only ever called by class method new_browser_cache()
|
## only ever called by class method new_browser_cache()
|
||||||
|
self.site = site
|
||||||
self.getConfig = getConfig_fn
|
self.getConfig = getConfig_fn
|
||||||
self.getConfigList = getConfigList_fn
|
self.getConfigList = getConfigList_fn
|
||||||
|
|
||||||
|
|
@ -66,11 +67,12 @@ class BaseBrowserCache(object):
|
||||||
self.age_limit = float(age_limit) * 3600
|
self.age_limit = float(age_limit) * 3600
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def new_browser_cache(cls, getConfig_fn, getConfigList_fn):
|
def new_browser_cache(cls, site, getConfig_fn, getConfigList_fn):
|
||||||
"""Return new instance of this BrowserCache class, or None if supplied directory not the correct cache type"""
|
"""Return new instance of this BrowserCache class, or None if supplied directory not the correct cache type"""
|
||||||
if cls.is_cache_dir(cls.expand_cache_dir(getConfig_fn(CACHE_DIR_CONFIG))):
|
if cls.is_cache_dir(cls.expand_cache_dir(getConfig_fn(CACHE_DIR_CONFIG))):
|
||||||
try:
|
try:
|
||||||
return cls(getConfig_fn,
|
return cls(site,
|
||||||
|
getConfig_fn,
|
||||||
getConfigList_fn)
|
getConfigList_fn)
|
||||||
except BrowserCacheException:
|
except BrowserCacheException:
|
||||||
return None
|
return None
|
||||||
|
|
@ -136,27 +138,36 @@ class BaseBrowserCache(object):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def make_key_parts(self, url):
|
def make_key_parts(self, url, site=False):
|
||||||
"""
|
"""
|
||||||
Modern browser all also key their cache with the domain to
|
Modern browser all also key their cache with the domain to
|
||||||
reduce info leaking, but differently. However, some parts
|
reduce info leaking, but differently. However, some parts
|
||||||
are common
|
are common.
|
||||||
|
|
||||||
|
Now returns a list of domains, one for the story URL site and
|
||||||
|
one for the URLs own domain. Cache partitioning of images is
|
||||||
|
done based on the parent page (ie, the story site), but if
|
||||||
|
it's not found/expired/etc and called directly instead, then
|
||||||
|
it will be partitioned by the image URL instead. This way we
|
||||||
|
have both.
|
||||||
"""
|
"""
|
||||||
parsedUrl = urlparse(url)
|
parsedUrl = urlparse(url)
|
||||||
scheme = parsedUrl.scheme
|
scheme = parsedUrl.scheme
|
||||||
domain = parsedUrl.netloc
|
domains = [self.site, parsedUrl.netloc]
|
||||||
# logger.debug(domain)
|
|
||||||
|
|
||||||
# discard www. -- others likely needed to distinguish host
|
|
||||||
# from domain. Something like tldextract ideally, but
|
## only keep the first domain.TLD, more general than
|
||||||
# dependencies
|
## discarding www.
|
||||||
# XXX forums?
|
domains = [ re.sub(r'.*?([^\.]+\.[^\.]+)$',r'\1',d) for d in domains ]
|
||||||
domain = re.sub(r'^(www|m)\.',r'',domain)
|
## don't need both if they are the same. Could use a set() to
|
||||||
|
## dedup, but want to preserve order.
|
||||||
|
if domains[0] == domains[1]:
|
||||||
|
domains.pop()
|
||||||
|
|
||||||
# discard any #anchor part
|
# discard any #anchor part
|
||||||
url = url.split('#')[0]
|
url = url.split('#')[0]
|
||||||
|
|
||||||
return (scheme, domain, url) # URL still contains domain, params, etc
|
return (scheme, domains, url) # URL still contains domain, params, etc
|
||||||
|
|
||||||
def make_redirect_url(self,location,origurl):
|
def make_redirect_url(self,location,origurl):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -39,10 +39,9 @@ class BaseChromiumCache(BaseBrowserCache):
|
||||||
# 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/11377932/2/Guilt
|
# 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/11377932/2/Guilt
|
||||||
# 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/14161667/10/That-Time-I-Was-Reincarnated-In-Brockton-Bay
|
# 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/14161667/10/That-Time-I-Was-Reincarnated-In-Brockton-Bay
|
||||||
def make_keys(self,url):
|
def make_keys(self,url):
|
||||||
(scheme, domain, url) = self.make_key_parts(url)
|
(scheme, domains, url) = self.make_key_parts(url)
|
||||||
return [ '1/0/_dk_'+scheme+'://'+domain+' '+scheme+'://'+domain+' '+url,
|
return [ '1/0/_dk_'+scheme+'://'+d+' '+scheme+'://'+d+' '+url for d in domains ] + \
|
||||||
'1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm '+url
|
[ '1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm '+url ]
|
||||||
]
|
|
||||||
|
|
||||||
def make_age(self,response_time):
|
def make_age(self,response_time):
|
||||||
return int(response_time/1000000)-EPOCH_DIFFERENCE
|
return int(response_time/1000000)-EPOCH_DIFFERENCE
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@ class FirefoxCache2(BaseBrowserCache):
|
||||||
self.utc_offset = datetime.datetime.now() - utcnow().replace(tzinfo=None)
|
self.utc_offset = datetime.datetime.now() - utcnow().replace(tzinfo=None)
|
||||||
|
|
||||||
# self.scan_cache_keys()
|
# self.scan_cache_keys()
|
||||||
|
# logger.debug("cache site:%s"%self.site)
|
||||||
# 1/0
|
# 1/0
|
||||||
|
|
||||||
def scan_cache_keys(self):
|
def scan_cache_keys(self):
|
||||||
|
|
@ -59,7 +60,7 @@ class FirefoxCache2(BaseBrowserCache):
|
||||||
if entry.stat().st_mtime > time.time() - 3600: # last hour only
|
if entry.stat().st_mtime > time.time() - 3600: # last hour only
|
||||||
with share_open(entry.path, "rb") as entry_file:
|
with share_open(entry.path, "rb") as entry_file:
|
||||||
metadata = _read_entry_headers(entry_file)
|
metadata = _read_entry_headers(entry_file)
|
||||||
if '14055284' in metadata['key']:
|
if 'Battle_of_Antarctica_9' in metadata['key']:
|
||||||
logger.debug("%s->%s"%(metadata['key'],metadata['key_hash']))
|
logger.debug("%s->%s"%(metadata['key'],metadata['key_hash']))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -77,14 +78,12 @@ class FirefoxCache2(BaseBrowserCache):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def make_keys(self,url):
|
def make_keys(self,url):
|
||||||
(scheme,domain, url) = self.make_key_parts(url)
|
(scheme, domains, url) = self.make_key_parts(url)
|
||||||
## WebToEpub appears to leave just
|
## WebToEpub appears to leave just
|
||||||
## ':'+url
|
## ':'+url
|
||||||
## May 2024, WebToEpub now uses '~FETCH,:'
|
## May 2024, WebToEpub now uses '~FETCH,:'
|
||||||
return [ 'O^partitionKey=%28'+scheme+'%2C'+domain+'%29,:'+url,
|
return [ 'O^partitionKey=%28'+scheme+'%2C'+d+'%29,:'+url for d in domains ] + \
|
||||||
':'+url,
|
[ ':'+url, '~FETCH,:'+url ]
|
||||||
'~FETCH,:'+url
|
|
||||||
]
|
|
||||||
|
|
||||||
def make_key_path(self,key):
|
def make_key_path(self,key):
|
||||||
logger.debug(key)
|
logger.debug(key)
|
||||||
|
|
@ -97,6 +96,7 @@ class FirefoxCache2(BaseBrowserCache):
|
||||||
def get_data_key_impl(self, url, key):
|
def get_data_key_impl(self, url, key):
|
||||||
key_path = self.make_key_path(key)
|
key_path = self.make_key_path(key)
|
||||||
if os.path.isfile(key_path): # share_open()'s failure for non-existent is some win error.
|
if os.path.isfile(key_path): # share_open()'s failure for non-existent is some win error.
|
||||||
|
logger.debug("found cache: %s"%key_path)
|
||||||
with share_open(key_path, "rb") as entry_file:
|
with share_open(key_path, "rb") as entry_file:
|
||||||
metadata = _read_entry_headers(entry_file)
|
metadata = _read_entry_headers(entry_file)
|
||||||
# import json
|
# import json
|
||||||
|
|
|
||||||
|
|
@ -614,7 +614,8 @@ class Configuration(ConfigParser):
|
||||||
|
|
||||||
def __init__(self, sections, fileform, lightweight=False,
|
def __init__(self, sections, fileform, lightweight=False,
|
||||||
basic_cache=None, browser_cache=None):
|
basic_cache=None, browser_cache=None):
|
||||||
site = sections[-1] # first section is site DN.
|
self.site = sections[-1] # first section is site DN.
|
||||||
|
logger.debug("config site:%s"%self.site)
|
||||||
ConfigParser.__init__(self)
|
ConfigParser.__init__(self)
|
||||||
|
|
||||||
self.fetcher = None # the network layer for getting pages the
|
self.fetcher = None # the network layer for getting pages the
|
||||||
|
|
@ -637,12 +638,12 @@ class Configuration(ConfigParser):
|
||||||
for section in sections[:-1]:
|
for section in sections[:-1]:
|
||||||
self.addConfigSection(section)
|
self.addConfigSection(section)
|
||||||
|
|
||||||
if site.startswith("www."):
|
if self.site.startswith("www."):
|
||||||
sitewith = site
|
sitewith = self.site
|
||||||
sitewithout = site.replace("www.","")
|
sitewithout = self.site.replace("www.","")
|
||||||
else:
|
else:
|
||||||
sitewith = "www."+site
|
sitewith = "www."+self.site
|
||||||
sitewithout = site
|
sitewithout = self.site
|
||||||
|
|
||||||
self.addConfigSection(sitewith)
|
self.addConfigSection(sitewith)
|
||||||
self.addConfigSection(sitewithout)
|
self.addConfigSection(sitewithout)
|
||||||
|
|
@ -1088,7 +1089,8 @@ class Configuration(ConfigParser):
|
||||||
## make a data list of decorators to re-apply if
|
## make a data list of decorators to re-apply if
|
||||||
## there are many more.
|
## there are many more.
|
||||||
if self.browser_cache is None:
|
if self.browser_cache is None:
|
||||||
self.browser_cache = BrowserCache(self.getConfig,
|
self.browser_cache = BrowserCache(self.site,
|
||||||
|
self.getConfig,
|
||||||
self.getConfigList)
|
self.getConfigList)
|
||||||
fetchers.BrowserCacheDecorator(self.browser_cache).decorate_fetcher(self.fetcher)
|
fetchers.BrowserCacheDecorator(self.browser_cache).decorate_fetcher(self.fetcher)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue