From 895274ad247016c280ad9e93c2fa6c23e8eeb327 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Sat, 17 Dec 2022 11:27:06 -0600 Subject: [PATCH] Scandir for cache troubleshooting --- fanficfare/browsercache/base_browsercache.py | 2 +- fanficfare/browsercache/base_chromium.py | 1 + .../browsercache/browsercache_blockfile.py | 1 + .../browsercache/browsercache_firefox2.py | 24 +++++++-------- .../browsercache/browsercache_simple.py | 29 ++++++++++++++----- 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/fanficfare/browsercache/base_browsercache.py b/fanficfare/browsercache/base_browsercache.py index eff8e979..cae1d028 100644 --- a/fanficfare/browsercache/base_browsercache.py +++ b/fanficfare/browsercache/base_browsercache.py @@ -108,7 +108,7 @@ class BaseBrowserCache(object): """ parsedUrl = urlparse(url) domain = parsedUrl.netloc - logger.debug(domain) + # logger.debug(domain) # discard www. -- others likely needed to distinguish host # from domain. Something like tldextract ideally, but diff --git a/fanficfare/browsercache/base_chromium.py b/fanficfare/browsercache/base_chromium.py index 1ba48ba8..94a884c2 100644 --- a/fanficfare/browsercache/base_chromium.py +++ b/fanficfare/browsercache/base_chromium.py @@ -37,6 +37,7 @@ class BaseChromiumCache(BaseBrowserCache): # WebToEpub: akiljllkbielkidmammnifcnibaigelm appears to be a UID. # 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/11377932/2/Guilt + # 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/14161667/10/That-Time-I-Was-Reincarnated-In-Brockton-Bay def make_key(self,url): (domain, url) = self.make_key_parts(url) key = '1/0/_dk_https://'+domain+' https://'+domain+' '+url diff --git a/fanficfare/browsercache/browsercache_blockfile.py b/fanficfare/browsercache/browsercache_blockfile.py index 835d29c4..14bc3906 100644 --- a/fanficfare/browsercache/browsercache_blockfile.py +++ b/fanficfare/browsercache/browsercache_blockfile.py @@ -84,6 +84,7 @@ class BlockfileCache(BaseChromiumCache): if '/11377932/' in entry.keyToStr(): logger.debug(entry) logger.debug("data length:%s"%len(entry.data)) + @staticmethod def is_cache_dir(cache_dir): """Return True only if a directory is a valid Cache for this class""" diff --git a/fanficfare/browsercache/browsercache_firefox2.py b/fanficfare/browsercache/browsercache_firefox2.py index 4b4ab748..0ca13538 100644 --- a/fanficfare/browsercache/browsercache_firefox2.py +++ b/fanficfare/browsercache/browsercache_firefox2.py @@ -45,6 +45,17 @@ class FirefoxCache2(BaseBrowserCache): logger.debug("Using FirefoxCache2") #self.scan_cache_keys() + def scan_cache_keys(self): + """Scan cache entries to save entries in this cache""" + ## scandir and checking age *before* parsing saves a ton of + ## hits and time. + logger.debug("using scandir") + for entry in os.scandir(os.path.join(self.cache_dir,'entries')): + with share_open(entry.path, "rb") as entry_file: + metadata = _read_entry_headers(entry_file) + if '14093457' in metadata['key']: + logger.debug("%s->%s"%(metadata['key'],metadata['key_hash'])) + @staticmethod def is_cache_dir(cache_dir): """Return True only if a directory is a valid Cache for this class""" @@ -59,17 +70,6 @@ class FirefoxCache2(BaseBrowserCache): return True return False - def scan_cache_keys(self): - """Scan cache entries to save entries in this cache""" - ## scandir and checking age *before* parsing saves a ton of - ## hits and time. - logger.debug("using scandir") - for entry in os.scandir(os.path.join(self.cache_dir,'entries')): - with share_open(entry.path, "rb") as entry_file: - metadata = _read_entry_headers(entry_file) - if '14093457' in metadata['key']: - logger.debug("%s->%s"%(metadata['key'],metadata['key_hash'])) - def make_key(self,url): (domain, url) = self.make_key_parts(url) ## WebToEpub appears to leave just @@ -80,7 +80,7 @@ class FirefoxCache2(BaseBrowserCache): def make_key_path(self,url): key = self.make_key(url) hashkey = hashlib.sha1(key.encode('utf8')).hexdigest().upper() - logger.debug(hashkey) + # logger.debug(hashkey) fullkey = os.path.join(self.cache_dir, 'entries', hashkey) logger.debug(fullkey) return fullkey diff --git a/fanficfare/browsercache/browsercache_simple.py b/fanficfare/browsercache/browsercache_simple.py index 3aa33b71..0733e4bf 100644 --- a/fanficfare/browsercache/browsercache_simple.py +++ b/fanficfare/browsercache/browsercache_simple.py @@ -27,7 +27,7 @@ from ..six import ensure_binary, ensure_text from ..exceptions import BrowserCacheException from .share_open import share_open -from .base_chromium import BaseChromiumCache +from .base_chromium import BaseChromiumCache, EPOCH_DIFFERENCE import logging logger = logging.getLogger(__name__) @@ -51,6 +51,27 @@ class SimpleCache(BaseChromiumCache): """Constructor for SimpleCache""" super(SimpleCache,self).__init__(*args, **kargs) logger.debug("Using SimpleCache") + self.scan_cache_keys() + 1/0 + + def scan_cache_keys(self): + """Scan cache entries to save entries in this cache""" + ## scandir and checking age *before* parsing saves a ton of + ## hits and time. + logger.debug("using scandir") + for entry in os.scandir(self.cache_dir): + if re.match(r'^[0-9a-fA-F]{16}_[0-9]+$',os.path.basename(entry.path)): + with share_open(entry.path, "rb") as entry_file: + try: + file_key = _read_entry_file(entry.path,entry_file) + if 'www.fanfiction.net/s/14161667' in file_key: + (info_size, flags, request_time, response_time, header_size) = _read_meta_headers(entry_file) + logger.debug("file_key:%s"%file_key) + #logger.debug("response_time:%s"%response_time) + logger.debug("Creation Time: %s"%datetime.datetime.fromtimestamp(int(response_time/1000000)-EPOCH_DIFFERENCE)) + except Exception as e: + raise e + pass @staticmethod def is_cache_dir(cache_dir): @@ -109,12 +130,6 @@ class SimpleCache(BaseChromiumCache): try: ## --- need to check vs full key due to possible hash ## --- collision--can't just do url in key - ## --- location - ## --- age check - ## --- This nonsense opens the file *4* times. - - ## --- also make location code common across all three--and age check? - ## parts of make key? with share_open(en_fl, "rb") as entry_file: file_key = _read_entry_file(en_fl,entry_file) if file_key != fullkey: