Scandir for cache troubleshooting

This commit is contained in:
Jim Miller 2022-12-17 11:27:06 -06:00
parent bf13b81837
commit 895274ad24
5 changed files with 37 additions and 20 deletions

View file

@ -108,7 +108,7 @@ class BaseBrowserCache(object):
"""
parsedUrl = urlparse(url)
domain = parsedUrl.netloc
logger.debug(domain)
# logger.debug(domain)
# discard www. -- others likely needed to distinguish host
# from domain. Something like tldextract ideally, but

View file

@ -37,6 +37,7 @@ class BaseChromiumCache(BaseBrowserCache):
# WebToEpub: akiljllkbielkidmammnifcnibaigelm appears to be a UID.
# 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/11377932/2/Guilt
# 1/0/_dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/14161667/10/That-Time-I-Was-Reincarnated-In-Brockton-Bay
def make_key(self,url):
(domain, url) = self.make_key_parts(url)
key = '1/0/_dk_https://'+domain+' https://'+domain+' '+url

View file

@ -84,6 +84,7 @@ class BlockfileCache(BaseChromiumCache):
if '/11377932/' in entry.keyToStr():
logger.debug(entry)
logger.debug("data length:%s"%len(entry.data))
@staticmethod
def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class"""

View file

@ -45,6 +45,17 @@ class FirefoxCache2(BaseBrowserCache):
logger.debug("Using FirefoxCache2")
#self.scan_cache_keys()
def scan_cache_keys(self):
"""Scan cache entries to save entries in this cache"""
## scandir and checking age *before* parsing saves a ton of
## hits and time.
logger.debug("using scandir")
for entry in os.scandir(os.path.join(self.cache_dir,'entries')):
with share_open(entry.path, "rb") as entry_file:
metadata = _read_entry_headers(entry_file)
if '14093457' in metadata['key']:
logger.debug("%s->%s"%(metadata['key'],metadata['key_hash']))
@staticmethod
def is_cache_dir(cache_dir):
"""Return True only if a directory is a valid Cache for this class"""
@ -59,17 +70,6 @@ class FirefoxCache2(BaseBrowserCache):
return True
return False
def scan_cache_keys(self):
"""Scan cache entries to save entries in this cache"""
## scandir and checking age *before* parsing saves a ton of
## hits and time.
logger.debug("using scandir")
for entry in os.scandir(os.path.join(self.cache_dir,'entries')):
with share_open(entry.path, "rb") as entry_file:
metadata = _read_entry_headers(entry_file)
if '14093457' in metadata['key']:
logger.debug("%s->%s"%(metadata['key'],metadata['key_hash']))
def make_key(self,url):
(domain, url) = self.make_key_parts(url)
## WebToEpub appears to leave just
@ -80,7 +80,7 @@ class FirefoxCache2(BaseBrowserCache):
def make_key_path(self,url):
key = self.make_key(url)
hashkey = hashlib.sha1(key.encode('utf8')).hexdigest().upper()
logger.debug(hashkey)
# logger.debug(hashkey)
fullkey = os.path.join(self.cache_dir, 'entries', hashkey)
logger.debug(fullkey)
return fullkey

View file

@ -27,7 +27,7 @@ from ..six import ensure_binary, ensure_text
from ..exceptions import BrowserCacheException
from .share_open import share_open
from .base_chromium import BaseChromiumCache
from .base_chromium import BaseChromiumCache, EPOCH_DIFFERENCE
import logging
logger = logging.getLogger(__name__)
@ -51,6 +51,27 @@ class SimpleCache(BaseChromiumCache):
"""Constructor for SimpleCache"""
super(SimpleCache,self).__init__(*args, **kargs)
logger.debug("Using SimpleCache")
self.scan_cache_keys()
1/0
def scan_cache_keys(self):
"""Scan cache entries to save entries in this cache"""
## scandir and checking age *before* parsing saves a ton of
## hits and time.
logger.debug("using scandir")
for entry in os.scandir(self.cache_dir):
if re.match(r'^[0-9a-fA-F]{16}_[0-9]+$',os.path.basename(entry.path)):
with share_open(entry.path, "rb") as entry_file:
try:
file_key = _read_entry_file(entry.path,entry_file)
if 'www.fanfiction.net/s/14161667' in file_key:
(info_size, flags, request_time, response_time, header_size) = _read_meta_headers(entry_file)
logger.debug("file_key:%s"%file_key)
#logger.debug("response_time:%s"%response_time)
logger.debug("Creation Time: %s"%datetime.datetime.fromtimestamp(int(response_time/1000000)-EPOCH_DIFFERENCE))
except Exception as e:
raise e
pass
@staticmethod
def is_cache_dir(cache_dir):
@ -109,12 +130,6 @@ class SimpleCache(BaseChromiumCache):
try:
## --- need to check vs full key due to possible hash
## --- collision--can't just do url in key
## --- location
## --- age check
## --- This nonsense opens the file *4* times.
## --- also make location code common across all three--and age check?
## parts of make key?
with share_open(en_fl, "rb") as entry_file:
file_key = _read_entry_file(en_fl,entry_file)
if file_key != fullkey: