mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Change blockfilecache to save uint32 addrs instead of original cache key. Hashing cache key proved unreliable in some cases.
This commit is contained in:
parent
162dcf5fbd
commit
e2a3b48481
3 changed files with 41 additions and 38 deletions
|
|
@ -162,10 +162,7 @@ class BaseBrowserCache(object):
|
|||
return
|
||||
if 'fanfiction.net/' in cache_url or 'ficbook.net/' in cache_url:
|
||||
minurl = self.minimal_url(self.cache_key_to_url(cache_url))
|
||||
# if 'ficbook.net/' in cache_url:
|
||||
# logger.debug("add:\n%s\n%s\n%s\n%s"%(cache_url,minurl,key,self.make_datetime(cached_time)))
|
||||
# if '13425439/4/' in cache_url:
|
||||
# logger.debug("add:\nurl:%s\nminurl:%s\nkey:%s\ncached_time:%s\ndatetime:%s\nnow:%s"%(cache_url,minurl,key,cached_time,self.make_datetime(cached_time),time.gmtime()))
|
||||
# logger.debug("%s -> %s"%(minurl,key))
|
||||
(existing_key,existing_time) = self.key_mapping.get(minurl,(None,None))
|
||||
if( existing_key is None
|
||||
or existing_time is None
|
||||
|
|
|
|||
|
|
@ -59,7 +59,12 @@ class BlockfileCache(BaseBrowserCache):
|
|||
return True
|
||||
|
||||
def map_cache_keys(self):
|
||||
"""Scan index file and cache entries to save entries in this cache"""
|
||||
"""
|
||||
Scan index file and cache entries to save entries in this cache.
|
||||
|
||||
Saving uint32 address as key--hashing to find key later proved
|
||||
unreliable.
|
||||
"""
|
||||
with share_open(os.path.join(self.cache_dir, "index"), 'rb') as index:
|
||||
# Skipping Header
|
||||
index.seek(92*4)
|
||||
|
|
@ -67,30 +72,43 @@ class BlockfileCache(BaseBrowserCache):
|
|||
for key in range(self.cacheBlock.tableSize):
|
||||
raw = struct.unpack('I', index.read(4))[0]
|
||||
if raw != 0:
|
||||
entry = CacheEntry(CacheAddress(raw, path=self.cache_dir))
|
||||
## 0 == unused hash index slot. I think.
|
||||
cacheaddr = CacheAddress(raw, path=self.cache_dir)
|
||||
# logger.debug("cacheaddr? %s"%cacheaddr)
|
||||
entry = CacheEntry(cacheaddr)
|
||||
# Checking if there is a next item in the bucket because
|
||||
# such entries are not stored in the Index File so they will
|
||||
# be ignored during iterative lookup in the hash table
|
||||
while entry.next != 0:
|
||||
# logger.debug("spinning on entry linked list?")
|
||||
self.add_key_mapping_entry(entry)
|
||||
entry = CacheEntry(CacheAddress(entry.next, path=self.cache_dir))
|
||||
cacheaddr = CacheAddress(entry.next, path=self.cache_dir)
|
||||
# logger.debug("cacheaddr? %s"%cacheaddr)
|
||||
entry = CacheEntry(cacheaddr)
|
||||
self.add_key_mapping_entry(entry)
|
||||
|
||||
def add_key_mapping_entry(self,entry):
|
||||
# if '/8096183/69/' in entry.keyToStr():
|
||||
# logger.debug(entry)
|
||||
# logger.debug("data length:%s"%len(entry.data))
|
||||
self.add_key_mapping(entry.keyToStr(),
|
||||
entry.keyToStr(),
|
||||
entry.address.addr,
|
||||
entry.creationTime)
|
||||
|
||||
def get_data_key(self,url):
|
||||
""" Return decoded data for specified key (a URL string) or None """
|
||||
entry = self.get_cache_entry(url)
|
||||
def get_data_key(self,addr):
|
||||
""" Return decoded data for specified key (a binary addr) or None """
|
||||
entry = self.get_cache_entry(addr)
|
||||
# logger.debug("get_data_key(%s)->%s"%(addr,entry))
|
||||
if entry:
|
||||
# entry = self.hash_cache[url]
|
||||
# logger.debug("has entry")
|
||||
for i in range(len(entry.data)):
|
||||
# logger.debug("data loop i:%s"%i)
|
||||
# logger.debug("entry.data[i].type:%s"%entry.data[i].type)
|
||||
if entry.data[i].type == CacheData.UNKNOWN:
|
||||
# Extracting data into a file
|
||||
data = entry.data[i].data()
|
||||
|
||||
# logger.debug("type = UNKNOWN, data len:%s"%len(data))
|
||||
# logger.debug("entry.httpHeader:%s"%entry.httpHeader)
|
||||
if entry.httpHeader != None and \
|
||||
b'content-encoding' in entry.httpHeader.headers:
|
||||
encoding = entry.httpHeader.headers.get(b'content-encoding','')
|
||||
|
|
@ -98,27 +116,9 @@ class BlockfileCache(BaseBrowserCache):
|
|||
return data
|
||||
return None
|
||||
|
||||
def get_cache_entry(self,url):
|
||||
url = ensure_binary(url,'utf8')
|
||||
# Compute the key and seeking to it
|
||||
# print("url:%s"%url)
|
||||
hash = SuperFastHash.superFastHash(url)
|
||||
# print("superFastHash:%s"%hash)
|
||||
key = hash & (self.cacheBlock.tableSize - 1)
|
||||
with share_open(os.path.join(self.cache_dir, "index"), 'rb') as index:
|
||||
index.seek(92*4 + key*4)
|
||||
|
||||
addr = struct.unpack('I', index.read(4))[0]
|
||||
# Checking if the address is initialized (i.e. used)
|
||||
if addr & 0x80000000 == 0:
|
||||
pass
|
||||
# print("%s is not in the cache" % url, file=sys.stderr)
|
||||
|
||||
# Follow the chained list in the bucket
|
||||
else:
|
||||
entry = CacheEntry(CacheAddress(addr, path=self.cache_dir))
|
||||
while entry.hash != hash and entry.next != 0:
|
||||
entry = CacheEntry(CacheAddress(entry.next, path=self.cache_dir))
|
||||
if entry.hash == hash:
|
||||
return entry
|
||||
|
||||
def get_cache_entry(self,addr):
|
||||
cacheaddr = CacheAddress(addr, path=self.cache_dir)
|
||||
# logger.debug("cacheaddr? %s"%cacheaddr)
|
||||
entry = CacheEntry(cacheaddr)
|
||||
# logger.debug("entry? %s"%entry)
|
||||
return entry
|
||||
|
|
|
|||
|
|
@ -44,6 +44,9 @@ from six.moves import range
|
|||
|
||||
from ..share_open import share_open
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CacheEntry():
|
||||
"""
|
||||
See /net/disk_cache/disk_format.h for details.
|
||||
|
|
@ -57,6 +60,7 @@ class CacheEntry():
|
|||
Parse a Chrome Cache Entry at the given address
|
||||
"""
|
||||
self.httpHeader = None
|
||||
self.address = address
|
||||
with share_open(os.path.join(address.path,address.fileSelector), 'rb') as block:
|
||||
|
||||
# Going to the right entry
|
||||
|
|
@ -89,7 +93,9 @@ class CacheEntry():
|
|||
addr = cacheAddress.CacheAddress(addr, address.path)
|
||||
self.data.append(cacheData.CacheData(addr, dataSize[index],
|
||||
True))
|
||||
except cacheAddress.CacheAddressError:
|
||||
except cacheAddress.CacheAddressError as e:
|
||||
# this happens tons? unused slots probably?
|
||||
# logger.debug("CacheEntry CacheAddressError:%s %s"%(address,e))
|
||||
pass
|
||||
|
||||
# Find the HTTP header if there is one
|
||||
|
|
|
|||
Loading…
Reference in a new issue