mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 21:11:59 +02:00
Change BrowserCache to on-demand, not scan
This commit is contained in:
parent
1301fc3dc4
commit
c3631f6ac7
14 changed files with 373 additions and 437 deletions
|
|
@ -1276,12 +1276,6 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
|
||||
## save and share caches and cookiejar between all downloads.
|
||||
configuration = adapter.get_configuration()
|
||||
## browser cache before basic to avoid incidentally reloading
|
||||
if configuration.getConfig('use_browser_cache'):
|
||||
if 'browser_cache' in options:
|
||||
configuration.set_browser_cache(options['browser_cache'])
|
||||
else:
|
||||
options['browser_cache'] = configuration.get_browser_cache()
|
||||
if 'basic_cache' in options:
|
||||
configuration.set_basic_cache(options['basic_cache'])
|
||||
else:
|
||||
|
|
@ -1714,20 +1708,6 @@ class FanFicFarePlugin(InterfaceAction):
|
|||
msgl)
|
||||
return
|
||||
|
||||
## save and pass cookiejar and caches to BG downloads.
|
||||
if 'browser_cache' in options:
|
||||
if not options['bgmeta']:
|
||||
## With load-on-demand, the cache exists, but hasn't
|
||||
## been loaded. Once it is (file)loaded in jobs, it's
|
||||
## marked as having been 'loaded'. So don't send when
|
||||
## bgmeta
|
||||
browser_cachefile = PersistentTemporaryFile(suffix='.browser_cache',
|
||||
dir=options['tdir'])
|
||||
options['browser_cache'].save_cache(browser_cachefile.name)
|
||||
options['browser_cachefile'] = browser_cachefile.name
|
||||
## can't be pickled by Calibre to send to BG proc
|
||||
del options['browser_cache']
|
||||
|
||||
basic_cachefile = PersistentTemporaryFile(suffix='.basic_cache',
|
||||
dir=options['tdir'])
|
||||
options['basic_cache'].save_cache(basic_cachefile.name)
|
||||
|
|
|
|||
|
|
@ -236,13 +236,6 @@ def do_download_for_worker(book,options,merge,notification=lambda x,y:x):
|
|||
## each site download job starts with a new copy of the
|
||||
## cookiejar and basic_cache from the FG process. They
|
||||
## are not shared between different sites' BG downloads
|
||||
if configuration.getConfig('use_browser_cache'):
|
||||
if 'browser_cache' in options:
|
||||
configuration.set_browser_cache(options['browser_cache'])
|
||||
else:
|
||||
options['browser_cache'] = configuration.get_browser_cache()
|
||||
if 'browser_cachefile' in options:
|
||||
options['browser_cache'].load_cache(options['browser_cachefile'])
|
||||
if 'basic_cache' in options:
|
||||
configuration.set_basic_cache(options['basic_cache'])
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -16,11 +16,12 @@
|
|||
#
|
||||
|
||||
import os
|
||||
from .basebrowsercache import BrowserCacheException, BaseBrowserCache
|
||||
from ..exceptions import BrowserCacheException
|
||||
from .base_browsercache import BaseBrowserCache
|
||||
## SimpleCache and BlockfileCache are both flavors of cache used by Chrome.
|
||||
from .simplecache import SimpleCache
|
||||
from .blockfilecache import BlockfileCache
|
||||
from .firefoxcache2 import FirefoxCache2
|
||||
from .browsercache_simple import SimpleCache
|
||||
from .browsercache_blockfile import BlockfileCache
|
||||
from .browsercache_firefox2 import FirefoxCache2
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -30,11 +31,13 @@ class BrowserCache(object):
|
|||
Class to read web browser cache
|
||||
This wrapper class contains the actual impl object.
|
||||
"""
|
||||
def __init__(self, cache_dir, age_limit=-1):
|
||||
def __init__(self, cache_dir, age_limit=-1, open_page_in_browser=False):
|
||||
"""Constructor for BrowserCache"""
|
||||
# import of child classes have to be inside the def to avoid circular import error
|
||||
for browser_cache_class in [SimpleCache, BlockfileCache, FirefoxCache2]:
|
||||
self.browser_cache_impl = browser_cache_class.new_browser_cache(cache_dir,age_limit=age_limit)
|
||||
self.browser_cache_impl = browser_cache_class.new_browser_cache(cache_dir,
|
||||
age_limit=age_limit,
|
||||
open_page_in_browser=open_page_in_browser)
|
||||
if self.browser_cache_impl is not None:
|
||||
break
|
||||
if self.browser_cache_impl is None:
|
||||
|
|
@ -45,9 +48,3 @@ class BrowserCache(object):
|
|||
# logger.debug("get_data:%s"%url)
|
||||
d = self.browser_cache_impl.get_data(url)
|
||||
return d
|
||||
|
||||
def load_cache(self,filename=None):
|
||||
self.browser_cache_impl.load_cache(filename)
|
||||
|
||||
def save_cache(self,filename=None):
|
||||
self.browser_cache_impl.save_cache(filename)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,25 @@
|
|||
import sys
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import time, datetime
|
||||
import gzip
|
||||
import zlib
|
||||
import webbrowser
|
||||
try:
|
||||
# py3 only, calls C libraries. CLI
|
||||
import brotli
|
||||
|
|
@ -13,208 +28,124 @@ except ImportError:
|
|||
# brotlidecpy, which is slower, but pure python
|
||||
from calibre_plugins.fanficfare_plugin import brotlidecpy as brotli
|
||||
|
||||
import pickle
|
||||
if sys.version_info < (2, 7):
|
||||
sys.exit('This program requires Python 2.7 or newer.')
|
||||
elif sys.version_info < (3, 0):
|
||||
reload(sys) # Reload restores 'hidden' setdefaultencoding method
|
||||
sys.setdefaultencoding("utf-8")
|
||||
def pickle_load(f):
|
||||
return pickle.load(f)
|
||||
else: # > 3.0
|
||||
def pickle_load(f):
|
||||
return pickle.load(f,encoding="bytes")
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ..six.moves.urllib.parse import urlparse, urlunparse
|
||||
from ..six import ensure_text
|
||||
|
||||
|
||||
# import cProfile
|
||||
# def do_cprofile(func):
|
||||
# def profiled_func(*args, **kwargs):
|
||||
# profile = cProfile.Profile()
|
||||
# try:
|
||||
# profile.enable()
|
||||
# result = func(*args, **kwargs)
|
||||
# profile.disable()
|
||||
# return result
|
||||
# finally:
|
||||
# profile.print_stats(sort='time')
|
||||
# return profiled_func
|
||||
|
||||
def do_cprofile(func):
|
||||
def profiled_func(*args, **kwargs):
|
||||
t=0
|
||||
try:
|
||||
t = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
t = time.time() - t
|
||||
return result
|
||||
finally:
|
||||
logger.debug("do_cprofile time:%s"%t)
|
||||
return profiled_func
|
||||
|
||||
|
||||
|
||||
class BrowserCacheException(Exception):
|
||||
pass
|
||||
|
||||
## difference in seconds between Jan 1 1601 and Jan 1 1970. Chrome
|
||||
## caches (so far) have kept time stamps as microseconds since
|
||||
## 1-1-1601 a Windows/Cobol thing.
|
||||
EPOCH_DIFFERENCE = 11644473600
|
||||
import datetime
|
||||
from ..exceptions import BrowserCacheException
|
||||
|
||||
class BaseBrowserCache(object):
|
||||
"""Base class to read various formats of web browser cache file"""
|
||||
|
||||
def __init__(self, cache_dir, age_limit=-1):
|
||||
def __init__(self, cache_dir, age_limit=-1,open_page_in_browser=False):
|
||||
"""Constructor for BaseBrowserCache"""
|
||||
## only ever
|
||||
if cache_dir is None:
|
||||
raise BrowserCacheException("BrowserCache must be initialized with a valid browser cache directory path")
|
||||
self.cache_dir = os.path.realpath(os.path.expanduser(cache_dir))
|
||||
if not os.path.isdir(self.cache_dir):
|
||||
raise BrowserCacheException("BrowserCache cache_dir does not exist: '%s (%s)'" %
|
||||
(cache_dir, self.cache_dir))
|
||||
self.age_comp_time = 0
|
||||
if age_limit is None or age_limit == '':
|
||||
self.age_limit = -1
|
||||
## only ever called by class method new_browser_cache()
|
||||
self.cache_dir = cache_dir
|
||||
if age_limit is None or age_limit == '' or float(age_limit) < 0.0:
|
||||
self.age_limit = None
|
||||
else:
|
||||
self.age_limit = float(age_limit)
|
||||
self.set_age_comp_time()
|
||||
# switched from namedtuple or class to primitives because it's
|
||||
# dirt simple and I want to pickle it.
|
||||
# map of urls -> (cache_key, cache_time)
|
||||
self.key_mapping = {}
|
||||
|
||||
self.mapping_loaded = False
|
||||
# set in hours, recorded in seconds
|
||||
self.age_limit = float(age_limit) * 3600
|
||||
self.open_page_in_browser = open_page_in_browser
|
||||
|
||||
@classmethod
|
||||
def new_browser_cache(cls, cache_dir, age_limit=-1):
|
||||
def new_browser_cache(cls, cache_dir, age_limit=-1, open_page_in_browser=False):
|
||||
"""Return new instance of this BrowserCache class, or None if supplied directory not the correct cache type"""
|
||||
cache_dir = os.path.realpath(os.path.expanduser(cache_dir))
|
||||
if cls.is_cache_dir(cache_dir):
|
||||
try:
|
||||
return cls(cache_dir,age_limit=age_limit)
|
||||
return cls(cache_dir,
|
||||
age_limit=age_limit,
|
||||
open_page_in_browser=open_page_in_browser)
|
||||
except BrowserCacheException:
|
||||
return None
|
||||
return None
|
||||
|
||||
# Chromium uses 1601 epoch for... reasons?
|
||||
def set_age_comp_time(self):
|
||||
if self.age_limit > 0.0:
|
||||
## now - age_limit as microseconds since Jan 1, 1601
|
||||
## for direct comparison with cache values.
|
||||
self.age_comp_time = int(time.time() - (self.age_limit*3600) + EPOCH_DIFFERENCE)*1000000
|
||||
## By doing this once, we save a lot of comparisons
|
||||
## and extra saved data at the risk of using pages
|
||||
## that would have expired during long download
|
||||
## sessions.
|
||||
|
||||
## just here for ease of applying @do_cprofile
|
||||
@do_cprofile
|
||||
def do_map_cache_keys(self):
|
||||
logger.debug("do_map_cache_keys()")
|
||||
self.map_cache_keys()
|
||||
self.mapping_loaded = True
|
||||
logger.debug("Cached %s entries"%len(self.key_mapping))
|
||||
|
||||
def map_cache_keys(self):
|
||||
"""Scan index file and cache entries to save entries in this cache"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def cache_key_to_url(self,key):
|
||||
'''
|
||||
Modern browsers partition cache by domain to avoid leaking information.
|
||||
'''
|
||||
key=ensure_text(key)
|
||||
# chromium examples seen so far:
|
||||
# _dk_https://fanfiction.net https://fanfiction.net https://www.fanfiction.net/s/13278343/1/The-Timeless-Vault-HP-travel
|
||||
# _dk_chrome-extension://akiljllkbielkidmammnifcnibaigelm chrome-extension://akiljllkbielkidmammnifcnibaigelm https://www.fanfiction.net/s/13278343/3/The-Timeless-Vault-HP-travel
|
||||
# 1610476847265546/_dk_https://fanfiction.net https://fanfiction.net https://www.fanfiction.net/s/13791057/1/A-Yule-Ball-Changes?__cf_chl_jschl_tk__=c80be......
|
||||
# firefox is different and overrides this
|
||||
return key.split(' ')[-1]
|
||||
|
||||
## should priority be given to keeping any particular domain cache?
|
||||
def minimal_url(self,url):
|
||||
'''
|
||||
ONLY tested with fanfiction.net & ficbook.net so far.
|
||||
|
||||
Will need to split into separate functions for add and
|
||||
get--FireFox domain keys different.
|
||||
'''
|
||||
url=ensure_text(url)
|
||||
url = url.split('?')[0]
|
||||
if 'www.fanfiction.net/s/' in url or 'www.fictionpress.com/s/' in url:
|
||||
# remove title too.
|
||||
url = '/'.join(url.split('/')[:6])+'/'
|
||||
if 'ficbook.net/readfic/' in url:
|
||||
# remove #content_part
|
||||
url = url.split('#')[0]
|
||||
return url
|
||||
|
||||
def add_key_mapping(self,cache_url,key,cached_time=None):
|
||||
'''
|
||||
ONLY used with fanfiction.net & ficbook.net so far.
|
||||
'''
|
||||
if self.age_comp_time > cached_time:
|
||||
return
|
||||
if 'fanfiction.net/' in cache_url or 'fictionpress.com/' in cache_url or 'ficbook.net/' in cache_url:
|
||||
minurl = self.minimal_url(self.cache_key_to_url(cache_url))
|
||||
# logger.debug("%s -> %s"%(minurl,key))
|
||||
(existing_key,existing_time) = self.key_mapping.get(minurl,(None,None))
|
||||
if( existing_key is None
|
||||
or existing_time is None
|
||||
or existing_time < cached_time ):
|
||||
# logger.debug("replacing existing:%s < %s"%(existing_key and self.make_datetime(existing_time),self.make_datetime(cached_time)))
|
||||
self.key_mapping[minurl]=(key,cached_time)
|
||||
|
||||
def get_key_mapping(self,url):
|
||||
# logger.debug("get_key_mapping:%s"%url)
|
||||
## on demand map loading now.
|
||||
## browser_cache is shared between configurations
|
||||
## XXX Needs some locking if multi-threading implemented.
|
||||
if not self.mapping_loaded:
|
||||
try:
|
||||
self.do_map_cache_keys()
|
||||
except Exception as e:
|
||||
logger.debug(traceback.format_exc())
|
||||
raise BrowserCacheException("Browser Cache Failed to Load with error '%s'"%e)
|
||||
return self.key_mapping.get(self.minimal_url(url),(None,None))[0]
|
||||
|
||||
def get_data(self, url):
|
||||
# logger.debug("\n\n===================================================\n\nurl:%s\n%s"%(url,self.minimal_url(url)))
|
||||
key = self.get_key_mapping(self.minimal_url(url))
|
||||
# logger.debug("key:%s"%key)
|
||||
if key:
|
||||
return self.get_data_key(key)
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_data_key(self,key):
|
||||
""" Return decoded data for specified key (a URL string) or None """
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def is_cache_dir(cache_dir):
|
||||
return os.path.isdir(cache_dir) # This method only makes sense when overridden
|
||||
"""Check given dir is a valid cache."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def make_datetime(self,i):
|
||||
return datetime.datetime(1601, 1, 1) + datetime.timedelta(microseconds=i)
|
||||
def get_data(self, url):
|
||||
"""Return cached value for URL if found."""
|
||||
|
||||
def load_cache(self,filename=None):
|
||||
logger.debug("load browser cache mappings(%s)"%(filename or self.filename))
|
||||
with open(filename or self.filename,'rb') as jin:
|
||||
self.key_mapping = pickle_load(jin)
|
||||
# logger.debug(self.basic_cache.keys())
|
||||
self.mapping_loaded = True
|
||||
## XXX - need to add open_page_in_browser config keyword
|
||||
## XXX - should number/sleep times be configurable?
|
||||
## derive from slow_down_sleep_time?
|
||||
rettuple = self.get_data_impl(url)
|
||||
sleeptries = [ 3, 10 ]
|
||||
while self.open_page_in_browser and rettuple is None and sleeptries:
|
||||
logger.debug("\n\nopen page in browser here %s\n"%url)
|
||||
webbrowser.open(url)
|
||||
time.sleep(sleeptries.pop(0))
|
||||
rettuple = self.get_data_impl(url)
|
||||
|
||||
def save_cache(self,filename=None):
|
||||
with open(filename or self.filename,'wb') as jout:
|
||||
pickle.dump(self.key_mapping,jout,protocol=2)
|
||||
logger.debug("save browser cache mappings(%s)"%(filename or self.filename))
|
||||
if rettuple is None:
|
||||
return None
|
||||
|
||||
(location,
|
||||
age,
|
||||
encoding,
|
||||
rawdata) = rettuple
|
||||
|
||||
# age check
|
||||
logger.debug("age:%s"%datetime.datetime.fromtimestamp(age))
|
||||
logger.debug("now:%s"%datetime.datetime.fromtimestamp(time.time()))
|
||||
if not (self.age_limit is None or age > time.time()-self.age_limit):
|
||||
return None
|
||||
|
||||
# recurse on location redirects
|
||||
if location:
|
||||
logger.debug("Do Redirect(%s)"%location)
|
||||
return self.get_data(self.make_redirect_url(location,url))
|
||||
|
||||
# decompress
|
||||
return self.decompress(encoding,rawdata)
|
||||
|
||||
def get_data_impl(self, url):
|
||||
"""
|
||||
returns location, entry age, content-encoding and
|
||||
raw(compressed) data
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def make_key(self, url):
|
||||
raise NotImplementedError()
|
||||
|
||||
def make_key_parts(self, url):
|
||||
"""
|
||||
Modern browser all also key their cache with the domain to
|
||||
reduce info leaking, but differently. However, some parts
|
||||
are common
|
||||
"""
|
||||
parsedUrl = urlparse(url)
|
||||
domain = parsedUrl.netloc
|
||||
logger.debug(domain)
|
||||
|
||||
# discard www. -- others likely needed to distinguish host
|
||||
# from domain. Something like tldextract ideally, but
|
||||
# dependencies
|
||||
domain = domain.replace('www.','')
|
||||
|
||||
# discard any #anchor part
|
||||
url = url.split('#')[0]
|
||||
|
||||
return (domain, url) # URL still contains domain, params, etc
|
||||
|
||||
def make_redirect_url(self,location,origurl):
|
||||
"""
|
||||
Most redirects are relative, but not all.
|
||||
"""
|
||||
pLoc = urlparse(location)
|
||||
pUrl = urlparse(origurl)
|
||||
# logger.debug(pLoc)
|
||||
# logger.debug(pUrl)
|
||||
return urlunparse((pLoc.scheme or pUrl.scheme,
|
||||
pLoc.netloc or pUrl.netloc,
|
||||
location.strip(),
|
||||
'','',''))
|
||||
|
||||
def decompress(self, encoding, data):
|
||||
encoding = ensure_text(encoding)
|
||||
|
|
|
|||
45
fanficfare/browsercache/base_chromium.py
Normal file
45
fanficfare/browsercache/base_chromium.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ..exceptions import BrowserCacheException
|
||||
|
||||
from . import BaseBrowserCache
|
||||
|
||||
## difference in seconds between Jan 1 1601 and Jan 1 1970. Chrome
|
||||
## caches (so far) have kept time stamps as microseconds since
|
||||
## 1-1-1601 a Windows/Cobol thing.
|
||||
EPOCH_DIFFERENCE = 11644473600
|
||||
|
||||
class BaseChromiumCache(BaseBrowserCache):
|
||||
def __init__(self, *args, **kargs):
|
||||
"""Constructor for BaseChromiumCache"""
|
||||
super(BaseChromiumCache,self).__init__(*args, **kargs)
|
||||
# logger.debug("Using BaseChromiumCache")
|
||||
|
||||
def make_key(self,url):
|
||||
(domain, url) = self.make_key_parts(url)
|
||||
key = '1/0/_dk_https://'+domain+' https://'+domain+' '+url
|
||||
logger.debug(key)
|
||||
return key
|
||||
|
||||
def make_age(self,response_time):
|
||||
return int(response_time/1000000)-EPOCH_DIFFERENCE
|
||||
|
|
@ -1,8 +1,24 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import struct
|
||||
import sys
|
||||
import time, datetime
|
||||
|
||||
# note share_open (on windows CLI) is implicitly readonly.
|
||||
from .share_open import share_open
|
||||
|
|
@ -11,22 +27,19 @@ from .chromagnon.cacheAddress import CacheAddress
|
|||
from .chromagnon.cacheBlock import CacheBlock
|
||||
from .chromagnon.cacheData import CacheData
|
||||
from .chromagnon.cacheEntry import CacheEntry
|
||||
from .chromagnon.cacheParse import parse
|
||||
from ..six.moves import range
|
||||
from ..six import ensure_binary, ensure_text
|
||||
from ..six import ensure_text
|
||||
|
||||
from . import BrowserCacheException, BaseBrowserCache
|
||||
from .base_chromium import BaseChromiumCache
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BlockfileCacheException(BrowserCacheException):
|
||||
pass
|
||||
|
||||
INDEX_MAGIC_NUMBER = 0xC103CAC3
|
||||
BLOCK_MAGIC_NUMBER = 0xC104CAC3
|
||||
|
||||
|
||||
class BlockfileCache(BaseBrowserCache):
|
||||
class BlockfileCache(BaseChromiumCache):
|
||||
"""Class to access data stream in Chrome Disk Blockfile Cache format cache files"""
|
||||
|
||||
def __init__(self, *args, **kargs):
|
||||
|
|
@ -58,64 +71,40 @@ class BlockfileCache(BaseBrowserCache):
|
|||
return False
|
||||
return True
|
||||
|
||||
def map_cache_keys(self):
|
||||
"""
|
||||
Scan index file and cache entries to save entries in this cache.
|
||||
|
||||
Saving uint32 address as key--hashing to find key later proved
|
||||
unreliable.
|
||||
"""
|
||||
with share_open(os.path.join(self.cache_dir, "index"), 'rb') as index:
|
||||
# Skipping Header
|
||||
index.seek(92*4)
|
||||
self.cache_keys = set()
|
||||
for key in range(self.cacheBlock.tableSize):
|
||||
raw = struct.unpack('I', index.read(4))[0]
|
||||
if raw != 0:
|
||||
## 0 == unused hash index slot. I think.
|
||||
cacheaddr = CacheAddress(raw, path=self.cache_dir)
|
||||
# logger.debug("cacheaddr? %s"%cacheaddr)
|
||||
entry = CacheEntry(cacheaddr)
|
||||
# Checking if there is a next item in the bucket because
|
||||
# such entries are not stored in the Index File so they will
|
||||
# be ignored during iterative lookup in the hash table
|
||||
while entry.next != 0:
|
||||
# logger.debug("spinning on entry linked list?")
|
||||
self.add_key_mapping_entry(entry)
|
||||
cacheaddr = CacheAddress(entry.next, path=self.cache_dir)
|
||||
# logger.debug("cacheaddr? %s"%cacheaddr)
|
||||
entry = CacheEntry(cacheaddr)
|
||||
self.add_key_mapping_entry(entry)
|
||||
|
||||
def add_key_mapping_entry(self,entry):
|
||||
self.add_key_mapping(entry.keyToStr(),
|
||||
entry.address.addr,
|
||||
entry.creationTime)
|
||||
|
||||
def get_data_key(self,addr):
|
||||
""" Return decoded data for specified key (a binary addr) or None """
|
||||
entry = self.get_cache_entry(addr)
|
||||
# logger.debug("get_data_key(%s)->%s"%(addr,entry))
|
||||
if entry:
|
||||
# logger.debug("has entry")
|
||||
for i in range(len(entry.data)):
|
||||
# logger.debug("data loop i:%s"%i)
|
||||
# logger.debug("entry.data[i].type:%s"%entry.data[i].type)
|
||||
if entry.data[i].type == CacheData.UNKNOWN:
|
||||
# Extracting data into a file
|
||||
data = entry.data[i].data()
|
||||
# logger.debug("type = UNKNOWN, data len:%s"%len(data))
|
||||
# logger.debug("entry.httpHeader:%s"%entry.httpHeader)
|
||||
if entry.httpHeader != None and \
|
||||
b'content-encoding' in entry.httpHeader.headers:
|
||||
encoding = entry.httpHeader.headers.get(b'content-encoding','')
|
||||
data = self.decompress(encoding,data)
|
||||
return data
|
||||
def get_data_impl(self, url):
|
||||
key = self.make_key(url)
|
||||
entry = None
|
||||
entrys = parse(self.cache_dir,[key.encode('utf8')])
|
||||
logger.debug(entrys)
|
||||
for entry in entrys:
|
||||
entry_name = entry.keyToStr()
|
||||
logger.debug("Name: %s"%entry_name)
|
||||
logger.debug("Hash: 0x%08x"%entry.hash)
|
||||
logger.debug("Usage Counter: %d"%entry.usageCounter)
|
||||
logger.debug("Reuse Counter: %d"%entry.reuseCounter)
|
||||
logger.debug("Creation Time: %s"%entry.creationTime)
|
||||
# logger.debug("Creation Time: %s"%datetime.datetime.fromtimestamp(int(entry.creationTime/1000000)-EPOCH_DIFFERENCE))
|
||||
logger.debug("Key: %s"%entry.keyToStr())
|
||||
logger.debug(entry.httpHeader.headers.get(b'location','(no location)'))
|
||||
if entry_name == key:
|
||||
location = ensure_text(entry.httpHeader.headers.get(b'location',''))
|
||||
ensure_text(entry.httpHeader.headers.get(b'content-encoding',''))
|
||||
rawdata = None if location else self.get_raw_data(entry)
|
||||
return (
|
||||
location,
|
||||
self.make_age(entry.creationTime),
|
||||
ensure_text(entry.httpHeader.headers.get(b'content-encoding','')),
|
||||
rawdata)
|
||||
return None
|
||||
|
||||
def get_cache_entry(self,addr):
|
||||
cacheaddr = CacheAddress(addr, path=self.cache_dir)
|
||||
# logger.debug("cacheaddr? %s"%cacheaddr)
|
||||
entry = CacheEntry(cacheaddr)
|
||||
# logger.debug("entry? %s"%entry)
|
||||
return entry
|
||||
def get_raw_data(self,entry):
|
||||
for i in range(len(entry.data)):
|
||||
# logger.debug("data loop i:%s"%i)
|
||||
# logger.debug("entry.data[i].type:%s"%entry.data[i].type)
|
||||
if entry.data[i].type == CacheData.UNKNOWN:
|
||||
# Extracting data into a file
|
||||
data = entry.data[i].data()
|
||||
# logger.debug("type = UNKNOWN, data len:%s"%len(data))
|
||||
# logger.debug("entry.httpHeader:%s"%entry.httpHeader)
|
||||
return data
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2021 FanFicFare team
|
||||
# Copyright 2022 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -26,28 +26,24 @@ import hashlib
|
|||
import glob
|
||||
import datetime
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from . import BaseBrowserCache, BrowserCacheException
|
||||
from ..six import ensure_binary, ensure_text
|
||||
|
||||
from . import BaseBrowserCache
|
||||
from ..six import ensure_text
|
||||
from ..six.moves.urllib.parse import urlparse
|
||||
from ..exceptions import BrowserCacheException
|
||||
from .share_open import share_open
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
class FirefoxCache2Exception(BrowserCacheException):
|
||||
pass
|
||||
|
||||
class FirefoxCache2(BaseBrowserCache):
|
||||
"""Class to access data stream in Firefox Cache2 format cache files"""
|
||||
|
||||
def __init__(self, *args, **kargs):
|
||||
"""Constructor for FirefoxCache2"""
|
||||
BaseBrowserCache.__init__(self, *args, **kargs)
|
||||
super(FirefoxCache2,self).__init__(*args, **kargs)
|
||||
logger.debug("Using FirefoxCache2")
|
||||
# self.map_cache_keys()
|
||||
|
||||
@staticmethod
|
||||
def is_cache_dir(cache_dir):
|
||||
|
|
@ -55,80 +51,53 @@ class FirefoxCache2(BaseBrowserCache):
|
|||
# logger.debug("\n\n1Starting cache check\n\n")
|
||||
if not os.path.isdir(cache_dir):
|
||||
return False
|
||||
try:
|
||||
## check at least one entry file exists.
|
||||
for en_fl in glob.iglob(os.path.join(cache_dir, 'entries', '????????????????????????????????????????')):
|
||||
# logger.debug(en_fl)
|
||||
k = _validate_entry_file(en_fl)
|
||||
if k is not None:
|
||||
return True
|
||||
except FirefoxCache2Exception:
|
||||
raise
|
||||
return False
|
||||
## check at least one entry file exists.
|
||||
for en_fl in glob.iglob(os.path.join(cache_dir, 'entries', '????????????????????????????????????????')):
|
||||
# logger.debug(en_fl)
|
||||
k = _validate_entry_file(en_fl)
|
||||
if k is not None:
|
||||
return True
|
||||
return False
|
||||
|
||||
# Firefox doesn't use 1601 epoch like Chrome does.
|
||||
def set_age_comp_time(self):
|
||||
if self.age_limit > 0.0 :
|
||||
self.age_comp_time = time.time() - (self.age_limit*3600)
|
||||
# def map_cache_keys(self):
|
||||
# """Scan cache entries to save entries in this cache"""
|
||||
# ## scandir and checking age *before* parsing saves a ton of
|
||||
# ## hits and time.
|
||||
# logger.debug("using scandir")
|
||||
# for entry in os.scandir(os.path.join(self.cache_dir,'entries')):
|
||||
# with share_open(entry.path, "rb") as entry_file:
|
||||
# metadata = _read_entry_headers(entry_file)
|
||||
# if 'squidge' in metadata['key']:
|
||||
# logger.debug("%s->%s"%(metadata['key'],metadata['key_hash']))
|
||||
|
||||
def map_cache_keys(self):
|
||||
"""Scan cache entries to save entries in this cache"""
|
||||
## scandir and checking age *before* parsing saves a ton of
|
||||
## hits and time.
|
||||
self.count=0
|
||||
if hasattr(os, 'scandir'):
|
||||
logger.debug("using scandir")
|
||||
for entry in os.scandir(os.path.join(self.cache_dir,'entries')):
|
||||
self.do_cache_key_entry(entry.path,entry.stat())
|
||||
else:
|
||||
logger.debug("using listdir")
|
||||
for en_fl in os.listdir(os.path.join(self.cache_dir,'entries')):
|
||||
en_path = os.path.join(self.cache_dir,'entries',en_fl)
|
||||
self.do_cache_key_entry(en_path,os.stat(en_path))
|
||||
logger.debug("Read %s entries"%self.count)
|
||||
def make_key(self,url):
|
||||
(domain, url) = self.make_key_parts(url)
|
||||
key = 'O^partitionKey=%28https%2C'+domain+'%29,:'+url
|
||||
return key
|
||||
|
||||
def do_cache_key_entry(self,path,stats):
|
||||
if stats.st_mtime > self.age_comp_time:
|
||||
try:
|
||||
(cache_url,created) = _get_entry_file_created(path)
|
||||
# logger.debug("cache_url:%s"%cache_url)
|
||||
if cache_url:
|
||||
self.add_key_mapping(cache_url,path,created)
|
||||
self.count+=1
|
||||
except Exception as e:
|
||||
logger.warning("Cache file %s failed to load, skipping."%path)
|
||||
logger.debug(traceback.format_exc())
|
||||
# logger.debug(" file time: %s"%datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
# logger.debug("created time: %s"%datetime.datetime.fromtimestamp(created))
|
||||
# break
|
||||
|
||||
|
||||
def cache_key_to_url(self,key):
|
||||
'''
|
||||
Modern browsers partition cache by domain to avoid leaking information.
|
||||
'''
|
||||
key=ensure_text(key)
|
||||
if '14161667' in key:
|
||||
logger.debug(key)
|
||||
# firefox examples seen so far:
|
||||
# :https://a.disquscdn.com/1611314356/images/noavatar92.png
|
||||
# O^partitionKey=%28https%2Cgithub.com%29,:https://avatars.githubusercontent.com/u/2255859?s=60&v=4
|
||||
# a,~1611850038,:http://r3.o.lencr.org/
|
||||
# a,:https://www.yueimg.com/en/js/detail/rss.49e5ceab.js
|
||||
# everything after first :
|
||||
return key.split(':',1)[-1]
|
||||
|
||||
# key == filename for firefox cache2
|
||||
def get_data_key(self, key):
|
||||
with share_open(key, "rb") as entry_file:
|
||||
metadata = _read_entry_headers(entry_file)
|
||||
entry_file.seek(0)
|
||||
encoding = metadata.get('response-headers',{}).get('content-encoding', '').strip().lower()
|
||||
return self.decompress(encoding,entry_file.read(metadata['readsize']))
|
||||
|
||||
def make_datetime(self,i):
|
||||
return datetime.datetime.fromtimestamp(i)
|
||||
def make_key_path(self,url):
|
||||
key = self.make_key(url)
|
||||
hashkey = hashlib.sha1(key.encode('utf8')).hexdigest().upper()
|
||||
logger.debug(hashkey)
|
||||
fullkey = os.path.join(self.cache_dir, 'entries', hashkey)
|
||||
logger.debug(fullkey)
|
||||
return fullkey
|
||||
|
||||
def get_data_impl(self, url):
|
||||
key_path = self.make_key_path(url)
|
||||
if os.path.isfile(key_path): # share_open()'s failure for non-existent is some win error.
|
||||
with share_open(key_path, "rb") as entry_file:
|
||||
metadata = _read_entry_headers(entry_file)
|
||||
# redirect when Location header
|
||||
location = metadata.get('response-headers',{}).get('Location', '')
|
||||
entry_file.seek(0)
|
||||
rawdata = None if location else entry_file.read(metadata['readsize'])
|
||||
return (
|
||||
location,
|
||||
metadata['lastModInt'],
|
||||
metadata.get('response-headers',{}).get('content-encoding', '').strip().lower(),
|
||||
rawdata)
|
||||
return None
|
||||
|
||||
def _validate_entry_file(path):
|
||||
with share_open(path, "rb") as entry_file:
|
||||
|
|
@ -140,16 +109,8 @@ def _validate_entry_file(path):
|
|||
return None # key in file does not match the hash, something is wrong
|
||||
return metadata['key']
|
||||
|
||||
chunkSize = 256 * 1024
|
||||
|
||||
def _get_entry_file_created(path):
|
||||
with share_open(path, "rb") as entry_file:
|
||||
metadata = _read_entry_headers(entry_file)
|
||||
if metadata['key_hash'] != os.path.basename(path):
|
||||
return None # key in file does not match the hash, something is wrong
|
||||
return (metadata['key'], metadata['lastModInt'])
|
||||
|
||||
def _read_entry_headers(entry_file):
|
||||
chunkSize = 256 * 1024
|
||||
retval = {}
|
||||
|
||||
## seek to & read last 4 bytes,
|
||||
|
|
|
|||
|
|
@ -1,15 +1,34 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import struct
|
||||
import hashlib
|
||||
import glob
|
||||
import time
|
||||
import time, datetime
|
||||
import re
|
||||
import traceback
|
||||
from . import BaseBrowserCache, BrowserCacheException
|
||||
from ..six import ensure_binary, ensure_text
|
||||
|
||||
from ..six import ensure_binary, ensure_text
|
||||
from ..exceptions import BrowserCacheException
|
||||
from .share_open import share_open
|
||||
|
||||
from .base_chromium import BaseChromiumCache
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -25,12 +44,12 @@ ENTRY_MAGIC_NUMBER = 0xfcfb6d1ba7725c30
|
|||
EOF_MAGIC_NUMBER = 0xf4fa6f45970d41d8
|
||||
THE_REAL_INDEX_MAGIC_NUMBER = 0x656e74657220796f
|
||||
|
||||
class SimpleCache(BaseBrowserCache):
|
||||
class SimpleCache(BaseChromiumCache):
|
||||
"""Class to access data stream in Chrome Simple Cache format cache files"""
|
||||
|
||||
def __init__(self, *args, **kargs):
|
||||
"""Constructor for SimpleCache"""
|
||||
BaseBrowserCache.__init__(self, *args, **kargs)
|
||||
super(SimpleCache,self).__init__(*args, **kargs)
|
||||
logger.debug("Using SimpleCache")
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -58,44 +77,8 @@ class SimpleCache(BaseBrowserCache):
|
|||
return False
|
||||
return False
|
||||
|
||||
def map_cache_keys(self):
|
||||
"""Scan index file and cache entries to save entries in this cache"""
|
||||
|
||||
# can't use self.age_comp_time because it's set to 1601 epoch.
|
||||
if self.age_limit > 0.0 :
|
||||
file_comp_time = time.time() - (self.age_limit*3600)
|
||||
else:
|
||||
file_comp_time = 0
|
||||
|
||||
self.count=0
|
||||
if hasattr(os, 'scandir'):
|
||||
logger.debug("using scandir")
|
||||
for entry in os.scandir(self.cache_dir):
|
||||
self.do_cache_key_entry(entry.path,entry.stat(),file_comp_time)
|
||||
else:
|
||||
logger.debug("using listdir")
|
||||
for en_fl in os.listdir(self.cache_dir):
|
||||
en_path = os.path.join(self.cache_dir,en_fl)
|
||||
self.do_cache_key_entry(en_path,os.stat(en_path),file_comp_time)
|
||||
logger.debug("Read %s entries"%self.count)
|
||||
|
||||
def do_cache_key_entry(self,path,stats,file_comp_time):
|
||||
## there are some other files in simple cache dir.
|
||||
# logger.debug("%s: %s > %s"%(os.path.basename(path),stats.st_mtime,file_comp_time))
|
||||
if( re.match(r'^[0-9a-fA-F]{16}_[0-9]+$',os.path.basename(path))
|
||||
and stats.st_mtime > file_comp_time ):
|
||||
try:
|
||||
(cache_url,created) = _get_entry_file_created(path)
|
||||
if '14161667' in cache_url:
|
||||
logger.debug(path)
|
||||
logger.debug(cache_url)
|
||||
self.add_key_mapping(cache_url,path,created)
|
||||
self.count+=1
|
||||
except Exception as e:
|
||||
logger.warning("Cache file %s failed to load, skipping."%path)
|
||||
logger.debug(traceback.format_exc())
|
||||
|
||||
# key == filename for simple cache
|
||||
# NOT USED
|
||||
def get_data_key(self, key):
|
||||
headers = _get_headers(key)
|
||||
encoding = headers.get('content-encoding', '').strip().lower()
|
||||
|
|
@ -105,19 +88,52 @@ class SimpleCache(BaseBrowserCache):
|
|||
# logger.debug("\n\n%s\n\n"%key)
|
||||
raise
|
||||
|
||||
def get_data_url(self, url):
|
||||
""" Return decoded data for specified key (a URL string) or None """
|
||||
glob_pattern = os.path.join(self.cache_dir, _key_hash(url) + '_?')
|
||||
def get_data_impl(self, url):
|
||||
"""
|
||||
returns location, entry age(unix epoch), content-encoding and
|
||||
raw(compressed) data
|
||||
"""
|
||||
logger.debug("simple get impl ================================= ")
|
||||
fullkey = self.make_key(url)
|
||||
hashkey = _key_hash(fullkey)
|
||||
glob_pattern = os.path.join(self.cache_dir, hashkey + '_?')
|
||||
# because hash collisions are so rare, this will usually only find zero or one file,
|
||||
# so there is no real savings to be had by reading the index file instead of going straight to the entry files
|
||||
url = ensure_text(url)
|
||||
logger.debug(url)
|
||||
logger.debug(glob_pattern)
|
||||
|
||||
## glob'ing for the collisions avoids ever trying to open
|
||||
## non-existent files.
|
||||
for en_fl in glob.glob(glob_pattern):
|
||||
try:
|
||||
file_key = _validate_entry_file(en_fl)
|
||||
if file_key == url:
|
||||
return self.get_data_key(en_fl)
|
||||
## --- need to check vs full key due to possible hash
|
||||
## --- collision--can't just do url in key
|
||||
## --- location
|
||||
## --- age check
|
||||
## --- This nonsense opens the file *4* times.
|
||||
|
||||
## --- also make location code common across all three--and age check?
|
||||
## parts of make key?
|
||||
with share_open(en_fl, "rb") as entry_file:
|
||||
file_key = _read_entry_file(en_fl,entry_file)
|
||||
if file_key != fullkey:
|
||||
# theoretically, there can be hash collision.
|
||||
continue
|
||||
(info_size, flags, request_time, response_time, header_size) = _read_meta_headers(entry_file)
|
||||
headers = _read_headers(entry_file,header_size)
|
||||
logger.debug("file_key:%s"%file_key)
|
||||
logger.debug("response_time:%s"%response_time)
|
||||
# logger.debug("Creation Time: %s"%datetime.datetime.fromtimestamp(int(response_time/1000000)-EPOCH_DIFFERENCE))
|
||||
logger.debug(headers)
|
||||
location = headers.get('Location', '')
|
||||
# don't need data when redirect
|
||||
rawdata = None if location else _read_data_from_entry(entry_file)
|
||||
return (
|
||||
location,
|
||||
self.make_age(response_time),
|
||||
headers.get('content-encoding', '').strip().lower(),
|
||||
rawdata)
|
||||
except SimpleCacheException:
|
||||
pass
|
||||
return None
|
||||
|
|
@ -177,16 +193,22 @@ def _skip_to_start_of_stream(entry_file):
|
|||
def _get_data_from_entry_file(path):
|
||||
""" Read the contents portion (stream 1 data) from the instance's cache entry file. Return a byte string """
|
||||
with share_open(path, "rb") as entry_file:
|
||||
entry_file.seek(0, os.SEEK_END)
|
||||
_skip_to_start_of_stream(entry_file)
|
||||
stream_size = _skip_to_start_of_stream(entry_file)
|
||||
ret = entry_file.read(stream_size)
|
||||
return _read_data_from_entry(entry_file)
|
||||
|
||||
|
||||
def _read_data_from_entry(entry_file):
|
||||
""" Read the contents portion (stream 1 data) from the instance's cache entry. Return a byte string """
|
||||
entry_file.seek(0, os.SEEK_END)
|
||||
_skip_to_start_of_stream(entry_file)
|
||||
stream_size = _skip_to_start_of_stream(entry_file)
|
||||
ret = entry_file.read(stream_size)
|
||||
return ret
|
||||
|
||||
|
||||
def _get_headers(path):
|
||||
with share_open(path, "rb") as entry_file:
|
||||
(info_size, flags, request_time, response_time, header_size) = _read_meta_headers(entry_file)
|
||||
logger.debug("request_time:%s, response_time:%s"%(request_time, response_time))
|
||||
return _read_headers(entry_file,header_size)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -59,14 +59,14 @@ def superFastHash(data):
|
|||
if rem == 3:
|
||||
hash += get16bits (data)
|
||||
hash ^= (hash << 16) & 0xFFFFFFFF
|
||||
hash ^= (int(binascii.hexlify(data[2]), 16) << 18) & 0xFFFFFFFF
|
||||
hash ^= (int(binascii.hexlify(data[2:]), 16) << 18) & 0xFFFFFFFF
|
||||
hash += hash >> 11
|
||||
elif rem == 2:
|
||||
hash += get16bits (data)
|
||||
hash ^= (hash << 11) & 0xFFFFFFFF
|
||||
hash += hash >> 17
|
||||
elif rem == 1:
|
||||
hash += int(binascii.hexlify(data[0]), 16)
|
||||
hash += int(binascii.hexlify(data[0:]), 16)
|
||||
hash ^= (hash << 10) & 0xFFFFFFFF
|
||||
hash += hash >> 1
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ from .cacheBlock import CacheBlock
|
|||
from .cacheData import CacheData
|
||||
from .cacheEntry import CacheEntry
|
||||
|
||||
from ..share_open import share_open
|
||||
|
||||
def parse(path, urls=None):
|
||||
"""
|
||||
|
|
@ -61,7 +62,7 @@ def parse(path, urls=None):
|
|||
if cacheBlock.type != CacheBlock.INDEX:
|
||||
raise Exception("Invalid Index File")
|
||||
|
||||
index = open(path + "index", 'rb')
|
||||
index = share_open(path + "index", 'rb')
|
||||
|
||||
# Skipping Header
|
||||
index.seek(92*4)
|
||||
|
|
|
|||
|
|
@ -24,6 +24,9 @@ Need to jump through various hoops to *really* open
|
|||
read-only--different hoops in CLI and Calibre, too.
|
||||
'''
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
## CLI version:
|
||||
|
||||
import sys
|
||||
|
|
@ -42,6 +45,7 @@ if iswindows:
|
|||
import msvcrt
|
||||
|
||||
def share_open(path,*args,**kargs):
|
||||
logger.debug("share_open(%s)"%path)
|
||||
# does need all three file share flags.
|
||||
handle = win32file.CreateFile(path,
|
||||
win32file.GENERIC_READ,
|
||||
|
|
|
|||
|
|
@ -1058,7 +1058,8 @@ class Configuration(ConfigParser):
|
|||
## there are many more.
|
||||
if self.browser_cache is None:
|
||||
self.browser_cache = BrowserCache(self.getConfig("browser_cache_path"),
|
||||
age_limit=self.getConfig("browser_cache_age_limit"))
|
||||
age_limit=self.getConfig("browser_cache_age_limit"),
|
||||
open_page_in_browser=self.getConfig("open_page_in_browser"))
|
||||
fetchers.BrowserCacheDecorator(self.browser_cache).decorate_fetcher(self.fetcher)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to setup BrowserCache(%s)"%e)
|
||||
|
|
|
|||
|
|
@ -137,3 +137,7 @@ class HTTPErrorFFF(Exception):
|
|||
return "HTTP Error in FFF '%s'(%s)"%(self.error_msg,self.status_code)
|
||||
else:
|
||||
return "HTTP Error in FFF '%s'(%s) URL:'%s'"%(self.error_msg,self.status_code,self.url)
|
||||
|
||||
class BrowserCacheException(Exception):
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ from __future__ import absolute_import
|
|||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import traceback
|
||||
|
||||
from .. import exceptions
|
||||
|
||||
from .base_fetcher import FetcherResponse
|
||||
|
|
@ -40,8 +42,15 @@ class BrowserCacheDecorator(FetcherDecorator):
|
|||
usecache=True):
|
||||
# logger.debug("BrowserCacheDecorator fetcher_do_request")
|
||||
if usecache:
|
||||
d = self.cache.get_data(url)
|
||||
logger.debug(make_log('BrowserCache',method,url,d is not None))
|
||||
try:
|
||||
d = self.cache.get_data(url)
|
||||
except Exception as e:
|
||||
logger.debug(traceback.format_exc())
|
||||
raise exceptions.BrowserCacheException("Browser Cache Failed to Load with error '%s'"%e)
|
||||
|
||||
# had a d = b'' which showed HIT, but failed.
|
||||
logger.debug(make_log('BrowserCache',method,url,True if d else False))
|
||||
# logger.debug(d)
|
||||
if d:
|
||||
return FetcherResponse(d,redirecturl=url,fromcache=True)
|
||||
## make use_browser_cache true/false/only?
|
||||
|
|
@ -60,4 +69,3 @@ class BrowserCacheDecorator(FetcherDecorator):
|
|||
parameters=parameters,
|
||||
referer=referer,
|
||||
usecache=usecache)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue