# -*- coding: utf-8 -*- # Copyright 2022 FanFicFare team # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import os import time, datetime import gzip import zlib try: # py3 only, calls C libraries. CLI import brotli except ImportError: try: # Calibre doesn't include brotli, so use plugin packaged # brotlidecpy, which is slower, but pure python from calibre_plugins.fanficfare_plugin import brotlidecpy as brotli except ImportError: # Included for benefit of A-Shell for iOS users. They need to # install brotlidecpy themselves and override pip to install # FFF without brotli # See: # https://github.com/JimmXinu/FanFicFare/issues/919 # https://github.com/sidney/brotlidecpy import brotlidecpy as brotli import logging logger = logging.getLogger(__name__) from ..six.moves.urllib.parse import urlparse, urlunparse from ..six import ensure_text from ..exceptions import BrowserCacheException class BaseBrowserCache(object): """Base class to read various formats of web browser cache file""" def __init__(self, cache_dir, age_limit=-1): """Constructor for BaseBrowserCache""" ## only ever called by class method new_browser_cache() self.cache_dir = cache_dir if age_limit is None or age_limit == '' or float(age_limit) < 0.0: self.age_limit = None else: # set in hours, recorded in seconds self.age_limit = float(age_limit) * 3600 @classmethod def new_browser_cache(cls, cache_dir, age_limit=-1): """Return new instance of this BrowserCache class, or None if supplied directory not the correct cache type""" cache_dir = os.path.realpath(os.path.expanduser(cache_dir)) if cls.is_cache_dir(cache_dir): try: return cls(cache_dir, age_limit=age_limit) except BrowserCacheException: return None return None @staticmethod def is_cache_dir(cache_dir): """Check given dir is a valid cache.""" raise NotImplementedError() def get_data(self, url): """Return cached value for URL if found.""" # logger.debug("get_data:%s"%url) ## allow for a list of keys specifically for finding WebToEpub ## cached entries. rettuple = None for key in self.make_keys(url): logger.debug("Cache Key:%s"%key) entrytuple = self.get_data_key_impl(url, key) # use newest if entrytuple and (not rettuple or rettuple[1] < entrytuple[1]): rettuple = entrytuple if rettuple is None: return None (location, age, encoding, rawdata) = rettuple # age check logger.debug("age:%s"%datetime.datetime.fromtimestamp(age)) logger.debug("now:%s"%datetime.datetime.fromtimestamp(time.time())) if not (self.age_limit is None or age > time.time()-self.age_limit): logger.debug("Cache entry found, rejected, past age limit") return None # recurse on location redirects if location: logger.debug("Do Redirect(%s)"%location) return self.get_data(self.make_redirect_url(location,url)) # decompress return self.decompress(encoding,rawdata) def get_data_key_impl(self, url, key): """ returns location, entry age, content-encoding and raw(compressed) data """ raise NotImplementedError() def make_keys(self, url): """ Returns a list of keys to try--list for WebToEpub and normal Hashing done inside get_data_key_impl """ raise NotImplementedError() def make_key_parts(self, url): """ Modern browser all also key their cache with the domain to reduce info leaking, but differently. However, some parts are common """ parsedUrl = urlparse(url) scheme = parsedUrl.scheme domain = parsedUrl.netloc # logger.debug(domain) # discard www. -- others likely needed to distinguish host # from domain. Something like tldextract ideally, but # dependencies # XXX forums? domain = domain.replace('www.','') # discard any #anchor part url = url.split('#')[0] return (scheme, domain, url) # URL still contains domain, params, etc def make_redirect_url(self,location,origurl): """ Most redirects are relative, but not all. """ pLoc = urlparse(location) pUrl = urlparse(origurl) # logger.debug(pLoc) # logger.debug(pUrl) return urlunparse((pLoc.scheme or pUrl.scheme, pLoc.netloc or pUrl.netloc, location.strip(), '','','')) def decompress(self, encoding, data): encoding = ensure_text(encoding) if encoding == 'gzip': return gzip.decompress(data) elif encoding == 'br': return brotli.decompress(data) elif encoding == 'deflate': return zlib.decompress(data) return data