mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-01-13 19:52:31 +01:00
171 lines
5.7 KiB
Python
171 lines
5.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2022 FanFicFare team
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import os
|
|
import time, datetime
|
|
import gzip
|
|
import zlib
|
|
try:
|
|
# py3 only, calls C libraries. CLI
|
|
import brotli
|
|
except ImportError:
|
|
try:
|
|
# Calibre doesn't include brotli, so use plugin packaged
|
|
# brotlidecpy, which is slower, but pure python
|
|
from calibre_plugins.fanficfare_plugin import brotlidecpy as brotli
|
|
except ImportError:
|
|
# Included for benefit of A-Shell for iOS users. They need to
|
|
# install brotlidecpy themselves and override pip to install
|
|
# FFF without brotli
|
|
# See:
|
|
# https://github.com/JimmXinu/FanFicFare/issues/919
|
|
# https://github.com/sidney/brotlidecpy
|
|
import brotlidecpy as brotli
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
from ..six.moves.urllib.parse import urlparse, urlunparse
|
|
from ..six import ensure_text
|
|
|
|
from ..exceptions import BrowserCacheException
|
|
|
|
class BaseBrowserCache(object):
|
|
"""Base class to read various formats of web browser cache file"""
|
|
|
|
def __init__(self, cache_dir, age_limit=-1):
|
|
"""Constructor for BaseBrowserCache"""
|
|
## only ever called by class method new_browser_cache()
|
|
self.cache_dir = cache_dir
|
|
if age_limit is None or age_limit == '' or float(age_limit) < 0.0:
|
|
self.age_limit = None
|
|
else:
|
|
# set in hours, recorded in seconds
|
|
self.age_limit = float(age_limit) * 3600
|
|
|
|
@classmethod
|
|
def new_browser_cache(cls, cache_dir, age_limit=-1):
|
|
"""Return new instance of this BrowserCache class, or None if supplied directory not the correct cache type"""
|
|
cache_dir = os.path.realpath(os.path.expanduser(cache_dir))
|
|
if cls.is_cache_dir(cache_dir):
|
|
try:
|
|
return cls(cache_dir,
|
|
age_limit=age_limit)
|
|
except BrowserCacheException:
|
|
return None
|
|
return None
|
|
|
|
@staticmethod
|
|
def is_cache_dir(cache_dir):
|
|
"""Check given dir is a valid cache."""
|
|
raise NotImplementedError()
|
|
|
|
def get_data(self, url):
|
|
"""Return cached value for URL if found."""
|
|
# logger.debug("get_data:%s"%url)
|
|
|
|
## allow for a list of keys specifically for finding WebToEpub
|
|
## cached entries.
|
|
rettuple = None
|
|
for key in self.make_keys(url):
|
|
logger.debug("Cache Key:%s"%key)
|
|
entrytuple = self.get_data_key_impl(url, key)
|
|
# use newest
|
|
if entrytuple and (not rettuple or rettuple[1] < entrytuple[1]):
|
|
rettuple = entrytuple
|
|
|
|
if rettuple is None:
|
|
return None
|
|
|
|
(location,
|
|
age,
|
|
encoding,
|
|
rawdata) = rettuple
|
|
|
|
# age check
|
|
logger.debug("age:%s"%datetime.datetime.fromtimestamp(age))
|
|
logger.debug("now:%s"%datetime.datetime.fromtimestamp(time.time()))
|
|
if not (self.age_limit is None or age > time.time()-self.age_limit):
|
|
logger.debug("Cache entry found, rejected, past age limit")
|
|
return None
|
|
|
|
# recurse on location redirects
|
|
if location:
|
|
logger.debug("Do Redirect(%s)"%location)
|
|
return self.get_data(self.make_redirect_url(location,url))
|
|
|
|
# decompress
|
|
return self.decompress(encoding,rawdata)
|
|
|
|
def get_data_key_impl(self, url, key):
|
|
"""
|
|
returns location, entry age, content-encoding and
|
|
raw(compressed) data
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
def make_keys(self, url):
|
|
"""
|
|
Returns a list of keys to try--list for WebToEpub and normal
|
|
Hashing done inside get_data_key_impl
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
def make_key_parts(self, url):
|
|
"""
|
|
Modern browser all also key their cache with the domain to
|
|
reduce info leaking, but differently. However, some parts
|
|
are common
|
|
"""
|
|
parsedUrl = urlparse(url)
|
|
scheme = parsedUrl.scheme
|
|
domain = parsedUrl.netloc
|
|
# logger.debug(domain)
|
|
|
|
# discard www. -- others likely needed to distinguish host
|
|
# from domain. Something like tldextract ideally, but
|
|
# dependencies
|
|
# XXX forums?
|
|
domain = domain.replace('www.','')
|
|
|
|
# discard any #anchor part
|
|
url = url.split('#')[0]
|
|
|
|
return (scheme, domain, url) # URL still contains domain, params, etc
|
|
|
|
def make_redirect_url(self,location,origurl):
|
|
"""
|
|
Most redirects are relative, but not all.
|
|
"""
|
|
pLoc = urlparse(location)
|
|
pUrl = urlparse(origurl)
|
|
# logger.debug(pLoc)
|
|
# logger.debug(pUrl)
|
|
return urlunparse((pLoc.scheme or pUrl.scheme,
|
|
pLoc.netloc or pUrl.netloc,
|
|
location.strip(),
|
|
'','',''))
|
|
|
|
def decompress(self, encoding, data):
|
|
encoding = ensure_text(encoding)
|
|
if encoding == 'gzip':
|
|
return gzip.decompress(data)
|
|
elif encoding == 'br':
|
|
return brotli.decompress(data)
|
|
elif encoding == 'deflate':
|
|
return zlib.decompress(data)
|
|
return data
|