Use brotlipython with plugin--much slower, but pure python. Not available in pypi

This commit is contained in:
Jim Miller 2021-01-11 21:45:06 -06:00
parent 10fb77f00f
commit 04314d2b63
6 changed files with 3217 additions and 8 deletions

View file

@ -114,12 +114,19 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
if self.chromagnon_cache is None:
logger.debug("Start making self.chromagnon_cache")
self.chromagnon_cache = ChromeCache(self.getConfig("chrome_cache_path"))
try:
if not self.getConfig("chrome_cache_path"):
raise exceptions.FailedToDownload("FFnet Workaround: chrome_cache_path setting must be set.")
self.chromagnon_cache = ChromeCache(self.getConfig("chrome_cache_path"))
except PermissionError:
raise exceptions.FailedToDownload("Permission to Chrome Cache (%s) denied--Did you quit Chrome?" % self.getConfig("chrome_cache_path"))
logger.debug("Done making self.chromagnon_cache")
data = self.chromagnon_cache.get_cached_file(url)
logger.debug("%s:len(%s)"%(url,len(data)))
if data is None:
raise HTTPError(404,"Not found in Chrome Cache")
## XXX Do something to collect list of failed URLs?
## Turn on continue on fail?
raise exceptions.FailedToDownload("URL not found in Chrome Cache: %s" % url)
logger.debug("%s:len(%s)"%(url,len(data)))
return self.configuration._decode(data)
def use_pagecache(self):

View file

@ -39,7 +39,33 @@ import os
import struct
import sys
import re
import brotli
import time
def do_cprofile(func):
def profiled_func(*args, **kwargs):
t=0
try:
t = time.time()
result = func(*args, **kwargs)
t = time.time() - t
return result
finally:
print("time:%s"%t)
return profiled_func
try:
from brotli import decompress
@do_cprofile
def brotli_decompress(inbuf):
return decompress(inbuf)
except:
# Calibre doesn't include brotli, so use packaged brotlipython
# which is waaaay slower, but pure python.
from brotlipython import brotlidec
@do_cprofile
def brotli_decompress(inbuf):
# wants the output, too, but returns it
return brotlidec(inbuf,[])
import time
from . import csvOutput
@ -138,7 +164,7 @@ class ChromeCache(object):
if entry.httpHeader.headers[b'content-encoding'] == b"gzip":
data = gzip.decompress(data)
elif entry.httpHeader.headers[b'content-encoding'] == b"br":
data = brotli.decompress(data)
data = brotli_decompress(data)
return data
return None
@ -217,7 +243,6 @@ def exportToHTML(cache, outpath):
# print("unbrotli'ed:%s"%name)
except IOError:
page.write("Something wrong happened while unzipping")
brotli
else:
page.write('<a href="%s">%s</a>'%(name ,
entry.keyToStr().split('/')[-1]))

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -37,7 +37,7 @@ if __name__=="__main__":
os.chdir('../included_dependencies')
files=['bs4','chardet','html2text','soupsieve','backports',
'cloudscraper','requests','requests_toolbelt','urllib3',
'certifi','idna']
'certifi','idna','brotlipython.py','brotli-dict']
## Kept only for v2.85.1 support now.
createZipFile("../"+filename,"a",
files,

View file

@ -85,7 +85,8 @@ setup(
'chardet',
'html5lib',
'html2text',
'cloudscraper'],
'cloudscraper',
'brotli'],
# html5lib requires 'six', FFF includes it's own copy as fanficfare.six
# List additional groups of dependencies here (e.g. development