mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-04-29 02:12:10 +02:00
Add (optional, default on) convert support for data:image in-line <img>s.
This commit is contained in:
parent
12383b6342
commit
0fa697b418
6 changed files with 69 additions and 37 deletions
|
|
@ -1025,6 +1025,13 @@ remove_transparency: true
|
|||
## grayscale.
|
||||
#no_image_processing: false
|
||||
|
||||
## In-line images (<img src="data:image/...;base64,...") are converted
|
||||
## to files so the normal image processing can be applied, but only if
|
||||
## base64 encoded. Note that in-line images are also removed when
|
||||
## include_images:false. Also allows for in-line image to be cover.
|
||||
## If set false, in-line images will be kept as-is.
|
||||
convert_inline_images:true
|
||||
|
||||
## If set true, FFF will compare image all files(of the same size)
|
||||
## looking for identical files with different URLs. fiction.live is
|
||||
## the only site currently(Sep2020) known to benefit from this.
|
||||
|
|
@ -2379,6 +2386,7 @@ slow_down_sleep_time:2
|
|||
## datechapter_format. Otherwise it will default to
|
||||
## datePublished_format
|
||||
#datechapter_format:%%Y-%%m-%%d
|
||||
|
||||
[starslibrary.net]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -257,6 +257,7 @@ def get_valid_set_options():
|
|||
'grayscale_images':(None,['epub','html'],boollist),
|
||||
'no_image_processing':(None,['epub','html'],boollist),
|
||||
'dedup_img_files':(None,['epub','html'],boollist),
|
||||
'convert_inline_images':(None,['epub','html'],boollist),
|
||||
'normalize_text_links':(None,['epub','html'],boollist),
|
||||
'internalize_text_links':(None,['epub','html'],boollist),
|
||||
|
||||
|
|
@ -427,6 +428,7 @@ def get_valid_keywords():
|
|||
'cover_min_size',
|
||||
'no_image_processing',
|
||||
'dedup_img_files',
|
||||
'convert_inline_images',
|
||||
'non_breaking_spaces',
|
||||
'download_text_version',
|
||||
'nook_img_fix',
|
||||
|
|
|
|||
|
|
@ -1051,6 +1051,13 @@ remove_transparency: true
|
|||
## grayscale.
|
||||
#no_image_processing: false
|
||||
|
||||
## In-line images (<img src="data:image/...;base64,...") are converted
|
||||
## to files so the normal image processing can be applied, but only if
|
||||
## base64 encoded. Note that in-line images are also removed when
|
||||
## include_images:false. Also allows for in-line image to be cover.
|
||||
## If set false, in-line images will be kept as-is.
|
||||
convert_inline_images:true
|
||||
|
||||
## If set true, FFF will compare image all files(of the same size)
|
||||
## looking for identical files with different URLs. fiction.live is
|
||||
## the only site currently(Sep2020) known to benefit from this.
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ def get_update_data(inputio,
|
|||
for img in soup.findAll('img'):
|
||||
newsrc=''
|
||||
longdesc=''
|
||||
## skip <img src="data:image..."
|
||||
if not img['src'].startswith('data:image'):
|
||||
try:
|
||||
newsrc=get_path_part(href)+img['src']
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ import json
|
|||
import datetime
|
||||
from math import floor
|
||||
from functools import partial
|
||||
import base64
|
||||
import hashlib
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -1194,41 +1196,52 @@ class Story(Configurable):
|
|||
|
||||
imgdata = None
|
||||
if url.startswith("data:image"):
|
||||
# don't do anything to in-line images.
|
||||
return (url, "inline image")
|
||||
## Mistakenly ended up with some // in image urls, like:
|
||||
## https://forums.spacebattles.com//styles/default/xenforo/clear.png
|
||||
## Removing one /, but not ://
|
||||
if not url.startswith("file:"): # keep file:///
|
||||
url = re.sub(r"([^:])//",r"\1/",url)
|
||||
if url.startswith("http") or url.startswith("file:") or parenturl == None:
|
||||
imgurl = url
|
||||
else:
|
||||
parsedUrl = urlparse(parenturl)
|
||||
if url.startswith("//") :
|
||||
imgurl = urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
'',
|
||||
url,
|
||||
'','',''))
|
||||
elif url.startswith("/") :
|
||||
imgurl = urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
url,
|
||||
'','',''))
|
||||
if 'base64' in url and self.getConfig("convert_inline_images",True):
|
||||
head, base64data = url.split(',', 1)
|
||||
# logger.debug("%s len(%s)"%(head,len(base64data)))
|
||||
# Get the file extension (gif, jpeg, png)
|
||||
file_ext = head.split(';')[0].split('/')[1]
|
||||
|
||||
# Decode the image data
|
||||
imgdata = base64.b64decode(base64data)
|
||||
imgurl = "file:///fakefile/img-data-image/"+hashlib.md5(imgdata).hexdigest()+"."+file_ext
|
||||
else:
|
||||
toppath=""
|
||||
if parsedUrl.path.endswith("/"):
|
||||
toppath = parsedUrl.path
|
||||
# don't do anything to in-line images.
|
||||
return (url, "inline image")
|
||||
else:
|
||||
## Mistakenly ended up with some // in image urls, like:
|
||||
## https://forums.spacebattles.com//styles/default/xenforo/clear.png
|
||||
## Removing one /, but not ://
|
||||
if not url.startswith("file:"): # keep file:///
|
||||
url = re.sub(r"([^:])//",r"\1/",url)
|
||||
if url.startswith("http") or url.startswith("file:") or parenturl == None:
|
||||
imgurl = url
|
||||
else:
|
||||
parsedUrl = urlparse(parenturl)
|
||||
if url.startswith("//") :
|
||||
imgurl = urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
'',
|
||||
url,
|
||||
'','',''))
|
||||
elif url.startswith("/") :
|
||||
imgurl = urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
url,
|
||||
'','',''))
|
||||
else:
|
||||
toppath = parsedUrl.path[:parsedUrl.path.rindex('/')+1]
|
||||
imgurl = urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
toppath + url,
|
||||
'','',''))
|
||||
# logger.debug("\n===========\nparsedUrl.path:%s\ntoppath:%s\nimgurl:%s\n\n"%(parsedUrl.path,toppath,imgurl))
|
||||
toppath=""
|
||||
if parsedUrl.path.endswith("/"):
|
||||
toppath = parsedUrl.path
|
||||
else:
|
||||
toppath = parsedUrl.path[:parsedUrl.path.rindex('/')+1]
|
||||
imgurl = urlunparse(
|
||||
(parsedUrl.scheme,
|
||||
parsedUrl.netloc,
|
||||
toppath + url,
|
||||
'','',''))
|
||||
# logger.debug("\n===========\nparsedUrl.path:%s\ntoppath:%s\nimgurl:%s\n\n"%(parsedUrl.path,toppath,imgurl))
|
||||
|
||||
# apply coverexclusion to explicit covers, too. Primarily for ffnet imageu.
|
||||
#print("[[[[[\n\n %s %s \n\n]]]]]]]"%(imgurl,coverexclusion))
|
||||
|
|
@ -1239,13 +1252,15 @@ class Story(Configurable):
|
|||
if imgurl not in self.imgurls:
|
||||
|
||||
try:
|
||||
if not imgdata:
|
||||
# might already have from data:image in-line
|
||||
imgdata = fetch(imgurl,referer=parenturl)
|
||||
if imgurl.endswith('failedtoload'):
|
||||
return ("failedtoload","failedtoload")
|
||||
|
||||
parsedUrl = urlparse(imgurl)
|
||||
if self.getConfig('no_image_processing'):
|
||||
(data,ext,mime) = no_convert_image(imgurl,
|
||||
fetch(imgurl,referer=parenturl))
|
||||
imgdata)
|
||||
else:
|
||||
try:
|
||||
sizes = [ int(x) for x in self.getConfigList('image_max_size',['580', '725']) ]
|
||||
|
|
@ -1264,7 +1279,7 @@ class Story(Configurable):
|
|||
logger.info("background_color(%s) needs to be a hexidecimal color--using ffffff instead."%bgcolor)
|
||||
bgcolor = 'ffffff'
|
||||
(data,ext,mime) = convert_image(imgurl,
|
||||
fetch(imgurl,referer=parenturl),
|
||||
imgdata,
|
||||
sizes,
|
||||
grayscale,
|
||||
removetrans,
|
||||
|
|
|
|||
Loading…
Reference in a new issue