Adding pass-through support for data:image in-line <img>s.

This commit is contained in:
Jim Miller 2020-11-15 10:02:45 -06:00
parent f91111de90
commit 12383b6342
3 changed files with 24 additions and 18 deletions

File diff suppressed because one or more lines are too long

View file

@ -128,22 +128,23 @@ def get_update_data(inputio,
for img in soup.findAll('img'):
newsrc=''
longdesc=''
try:
newsrc=get_path_part(href)+img['src']
# remove all .. and the path part above it, if present.
# Mostly for epubs edited by Sigil.
newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
longdesc=img['longdesc']
data = epub.read(newsrc)
images[longdesc] = data
img['src'] = img['longdesc']
except Exception as e:
# don't report u'OEBPS/failedtoload',
# it indicates a failed download
# originally.
if newsrc != u'OEBPS/failedtoload':
logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
if not img['src'].startswith('data:image'):
try:
newsrc=get_path_part(href)+img['src']
# remove all .. and the path part above it, if present.
# Mostly for epubs edited by Sigil.
newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
longdesc=img['longdesc']
data = epub.read(newsrc)
images[longdesc] = data
img['src'] = img['longdesc']
except Exception as e:
# don't report u'OEBPS/failedtoload',
# it indicates a failed download
# originally.
if newsrc != u'OEBPS/failedtoload':
logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
bodysoup = soup.find('body')
# ffdl epubs have chapter title h3
h3 = bodysoup.find('h3')

View file

@ -1192,12 +1192,16 @@ class Story(Configurable):
url = url.strip() # ran across an image with a space in the
# src. Browser handled it, so we'd better, too.
imgdata = None
if url.startswith("data:image"):
# don't do anything to in-line images.
return (url, "inline image")
## Mistakenly ended up with some // in image urls, like:
## https://forums.spacebattles.com//styles/default/xenforo/clear.png
## Removing one /, but not ://
if not url.startswith("file"): # keep file:///
if not url.startswith("file:"): # keep file:///
url = re.sub(r"([^:])//",r"\1/",url)
if url.startswith("http") or url.startswith("file") or parenturl == None:
if url.startswith("http") or url.startswith("file:") or parenturl == None:
imgurl = url
else:
parsedUrl = urlparse(parenturl)