mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-05 02:51:48 +02:00
Adding pass-through support for data:image in-line <img>s.
This commit is contained in:
parent
f91111de90
commit
12383b6342
3 changed files with 24 additions and 18 deletions
File diff suppressed because one or more lines are too long
|
|
@ -128,22 +128,23 @@ def get_update_data(inputio,
|
|||
for img in soup.findAll('img'):
|
||||
newsrc=''
|
||||
longdesc=''
|
||||
try:
|
||||
newsrc=get_path_part(href)+img['src']
|
||||
# remove all .. and the path part above it, if present.
|
||||
# Mostly for epubs edited by Sigil.
|
||||
newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
|
||||
longdesc=img['longdesc']
|
||||
data = epub.read(newsrc)
|
||||
images[longdesc] = data
|
||||
img['src'] = img['longdesc']
|
||||
except Exception as e:
|
||||
# don't report u'OEBPS/failedtoload',
|
||||
# it indicates a failed download
|
||||
# originally.
|
||||
if newsrc != u'OEBPS/failedtoload':
|
||||
logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
|
||||
logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
|
||||
if not img['src'].startswith('data:image'):
|
||||
try:
|
||||
newsrc=get_path_part(href)+img['src']
|
||||
# remove all .. and the path part above it, if present.
|
||||
# Mostly for epubs edited by Sigil.
|
||||
newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
|
||||
longdesc=img['longdesc']
|
||||
data = epub.read(newsrc)
|
||||
images[longdesc] = data
|
||||
img['src'] = img['longdesc']
|
||||
except Exception as e:
|
||||
# don't report u'OEBPS/failedtoload',
|
||||
# it indicates a failed download
|
||||
# originally.
|
||||
if newsrc != u'OEBPS/failedtoload':
|
||||
logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
|
||||
logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
|
||||
bodysoup = soup.find('body')
|
||||
# ffdl epubs have chapter title h3
|
||||
h3 = bodysoup.find('h3')
|
||||
|
|
|
|||
|
|
@ -1192,12 +1192,16 @@ class Story(Configurable):
|
|||
url = url.strip() # ran across an image with a space in the
|
||||
# src. Browser handled it, so we'd better, too.
|
||||
|
||||
imgdata = None
|
||||
if url.startswith("data:image"):
|
||||
# don't do anything to in-line images.
|
||||
return (url, "inline image")
|
||||
## Mistakenly ended up with some // in image urls, like:
|
||||
## https://forums.spacebattles.com//styles/default/xenforo/clear.png
|
||||
## Removing one /, but not ://
|
||||
if not url.startswith("file"): # keep file:///
|
||||
if not url.startswith("file:"): # keep file:///
|
||||
url = re.sub(r"([^:])//",r"\1/",url)
|
||||
if url.startswith("http") or url.startswith("file") or parenturl == None:
|
||||
if url.startswith("http") or url.startswith("file:") or parenturl == None:
|
||||
imgurl = url
|
||||
else:
|
||||
parsedUrl = urlparse(parenturl)
|
||||
|
|
|
|||
Loading…
Reference in a new issue