Adding pass-through support for data:image in-line <img>s.

2026-05-05 02:51:48 +02:00 · 2020-11-15 10:02:45 -06:00 · 2020-11-15 10:02:45 -06:00 · 12383b6342
commit 12383b6342
parent f91111de90
3 changed files with 24 additions and 18 deletions
--- a/fanficfare/adapters/adapter_test1.py
+++ b/fanficfare/adapters/adapter_test1.py
--- a/fanficfare/epubutils.py
+++ b/fanficfare/epubutils.py
@ -128,22 +128,23 @@ def get_update_data(inputio,
                        for img in soup.findAll('img'):
                            newsrc=''
                            longdesc=''
-                            try:
-                                newsrc=get_path_part(href)+img['src']
-                                # remove all .. and the path part above it, if present.
-                                # Mostly for epubs edited by Sigil.
-                                newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
-                                longdesc=img['longdesc']
-                                data = epub.read(newsrc)
-                                images[longdesc] = data
-                                img['src'] = img['longdesc']
-                            except Exception as e:
-                                # don't report u'OEBPS/failedtoload',
-                                # it indicates a failed download
-                                # originally.
-                                if newsrc != u'OEBPS/failedtoload':
-                                    logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
-                                    logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
+                            if not img['src'].startswith('data:image'):
+                                try:
+                                    newsrc=get_path_part(href)+img['src']
+                                    # remove all .. and the path part above it, if present.
+                                    # Mostly for epubs edited by Sigil.
+                                    newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
+                                    longdesc=img['longdesc']
+                                    data = epub.read(newsrc)
+                                    images[longdesc] = data
+                                    img['src'] = img['longdesc']
+                                except Exception as e:
+                                    # don't report u'OEBPS/failedtoload',
+                                    # it indicates a failed download
+                                    # originally.
+                                    if newsrc != u'OEBPS/failedtoload':
+                                        logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
+                                        logger.warning("Exception: %s"%(unicode(e)),exc_info=True)
                        bodysoup = soup.find('body')
                        # ffdl epubs have chapter title h3
                        h3 = bodysoup.find('h3')
--- a/fanficfare/story.py
+++ b/fanficfare/story.py
@ -1192,12 +1192,16 @@ class Story(Configurable):
        url = url.strip() # ran across an image with a space in the
                          # src. Browser handled it, so we'd better, too.

+        imgdata = None
+        if url.startswith("data:image"):
+            # don't do anything to in-line images.
+            return (url, "inline image")
        ## Mistakenly ended up with some // in image urls, like:
        ## https://forums.spacebattles.com//styles/default/xenforo/clear.png
        ## Removing one /, but not ://
-        if not url.startswith("file"): # keep file:///
+        if not url.startswith("file:"): # keep file:///
            url = re.sub(r"([^:])//",r"\1/",url)
-        if url.startswith("http") or url.startswith("file") or parenturl == None:
+        if url.startswith("http") or url.startswith("file:") or parenturl == None:
            imgurl = url
        else:
            parsedUrl = urlparse(parenturl)