epubutils: Load all images, not just referenced. uuid5 will still allow use.

2026-05-09 05:21:13 +02:00 · 2026-01-15 08:57:51 -06:00 · 2026-01-15 08:57:51 -06:00 · ed02d61953
commit ed02d61953
parent b58d54b8ea
1 changed files with 18 additions and 1 deletions
--- a/fanficfare/epubutils.py
+++ b/fanficfare/epubutils.py
@ -171,9 +171,11 @@ def get_update_data(inputio,
                                    newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
                                    longdesc=img['longdesc']
                                    img['src'] = img['longdesc']
+                                    # logger.debug("html -->img:%s"%longdesc)
                                    if longdesc not in images:
                                        data = epub.read(newsrc)
                                        images[longdesc] = (newsrc, data)
+                                        # logger.debug("-->html Add oldimages:%s"%newsrc)
                                except Exception as e:
                                    # don't report u'OEBPS/failedtoload',
                                    # it indicates a failed download
@ -199,9 +201,11 @@ def get_update_data(inputio,
                                        # remove all .. and the path part above it, if present.
                                        # Mostly for epubs edited by Sigil.
                                        newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
+                                        # logger.debug("htmlcss -->img:%s"%href)
                                        if style_url not in images:
                                            data = epub.read(newsrc)
                                            images[style_url] = (newsrc, data)
+                                            # logger.debug("-->htmlcss Add oldimages:%s"%newsrc)
                                            # logger.debug("\nimg %s len(%s)\n"%(newsrc,len(data)))
                                    except Exception as e:
                                        logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
@ -268,13 +272,26 @@ def get_update_data(inputio,
                            # remove all .. and the path part above it, if present.
                            # Mostly for epubs edited by Sigil.
                            newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
+                            # logger.debug("css -->img:%s"%href)
                            if style_url not in images:
                                data = epub.read(newsrc)
                                images[style_url] = (newsrc, data)
+                                # logger.debug("css -->Add oldimages:%s"%newsrc)
                                # logger.debug("\nimg %s len(%s)\n"%(newsrc,len(data)))
                        except Exception as e:
                            logger.warning("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
-
+            ## All images in file.  Some redundancy with above finding
+            ## images in chapters and css, but also keeps images in
+            ## the epub that aren't referenced by removed chapters in
+            ## case of deliberate chapter reload.  Images will still
+            ## be discarded on epub write if not used.
+            if item.getAttribute("media-type").startswith("image/") and getsoups:
+                img_url = href.replace("OEBPS/","")
+                # logger.debug("-->img img:%s"%img_url)
+                if img_url not in images:
+                    data = epub.read(href)
+                    # logger.debug("-->img Add oldimages:%s"%href)
+                    images[img_url] = (img_url, data)
    try:
        calibrebookmark = epub.read("META-INF/calibre_bookmarks.txt")
    except: