Find and use epub3 cover on update--relies on Calibre's calibre:title-page property.

2025-12-06 08:52:55 +01:00 · 2025-11-01 12:48:03 -05:00 · 2025-11-01 12:48:03 -05:00 · 75a213beb9
commit 75a213beb9
parent ead830c60a
1 changed files with 52 additions and 43 deletions
--- a/fanficfare/epubutils.py
+++ b/fanficfare/epubutils.py
@ -33,6 +33,51 @@ def get_cover_data(inputio):
    # (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
    return get_update_data(inputio,getfilecount=True,getsoups=False)[4]

+def get_oldcover(epub,relpath,contentdom,item):
+    href=relpath+item.getAttribute("href")
+    src = None
+    try:
+        oldcoverhtmlhref = href
+        oldcoverhtmldata = epub.read(href)
+        oldcoverhtmltype = "application/xhtml+xml"
+        for item in contentdom.getElementsByTagName("item"):
+            if( relpath+item.getAttribute("href") == oldcoverhtmlhref ):
+                oldcoverhtmltype = item.getAttribute("media-type")
+                break
+        soup = make_soup(oldcoverhtmldata.decode("utf-8"))
+        # first img or image tag.
+        imgs = soup.find_all('img')
+        if imgs:
+            src = get_path_part(href)+imgs[0]['src']
+        else:
+            imgs = soup.find_all('image')
+            if imgs:
+                src=get_path_part(href)+imgs[0]['xlink:href']
+
+        if not src:
+            return None
+    except Exception as e:
+        ## Calibre's Polish Book corrupts sub-book covers.
+        logger.warning("Cover (x)html file %s not found"%href)
+        logger.warning("Exception: %s"%(unicode(e)))
+
+    try:
+        # remove all .. and the path part above it, if present.
+        # Mostly for epubs edited by Sigil.
+        src = re.sub(r"([^/]+/\.\./)","",src)
+        #print("epubutils: found pre-existing cover image:%s"%src)
+        oldcoverimghref = src
+        oldcoverimgdata = epub.read(src)
+        for item in contentdom.getElementsByTagName("item"):
+            if( relpath+item.getAttribute("href") == oldcoverimghref ):
+                oldcoverimgtype = item.getAttribute("media-type")
+                break
+        return (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
+    except Exception as e:
+        logger.warning("Cover Image %s not found"%src)
+        logger.warning("Exception: %s"%(unicode(e)))
+    return None
+
 def get_update_data(inputio,
                    getfilecount=True,
                    getsoups=True):
@ -61,48 +106,7 @@ def get_update_data(inputio,
    for item in contentdom.getElementsByTagName("reference"):
        if item.getAttribute("type") == "cover":
            # there is a cover (x)html file, save the soup for it.
-            href=relpath+item.getAttribute("href")
-            src = None
-            try:
-                oldcoverhtmlhref = href
-                oldcoverhtmldata = epub.read(href)
-                oldcoverhtmltype = "application/xhtml+xml"
-                for item in contentdom.getElementsByTagName("item"):
-                    if( relpath+item.getAttribute("href") == oldcoverhtmlhref ):
-                        oldcoverhtmltype = item.getAttribute("media-type")
-                        break
-                soup = make_soup(oldcoverhtmldata.decode("utf-8"))
-                # first img or image tag.
-                imgs = soup.find_all('img')
-                if imgs:
-                    src = get_path_part(href)+imgs[0]['src']
-                else:
-                    imgs = soup.find_all('image')
-                    if imgs:
-                        src=get_path_part(href)+imgs[0]['xlink:href']
-
-                if not src:
-                    continue
-            except Exception as e:
-                ## Calibre's Polish Book corrupts sub-book covers.
-                logger.warning("Cover (x)html file %s not found"%href)
-                logger.warning("Exception: %s"%(unicode(e)))
-
-            try:
-                # remove all .. and the path part above it, if present.
-                # Mostly for epubs edited by Sigil.
-                src = re.sub(r"([^/]+/\.\./)","",src)
-                #print("epubutils: found pre-existing cover image:%s"%src)
-                oldcoverimghref = src
-                oldcoverimgdata = epub.read(src)
-                for item in contentdom.getElementsByTagName("item"):
-                    if( relpath+item.getAttribute("href") == oldcoverimghref ):
-                        oldcoverimgtype = item.getAttribute("media-type")
-                        break
-                oldcover = (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
-            except Exception as e:
-                logger.warning("Cover Image %s not found"%src)
-                logger.warning("Exception: %s"%(unicode(e)))
+            oldcover = get_oldcover(epub,relpath,contentdom,item)

    filecount = 0
    soups = [] # list of xhmtl blocks
@ -115,8 +119,13 @@ def get_update_data(inputio,
            # First, count the 'chapter' files.  FFF uses file0000.xhtml,
            # but can also update epubs downloaded from Twisting the
            # Hellmouth, which uses chapter0.html.
-            if( item.getAttribute("media-type") == "application/xhtml+xml" ):
+            if item.getAttribute("media-type") == "application/xhtml+xml":
                href=relpath+item.getAttribute("href")
+                # for epub3--only works on Calibre tagged covers.
+                # Back tracking to find the cover *page* from the
+                # cover *image* isn't currently done.
+                if "calibre:title-page" in item.getAttribute("properties"):
+                    oldcover = get_oldcover(epub,relpath,contentdom,item)
                #print("---- item href:%s path part: %s"%(href,get_path_part(href)))
                if re.match(r'.*/log_page(_u\d+)?\.x?html',href):
                    try: