Find and use epub3 cover on update--relies on Calibre's calibre:title-page property.

2025-12-06 08:52:55 +01:00 · 2025-11-01 12:48:03 -05:00 · 2025-11-01 12:48:03 -05:00 · 75a213beb9
commit 75a213beb9
parent ead830c60a
1 changed files with 52 additions and 43 deletions
--- a/fanficfare/epubutils.py
+++ b/fanficfare/epubutils.py
@ -33,6 +33,51 @@ def get_cover_data(inputio):
    # (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
    return get_update_data(inputio,getfilecount=True,getsoups=False)[4]
 def get_oldcover(epub,relpath,contentdom,item):
    href=relpath+item.getAttribute("href")
    src = None
    try:
        oldcoverhtmlhref = href
        oldcoverhtmldata = epub.read(href)
        oldcoverhtmltype = "application/xhtml+xml"
        for item in contentdom.getElementsByTagName("item"):
            if( relpath+item.getAttribute("href") == oldcoverhtmlhref ):
                oldcoverhtmltype = item.getAttribute("media-type")
                break
        soup = make_soup(oldcoverhtmldata.decode("utf-8"))
        # first img or image tag.
        imgs = soup.find_all('img')
        if imgs:
            src = get_path_part(href)+imgs[0]['src']
        else:
            imgs = soup.find_all('image')
            if imgs:
                src=get_path_part(href)+imgs[0]['xlink:href']
        if not src:
            return None
    except Exception as e:
        ## Calibre's Polish Book corrupts sub-book covers.
        logger.warning("Cover (x)html file %s not found"%href)
        logger.warning("Exception: %s"%(unicode(e)))
    try:
        # remove all .. and the path part above it, if present.
        # Mostly for epubs edited by Sigil.
        src = re.sub(r"([^/]+/\.\./)","",src)
        #print("epubutils: found pre-existing cover image:%s"%src)
        oldcoverimghref = src
        oldcoverimgdata = epub.read(src)
        for item in contentdom.getElementsByTagName("item"):
            if( relpath+item.getAttribute("href") == oldcoverimghref ):
                oldcoverimgtype = item.getAttribute("media-type")
                break
        return (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
    except Exception as e:
        logger.warning("Cover Image %s not found"%src)
        logger.warning("Exception: %s"%(unicode(e)))
    return None
 def get_update_data(inputio,
                    getfilecount=True,
                    getsoups=True):
@ -61,48 +106,7 @@ def get_update_data(inputio,
    for item in contentdom.getElementsByTagName("reference"):
        if item.getAttribute("type") == "cover":
            # there is a cover (x)html file, save the soup for it.
-            href=relpath+item.getAttribute("href")
+            oldcover = get_oldcover(epub,relpath,contentdom,item)
            src = None
            try:
                oldcoverhtmlhref = href
                oldcoverhtmldata = epub.read(href)
                oldcoverhtmltype = "application/xhtml+xml"
                for item in contentdom.getElementsByTagName("item"):
                    if( relpath+item.getAttribute("href") == oldcoverhtmlhref ):
                        oldcoverhtmltype = item.getAttribute("media-type")
                        break
                soup = make_soup(oldcoverhtmldata.decode("utf-8"))
                # first img or image tag.
                imgs = soup.find_all('img')
                if imgs:
                    src = get_path_part(href)+imgs[0]['src']
                else:
                    imgs = soup.find_all('image')
                    if imgs:
                        src=get_path_part(href)+imgs[0]['xlink:href']
                if not src:
                    continue
            except Exception as e:
                ## Calibre's Polish Book corrupts sub-book covers.
                logger.warning("Cover (x)html file %s not found"%href)
                logger.warning("Exception: %s"%(unicode(e)))
            try:
                # remove all .. and the path part above it, if present.
                # Mostly for epubs edited by Sigil.
                src = re.sub(r"([^/]+/\.\./)","",src)
                #print("epubutils: found pre-existing cover image:%s"%src)
                oldcoverimghref = src
                oldcoverimgdata = epub.read(src)
                for item in contentdom.getElementsByTagName("item"):
                    if( relpath+item.getAttribute("href") == oldcoverimghref ):
                        oldcoverimgtype = item.getAttribute("media-type")
                        break
                oldcover = (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
            except Exception as e:
                logger.warning("Cover Image %s not found"%src)
                logger.warning("Exception: %s"%(unicode(e)))
    filecount = 0
    soups = [] # list of xhmtl blocks
@ -115,8 +119,13 @@ def get_update_data(inputio,
            # First, count the 'chapter' files.  FFF uses file0000.xhtml,
            # but can also update epubs downloaded from Twisting the
            # Hellmouth, which uses chapter0.html.
-            if( item.getAttribute("media-type") == "application/xhtml+xml" ):
+            if item.getAttribute("media-type") == "application/xhtml+xml":
                href=relpath+item.getAttribute("href")
                # for epub3--only works on Calibre tagged covers.
                # Back tracking to find the cover *page* from the
                # cover *image* isn't currently done.
                if "calibre:title-page" in item.getAttribute("properties"):
                    oldcover = get_oldcover(epub,relpath,contentdom,item)
                #print("---- item href:%s path part: %s"%(href,get_path_part(href)))
                if re.match(r'.*/log_page(_u\d+)?\.x?html',href):
                    try: