mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
Find and use epub3 cover on update--relies on Calibre's calibre:title-page property.
This commit is contained in:
parent
ead830c60a
commit
75a213beb9
1 changed files with 52 additions and 43 deletions
|
|
@ -33,6 +33,51 @@ def get_cover_data(inputio):
|
|||
# (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
|
||||
return get_update_data(inputio,getfilecount=True,getsoups=False)[4]
|
||||
|
||||
def get_oldcover(epub,relpath,contentdom,item):
|
||||
href=relpath+item.getAttribute("href")
|
||||
src = None
|
||||
try:
|
||||
oldcoverhtmlhref = href
|
||||
oldcoverhtmldata = epub.read(href)
|
||||
oldcoverhtmltype = "application/xhtml+xml"
|
||||
for item in contentdom.getElementsByTagName("item"):
|
||||
if( relpath+item.getAttribute("href") == oldcoverhtmlhref ):
|
||||
oldcoverhtmltype = item.getAttribute("media-type")
|
||||
break
|
||||
soup = make_soup(oldcoverhtmldata.decode("utf-8"))
|
||||
# first img or image tag.
|
||||
imgs = soup.find_all('img')
|
||||
if imgs:
|
||||
src = get_path_part(href)+imgs[0]['src']
|
||||
else:
|
||||
imgs = soup.find_all('image')
|
||||
if imgs:
|
||||
src=get_path_part(href)+imgs[0]['xlink:href']
|
||||
|
||||
if not src:
|
||||
return None
|
||||
except Exception as e:
|
||||
## Calibre's Polish Book corrupts sub-book covers.
|
||||
logger.warning("Cover (x)html file %s not found"%href)
|
||||
logger.warning("Exception: %s"%(unicode(e)))
|
||||
|
||||
try:
|
||||
# remove all .. and the path part above it, if present.
|
||||
# Mostly for epubs edited by Sigil.
|
||||
src = re.sub(r"([^/]+/\.\./)","",src)
|
||||
#print("epubutils: found pre-existing cover image:%s"%src)
|
||||
oldcoverimghref = src
|
||||
oldcoverimgdata = epub.read(src)
|
||||
for item in contentdom.getElementsByTagName("item"):
|
||||
if( relpath+item.getAttribute("href") == oldcoverimghref ):
|
||||
oldcoverimgtype = item.getAttribute("media-type")
|
||||
break
|
||||
return (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
|
||||
except Exception as e:
|
||||
logger.warning("Cover Image %s not found"%src)
|
||||
logger.warning("Exception: %s"%(unicode(e)))
|
||||
return None
|
||||
|
||||
def get_update_data(inputio,
|
||||
getfilecount=True,
|
||||
getsoups=True):
|
||||
|
|
@ -61,48 +106,7 @@ def get_update_data(inputio,
|
|||
for item in contentdom.getElementsByTagName("reference"):
|
||||
if item.getAttribute("type") == "cover":
|
||||
# there is a cover (x)html file, save the soup for it.
|
||||
href=relpath+item.getAttribute("href")
|
||||
src = None
|
||||
try:
|
||||
oldcoverhtmlhref = href
|
||||
oldcoverhtmldata = epub.read(href)
|
||||
oldcoverhtmltype = "application/xhtml+xml"
|
||||
for item in contentdom.getElementsByTagName("item"):
|
||||
if( relpath+item.getAttribute("href") == oldcoverhtmlhref ):
|
||||
oldcoverhtmltype = item.getAttribute("media-type")
|
||||
break
|
||||
soup = make_soup(oldcoverhtmldata.decode("utf-8"))
|
||||
# first img or image tag.
|
||||
imgs = soup.find_all('img')
|
||||
if imgs:
|
||||
src = get_path_part(href)+imgs[0]['src']
|
||||
else:
|
||||
imgs = soup.find_all('image')
|
||||
if imgs:
|
||||
src=get_path_part(href)+imgs[0]['xlink:href']
|
||||
|
||||
if not src:
|
||||
continue
|
||||
except Exception as e:
|
||||
## Calibre's Polish Book corrupts sub-book covers.
|
||||
logger.warning("Cover (x)html file %s not found"%href)
|
||||
logger.warning("Exception: %s"%(unicode(e)))
|
||||
|
||||
try:
|
||||
# remove all .. and the path part above it, if present.
|
||||
# Mostly for epubs edited by Sigil.
|
||||
src = re.sub(r"([^/]+/\.\./)","",src)
|
||||
#print("epubutils: found pre-existing cover image:%s"%src)
|
||||
oldcoverimghref = src
|
||||
oldcoverimgdata = epub.read(src)
|
||||
for item in contentdom.getElementsByTagName("item"):
|
||||
if( relpath+item.getAttribute("href") == oldcoverimghref ):
|
||||
oldcoverimgtype = item.getAttribute("media-type")
|
||||
break
|
||||
oldcover = (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
|
||||
except Exception as e:
|
||||
logger.warning("Cover Image %s not found"%src)
|
||||
logger.warning("Exception: %s"%(unicode(e)))
|
||||
oldcover = get_oldcover(epub,relpath,contentdom,item)
|
||||
|
||||
filecount = 0
|
||||
soups = [] # list of xhmtl blocks
|
||||
|
|
@ -115,8 +119,13 @@ def get_update_data(inputio,
|
|||
# First, count the 'chapter' files. FFF uses file0000.xhtml,
|
||||
# but can also update epubs downloaded from Twisting the
|
||||
# Hellmouth, which uses chapter0.html.
|
||||
if( item.getAttribute("media-type") == "application/xhtml+xml" ):
|
||||
if item.getAttribute("media-type") == "application/xhtml+xml":
|
||||
href=relpath+item.getAttribute("href")
|
||||
# for epub3--only works on Calibre tagged covers.
|
||||
# Back tracking to find the cover *page* from the
|
||||
# cover *image* isn't currently done.
|
||||
if "calibre:title-page" in item.getAttribute("properties"):
|
||||
oldcover = get_oldcover(epub,relpath,contentdom,item)
|
||||
#print("---- item href:%s path part: %s"%(href,get_path_part(href)))
|
||||
if re.match(r'.*/log_page(_u\d+)?\.x?html',href):
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in a new issue