mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
Find and use epub3 cover on update--relies on Calibre's calibre:title-page property.
This commit is contained in:
parent
ead830c60a
commit
75a213beb9
1 changed files with 52 additions and 43 deletions
|
|
@ -33,6 +33,51 @@ def get_cover_data(inputio):
|
||||||
# (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
|
# (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
|
||||||
return get_update_data(inputio,getfilecount=True,getsoups=False)[4]
|
return get_update_data(inputio,getfilecount=True,getsoups=False)[4]
|
||||||
|
|
||||||
|
def get_oldcover(epub,relpath,contentdom,item):
|
||||||
|
href=relpath+item.getAttribute("href")
|
||||||
|
src = None
|
||||||
|
try:
|
||||||
|
oldcoverhtmlhref = href
|
||||||
|
oldcoverhtmldata = epub.read(href)
|
||||||
|
oldcoverhtmltype = "application/xhtml+xml"
|
||||||
|
for item in contentdom.getElementsByTagName("item"):
|
||||||
|
if( relpath+item.getAttribute("href") == oldcoverhtmlhref ):
|
||||||
|
oldcoverhtmltype = item.getAttribute("media-type")
|
||||||
|
break
|
||||||
|
soup = make_soup(oldcoverhtmldata.decode("utf-8"))
|
||||||
|
# first img or image tag.
|
||||||
|
imgs = soup.find_all('img')
|
||||||
|
if imgs:
|
||||||
|
src = get_path_part(href)+imgs[0]['src']
|
||||||
|
else:
|
||||||
|
imgs = soup.find_all('image')
|
||||||
|
if imgs:
|
||||||
|
src=get_path_part(href)+imgs[0]['xlink:href']
|
||||||
|
|
||||||
|
if not src:
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
## Calibre's Polish Book corrupts sub-book covers.
|
||||||
|
logger.warning("Cover (x)html file %s not found"%href)
|
||||||
|
logger.warning("Exception: %s"%(unicode(e)))
|
||||||
|
|
||||||
|
try:
|
||||||
|
# remove all .. and the path part above it, if present.
|
||||||
|
# Mostly for epubs edited by Sigil.
|
||||||
|
src = re.sub(r"([^/]+/\.\./)","",src)
|
||||||
|
#print("epubutils: found pre-existing cover image:%s"%src)
|
||||||
|
oldcoverimghref = src
|
||||||
|
oldcoverimgdata = epub.read(src)
|
||||||
|
for item in contentdom.getElementsByTagName("item"):
|
||||||
|
if( relpath+item.getAttribute("href") == oldcoverimghref ):
|
||||||
|
oldcoverimgtype = item.getAttribute("media-type")
|
||||||
|
break
|
||||||
|
return (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Cover Image %s not found"%src)
|
||||||
|
logger.warning("Exception: %s"%(unicode(e)))
|
||||||
|
return None
|
||||||
|
|
||||||
def get_update_data(inputio,
|
def get_update_data(inputio,
|
||||||
getfilecount=True,
|
getfilecount=True,
|
||||||
getsoups=True):
|
getsoups=True):
|
||||||
|
|
@ -61,48 +106,7 @@ def get_update_data(inputio,
|
||||||
for item in contentdom.getElementsByTagName("reference"):
|
for item in contentdom.getElementsByTagName("reference"):
|
||||||
if item.getAttribute("type") == "cover":
|
if item.getAttribute("type") == "cover":
|
||||||
# there is a cover (x)html file, save the soup for it.
|
# there is a cover (x)html file, save the soup for it.
|
||||||
href=relpath+item.getAttribute("href")
|
oldcover = get_oldcover(epub,relpath,contentdom,item)
|
||||||
src = None
|
|
||||||
try:
|
|
||||||
oldcoverhtmlhref = href
|
|
||||||
oldcoverhtmldata = epub.read(href)
|
|
||||||
oldcoverhtmltype = "application/xhtml+xml"
|
|
||||||
for item in contentdom.getElementsByTagName("item"):
|
|
||||||
if( relpath+item.getAttribute("href") == oldcoverhtmlhref ):
|
|
||||||
oldcoverhtmltype = item.getAttribute("media-type")
|
|
||||||
break
|
|
||||||
soup = make_soup(oldcoverhtmldata.decode("utf-8"))
|
|
||||||
# first img or image tag.
|
|
||||||
imgs = soup.find_all('img')
|
|
||||||
if imgs:
|
|
||||||
src = get_path_part(href)+imgs[0]['src']
|
|
||||||
else:
|
|
||||||
imgs = soup.find_all('image')
|
|
||||||
if imgs:
|
|
||||||
src=get_path_part(href)+imgs[0]['xlink:href']
|
|
||||||
|
|
||||||
if not src:
|
|
||||||
continue
|
|
||||||
except Exception as e:
|
|
||||||
## Calibre's Polish Book corrupts sub-book covers.
|
|
||||||
logger.warning("Cover (x)html file %s not found"%href)
|
|
||||||
logger.warning("Exception: %s"%(unicode(e)))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# remove all .. and the path part above it, if present.
|
|
||||||
# Mostly for epubs edited by Sigil.
|
|
||||||
src = re.sub(r"([^/]+/\.\./)","",src)
|
|
||||||
#print("epubutils: found pre-existing cover image:%s"%src)
|
|
||||||
oldcoverimghref = src
|
|
||||||
oldcoverimgdata = epub.read(src)
|
|
||||||
for item in contentdom.getElementsByTagName("item"):
|
|
||||||
if( relpath+item.getAttribute("href") == oldcoverimghref ):
|
|
||||||
oldcoverimgtype = item.getAttribute("media-type")
|
|
||||||
break
|
|
||||||
oldcover = (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Cover Image %s not found"%src)
|
|
||||||
logger.warning("Exception: %s"%(unicode(e)))
|
|
||||||
|
|
||||||
filecount = 0
|
filecount = 0
|
||||||
soups = [] # list of xhmtl blocks
|
soups = [] # list of xhmtl blocks
|
||||||
|
|
@ -115,8 +119,13 @@ def get_update_data(inputio,
|
||||||
# First, count the 'chapter' files. FFF uses file0000.xhtml,
|
# First, count the 'chapter' files. FFF uses file0000.xhtml,
|
||||||
# but can also update epubs downloaded from Twisting the
|
# but can also update epubs downloaded from Twisting the
|
||||||
# Hellmouth, which uses chapter0.html.
|
# Hellmouth, which uses chapter0.html.
|
||||||
if( item.getAttribute("media-type") == "application/xhtml+xml" ):
|
if item.getAttribute("media-type") == "application/xhtml+xml":
|
||||||
href=relpath+item.getAttribute("href")
|
href=relpath+item.getAttribute("href")
|
||||||
|
# for epub3--only works on Calibre tagged covers.
|
||||||
|
# Back tracking to find the cover *page* from the
|
||||||
|
# cover *image* isn't currently done.
|
||||||
|
if "calibre:title-page" in item.getAttribute("properties"):
|
||||||
|
oldcover = get_oldcover(epub,relpath,contentdom,item)
|
||||||
#print("---- item href:%s path part: %s"%(href,get_path_part(href)))
|
#print("---- item href:%s path part: %s"%(href,get_path_part(href)))
|
||||||
if re.match(r'.*/log_page(_u\d+)?\.x?html',href):
|
if re.match(r'.*/log_page(_u\d+)?\.x?html',href):
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue