First version of cover image support. Needs more work--update merge is broken.

This commit is contained in:
Jim Miller 2012-02-26 15:19:41 -06:00
parent a89cec6844
commit f0445f106c
10 changed files with 172 additions and 117 deletions

View file

@ -36,6 +36,7 @@ all_prefs = JSONConfig('plugins/fanfictiondownloader_plugin')
# take from here.
all_prefs.defaults['personal.ini'] = get_resources('plugin-example.ini')
all_prefs.defaults['updatemeta'] = True
all_prefs.defaults['updatecover'] = False
all_prefs.defaults['keeptags'] = False
all_prefs.defaults['urlsfromclip'] = True
all_prefs.defaults['updatedefault'] = True
@ -53,6 +54,7 @@ all_prefs.defaults['custom_cols'] = {}
# when config is called for the first time on a library.
copylist = ['personal.ini',
'updatemeta',
'updatecover',
'keeptags',
'urlsfromclip',
'updatedefault',
@ -144,6 +146,7 @@ class ConfigWidget(QWidget):
prefs['fileform'] = unicode(self.basic_tab.fileform.currentText())
prefs['collision'] = unicode(self.basic_tab.collision.currentText())
prefs['updatemeta'] = self.basic_tab.updatemeta.isChecked()
prefs['updatecover'] = self.basic_tab.updatecover.isChecked()
prefs['keeptags'] = self.basic_tab.keeptags.isChecked()
prefs['urlsfromclip'] = self.basic_tab.urlsfromclip.isChecked()
prefs['updatedefault'] = self.basic_tab.updatedefault.isChecked()
@ -234,6 +237,11 @@ class BasicTab(QWidget):
self.updatemeta.setChecked(prefs['updatemeta'])
self.l.addWidget(self.updatemeta)
self.updatecover = QCheckBox('Update Cover when Updating Metadata?',self)
self.updatecover.setToolTip('Update cover image when metadata is updated. EPUB only.')
self.updatecover.setChecked(prefs['updatecover'])
self.l.addWidget(self.updatecover)
self.keeptags = QCheckBox('Keep Existing Tags when Updating Metadata?',self)
self.keeptags.setToolTip('Existing tags will be kept and any new tags added.\nCompleted and In-Progress tags will be still be updated, if known.\nLast Updated tags will be updated if lastupdate in include_subject_tags.')
self.keeptags.setChecked(prefs['keeptags'])

View file

@ -637,7 +637,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
if options['collision'] == CALIBREONLY or \
(options['updatemeta'] and book['good']):
self._update_metadata(db, book['calibre_id'], book, mi)
self._update_metadata(db, book['calibre_id'], book, mi, options)
def _update_books_completed(self, book_list, options={}):
@ -656,6 +656,9 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
self.gui.library_view.model().current_changed(current, self.previous)
self.gui.tags_view.recount()
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()
self.gui.status_bar.show_message(_('Finished Adding/Updating %d books.'%(len(update_list) + len(add_list))), 3000)
if len(update_list) + len(add_list) != len(book_list):
@ -736,7 +739,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
return book_id
def _update_metadata(self, db, book_id, book, mi):
def _update_metadata(self, db, book_id, book, mi, options):
if prefs['keeptags']:
old_tags = db.get_tags(book_id)
# remove old Completed/In-Progress only if there's a new one.
@ -755,6 +758,13 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
oldmi = db.get_metadata(book_id,index_is_id=True)
if not oldmi.languages:
mi.languages=['eng']
if options['fileform'] == 'epub' and prefs['updatecover']:
existingepub = db.format(book_id,'EPUB',index_is_id=True, as_file=True)
epubmi = get_metadata(existingepub,'EPUB')
if epubmi.cover_data[1] is not None:
db.set_cover(book_id, epubmi.cover_data[1])
#mi.cover = epubmi.cover_data[1]
db.set_metadata(book_id,mi)
@ -787,7 +797,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
if meta == 'status-I':
val = book['all_metadata']['status'] == 'In-Progress'
db.set_custom(book_id, val, label=label, commit=False)
db.commit()
def _get_clean_reading_lists(self,lists):

View file

@ -126,7 +126,6 @@ extratags: FanFiction
## number of seconds to sleep between calls to the story site. May by
## useful if pulling large numbers of stories or if the site is slow.
## Primarily for commandline.
#slow_down_sleep_time:0.5
## For use only with stand-alone CLI version--run a command on the
@ -231,14 +230,28 @@ output_css:
.u {text-decoration: underline;}
.bold {font-weight: bold;}
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.
#include_images:false
## If not set, the summary will have all html stripped for safety.
## Both this and include_images must be true to get images in the
## summary.
#keep_summary_html:false
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.
#include_images:false
## If set, the first image found will be made the cover image. If
## keep_summary_html is true, any images in summary will be before any
## in chapters.
#make_firstimage_cover: false
## If set, and there isn't already a cover image from the adapter or
## from make_firstimage_cover, this image will be made the cover.
## It can be either a 'file:' or 'http:' url.
## Note that if you enable make_firstimage_cover in [epub], but want
## to use default_cover_image for a specific site, use the site:format
## section, for example: [www.ficwad.com:epub]
#default_cover_image:file:///C:/Users/username/Desktop/nook/images/icon.png
#default_cover_image:http://www.somesite.com/someimage.gif
## Resize images down to width, height, preserving aspect ratio.
## Nook size, with margin.

View file

@ -31,59 +31,7 @@ from time import time
from exceptions import KeyError
from xml.dom.minidom import parse, parseString, getDOMImplementation
def main(argv):
# read in args, anything starting with -- will be treated as --<varible>=<value>
usage = "usage: %prog [options] <input epub> [<input epub>...]"
parser = OptionParser(usage)
parser.add_option("-o", "--output", dest="outputopt", default="merge.epub",
help="Set OUTPUT file, Default: merge.epub", metavar="OUTPUT")
parser.add_option("-t", "--title", dest="titleopt", default=None,
help="Use TITLE as the metadata title. Default: '<first epub title> Anthology'", metavar="TITLE")
parser.add_option("-d", "--description", dest="descopt", default=None,
help="Use DESC as the metadata description. Default: '<epub title> by <author>' for each epub.", metavar="DESC")
parser.add_option("-a", "--author",
action="append", dest="authoropts", default=[],
help="Use AUTHOR as a metadata author, multiple authors may be given, Default: <All authors from epubs>", metavar="AUTHOR")
parser.add_option("-f", "--first",
action="store_true", dest="fromfirst", default=False,
help="Take all metadata from first input epub",)
parser.add_option("-n", "--titles-in-toc",
action="store_true", dest="titlenavpoints",
help="Put an entry in the TOC for each epub, in addition to each epub's chapters.",)
parser.add_option("-s", "--strip-title-toc",
action="store_true", dest="striptitletoc",
help="Strip any title_page.xhtml and toc_page.xhtml files.",)
(options, args) = parser.parse_args()
## Add .epub if not already there.
if not options.outputopt.lower().endswith(".epub"):
options.outputopt=options.outputopt+".epub"
print "output file: "+options.outputopt
doMerge(options.outputopt,
args,
options.authoropts,
options.titleopt,
options.descopt,
options.fromfirst,
options.titlenavpoints,
options.striptitletoc)
# output = StringIO.StringIO()
# files = []
# for file in args:
# f = open(file,"rb")
# fio = StringIO.StringIO(f.read())
# f.close()
# files.append(fio)
# doMerge(output,files,authoropts,titleopt,descopt,fromfirst,titlenavpoints,striptitletoc)
# out = open(outputopt,"wb")
# out.write(output.getvalue())
def doMerge(outputio,files,authoropts=[],titleopt=None,descopt=None,
fromfirst=False,
titlenavpoints=True,
@ -210,7 +158,7 @@ def doMerge(outputio,files,authoropts=[],titleopt=None,descopt=None,
href=bookdir+relpath+item.getAttribute("href")
href=href.encode('utf8')
#print "href:"+href
if not striptitletoc or not re.match(r'.*/(title|toc)_page\.xhtml',
if not striptitletoc or not re.match(r'.*/((title|toc)_page|cover)\.xhtml',
item.getAttribute("href")):
if href not in filelist:
try:
@ -225,7 +173,7 @@ def doMerge(outputio,files,authoropts=[],titleopt=None,descopt=None,
for itemref in metadom.getElementsByTagName("itemref"):
if not striptitletoc or not re.match(r'(title|toc)_page', itemref.getAttribute("idref")):
if not striptitletoc or not re.match(r'((title|toc)_page|cover)', itemref.getAttribute("idref")):
itemrefs.append(bookid+itemref.getAttribute("idref"))
booknum=booknum+1;
@ -386,4 +334,8 @@ def newTag(dom,name,attrs=None,text=None):
return tag
if __name__ == "__main__":
main(sys.argv[1:])
print('''
This version is only used by fanfictiondownloader now. See:
http://code.google.com/p/epubmerge/
...for a CLI epubmerge.py program and calibre plugin.
''')

View file

@ -142,13 +142,11 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
except:
pass
# fimfic is the first site with an explicit cover image.
story_img = soup.find('img',{'class':'story_image'})
if self.getConfig('keep_summary_html') and \
self.getConfig('include_images') and \
story_img:
self.setDescription(self.url,"<p><center>%s</center></p>%s"%(story_img,description_soup.text))
else:
self.setDescription(self.url,description_soup.text)
if self.getConfig('include_images') and story_img:
self.story.addImgUrl(self,self.url,story_img['src'],self._fetchUrlRaw,cover=True)
self.setDescription(self.url,description_soup.text)
#self.story.setMetadata('description', description_soup.text)
# Unfortunately, nowhere on the page is the year mentioned.

View file

@ -127,8 +127,8 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
('Chapter 5',self.url+"&chapter=6"),
('Chapter 6',self.url+"&chapter=6"),
('Chapter 7',self.url+"&chapter=6"),
# ('Chapter 8',self.url+"&chapter=6"),
# ('Chapter 9',self.url+"&chapter=6"),
('Chapter 8',self.url+"&chapter=6"),
('Chapter 9',self.url+"&chapter=6"),
# ('Chapter 0',self.url+"&chapter=6"),
# ('Chapter a',self.url+"&chapter=6"),
# ('Chapter b',self.url+"&chapter=6"),
@ -181,6 +181,8 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
<p><center>Centered text</center></p>
<p>Lorem '''+self.crazystring+''' <i>italics</i>, <b>bold</b>, <u>underline</u> consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
br breaks<br><br>
<a href="http://code.google.com/p/fanficdownloader/wiki/FanFictionDownLoaderPluginWithReadingList" title="Tilt-a-Whirl by Jim &amp; Sarah, on Flickr"><img src="http://i.imgur.com/bo8eD.png"></a><br/>
br breaks<br><br>
<hr>
horizontal rules

View file

@ -198,6 +198,16 @@ class BaseSiteAdapter(Configurable):
self.story.addChapter(removeEntities(title),
removeEntities(self.getChapterText(url)))
self.storyDone = True
# include image, but no cover from story, add default_cover_image cover.
if self.getConfig('include_images') and \
not self.story.cover and \
self.getConfig('default_cover_image'):
self.story.addImgUrl(self,
None,
self.getConfig('default_cover_image'),
self._fetchUrlRaw,
cover=True)
return self.story
def getStoryMetadataOnly(self):

View file

@ -154,6 +154,7 @@ class Story:
self.imgurls = []
self.imgtuples = []
self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists.
self.cover=None
def setMetadata(self, key, value):
## still keeps &lt; &lt; and &amp;
@ -237,14 +238,14 @@ class Story:
# pass fetch in from adapter in case we need the cookies collected
# as well as it's a base_story class method.
def addImgUrl(self,configurable,parenturl,url,fetch):
def addImgUrl(self,configurable,parenturl,url,fetch,cover=False):
# appengine (web version) isn't allowed to do images--just
# gets too big too fast and breaks things.
if is_appengine:
return
if url.startswith("http") :
if url.startswith("http") or url.startswith("file") :
imgurl = url
elif parenturl != None:
parsedUrl = urlparse.urlparse(parenturl)
@ -261,21 +262,6 @@ class Story:
parsedUrl.path + url,
'','',''))
# using b64 encode of the url means that the same image ends
# up with the same name both now, in different chapters, and
# later with new update chapters. Numbering them didn't do
# that.
# newsrc = "images/%s.jpg"%(b64encode(imgurl))
# step = 20
# if newsrc > step:
# i = step
# while i < len(newsrc):
# newsrc = newsrc[:i]+"/"+newsrc[i:]
# i += step
# But, b64 names can get too big for zip (on windows, at
# least) to handle too quickly.
# This version, prefixing the images with the creation
# timestamp, still allows for dup images to be detected and
# not dup'ed in a single download. And it prevents 0.jpg from
@ -286,7 +272,7 @@ class Story:
# scanning all the pre-existing files on update. oldsrc is
# being saved on img tags just in case, however.
prefix=self.getMetadataRaw('dateCreated').strftime("%Y%m%d%H%M%S")
if imgurl not in self.imgurls:
parsedUrl = urlparse.urlparse(imgurl)
sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]
@ -294,12 +280,31 @@ class Story:
fetch(imgurl),
sizes,
configurable.getConfig('grayscale_images'))
self.imgurls.append(imgurl)
newsrc = "images/%s-%s.%s"%(
prefix,
self.imgurls.index(imgurl),
ext)
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
# explicit cover, make the first image.
if cover:
if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']:
# remove existing cover, if there is one.
del self.imgurls[0]
del self.imgtuples[0]
self.imgurls.insert(0,imgurl)
newsrc = "images/cover.%s"%ext
self.cover=newsrc
self.imgtuples.insert(0,{'newsrc':newsrc,'mime':mime,'data':data})
else:
self.imgurls.append(imgurl)
# First image, copy not link because calibre will replace with it's cover.
if (len(self.imgurls)==1 and configurable.getConfig('make_firstimage_cover')):
newsrc = "images/cover.%s"%ext
self.cover=newsrc
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
self.imgurls.append(imgurl)
newsrc = "images/%s-%s.%s"%(
prefix,
self.imgurls.index(imgurl),
ext)
self.imgtuples.append({'newsrc':newsrc,'mime':mime,'data':data})
print("\nimgurl:%s\nnewsrc:%s\nimage size:%d\n"%(imgurl,newsrc,len(data)))
else:
newsrc = self.imgtuples[self.imgurls.index(imgurl)]['newsrc']

View file

@ -261,7 +261,52 @@ ${value}<br />
itemrefs = [] # list of strings -- idrefs from .opfs' spines
items.append(("ncx","toc.ncx","application/x-dtbncx+xml",None)) ## we'll generate the toc.ncx file,
## but it needs to be in the items manifest.
if self.getConfig('include_images'):
imgcount=0
for imgmap in self.story.getImgUrls():
imgfile = "OEBPS/"+imgmap['newsrc']
outputepub.writestr(imgfile,imgmap['data'])
items.append(("image%04d"%imgcount,
imgfile,
imgmap['mime'],
None))
imgcount+=1
items.append(("style","OEBPS/stylesheet.css","text/css",None))
guide = None
coverIO = None
if self.story.cover:
items.append(("cover","OEBPS/cover.xhtml","application/xhtml+xml",None))
itemrefs.append("cover")
#
# <meta name="cover" content="cover.jpg"/>
metadata.appendChild(newTag(contentdom,"meta",{"content":"image0000",
"name":"cover"}))
# cover stuff for later:
# at end of <package>:
# <guide>
# <reference type="cover" title="Cover" href="Text/cover.xhtml"/>
# </guide>
guide = newTag(contentdom,"guide")
guide.appendChild(newTag(contentdom,"reference",attrs={"type":"cover",
"title":"Cover",
"href":"cover.xhtml"}))
coverIO = StringIO.StringIO()
coverIO.write('''
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head><title>Cover</title><style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt; }
div { margin: 0pt; padding: 0pt; }
</style></head><body><div>
<img src="%s" alt="cover"/>
</div></body></html>
'''%self.story.cover)
if self.getConfig("include_titlepage"):
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
itemrefs.append("title_page")
@ -277,17 +322,6 @@ ${value}<br />
title))
itemrefs.append("file%04d"%i)
if self.getConfig('include_images'):
imgcount=0
for imgmap in self.story.getImgUrls():
imgfile = "OEBPS/"+imgmap['newsrc']
outputepub.writestr(imgfile,imgmap['data'])
items.append(("image%04d"%imgcount,
imgfile,
imgmap['mime'],
None))
imgcount+=1
manifest = contentdom.createElement("manifest")
package.appendChild(manifest)
for item in items:
@ -303,8 +337,17 @@ ${value}<br />
spine.appendChild(newTag(contentdom,"itemref",
attrs={"idref":itemref,
"linear":"yes"}))
# guide only exists if there's a cover.
if guide:
package.appendChild(guide)
# write content.opf to zip.
outputepub.writestr("content.opf",contentdom.toxml(encoding='utf-8'))
contentxml = contentdom.toxml(encoding='utf-8')
# tweak for brain damaged Nook STR.
contentxml = contentxml.replace('<meta content="image0000" name="cover"/>',
'<meta name="cover" content="image0000"/>')
outputepub.writestr("content.opf",contentxml)
contentdom.unlink()
del contentdom
@ -340,7 +383,7 @@ ${value}<br />
index=0
for item in items:
(id,href,type,title)=item
# only items to be skipped, toc.ncx, stylesheet.css, should have no title.
# only items to be skipped, cover.xhtml, images, toc.ncx, stylesheet.css, should have no title.
if title :
navPoint = newTag(tocncxdom,"navPoint",
attrs={'id':id,
@ -353,7 +396,7 @@ ${value}<br />
navPoint.appendChild(newTag(tocncxdom,"content",attrs={"src":href}))
index=index+1
# write toc.ncs to zip file
# write toc.ncx to zip file
outputepub.writestr("toc.ncx",tocncxdom.toxml(encoding='utf-8'))
tocncxdom.unlink()
del tocncxdom
@ -374,7 +417,11 @@ ${value}<br />
WIDE_TITLE_ENTRY = self.EPUB_TITLE_ENTRY # same, only wide in tables.
NO_TITLE_ENTRY = self.EPUB_NO_TITLE_ENTRY
TITLE_PAGE_END = self.EPUB_TITLE_PAGE_END
if coverIO:
outputepub.writestr("OEBPS/cover.xhtml",coverIO.getvalue())
coverIO.close()
titlepageIO = StringIO.StringIO()
self.writeTitlePage(out=titlepageIO,
START=TITLE_PAGE_START,

View file

@ -108,10 +108,6 @@ extratags: FanFiction
## useful if pulling large numbers of stories or if the site is slow.
#slow_down_sleep_time:0.5
## output background color--only used by html and epub (and ignored in
## epub by many readers). Must be hex code, # will be added.
background_color: ffffff
## Use regular expressions to find and replace (or remove) metadata.
## For example, you could change Sci-Fi=>SF, remove *-Centered tags,
## etc. See http://docs.python.org/library/re.html (look for re.sub)
@ -206,14 +202,28 @@ output_css:
.u {text-decoration: underline;}
.bold {font-weight: bold;}
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.
#include_images:false
## If not set, the summary will have all html stripped for safety.
## Both this and include_images must be true to get images in the
## summary.
#keep_summary_html:false
## include images from img tags in the body and summary of
## stories. Images will be converted to jpg for size if possible.
#include_images:false
## If set, the first image found will be made the cover image. If
## keep_summary_html is true, any images in summary will be before any
## in chapters.
#make_firstimage_cover: false
## If set, and there isn't already a cover image from the adapter or
## from make_firstimage_cover, this image will be made the cover.
## It can be either a 'file:' or 'http:' url.
## Note that if you enable make_firstimage_cover in [epub], but want
## to use default_cover_image for a specific site, use the site:format
## section, for example: [www.ficwad.com:epub]
#default_cover_image:file:///C:/Users/username/Desktop/nook/images/icon.png
#default_cover_image:http://www.somesite.com/someimage.gif
## Resize images down to width, height, preserving aspect ratio.
## Nook size, with margin.