diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py index 98d8e68b..a4fc3e15 100644 --- a/calibre-plugin/ffdl_plugin.py +++ b/calibre-plugin/ffdl_plugin.py @@ -35,7 +35,6 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML -#from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values) @@ -201,9 +200,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction): shortcut_name='About FanFictionDownLoader', triggered=self.about) - # self.update_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) - # self.get_list_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 ) - # Before we finalize, make sure we delete any actions for menus that are no longer displayed for menu_id, unique_name in self.old_actions_unique_map.iteritems(): if menu_id not in self.actions_unique_map: @@ -243,7 +239,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction): if len(self.gui.library_view.get_selected_ids()) > 0 and \ (prefs['addtolists'] or prefs['addtoreadlists']) : self._update_reading_lists(self.gui.library_view.get_selected_ids(),add) - #self.gui.library_view.model().refresh_ids(self.gui.library_view.get_selected_ids()) def get_list_urls(self): if len(self.gui.library_view.get_selected_ids()) > 0: @@ -524,15 +519,9 @@ class FanFictionDownLoaderPlugin(InterfaceAction): # 'book' can exist without epub. If there's no existing epub, # let it go and it will download it. if db.has_format(book_id,fileform,index_is_id=True): - #toupdateio = StringIO() - (epuburl,chaptercount) = get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB', - index_is_id=True))) - # (epuburl,chaptercount) = doMerge(toupdateio, - # [StringIO(db.format(book_id,'EPUB', - # index_is_id=True))], - # titlenavpoints=False, - # striptitletoc=True, - # forceunique=False) + (epuburl,chaptercount) = \ + get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB', + index_is_id=True))) urlchaptercount = int(story.getMetadata('numChapters')) if chaptercount == urlchaptercount: if collision == UPDATE: @@ -919,13 +908,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction): book['added'] = False return book - - - # def _convert_calibre_ids_to_books(self, db, ids): - # books = [] - # for book_id in ids: - # books.append(self._convert_calibre_id_to_book(db,book_id)) - # return books def _populate_book_from_calibre_id(self, book, db=None): mi = db.get_metadata(book['calibre_id'], index_is_id=True) diff --git a/calibre-plugin/jobs.py b/calibre-plugin/jobs.py index 3fdca553..55c9853e 100644 --- a/calibre-plugin/jobs.py +++ b/calibre-plugin/jobs.py @@ -12,10 +12,7 @@ import time, os, traceback from ConfigParser import SafeConfigParser from StringIO import StringIO -#from itertools import izip -#from threading import Event -#from calibre.gui2.convert.single import sort_formats_by_preference from calibre.utils.ipc.server import Server from calibre.utils.ipc.job import ParallelJob from calibre.utils.logging import Log @@ -23,7 +20,6 @@ from calibre.utils.logging import Log from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload, OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY) from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions -#from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data # ------------------------------------------------------------------------------ @@ -136,6 +132,8 @@ def do_download_for_worker(book,options): ## checks were done earlier, just update it. elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): + # update now handled by pre-populating the old images and + # chapters in the adapter rather than merging epubs. urlchaptercount = int(story.getMetadata('numChapters')) (url,chaptercount, adapter.oldchapters, @@ -146,36 +144,6 @@ def do_download_for_worker(book,options): writer.writeStory(outfilename=outfile, forceOverwrite=True) - ## First, get existing epub with titlepage and tocpage stripped. - # updateio = StringIO() - # (epuburl,chaptercount) = doMerge(updateio, - # [book['epub_for_update']], - # titlenavpoints=False, - # striptitletoc=True, - # forceunique=False) - # ## Get updated title page/metadata by itself in an epub. - # ## Even if the title page isn't included, this carries the metadata. - # titleio = StringIO() - # writer.writeStory(outstream=titleio,metaonly=True) - - # newchaptersio = None - # if urlchaptercount > chaptercount : - # ## Go get the new chapters - # newchaptersio = StringIO() - # adapter.setChaptersRange(chaptercount+1,urlchaptercount) - - # adapter.config.set("overrides",'include_tocpage','false') - # adapter.config.set("overrides",'include_titlepage','false') - # writer.writeStory(outstream=newchaptersio) - - # ## Merge the three epubs together. - # doMerge(outfile, - # [titleio,updateio,newchaptersio], - # fromfirst=True, - # titlenavpoints=False, - # striptitletoc=False, - # forceunique=False) - book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\ (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount) diff --git a/downloader.py b/downloader.py index 58f0f7d9..ab20bf2f 100644 --- a/downloader.py +++ b/downloader.py @@ -113,12 +113,6 @@ def main(): try: ## Attempt to update an existing epub. if options.update: - # updateio = StringIO() - # (url,chaptercount) = doMerge(updateio, - # args, - # titlenavpoints=False, - # striptitletoc=True, - # forceunique=False) (url,chaptercount) = get_dcsource_chaptercount(args[0]) print "Updating %s, URL: %s" % (args[0],url) output_filename = args[0] @@ -163,46 +157,17 @@ def main(): print "%s contains %d chapters, more than source: %d." % (args[0],chaptercount,urlchaptercount) else: print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount) - ## Get updated title page/metadata by itself in an epub. - ## Even if the title page isn't included, this carries the metadata. - # titleio = StringIO() - # writeStory(config,adapter,"epub",metaonly=True,outstream=titleio) - - # newchaptersio = None if not options.metaonly: + + # update now handled by pre-populating the old + # images and chapters in the adapter rather than + # merging epubs. (url,chaptercount, adapter.oldchapters, adapter.oldimgs) = get_update_data(args[0]) writeStory(config,adapter,"epub") - ## Go get the new chapters only in another epub. - # newchaptersio = StringIO() - # adapter.setChaptersRange(chaptercount+1,urlchaptercount) - # config.set("overrides",'include_tocpage','false') - # config.set("overrides",'include_titlepage','false') - # writeStory(config,adapter,"epub",outstream=newchaptersio) - - # out = open("testing/titleio.epub","wb") - # out.write(titleio.getvalue()) - # out.close() - - # out = open("testing/updateio.epub","wb") - # out.write(updateio.getvalue()) - # out.close() - - # out = open("testing/newchaptersio.epub","wb") - # out.write(newchaptersio.getvalue()) - # out.close() - - ## Merge the three epubs together. - # doMerge(args[0], - # [titleio,updateio,newchaptersio], - # fromfirst=True, - # titlenavpoints=False, - # striptitletoc=False, - # forceunique=False) - else: # regular download if options.metaonly: diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py index 5f96c657..85398a7f 100644 --- a/fanficdownloader/adapters/base_adapter.py +++ b/fanficdownloader/adapters/base_adapter.py @@ -281,9 +281,9 @@ class BaseSiteAdapter(Configurable): acceptable_attributes = ['href','name'] #print("include_images:"+self.getConfig('include_images')) if self.getConfig('include_images'): - acceptable_attributes.extend(('src','alt','origsrc')) + acceptable_attributes.extend(('src','alt','longdesc')) for img in soup.findAll('img'): - img['origsrc']=img['src'] + img['longdesc']=img['src'] img['src']=self.story.addImgUrl(self,url,img['src'],fetch) for attr in soup._getAttrMap().keys(): @@ -306,7 +306,9 @@ class BaseSiteAdapter(Configurable): # removes paired, but empty tags. if t.string != None and len(t.string.strip()) == 0 : t.extract() + # Don't want body tags in chapter html--writers add them. + # This is primarily for epub updates. return re.sub(r"\r?\n?","",soup.__str__('utf8').decode('utf-8')) fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05", diff --git a/fanficdownloader/epubutils.py b/fanficdownloader/epubutils.py index 94d32f6a..3c854106 100644 --- a/fanficdownloader/epubutils.py +++ b/fanficdownloader/epubutils.py @@ -42,7 +42,7 @@ def get_update_data(inputio, filecount = 0 soups = [] # list of xhmtl blocks - images = {} # dict() origsrc->data + images = {} # dict() longdesc->data if getfilecount: # spin through the manifest--only place there are item tags. for item in contentdom.getElementsByTagName("item"): @@ -61,12 +61,12 @@ def get_update_data(inputio, # remove all .. and the path part above it, if present. # Most for epubs edited by Sigil. newsrc = re.sub(r"([^/]+/\.\./)","",newsrc) - origsrc=img['origsrc'] + longdesc=img['longdesc'] data = epub.read(newsrc) - images[origsrc] = data - img['src'] = img['origsrc'] + images[longdesc] = data + img['src'] = img['longdesc'] except Exception as e: - print("Image %s not found!\n(originally:%s)"%(newsrc,origsrc)) + print("Image %s not found!\n(originally:%s)"%(newsrc,longdesc)) print("Exception: %s"%(unicode(e))) traceback.print_exc() soup = soup.find('body') @@ -76,7 +76,7 @@ def get_update_data(inputio, filecount+=1 for k in images.keys(): - print("\torigsrc:%s\n\tData len:%s\n"%(k,len(images[k]))) + print("\tlongdesc:%s\n\tData len:%s\n"%(k,len(images[k]))) return (source,filecount,soups,images) def get_path_part(n): diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py index f25e72b5..11e31248 100644 --- a/fanficdownloader/story.py +++ b/fanficdownloader/story.py @@ -301,17 +301,7 @@ class Story: parsedUrl.path + url, '','','')) - # This version, prefixing the images with the creation - # timestamp, still allows for dup images to be detected and - # not dup'ed in a single download. And it prevents 0.jpg from - # earlier update being overwritten by the first image in newer - # chapter. It does not, however, prevent dup copies of the - # same image being d/l'ed and saved in different updates. A - # bit of corner case inefficiency I can live with rather than - # scanning all the pre-existing files on update. oldsrc is - # being saved on img tags just in case, however. - prefix='ffdl' #self.getMetadataRaw('dateCreated').strftime("%Y%m%d%H%M%S") - + prefix='ffdl' if imgurl not in self.imgurls: parsedUrl = urlparse.urlparse(imgurl) sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ] diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py index 4d18a884..bbe47fe4 100644 --- a/fanficdownloader/writers/writer_epub.py +++ b/fanficdownloader/writers/writer_epub.py @@ -280,6 +280,8 @@ ${value}
coverIO = None if self.story.cover: + # Note that the id of the cover xhmtl *must* be 'cover' + # for it to work on Nook. items.append(("cover","OEBPS/cover.xhtml","application/xhtml+xml",None)) itemrefs.append("cover") # @@ -343,7 +345,8 @@ div { margin: 0pt; padding: 0pt; } # write content.opf to zip. contentxml = contentdom.toxml(encoding='utf-8') - # tweak for brain damaged Nook STR. + + # tweak for brain damaged Nook STR. Nook insists on name before content. contentxml = contentxml.replace('', '') outputepub.writestr("content.opf",contentxml)