Change origsrc attribute on img to longdesc to appease epubcheck. Clean up.

2025-12-24 17:53:46 +01:00 · 2012-02-27 12:25:03 -06:00 · 2012-02-27 12:25:03 -06:00 · 6a525ca9fb
commit 6a525ca9fb
parent 4306bfc301
7 changed files with 24 additions and 114 deletions
--- a/calibre-plugin/ffdl_plugin.py
+++ b/calibre-plugin/ffdl_plugin.py
@ -35,7 +35,6 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin

 from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
 from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
-#from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge
 from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount

 from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values)
@ -201,9 +200,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
                                                           shortcut_name='About FanFictionDownLoader',
                                                           triggered=self.about)
            
-            # self.update_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 )
-            # self.get_list_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 )
-
            # Before we finalize, make sure we delete any actions for menus that are no longer displayed
            for menu_id, unique_name in self.old_actions_unique_map.iteritems():
                if menu_id not in self.actions_unique_map:
@ -243,7 +239,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        if len(self.gui.library_view.get_selected_ids()) > 0 and \
                (prefs['addtolists'] or prefs['addtoreadlists']) :
            self._update_reading_lists(self.gui.library_view.get_selected_ids(),add)
-        #self.gui.library_view.model().refresh_ids(self.gui.library_view.get_selected_ids())

    def get_list_urls(self):
        if len(self.gui.library_view.get_selected_ids()) > 0:
@ -524,15 +519,9 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
                # 'book' can exist without epub.  If there's no existing epub,
                # let it go and it will download it.
                if db.has_format(book_id,fileform,index_is_id=True):
-                    #toupdateio = StringIO()
-                    (epuburl,chaptercount) = get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB',
-                                                                              index_is_id=True)))
-                    # (epuburl,chaptercount) = doMerge(toupdateio,
-                    #                                  [StringIO(db.format(book_id,'EPUB',
-                    #                                                           index_is_id=True))],
-                    #                                  titlenavpoints=False,
-                    #                                  striptitletoc=True,
-                    #                                  forceunique=False)
+                    (epuburl,chaptercount) = \
+                        get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB',
+                                                                     index_is_id=True)))
                    urlchaptercount = int(story.getMetadata('numChapters'))
                    if chaptercount == urlchaptercount:
                        if collision == UPDATE:
@ -919,13 +908,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        book['added'] = False
        
        return book
-        
-    
-    # def _convert_calibre_ids_to_books(self, db, ids):
-    #     books = []
-    #     for book_id in ids:
-    #         books.append(self._convert_calibre_id_to_book(db,book_id))
-    #     return books
            
    def _populate_book_from_calibre_id(self, book, db=None):
        mi = db.get_metadata(book['calibre_id'], index_is_id=True)
--- a/calibre-plugin/jobs.py
+++ b/calibre-plugin/jobs.py
@ -12,10 +12,7 @@ import time, os, traceback

 from ConfigParser import SafeConfigParser
 from StringIO import StringIO
-#from itertools import izip
-#from threading import Event

-#from calibre.gui2.convert.single import sort_formats_by_preference
 from calibre.utils.ipc.server import Server
 from calibre.utils.ipc.job import ParallelJob
 from calibre.utils.logging import Log
@ -23,7 +20,6 @@ from calibre.utils.logging import Log
 from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
    OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
 from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
-#from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge
 from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data

 # ------------------------------------------------------------------------------
@ -136,6 +132,8 @@ def do_download_for_worker(book,options):
        ## checks were done earlier, just update it.
        elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
            
+            # update now handled by pre-populating the old images and
+            # chapters in the adapter rather than merging epubs.
            urlchaptercount = int(story.getMetadata('numChapters'))
            (url,chaptercount,
             adapter.oldchapters,
@ -146,36 +144,6 @@ def do_download_for_worker(book,options):

            writer.writeStory(outfilename=outfile, forceOverwrite=True)
            
-            ## First, get existing epub with titlepage and tocpage stripped.
-            # updateio = StringIO()
-            # (epuburl,chaptercount) = doMerge(updateio,
-            #                                  [book['epub_for_update']],
-            #                                  titlenavpoints=False,
-            #                                  striptitletoc=True,
-            #                                  forceunique=False)
-            # ## Get updated title page/metadata by itself in an epub.
-            # ## Even if the title page isn't included, this carries the metadata.
-            # titleio = StringIO()
-            # writer.writeStory(outstream=titleio,metaonly=True)
-
-            # newchaptersio = None
-            # if urlchaptercount > chaptercount :
-            #     ## Go get the new chapters
-            #     newchaptersio = StringIO()
-            #     adapter.setChaptersRange(chaptercount+1,urlchaptercount)
-
-            #     adapter.config.set("overrides",'include_tocpage','false')
-            #     adapter.config.set("overrides",'include_titlepage','false')
-            #     writer.writeStory(outstream=newchaptersio)
-
-            # ## Merge the three epubs together.
-            # doMerge(outfile,
-            #         [titleio,updateio,newchaptersio],
-            #         fromfirst=True,
-            #         titlenavpoints=False,
-            #         striptitletoc=False,
-            #         forceunique=False)
-            
            book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\
                (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)
        
--- a/downloader.py
+++ b/downloader.py
@ -113,12 +113,6 @@ def main():
   try:
       ## Attempt to update an existing epub.
       if options.update:
-           # updateio = StringIO()
-           # (url,chaptercount) = doMerge(updateio,
-           #                              args,
-           #                              titlenavpoints=False,
-           #                              striptitletoc=True,
-           #                              forceunique=False)
           (url,chaptercount) = get_dcsource_chaptercount(args[0])
           print "Updating %s, URL: %s" % (args[0],url)
           output_filename = args[0]
@ -163,46 +157,17 @@ def main():
               print "%s contains %d chapters, more than source: %d." % (args[0],chaptercount,urlchaptercount)
           else:
               print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)
-               ## Get updated title page/metadata by itself in an epub.
-               ## Even if the title page isn't included, this carries the metadata.
-               # titleio = StringIO()
-               # writeStory(config,adapter,"epub",metaonly=True,outstream=titleio)
-
-               # newchaptersio = None
               if not options.metaonly:
+
+                   # update now handled by pre-populating the old
+                   # images and chapters in the adapter rather than
+                   # merging epubs.
                   (url,chaptercount,
                    adapter.oldchapters,
                    adapter.oldimgs) = get_update_data(args[0])

                   writeStory(config,adapter,"epub")
                   
-                   ## Go get the new chapters only in another epub.
-                   # newchaptersio = StringIO()
-                   # adapter.setChaptersRange(chaptercount+1,urlchaptercount)
-                   # config.set("overrides",'include_tocpage','false')
-                   # config.set("overrides",'include_titlepage','false')
-                   # writeStory(config,adapter,"epub",outstream=newchaptersio)
-               
-               # out = open("testing/titleio.epub","wb")
-               # out.write(titleio.getvalue())
-               # out.close()
-               
-               # out = open("testing/updateio.epub","wb")
-               # out.write(updateio.getvalue())
-               # out.close()
-               
-               # out = open("testing/newchaptersio.epub","wb")
-               # out.write(newchaptersio.getvalue())
-               # out.close()
-               
-               ## Merge the three epubs together.
-               # doMerge(args[0],
-               #         [titleio,updateio,newchaptersio],
-               #         fromfirst=True,
-               #         titlenavpoints=False,
-               #         striptitletoc=False,
-               #         forceunique=False)
-
       else:
           # regular download
           if options.metaonly:
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@ -281,9 +281,9 @@ class BaseSiteAdapter(Configurable):
        acceptable_attributes = ['href','name']
        #print("include_images:"+self.getConfig('include_images'))
        if self.getConfig('include_images'):
-            acceptable_attributes.extend(('src','alt','origsrc'))
+            acceptable_attributes.extend(('src','alt','longdesc'))
            for img in soup.findAll('img'):
-                img['origsrc']=img['src']
+                img['longdesc']=img['src']
                img['src']=self.story.addImgUrl(self,url,img['src'],fetch)

        for attr in soup._getAttrMap().keys():
@ -306,7 +306,9 @@ class BaseSiteAdapter(Configurable):
 	    # removes paired, but empty tags.
            if t.string != None and len(t.string.strip()) == 0 :
                t.extract()
+                
        # Don't want body tags in chapter html--writers add them.
+        # This is primarily for epub updates.
        return re.sub(r"</?body>\r?\n?","",soup.__str__('utf8').decode('utf-8'))

 fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05",
--- a/fanficdownloader/epubutils.py
+++ b/fanficdownloader/epubutils.py
@ -42,7 +42,7 @@ def get_update_data(inputio,
            
    filecount = 0
    soups = [] # list of xhmtl blocks
-    images = {} # dict() origsrc->data
+    images = {} # dict() longdesc->data
    if getfilecount:
        # spin through the manifest--only place there are item tags.
        for item in contentdom.getElementsByTagName("item"):
@ -61,12 +61,12 @@ def get_update_data(inputio,
                                # remove all .. and the path part above it, if present.
                                # Most for epubs edited by Sigil.
                                newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
-                                origsrc=img['origsrc']
+                                longdesc=img['longdesc']
                                data = epub.read(newsrc)
-                                images[origsrc] = data
-                                img['src'] = img['origsrc']
+                                images[longdesc] = data
+                                img['src'] = img['longdesc']
                            except Exception as e:
-                                print("Image %s not found!\n(originally:%s)"%(newsrc,origsrc))
+                                print("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
                                print("Exception: %s"%(unicode(e)))
                                traceback.print_exc()
                        soup = soup.find('body')
@ -76,7 +76,7 @@ def get_update_data(inputio,
                    filecount+=1

    for k in images.keys():
-        print("\torigsrc:%s\n\tData len:%s\n"%(k,len(images[k])))
+        print("\tlongdesc:%s\n\tData len:%s\n"%(k,len(images[k])))
    return (source,filecount,soups,images)

 def get_path_part(n):
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@ -301,17 +301,7 @@ class Story:
                     parsedUrl.path + url,
                     '','',''))

-        # This version, prefixing the images with the creation
-        # timestamp, still allows for dup images to be detected and
-        # not dup'ed in a single download.  And it prevents 0.jpg from
-        # earlier update being overwritten by the first image in newer
-        # chapter.  It does not, however, prevent dup copies of the
-        # same image being d/l'ed and saved in different updates.  A
-        # bit of corner case inefficiency I can live with rather than
-        # scanning all the pre-existing files on update.  oldsrc is
-        # being saved on img tags just in case, however.
-        prefix='ffdl' #self.getMetadataRaw('dateCreated').strftime("%Y%m%d%H%M%S")
-
+        prefix='ffdl'
        if imgurl not in self.imgurls:
            parsedUrl = urlparse.urlparse(imgurl)
            sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]
--- a/fanficdownloader/writers/writer_epub.py
+++ b/fanficdownloader/writers/writer_epub.py
@ -280,6 +280,8 @@ ${value}<br />
        coverIO = None

        if self.story.cover:
+            # Note that the id of the cover xhmtl *must* be 'cover'
+            # for it to work on Nook.
            items.append(("cover","OEBPS/cover.xhtml","application/xhtml+xml",None))
            itemrefs.append("cover")
            # 
@ -343,7 +345,8 @@ div { margin: 0pt; padding: 0pt; }
            
        # write content.opf to zip.
        contentxml = contentdom.toxml(encoding='utf-8')
-        # tweak for brain damaged Nook STR.
+        
+        # tweak for brain damaged Nook STR.  Nook insists on name before content.
        contentxml = contentxml.replace('<meta content="image0000" name="cover"/>',
                                        '<meta name="cover" content="image0000"/>')
        outputepub.writestr("content.opf",contentxml)