Change origsrc attribute on img to longdesc to appease epubcheck. Clean up.

This commit is contained in:
Jim Miller 2012-02-27 12:25:03 -06:00
parent 4306bfc301
commit 6a525ca9fb
7 changed files with 24 additions and 114 deletions

View file

@ -35,7 +35,6 @@ from calibre_plugins.fanfictiondownloader_plugin.common_utils import (set_plugin
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.htmlcleanup import stripHTML
#from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_dcsource, get_dcsource_chaptercount
from calibre_plugins.fanfictiondownloader_plugin.config import (prefs, permitted_values)
@ -201,9 +200,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
shortcut_name='About FanFictionDownLoader',
triggered=self.about)
# self.update_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 )
# self.get_list_action.setEnabled( len(self.gui.library_view.get_selected_ids()) > 0 )
# Before we finalize, make sure we delete any actions for menus that are no longer displayed
for menu_id, unique_name in self.old_actions_unique_map.iteritems():
if menu_id not in self.actions_unique_map:
@ -243,7 +239,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
if len(self.gui.library_view.get_selected_ids()) > 0 and \
(prefs['addtolists'] or prefs['addtoreadlists']) :
self._update_reading_lists(self.gui.library_view.get_selected_ids(),add)
#self.gui.library_view.model().refresh_ids(self.gui.library_view.get_selected_ids())
def get_list_urls(self):
if len(self.gui.library_view.get_selected_ids()) > 0:
@ -524,15 +519,9 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
# 'book' can exist without epub. If there's no existing epub,
# let it go and it will download it.
if db.has_format(book_id,fileform,index_is_id=True):
#toupdateio = StringIO()
(epuburl,chaptercount) = get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB',
index_is_id=True)))
# (epuburl,chaptercount) = doMerge(toupdateio,
# [StringIO(db.format(book_id,'EPUB',
# index_is_id=True))],
# titlenavpoints=False,
# striptitletoc=True,
# forceunique=False)
(epuburl,chaptercount) = \
get_dcsource_chaptercount(StringIO(db.format(book_id,'EPUB',
index_is_id=True)))
urlchaptercount = int(story.getMetadata('numChapters'))
if chaptercount == urlchaptercount:
if collision == UPDATE:
@ -919,13 +908,6 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
book['added'] = False
return book
# def _convert_calibre_ids_to_books(self, db, ids):
# books = []
# for book_id in ids:
# books.append(self._convert_calibre_id_to_book(db,book_id))
# return books
def _populate_book_from_calibre_id(self, book, db=None):
mi = db.get_metadata(book['calibre_id'], index_is_id=True)

View file

@ -12,10 +12,7 @@ import time, os, traceback
from ConfigParser import SafeConfigParser
from StringIO import StringIO
#from itertools import izip
#from threading import Event
#from calibre.gui2.convert.single import sort_formats_by_preference
from calibre.utils.ipc.server import Server
from calibre.utils.ipc.job import ParallelJob
from calibre.utils.logging import Log
@ -23,7 +20,6 @@ from calibre.utils.logging import Log
from calibre_plugins.fanfictiondownloader_plugin.dialogs import (NotGoingToDownload,
OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader import adapters, writers, exceptions
#from calibre_plugins.fanfictiondownloader_plugin.epubmerge import doMerge
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.epubutils import get_update_data
# ------------------------------------------------------------------------------
@ -136,6 +132,8 @@ def do_download_for_worker(book,options):
## checks were done earlier, just update it.
elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS):
# update now handled by pre-populating the old images and
# chapters in the adapter rather than merging epubs.
urlchaptercount = int(story.getMetadata('numChapters'))
(url,chaptercount,
adapter.oldchapters,
@ -146,36 +144,6 @@ def do_download_for_worker(book,options):
writer.writeStory(outfilename=outfile, forceOverwrite=True)
## First, get existing epub with titlepage and tocpage stripped.
# updateio = StringIO()
# (epuburl,chaptercount) = doMerge(updateio,
# [book['epub_for_update']],
# titlenavpoints=False,
# striptitletoc=True,
# forceunique=False)
# ## Get updated title page/metadata by itself in an epub.
# ## Even if the title page isn't included, this carries the metadata.
# titleio = StringIO()
# writer.writeStory(outstream=titleio,metaonly=True)
# newchaptersio = None
# if urlchaptercount > chaptercount :
# ## Go get the new chapters
# newchaptersio = StringIO()
# adapter.setChaptersRange(chaptercount+1,urlchaptercount)
# adapter.config.set("overrides",'include_tocpage','false')
# adapter.config.set("overrides",'include_titlepage','false')
# writer.writeStory(outstream=newchaptersio)
# ## Merge the three epubs together.
# doMerge(outfile,
# [titleio,updateio,newchaptersio],
# fromfirst=True,
# titlenavpoints=False,
# striptitletoc=False,
# forceunique=False)
book['comment'] = 'Update %s completed, added %s chapters for %s total.'%\
(options['fileform'],(urlchaptercount-chaptercount),urlchaptercount)

View file

@ -113,12 +113,6 @@ def main():
try:
## Attempt to update an existing epub.
if options.update:
# updateio = StringIO()
# (url,chaptercount) = doMerge(updateio,
# args,
# titlenavpoints=False,
# striptitletoc=True,
# forceunique=False)
(url,chaptercount) = get_dcsource_chaptercount(args[0])
print "Updating %s, URL: %s" % (args[0],url)
output_filename = args[0]
@ -163,46 +157,17 @@ def main():
print "%s contains %d chapters, more than source: %d." % (args[0],chaptercount,urlchaptercount)
else:
print "Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)
## Get updated title page/metadata by itself in an epub.
## Even if the title page isn't included, this carries the metadata.
# titleio = StringIO()
# writeStory(config,adapter,"epub",metaonly=True,outstream=titleio)
# newchaptersio = None
if not options.metaonly:
# update now handled by pre-populating the old
# images and chapters in the adapter rather than
# merging epubs.
(url,chaptercount,
adapter.oldchapters,
adapter.oldimgs) = get_update_data(args[0])
writeStory(config,adapter,"epub")
## Go get the new chapters only in another epub.
# newchaptersio = StringIO()
# adapter.setChaptersRange(chaptercount+1,urlchaptercount)
# config.set("overrides",'include_tocpage','false')
# config.set("overrides",'include_titlepage','false')
# writeStory(config,adapter,"epub",outstream=newchaptersio)
# out = open("testing/titleio.epub","wb")
# out.write(titleio.getvalue())
# out.close()
# out = open("testing/updateio.epub","wb")
# out.write(updateio.getvalue())
# out.close()
# out = open("testing/newchaptersio.epub","wb")
# out.write(newchaptersio.getvalue())
# out.close()
## Merge the three epubs together.
# doMerge(args[0],
# [titleio,updateio,newchaptersio],
# fromfirst=True,
# titlenavpoints=False,
# striptitletoc=False,
# forceunique=False)
else:
# regular download
if options.metaonly:

View file

@ -281,9 +281,9 @@ class BaseSiteAdapter(Configurable):
acceptable_attributes = ['href','name']
#print("include_images:"+self.getConfig('include_images'))
if self.getConfig('include_images'):
acceptable_attributes.extend(('src','alt','origsrc'))
acceptable_attributes.extend(('src','alt','longdesc'))
for img in soup.findAll('img'):
img['origsrc']=img['src']
img['longdesc']=img['src']
img['src']=self.story.addImgUrl(self,url,img['src'],fetch)
for attr in soup._getAttrMap().keys():
@ -306,7 +306,9 @@ class BaseSiteAdapter(Configurable):
# removes paired, but empty tags.
if t.string != None and len(t.string.strip()) == 0 :
t.extract()
# Don't want body tags in chapter html--writers add them.
# This is primarily for epub updates.
return re.sub(r"</?body>\r?\n?","",soup.__str__('utf8').decode('utf-8'))
fullmon = {"January":"01", "February":"02", "March":"03", "April":"04", "May":"05",

View file

@ -42,7 +42,7 @@ def get_update_data(inputio,
filecount = 0
soups = [] # list of xhmtl blocks
images = {} # dict() origsrc->data
images = {} # dict() longdesc->data
if getfilecount:
# spin through the manifest--only place there are item tags.
for item in contentdom.getElementsByTagName("item"):
@ -61,12 +61,12 @@ def get_update_data(inputio,
# remove all .. and the path part above it, if present.
# Most for epubs edited by Sigil.
newsrc = re.sub(r"([^/]+/\.\./)","",newsrc)
origsrc=img['origsrc']
longdesc=img['longdesc']
data = epub.read(newsrc)
images[origsrc] = data
img['src'] = img['origsrc']
images[longdesc] = data
img['src'] = img['longdesc']
except Exception as e:
print("Image %s not found!\n(originally:%s)"%(newsrc,origsrc))
print("Image %s not found!\n(originally:%s)"%(newsrc,longdesc))
print("Exception: %s"%(unicode(e)))
traceback.print_exc()
soup = soup.find('body')
@ -76,7 +76,7 @@ def get_update_data(inputio,
filecount+=1
for k in images.keys():
print("\torigsrc:%s\n\tData len:%s\n"%(k,len(images[k])))
print("\tlongdesc:%s\n\tData len:%s\n"%(k,len(images[k])))
return (source,filecount,soups,images)
def get_path_part(n):

View file

@ -301,17 +301,7 @@ class Story:
parsedUrl.path + url,
'','',''))
# This version, prefixing the images with the creation
# timestamp, still allows for dup images to be detected and
# not dup'ed in a single download. And it prevents 0.jpg from
# earlier update being overwritten by the first image in newer
# chapter. It does not, however, prevent dup copies of the
# same image being d/l'ed and saved in different updates. A
# bit of corner case inefficiency I can live with rather than
# scanning all the pre-existing files on update. oldsrc is
# being saved on img tags just in case, however.
prefix='ffdl' #self.getMetadataRaw('dateCreated').strftime("%Y%m%d%H%M%S")
prefix='ffdl'
if imgurl not in self.imgurls:
parsedUrl = urlparse.urlparse(imgurl)
sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]

View file

@ -280,6 +280,8 @@ ${value}<br />
coverIO = None
if self.story.cover:
# Note that the id of the cover xhmtl *must* be 'cover'
# for it to work on Nook.
items.append(("cover","OEBPS/cover.xhtml","application/xhtml+xml",None))
itemrefs.append("cover")
#
@ -343,7 +345,8 @@ div { margin: 0pt; padding: 0pt; }
# write content.opf to zip.
contentxml = contentdom.toxml(encoding='utf-8')
# tweak for brain damaged Nook STR.
# tweak for brain damaged Nook STR. Nook insists on name before content.
contentxml = contentxml.replace('<meta content="image0000" name="cover"/>',
'<meta name="cover" content="image0000"/>')
outputepub.writestr("content.opf",contentxml)