diff --git a/recipes/nzherald.recipe b/recipes/nzherald.recipe
index b73fd8366e..46242b630a 100644
--- a/recipes/nzherald.recipe
+++ b/recipes/nzherald.recipe
@@ -25,7 +25,7 @@ class NewZealandHerald(BasicNewsRecipe):
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000466.xml'),
('Life & Style',
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000006.xml'),
- ('Technology'
+ ('Technology',
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000005.xml'),
('Sport',
'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000004.xml'),
diff --git a/src/calibre/devices/smart_device_app/driver.py b/src/calibre/devices/smart_device_app/driver.py
index b8bf50aba7..da400370ea 100644
--- a/src/calibre/devices/smart_device_app/driver.py
+++ b/src/calibre/devices/smart_device_app/driver.py
@@ -52,9 +52,7 @@ def _synchronizer(self, *args, **kwargs):
return _synched
-class ConnectionListener (Thread):
-
- all_ip_addresses = dict()
+class ConnectionListener(Thread):
NOT_SERVICED_COUNT = 6
@@ -63,7 +61,7 @@ def __init__(self, driver):
self.daemon = True
self.driver = driver
self.keep_running = True
- all_ip_addresses = dict()
+ self.all_ip_addresses = dict()
def stop(self):
self.keep_running = False
diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py
index d2b6ac85b5..08fae838c1 100644
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@@ -8,7 +8,7 @@
__docformat__ = 'restructuredtext en'
import os, logging, sys, hashlib, uuid
-from urllib import unquote as urlunquote
+from urllib import unquote as urlunquote, quote as urlquote
from lxml import etree
@@ -22,8 +22,8 @@
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader.headers import MetadataHeader
from calibre.ebooks.mobi.tweak import set_cover
-from calibre.ebooks.oeb.base import (serialize, OEB_DOCS, _css_logger,
- OEB_STYLES, OPF2_NS)
+from calibre.ebooks.oeb.base import (
+ serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF)
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
@@ -34,9 +34,25 @@
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
OEB_FONTS = {guess_type('a.ttf')[0], guess_type('b.ttf')[0]}
+OPF_NAMESPACES = {'opf':OPF2_NS, 'dc':DC11_NS}
class Container(object):
+ '''
+ A container represents an Open EBook as a directory full of files and an
+ opf file. There are two important concepts:
+
+ * The root directory. This is the base of the ebook. All the ebooks
+ files are inside this directory or in its sub-directories.
+
+ * Names: These are paths to the books' files relative to the root
+ directory. They always contain POSIX separators and are unquoted. They
+ can be thought of as canonical identifiers for files in the book.
+ Most methods on the container object work with names.
+ '''
+
+ book_type = 'oeb'
+
def __init__(self, rootpath, opfpath, log):
self.root = os.path.abspath(rootpath)
self.log = log
@@ -54,7 +70,7 @@ def __init__(self, rootpath, opfpath, log):
for dirpath, _dirnames, filenames in os.walk(self.root):
for f in filenames:
path = join(dirpath, f)
- name = relpath(path, self.root).replace(os.sep, '/')
+ name = self.abspath_to_name(path)
self.name_path_map[name] = path
self.mime_map[name] = guess_type(path)[0]
# Special case if we have stumbled onto the opf
@@ -63,35 +79,52 @@ def __init__(self, rootpath, opfpath, log):
self.opf_dir = os.path.dirname(path)
self.mime_map[name] = guess_type('a.opf')[0]
+ if not hasattr(self, 'opf_name'):
+ raise InvalidBook('Book has no OPF file')
+
# Update mime map with data from the OPF
- for item in self.opf.xpath(
- '//opf:manifest/opf:item[@href and @media-type]',
- namespaces={'opf':OPF2_NS}):
+ for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'):
href = item.get('href')
- self.mime_map[self.href_to_name(href)] = item.get('media-type')
+ self.mime_map[self.href_to_name(href, self.opf_name)] = item.get('media-type')
def abspath_to_name(self, fullpath):
return self.relpath(os.path.abspath(fullpath)).replace(os.sep, '/')
+ def name_to_abspath(self, name):
+ return os.path.abspath(join(self.root, *name.split('/')))
+
def href_to_name(self, href, base=None):
'''
- Convert an href (relative to base) to a name (i.e. a path
- relative to self.root with POSIX separators).
-
- base must be an absolute path with OS separators or None, in which case
- the href is interpreted relative to the dir containing the OPF.
+ Convert an href (relative to base) to a name. base must be a name or
+ None, in which self.root is used.
'''
if base is None:
- base = self.opf_dir
+ base = self.root
+ else:
+ base = os.path.dirname(self.name_to_abspath(base))
href = urlunquote(href.partition('#')[0])
fullpath = os.path.join(base, *href.split('/'))
return self.abspath_to_name(fullpath)
+ def name_to_href(self, name, base=None):
+ '''Convert a name to a href relative to base, which must be a name or
+ None in which case self.root is used as the base'''
+ fullpath = self.name_to_abspath(name)
+ basepath = self.root if base is None else os.path.dirname(self.name_to_abspath(base))
+ path = relpath(fullpath, basepath).replace(os.sep, '/')
+ return urlquote(path)
+
+ def opf_xpath(self, expr):
+ return self.opf.xpath(expr, namespaces=OPF_NAMESPACES)
+
def has_name(self, name):
return name in self.name_path_map
- def relpath(self, path):
- return relpath(path, self.root)
+ def relpath(self, path, base=None):
+ '''Convert an absolute path (with os separators) to a path relative to
+ base (defaults to self.root). The relative path is *not* a name. Use
+ abspath_to_name() for that.'''
+ return relpath(path, base or self.root)
def decode(self, data):
"""Automatically decode :param:`data` into a `unicode` object."""
@@ -173,13 +206,11 @@ def opf(self):
@property
def spine_items(self):
- manifest_id_map = {item.get('id'):self.href_to_name(item.get('href'))
- for item in self.opf.xpath('//opf:manifest/opf:item[@href and @id]',
- namespaces={'opf':OPF2_NS})}
+ manifest_id_map = {item.get('id'):self.href_to_name(item.get('href'), self.opf_name)
+ for item in self.opf_xpath('//opf:manifest/opf:item[@href and @id]')}
linear, non_linear = [], []
- for item in self.opf.xpath('//opf:spine/opf:itemref[@idref]',
- namespaces={'opf':OPF2_NS}):
+ for item in self.opf_xpath('//opf:spine/opf:itemref[@idref]'):
idref = item.get('idref')
name = manifest_id_map.get(idref, None)
path = self.name_path_map.get(name, None)
@@ -198,26 +229,23 @@ def remove_item(self, name):
any internal caches.
'''
removed = set()
- for elem in self.opf.xpath('//opf:manifest/opf:item[@href]',
- namespaces={'opf':OPF2_NS}):
- if self.href_to_name(elem.get('href')) == name:
+ for elem in self.opf_xpath('//opf:manifest/opf:item[@href]'):
+ if self.href_to_name(elem.get('href'), self.opf_name) == name:
id_ = elem.get('id', None)
if id_ is not None:
removed.add(id_)
- elem.getparent().remove(elem)
+ self.remove_from_xml(elem)
self.dirty(self.opf_name)
if removed:
- for item in self.opf.xpath('//opf:spine/opf:itemref[@idref]',
- namespaces={'opf':OPF2_NS}):
+ for item in self.opf_xpath('//opf:spine/opf:itemref[@idref]'):
idref = item.get('idref')
if idref in removed:
- item.getparent().remove(item)
+ self.remove_from_xml(item)
self.dirty(self.opf_name)
- for item in self.opf.xpath('//opf:guide/opf:reference[@href]',
- namespaces={'opf':OPF2_NS}):
- if self.href_to_name(item.get('href')) == name:
- item.getparent().remove(item)
+ for item in self.opf_xpath('//opf:guide/opf:reference[@href]'):
+ if self.href_to_name(item.get('href'), self.opf_name) == name:
+ self.remove_from_xml(item)
self.dirty(self.opf_name)
path = self.name_path_map.pop(name)
@@ -230,6 +258,76 @@ def remove_item(self, name):
def dirty(self, name):
self.dirtied.add(name)
+ def remove_from_xml(self, item):
+ 'Removes item from parent, fixing indentation (works only with self closing items)'
+ parent = item.getparent()
+ idx = parent.index(item)
+ if idx == 0:
+ # We are removing the first item - only care about adjusting
+ # the tail if this was the only child
+ if len(parent) == 1:
+ parent.text = item.tail
+ else:
+ # Make sure the preceding item has this tail
+ parent[idx-1].tail = item.tail
+ parent.remove(item)
+ return item
+
+ def insert_into_xml(self, parent, item, index=None):
+ '''Insert item into parent (or append if index is None), fixing
+ indentation. Only works with self closing items.'''
+ if index is None:
+ parent.append(item)
+ else:
+ parent.insert(index, item)
+ idx = parent.index(item)
+ if idx == 0:
+ item.tail = parent.text
+ # If this is the only child of this parent element, we need a
+ # little extra work as we have gone from a self-closing
+ # element to
+ if len(parent) == 1:
+ sibling = parent.getprevious()
+ if sibling is None:
+ # Give up!
+ return
+ parent.text = sibling.text
+ item.tail = sibling.tail
+ else:
+ item.tail = parent[idx-1].tail
+ if idx == len(parent)-1:
+ parent[idx-1].tail = parent.text
+
+ def generate_item(self, name, id_prefix=None, media_type=None):
+ '''Add an item to the manifest with href derived from the given
+ name. Ensures uniqueness of href and id automatically. Returns
+ generated item.'''
+ id_prefix = id_prefix or 'id'
+ media_type = media_type or guess_type(name)[0]
+ path = self.name_to_abspath(name)
+ relpath = self.relpath(path, base=self.opf_dir)
+ href = urlquote(relpath)
+ base, ext = href.rpartition('.')[0::2]
+ all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
+ c = 0
+ item_id = id_prefix
+ while item_id in all_ids:
+ c += 1
+ item_id = id_prefix + '%d'%c
+ all_names = {x.get('href') for x in self.opf_xpath(
+ '//opf:manifest/opf:item[@href]')}
+ c = 0
+ while href in all_names:
+ c += 1
+ href = '%s_%d.%s'%(base, c, ext)
+ manifest = self.opf_xpath('//opf:manifest')[0]
+ item = manifest.makeelement(OPF('item'), nsmap=OPF_NAMESPACES,
+ id=item_id, href=href)
+ item.set('media-type', media_type)
+ self.insert_into_xml(manifest, item)
+ self.dirty(self.opf_name)
+ return item
+
def commit(self, outpath=None):
for name in tuple(self.dirtied):
self.dirtied.remove(name)
@@ -257,6 +355,8 @@ class InvalidEpub(InvalidBook):
class EpubContainer(Container):
+ book_type = 'epub'
+
META_INF = {
'container.xml' : True,
'manifest.xml' : False,
@@ -314,7 +414,7 @@ def process_encryption(self):
if alg not in {ADOBE_OBFUSCATION, IDPF_OBFUSCATION}:
raise DRMError()
cr = em.getparent().xpath('descendant::*[local-name()="CipherReference" and @URI]')[0]
- name = self.href_to_name(cr.get('URI'), self.root)
+ name = self.href_to_name(cr.get('URI'))
path = self.name_path_map.get(name, None)
if path is not None:
fonts[name] = alg
@@ -327,14 +427,14 @@ def process_encryption(self):
package_id = val
break
if package_id is not None:
- for elem in self.opf.xpath('//*[@id=%r]'%package_id):
+ for elem in self.opf_xpath('//*[@id=%r]'%package_id):
if elem.text:
unique_identifier = elem.text.rpartition(':')[-1]
break
if unique_identifier is not None:
idpf_key = hashlib.sha1(unique_identifier).digest()
key = None
- for item in self.opf.xpath('//*[local-name()="metadata"]/*'
+ for item in self.opf_xpath('//*[local-name()="metadata"]/*'
'[local-name()="identifier"]'):
scheme = None
for xkey in item.attrib.keys():
@@ -397,6 +497,8 @@ def do_explode(path, dest):
class AZW3Container(Container):
+ book_type = 'azw3'
+
def __init__(self, pathtoazw3, log):
self.pathtoazw3 = pathtoazw3
tdir = self.root = PersistentTemporaryDirectory('_azw3_container')
diff --git a/src/calibre/ebooks/oeb/polish/cover.py b/src/calibre/ebooks/oeb/polish/cover.py
new file mode 100644
index 0000000000..2ad0e2bdfd
--- /dev/null
+++ b/src/calibre/ebooks/oeb/polish/cover.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+ print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import shutil
+
+from calibre.ebooks.oeb.base import OPF
+
+def set_azw3_cover(container, cover_path, report):
+ name = None
+ found = True
+ for gi in container.opf_xpath('//opf:guide/opf:reference[@href and contains(@type, "cover")]'):
+ href = gi.get('href')
+ name = container.href_to_name(href, container.opf_name)
+ container.remove_from_xml(gi)
+ if name is None or not container.has_name(name):
+ item = container.generate_item(name='cover.jpeg', id_prefix='cover')
+ name = container.href_to_name(item.get('href'), container.opf_name)
+ found = False
+ href = container.name_to_href(name, container.opf_name)
+ guide = container.opf_xpath('//opf:guide')[0]
+ container.insert_into_xml(guide, guide.makeelement(
+ OPF('reference'), href=href, type='cover'))
+ shutil.copyfile(cover_path, container.name_to_abspath(name))
+ container.dirty(container.opf_name)
+ report('Cover updated' if found else 'Cover inserted')
+
+def set_cover(container, cover_path, report):
+ if container.book_type == 'azw3':
+ set_azw3_cover(container, cover_path, report)
+
diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py
index cba24c2a07..3473caacdb 100644
--- a/src/calibre/ebooks/oeb/polish/main.py
+++ b/src/calibre/ebooks/oeb/polish/main.py
@@ -14,6 +14,7 @@
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.stats import StatsCollector
from calibre.ebooks.oeb.polish.subset import subset_all_fonts
+from calibre.ebooks.oeb.polish.cover import set_cover
from calibre.utils.logging import Log
ALL_OPTS = {
@@ -72,6 +73,7 @@ def hfix(name, raw):
# }}}
def polish(file_map, opts, log, report):
+ rt = lambda x: report('\n### ' + x)
for inbook, outbook in file_map.iteritems():
report('Polishing: %s'%(inbook.rpartition('.')[-1].upper()))
ebook = get_container(inbook, log)
@@ -80,10 +82,15 @@ def polish(file_map, opts, log, report):
stats = StatsCollector(ebook)
if opts.subset:
- report('\n### Subsetting embedded fonts')
+ rt('Subsetting embedded fonts')
subset_all_fonts(ebook, stats.font_stats, report)
report('')
+ if opts.cover:
+ rt('Setting cover')
+ set_cover(ebook, opts.cover, report)
+ report('')
+
ebook.commit(outbook)
def gui_polish(data):
@@ -105,13 +112,17 @@ def option_parser():
USAGE = '%prog [options] input_file [output_file]\n\n' + re.sub(
r'<.*?>', '', CLI_HELP['about'])
parser = OptionParser(usage=USAGE)
- o = partial(parser.add_option, default=False, action='store_true')
+ a = parser.add_option
+ o = partial(a, default=False, action='store_true')
o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset'])
+ a('--cover', help=_(
+ 'Path to a cover image. Changes the cover specified in the ebook. '
+ 'If no cover is present, inserts a new cover.'))
o('--verbose', help=_('Produce more verbose output, useful for debugging.'))
return parser
-def cli_polish():
+def main():
parser = option_parser()
opts, args = parser.parse_args()
log = Log(level=Log.DEBUG if opts.verbose else Log.INFO)
@@ -139,7 +150,7 @@ def cli_polish():
report = []
something = False
for name in ALL_OPTS:
- if name not in {'opf', 'cover'}:
+ if name not in {'opf', }:
if getattr(popts, name):
something = True
@@ -156,5 +167,5 @@ def cli_polish():
log('Output written to:', outbook)
if __name__ == '__main__':
- cli_polish()
+ main()
diff --git a/src/calibre/ebooks/oeb/polish/subset.py b/src/calibre/ebooks/oeb/polish/subset.py
index eb4f4c4b1f..5ab9db7f5c 100644
--- a/src/calibre/ebooks/oeb/polish/subset.py
+++ b/src/calibre/ebooks/oeb/polish/subset.py
@@ -15,7 +15,7 @@
from calibre.utils.fonts.sfnt.subset import subset
from calibre.utils.fonts.utils import get_font_names
-def remove_font_face_rules(container, sheet, remove_names):
+def remove_font_face_rules(container, sheet, remove_names, base):
changed = False
for rule in tuple(sheet.cssRules):
if rule.type != rule.FONT_FACE_RULE:
@@ -24,7 +24,7 @@ def remove_font_face_rules(container, sheet, remove_names):
uri = rule.style.getProperty('src').propertyValue[0].uri
except (IndexError, KeyError, AttributeError, TypeError, ValueError):
continue
- name = container.href_to_name(uri)
+ name = container.href_to_name(uri, base)
if name in remove_names:
sheet.deleteRule(rule)
changed = True
@@ -65,13 +65,13 @@ def subset_all_fonts(container, font_stats, report):
for name, mt in container.mime_map.iteritems():
if mt in OEB_STYLES:
sheet = container.parsed(name)
- if remove_font_face_rules(container, sheet, remove):
+ if remove_font_face_rules(container, sheet, remove, name):
container.dirty(name)
elif mt in OEB_DOCS:
for style in XPath('//h:style')(container.parsed(name)):
if style.get('type', 'text/css') == 'text/css' and style.text:
sheet = container.parse_css(style.text, name)
- if remove_font_face_rules(container, sheet, remove):
+ if remove_font_face_rules(container, sheet, remove, name):
style.text = sheet.cssText
container.dirty(name)
if total_old > 0:
diff --git a/src/calibre/gui2/store/loader.py b/src/calibre/gui2/store/loader.py
index 45c258a915..e4641da604 100644
--- a/src/calibre/gui2/store/loader.py
+++ b/src/calibre/gui2/store/loader.py
@@ -188,10 +188,12 @@ def load_object(self, src, key):
if __name__ == '__main__':
st = time.time()
+ count = 0
for name, code in download_updates():
+ count += 1
print(name)
print(code)
print('\n', '_'*80, '\n', sep='')
- print ('Time to download all plugins: %.2f'%( time.time() - st))
+ print ('Time to download all %d plugins: %.2f'%(count, time.time() - st))
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 0f3e2d1792..1ba7796714 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -19,6 +19,7 @@
'ebook-device = calibre.devices.cli:main',
'ebook-meta = calibre.ebooks.metadata.cli:main',
'ebook-convert = calibre.ebooks.conversion.cli:main',
+ 'ebook-polish = calibre.ebooks.oeb.polish.main:main',
'markdown-calibre = calibre.ebooks.markdown.markdown:main',
'web2disk = calibre.web.fetch.simple:main',
'calibre-server = calibre.library.server.main:main',
@@ -30,7 +31,6 @@
'calibre-customize = calibre.customize.ui:main',
'calibre-complete = calibre.utils.complete:main',
'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main',
- 'epub-fix = calibre.ebooks.epub.fix.main:main',
'calibre-smtp = calibre.utils.smtp:main',
],
'gui_scripts' : [
@@ -226,7 +226,7 @@ def setup_completion(self): # {{{
from calibre.gui2.main import option_parser as guiop
from calibre.utils.smtp import option_parser as smtp_op
from calibre.library.server.main import option_parser as serv_op
- from calibre.ebooks.epub.fix.main import option_parser as fix_op
+ from calibre.ebooks.oeb.polish.main import option_parser as polish_op, SUPPORTED
from calibre.ebooks import BOOK_EXTENSIONS
input_formats = sorted(all_input_formats())
bc = os.path.join(os.path.dirname(self.opts.staging_sharedir),
@@ -251,12 +251,12 @@ def setup_completion(self): # {{{
f.write(opts_and_exts('calibre', guiop, BOOK_EXTENSIONS))
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
+ f.write(opts_and_exts('ebook-polish', polish_op, [x.lower() for x in SUPPORTED]))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write(opts_and_exts('ebook-viewer', viewer_op, input_formats))
f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
f.write(opts_and_words('calibre-smtp', smtp_op, []))
f.write(opts_and_words('calibre-server', serv_op, []))
- f.write(opts_and_exts('epub-fix', fix_op, ['epub']))
f.write(textwrap.dedent('''
_ebook_device_ls()
{