Various stability fixes for any2lit and any2mobi

This commit is contained in:
Kovid Goyal 2009-01-26 09:37:57 -08:00
commit 2823867347
4 changed files with 38 additions and 4 deletions

View file

@ -11,6 +11,7 @@
import functools
import re
from urlparse import urldefrag
from urllib import unquote as urlunquote
from lxml import etree
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
@ -611,6 +612,8 @@ def _read_manifest(self):
offset, raw = u32(raw), raw[4:]
internal, raw = consume_sized_utf8_string(raw)
original, raw = consume_sized_utf8_string(raw)
# The path should be stored unquoted, but not always
original = urlunquote(original)
# Is this last one UTF-8 or ASCIIZ?
mime_type, raw = consume_sized_utf8_string(raw, zpad=True)
self.manifest[internal] = ManifestItem(

View file

@ -331,6 +331,13 @@ def __repr__(self):
def _force_xhtml(self, data):
if self.oeb.encoding is not None:
data = data.decode(self.oeb.encoding, 'replace')
# Handle broken XHTML w/ SVG (ugh)
if 'svg:' in data and SVG_NS not in data:
data = data.replace(
'<html', '<html xmlns:svg="%s"' % SVG_NS, 1)
if 'xlink:' in data and XLINK_NS not in data:
data = data.replace(
'<html', '<html xmlns:xlink="%s"' % XLINK_NS, 1)
try:
data = etree.fromstring(data)
except etree.XMLSyntaxError:
@ -343,6 +350,24 @@ def _force_xhtml(self, data):
data = etree.fromstring(data)
for meta in self.META_XP(data):
meta.getparent().remove(meta)
head = xpath(data, '/h:html/h:head')
head = head[0] if head else None
if head is None:
self.oeb.logger.warn(
'File %r missing <head/> element' % self.href)
head = etree.Element(XHTML('head'))
data.insert(0, head)
title = etree.SubElement(head, XHTML('title'))
title.text = self.oeb.translate(__('Unknown'))
elif not xpath(data, '/h:html/h:head/h:title'):
self.oeb.logger.warn(
'File %r missing <title/> element' % self.href)
title = etree.SubElement(head, XHTML('title'))
title.text = self.oeb.translate(__('Unknown'))
if not xpath(data, '/h:html/h:body'):
self.oeb.logger.warn(
'File %r missing <body/> element' % self.href)
etree.SubElement(data, XHTML('body'))
return data
def data():

View file

@ -110,7 +110,8 @@ class Stylizer(object):
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
self.profile = profile
base = os.path.dirname(path)
self.logger = oeb.logger
item = oeb.manifest.hrefs[path]
basename = os.path.basename(path)
cssname = os.path.splitext(basename)[0] + '.css'
stylesheets = [HTML_CSS_STYLESHEET]
@ -128,8 +129,12 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
and elem.get('rel', 'stylesheet') == 'stylesheet' \
and elem.get('type', CSS_MIME) in OEB_STYLES:
href = urlnormalize(elem.attrib['href'])
path = os.path.join(base, href)
path = os.path.normpath(path).replace('\\', '/')
path = item.abshref(href)
if path not in oeb.manifest.hrefs:
self.logger.warn(
'Stylesheet %r referenced by file %r not in manifest' %
(path, item.href))
continue
if path in self.STYLESHEETS:
stylesheet = self.STYLESHEETS[path]
else:

View file

@ -13,6 +13,7 @@
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
from calibre.ebooks.oeb.base import urlnormalize
LINK_SELECTORS = []
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
@ -46,7 +47,7 @@ def transform(self, oeb, context):
item.data is not None:
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
for href in chain(*hrefs):
href = item.abshref(href)
href = item.abshref(urlnormalize(href))
if href in oeb.manifest.hrefs:
found = oeb.manifest.hrefs[href]
if found not in used: