This commit is contained in:
Kovid Goyal 2009-03-19 19:12:07 -07:00
commit d7257ad5f2
8 changed files with 76 additions and 61 deletions

View file

@ -129,8 +129,6 @@ def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.is_html = map is HTML_MAP
self.tag_atoms, self.attr_atoms = atoms
self.opf = map is OPF_MAP
self.bin = bin
self.dir = os.path.dirname(path)
buf = StringIO()
self.binary_to_text(bin, buf)
@ -210,7 +208,8 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
continue
if flags & FLAG_ATOM:
if not self.tag_atoms or tag not in self.tag_atoms:
raise LitError("atom tag %d not in atom tag list" % tag)
raise LitError(
"atom tag %d not in atom tag list" % tag)
tag_name = self.tag_atoms[tag]
current_map = self.attr_atoms
elif tag < len(self.tag_map):
@ -295,7 +294,7 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
c = '&quot;'
elif c == '<':
c = '&lt;'
self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
buf.write(c.encode('ascii', 'xmlcharrefreplace'))
count -= 1
if count == 0:
if not in_censorship:
@ -841,24 +840,7 @@ def get_atoms(self, entry):
if len(attrs) != nentries:
self._warn("damaged or invalid atoms attributes table")
return (tags, attrs)
def get_entry_content(self, entry, pretty_print=False):
if 'spine' in entry.state:
name = '/'.join(('/data', entry.internal, 'content'))
path = entry.path
raw = self.get_file(name)
decl, map = (OPF_DECL, OPF_MAP) \
if name == '/meta' else (HTML_DECL, HTML_MAP)
atoms = self.get_atoms(entry)
content = decl + unicode(UnBinary(raw, path, self.manifest, map, atoms))
if pretty_print:
content = self._pretty_print(content)
content = content.encode('utf-8')
else:
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
return content
class LitContainer(object):
"""Simple Container-interface, read-only accessor for LIT files."""
@ -879,9 +861,15 @@ def read(self, name):
elif 'spine' in entry.state:
internal = '/'.join(('/data', entry.internal, 'content'))
raw = self._litfile.get_file(internal)
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
manifest = self._litfile.manifest
atoms = self._litfile.get_atoms(entry)
unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms)
content = HTML_DECL + str(unbin)
else:
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
return content
def _read_meta(self):
path = 'content.opf'
raw = self._litfile.get_file('/meta')

View file

@ -27,7 +27,7 @@
CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
urlnormalize, xpath
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
@ -732,7 +732,7 @@ def option_parser():
return parser
def oeb2lit(opts, inpath):
logger = Logger(logging.getLogger('oeb2lit'))
logger = logging.getLogger('oeb2lit')
logger.setup_cli_handler(opts.verbose)
outpath = opts.output
if outpath is None:

View file

@ -13,8 +13,11 @@
from itertools import count
from urlparse import urldefrag, urlparse, urlunparse
from urllib import unquote as urlunquote
import logging
from lxml import etree, html
import calibre
from cssutils import CSSParser
from cssutils.css import CSSStyleSheet
from calibre.translations.dynamic import translate
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -99,6 +102,8 @@ def LINK_SELECTORS():
SVG_MIME = types_map['.svg']
BINARY_MIME = 'application/octet-stream'
XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME,
'text/x-oeb-document'])
@ -565,7 +570,7 @@ def __repr__(self):
return 'Item(id=%r, href=%r, media_type=%r)' \
% (self.id, self.href, self.media_type)
def _force_xhtml(self, data):
def _parse_xhtml(self, data):
# Convert to Unicode and normalize line endings
data = self.oeb.decode(data)
data = XMLDECL_RE.sub('', data)
@ -645,6 +650,27 @@ def _force_xhtml(self, data):
'File %r missing <body/> element' % self.href)
etree.SubElement(data, XHTML('body'))
return data
def _parse_css(self, data):
data = self.oeb.decode(data)
data = XHTML_CSS_NAMESPACE + data
parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING,
fetcher=self._fetch_css)
data = parser.parseString(data, href=self.href)
data.namespaces['h'] = XHTML_NS
return data
def _fetch_css(self, path):
hrefs = self.oeb.manifest.hrefs
if path not in hrefs:
self.oeb.logger.warn('CSS import of missing file %r' % path)
return (None, None)
item = hrefs[path]
if item.media_type not in OEB_STYLES:
self.oeb.logger.warn('CSS import of non-CSS file %r' % path)
return (None, None)
data = item.data.cssText
return ('utf-8', data)
@dynamic_property
def data(self):
@ -661,15 +687,19 @@ def data(self):
special parsing.
"""
def fget(self):
if self._data is not None:
return self._data
data = self._loader(self.href)
if self.media_type in OEB_DOCS:
data = self._force_xhtml(data)
data = self._data
if data is None:
if self._loader is None:
return None
data = self._loader(self.href)
if not isinstance(data, basestring):
pass # already parsed
elif self.media_type in OEB_DOCS:
data = self._parse_xhtml(data)
elif self.media_type[-4:] in ('+xml', '/xml'):
data = etree.fromstring(data)
elif self.media_type in OEB_STYLES:
data = self.oeb.decode(data)
data = self._parse_css(data)
self._data = data
return data
def fset(self, value):
@ -677,7 +707,7 @@ def fset(self, value):
def fdel(self):
self._data = None
return property(fget, fset, fdel, doc=doc)
def __str__(self):
data = self.data
if isinstance(data, etree._Element):
@ -726,7 +756,7 @@ def relhref(self, href):
if frag:
relhref = '#'.join((relhref, frag))
return relhref
def abshref(self, href):
"""Convert the URL provided in :param:`href` from a reference
relative to this manifest item to a book-absolute reference.
@ -748,7 +778,7 @@ def __init__(self, oeb):
self.items = set()
self.ids = {}
self.hrefs = {}
def add(self, id, href, media_type, fallback=None, loader=None, data=None):
"""Add a new item to the book manifest.
@ -765,7 +795,7 @@ def add(self, id, href, media_type, fallback=None, loader=None, data=None):
self.ids[item.id] = item
self.hrefs[item.href] = item
return item
def remove(self, item):
"""Removes :param:`item` from the manifest."""
if item in self.ids:
@ -775,7 +805,7 @@ def remove(self, item):
self.items.remove(item)
if item in self.oeb.spine:
self.oeb.spine.remove(item)
def generate(self, id=None, href=None):
"""Generate a new unique identifier and/or internal path for use in
creating a new manifest item, using the provided :param:`id` and/or
@ -803,13 +833,13 @@ def generate(self, id=None, href=None):
def __iter__(self):
for item in self.items:
yield item
def values(self):
return list(self.items)
def __contains__(self, item):
return item in self.items
def to_opf1(self, parent=None):
elem = element(parent, 'manifest')
for item in self.items:

View file

@ -8,6 +8,7 @@
import sys, os, logging
from itertools import chain
import calibre
from calibre.ebooks.oeb.base import OEBError
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.writer import OEBWriter
@ -15,7 +16,7 @@
from calibre.ebooks.lit.writer import LitWriter
from calibre.ebooks.mobi.reader import MobiReader
from calibre.ebooks.mobi.writer import MobiWriter
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.utils.config import Config
@ -77,8 +78,8 @@ def main(argv=sys.argv):
if len(args) != 0:
parser.print_help()
return 1
logger = Logger(logging.getLogger('ebook-convert'))
logger.setup_cli_handler(opts.verbose)
logger = logging.getLogger('ebook-convert')
calibre.setup_cli_handlers(logger, logging.DEBUG)
encoding = opts.encoding
pretty_print = opts.pretty_print
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)

View file

@ -181,7 +181,7 @@ def _manifest_add_missing(self):
if not scheme and href not in known:
new.add(href)
elif item.media_type in OEB_STYLES:
for match in CSSURL_RE.finditer(item.data):
for match in CSSURL_RE.finditer(item.data.cssText):
href, _ = urldefrag(match.group('url'))
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme

View file

@ -115,8 +115,7 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
cssname = os.path.splitext(basename)[0] + '.css'
stylesheets = [HTML_CSS_STYLESHEET]
head = xpath(tree, '/h:html/h:head')[0]
parser = cssutils.CSSParser()
parser.setFetcher(self._fetch_css_file)
parser = cssutils.CSSParser(fetcher=self._fetch_css_file)
for elem in head:
if elem.tag == XHTML('style') and elem.text \
and elem.get('type', CSS_MIME) in OEB_STYLES:
@ -135,14 +134,7 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
'Stylesheet %r referenced by file %r not in manifest' %
(path, item.href))
continue
if sitem in self.STYLESHEETS:
stylesheet = self.STYLESHEETS[sitem]
else:
data = self._fetch_css_file(path)[1]
stylesheet = parser.parseString(data, href=path)
stylesheet.namespaces['h'] = XHTML_NS
self.STYLESHEETS[sitem] = stylesheet
stylesheets.append(stylesheet)
stylesheets.append(sitem.data)
rules = []
index = 0
self.stylesheets = set()
@ -159,9 +151,9 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
for _, _, cssdict, text, _ in rules:
try:
selector = CSSSelector(text)
except (AssertionError, ExpressionError, etree.XPathSyntaxError,\
NameError, # gets thrown on OS X instead of SelectorSyntaxError
SelectorSyntaxError):
except (AssertionError, ExpressionError, etree.XPathSyntaxError,
NameError, # thrown on OS X instead of SelectorSyntaxError
SelectorSyntaxError):
continue
for elem in selector(tree):
self.style(elem)._update_cssdict(cssdict)
@ -171,9 +163,13 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
def _fetch_css_file(self, path):
hrefs = self.oeb.manifest.hrefs
if path not in hrefs:
self.logger.warn('CSS import of missing file %r' % path)
return (None, None)
data = hrefs[path].data
data = XHTML_CSS_NAMESPACE + data
item = hrefs[path]
if item.media_type not in OEB_STYLES:
self.logger.warn('CSS import of non-CSS file %r' % path)
return (None, None)
data = item.data.cssText
return ('utf-8', data)
def flatten_rule(self, rule, href, index):

View file

@ -53,7 +53,7 @@ def __call__(self, oeb, context):
if found not in used:
new.add(found)
elif item.media_type == CSS_MIME:
for match in CSSURL_RE.finditer(item.data):
for match in CSSURL_RE.finditer(item.data.cssText):
href = match.group('url')
href = item.abshref(urlnormalize(href))
if href in oeb.manifest.hrefs:

View file

@ -8,7 +8,7 @@
import sys, os, logging
from calibre.ebooks.oeb.base import OPF_MIME, xml2str
from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
from calibre.ebooks.oeb.base import DirContainer, OEBBook
__all__ = ['OEBWriter']