IGN:...

2026-04-29 17:53:15 +02:00 · 2009-03-19 19:12:07 -07:00 · 2009-03-19 19:12:07 -07:00 · d7257ad5f2
commit d7257ad5f2
parent fe918ab068 29486d653e
8 changed files with 76 additions and 61 deletions
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -129,8 +129,6 @@ def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
        self.tag_map, self.attr_map, self.tag_to_attr_map = map
        self.is_html = map is HTML_MAP
        self.tag_atoms, self.attr_atoms = atoms
-        self.opf = map is OPF_MAP
-        self.bin = bin
        self.dir = os.path.dirname(path)
        buf = StringIO()
        self.binary_to_text(bin, buf)
@ -210,7 +208,8 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
                        continue
                    if flags & FLAG_ATOM:
                        if not self.tag_atoms or tag not in self.tag_atoms:
-                            raise LitError("atom tag %d not in atom tag list" % tag)
+                            raise LitError(
+                                "atom tag %d not in atom tag list" % tag)
                        tag_name = self.tag_atoms[tag]
                        current_map = self.attr_atoms
                    elif tag < len(self.tag_map):
@ -295,7 +294,7 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
                            c = '&quot;'
                        elif c == '<':
                            c = '&lt;'
-                        self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
+                        buf.write(c.encode('ascii', 'xmlcharrefreplace'))
                    count -= 1
                if count == 0:
                    if not in_censorship:
@ -841,24 +840,7 @@ def get_atoms(self, entry):
        if len(attrs) != nentries:
            self._warn("damaged or invalid atoms attributes table")
        return (tags, attrs)
-    
-    def get_entry_content(self, entry, pretty_print=False):
-        if 'spine' in entry.state:
-            name = '/'.join(('/data', entry.internal, 'content'))
-            path = entry.path
-            raw = self.get_file(name)
-            decl, map = (OPF_DECL, OPF_MAP) \
-                if name == '/meta' else (HTML_DECL, HTML_MAP)
-            atoms = self.get_atoms(entry)
-            content = decl + unicode(UnBinary(raw, path, self.manifest, map, atoms))
-            if pretty_print:
-                content = self._pretty_print(content)
-            content = content.encode('utf-8')
-        else:
-            internal = '/'.join(('/data', entry.internal))
-            content = self._litfile.get_file(internal)
-        return content
- 
+

 class LitContainer(object):
    """Simple Container-interface, read-only accessor for LIT files."""
@ -879,9 +861,15 @@ def read(self, name):
        elif 'spine' in entry.state:
            internal = '/'.join(('/data', entry.internal, 'content'))
            raw = self._litfile.get_file(internal)
-            unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
+            manifest = self._litfile.manifest
+            atoms = self._litfile.get_atoms(entry)
+            unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms)
            content = HTML_DECL + str(unbin)
-   
+        else:
+            internal = '/'.join(('/data', entry.internal))
+            content = self._litfile.get_file(internal)
+        return content
+    
    def _read_meta(self):
        path = 'content.opf'
        raw = self._litfile.get_file('/meta')
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -27,7 +27,7 @@
    CSS_MIME, OPF_MIME, XML_NS, XML
 from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
    urlnormalize, xpath
-from calibre.ebooks.oeb.base import Logger, OEBBook
+from calibre.ebooks.oeb.base import OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
@ -732,7 +732,7 @@ def option_parser():
    return parser

 def oeb2lit(opts, inpath):
-    logger = Logger(logging.getLogger('oeb2lit'))
+    logger = logging.getLogger('oeb2lit')
    logger.setup_cli_handler(opts.verbose)
    outpath = opts.output
    if outpath is None:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -13,8 +13,11 @@
 from itertools import count
 from urlparse import urldefrag, urlparse, urlunparse
 from urllib import unquote as urlunquote
+import logging
 from lxml import etree, html
 import calibre
+from cssutils import CSSParser
+from cssutils.css import CSSStyleSheet
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -99,6 +102,8 @@ def LINK_SELECTORS():
 SVG_MIME       = types_map['.svg']
 BINARY_MIME    = 'application/octet-stream'

+XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS
+
 OEB_STYLES        = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
 OEB_DOCS          = set([XHTML_MIME, 'text/html', OEB_DOC_MIME,
                         'text/x-oeb-document'])
@ -565,7 +570,7 @@ def __repr__(self):
            return 'Item(id=%r, href=%r, media_type=%r)' \
                % (self.id, self.href, self.media_type)

-        def _force_xhtml(self, data):
+        def _parse_xhtml(self, data):
            # Convert to Unicode and normalize line endings
            data = self.oeb.decode(data)
            data = XMLDECL_RE.sub('', data)
@ -645,6 +650,27 @@ def _force_xhtml(self, data):
                    'File %r missing <body/> element' % self.href)
                etree.SubElement(data, XHTML('body'))
            return data
+
+        def _parse_css(self, data):
+            data = self.oeb.decode(data)
+            data = XHTML_CSS_NAMESPACE + data
+            parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING,
+                               fetcher=self._fetch_css)
+            data = parser.parseString(data, href=self.href)
+            data.namespaces['h'] = XHTML_NS
+            return data
+        
+        def _fetch_css(self, path):
+            hrefs = self.oeb.manifest.hrefs
+            if path not in hrefs:
+                self.oeb.logger.warn('CSS import of missing file %r' % path)
+                return (None, None)
+            item = hrefs[path]
+            if item.media_type not in OEB_STYLES:
+                self.oeb.logger.warn('CSS import of non-CSS file %r' % path)
+                return (None, None)
+            data = item.data.cssText
+            return ('utf-8', data)
        
        @dynamic_property
        def data(self):
@ -661,15 +687,19 @@ def data(self):
              special parsing.
            """
            def fget(self):
-                if self._data is not None:
-                    return self._data
-                data = self._loader(self.href)
-                if self.media_type in OEB_DOCS:
-                    data = self._force_xhtml(data)
+                data = self._data
+                if data is None:
+                    if self._loader is None:
+                        return None
+                    data = self._loader(self.href)
+                if not isinstance(data, basestring):
+                    pass # already parsed
+                elif self.media_type in OEB_DOCS:
+                    data = self._parse_xhtml(data)
                elif self.media_type[-4:] in ('+xml', '/xml'):
                    data = etree.fromstring(data)
                elif self.media_type in OEB_STYLES:
-                    data = self.oeb.decode(data)
+                    data = self._parse_css(data)
                self._data = data
                return data
            def fset(self, value):
@ -677,7 +707,7 @@ def fset(self, value):
            def fdel(self):
                self._data = None
            return property(fget, fset, fdel, doc=doc)
-                
+        
        def __str__(self):
            data = self.data
            if isinstance(data, etree._Element):
@ -726,7 +756,7 @@ def relhref(self, href):
            if frag:
                relhref = '#'.join((relhref, frag))
            return relhref
-
+        
        def abshref(self, href):
            """Convert the URL provided in :param:`href` from a reference
            relative to this manifest item to a book-absolute reference.
@ -748,7 +778,7 @@ def __init__(self, oeb):
        self.items = set()
        self.ids = {}
        self.hrefs = {}
-
+    
    def add(self, id, href, media_type, fallback=None, loader=None, data=None):
        """Add a new item to the book manifest.

@ -765,7 +795,7 @@ def add(self, id, href, media_type, fallback=None, loader=None, data=None):
        self.ids[item.id] = item
        self.hrefs[item.href] = item
        return item
-
+    
    def remove(self, item):
        """Removes :param:`item` from the manifest."""
        if item in self.ids:
@ -775,7 +805,7 @@ def remove(self, item):
        self.items.remove(item)
        if item in self.oeb.spine:
            self.oeb.spine.remove(item)
-
+    
    def generate(self, id=None, href=None):
        """Generate a new unique identifier and/or internal path for use in
        creating a new manifest item, using the provided :param:`id` and/or
@ -803,13 +833,13 @@ def generate(self, id=None, href=None):
    def __iter__(self):
        for item in self.items:
            yield item
-
+    
    def values(self):
        return list(self.items)
    
    def __contains__(self, item):
        return item in self.items
-
+    
    def to_opf1(self, parent=None):
        elem = element(parent, 'manifest')
        for item in self.items:
--- a/src/calibre/ebooks/oeb/factory.py
+++ b/src/calibre/ebooks/oeb/factory.py
@ -8,6 +8,7 @@

 import sys, os, logging
 from itertools import chain
+import calibre
 from calibre.ebooks.oeb.base import OEBError
 from calibre.ebooks.oeb.reader import OEBReader
 from calibre.ebooks.oeb.writer import OEBWriter
@ -15,7 +16,7 @@
 from calibre.ebooks.lit.writer import LitWriter
 from calibre.ebooks.mobi.reader import MobiReader
 from calibre.ebooks.mobi.writer import MobiWriter
-from calibre.ebooks.oeb.base import Logger, OEBBook
+from calibre.ebooks.oeb.base import OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.utils.config import Config

@ -77,8 +78,8 @@ def main(argv=sys.argv):
    if len(args) != 0:
        parser.print_help()
        return 1
-    logger = Logger(logging.getLogger('ebook-convert'))
-    logger.setup_cli_handler(opts.verbose)
+    logger = logging.getLogger('ebook-convert')
+    calibre.setup_cli_handlers(logger, logging.DEBUG)
    encoding = opts.encoding
    pretty_print = opts.pretty_print
    oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -181,7 +181,7 @@ def _manifest_add_missing(self):
                        if not scheme and href not in known:
                            new.add(href)
                elif item.media_type in OEB_STYLES:
-                    for match in CSSURL_RE.finditer(item.data):
+                    for match in CSSURL_RE.finditer(item.data.cssText):
                        href, _ = urldefrag(match.group('url'))
                        href = item.abshref(urlnormalize(href))
                        scheme = urlparse(href).scheme
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -115,8 +115,7 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [HTML_CSS_STYLESHEET]
        head = xpath(tree, '/h:html/h:head')[0]
-        parser = cssutils.CSSParser()
-        parser.setFetcher(self._fetch_css_file)
+        parser = cssutils.CSSParser(fetcher=self._fetch_css_file)
        for elem in head:
            if elem.tag == XHTML('style') and elem.text \
               and elem.get('type', CSS_MIME) in OEB_STYLES:
@ -135,14 +134,7 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
-                if sitem in self.STYLESHEETS:
-                    stylesheet = self.STYLESHEETS[sitem]
-                else:
-                    data = self._fetch_css_file(path)[1]
-                    stylesheet = parser.parseString(data, href=path)
-                    stylesheet.namespaces['h'] = XHTML_NS
-                    self.STYLESHEETS[sitem] = stylesheet
-                stylesheets.append(stylesheet)
+                stylesheets.append(sitem.data)
        rules = []
        index = 0
        self.stylesheets = set()
@ -159,9 +151,9 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
        for _, _, cssdict, text, _ in rules:
            try:
                selector = CSSSelector(text)
-            except (AssertionError, ExpressionError, etree.XPathSyntaxError,\
-                NameError, # gets thrown on OS X instead of SelectorSyntaxError
-                SelectorSyntaxError):
+            except (AssertionError, ExpressionError, etree.XPathSyntaxError,
+                    NameError, # thrown on OS X instead of SelectorSyntaxError
+                    SelectorSyntaxError):
                continue
            for elem in selector(tree):
                self.style(elem)._update_cssdict(cssdict)
@ -171,9 +163,13 @@ def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
    def _fetch_css_file(self, path):
        hrefs = self.oeb.manifest.hrefs
        if path not in hrefs:
+            self.logger.warn('CSS import of missing file %r' % path)
            return (None, None)
-        data = hrefs[path].data
-        data = XHTML_CSS_NAMESPACE + data
+        item = hrefs[path]
+        if item.media_type not in OEB_STYLES:
+            self.logger.warn('CSS import of non-CSS file %r' % path)
+            return (None, None)
+        data = item.data.cssText
        return ('utf-8', data)
    
    def flatten_rule(self, rule, href, index):
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@ -53,7 +53,7 @@ def __call__(self, oeb, context):
                            if found not in used:
                                new.add(found)
                elif item.media_type == CSS_MIME:
-                    for match in CSSURL_RE.finditer(item.data):
+                    for match in CSSURL_RE.finditer(item.data.cssText):
                        href = match.group('url')
                        href = item.abshref(urlnormalize(href))
                        if href in oeb.manifest.hrefs:
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@ -8,7 +8,7 @@

 import sys, os, logging
 from calibre.ebooks.oeb.base import OPF_MIME, xml2str
-from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
+from calibre.ebooks.oeb.base import DirContainer, OEBBook

 __all__ = ['OEBWriter']