Halve the startup time of worker processes by delay loading cssutils, oeb.stylizer and oeb.base

This commit is contained in:
Kovid Goyal 2011-04-19 12:24:41 -06:00
parent 6b52f4ad89
commit e835131c82
23 changed files with 75 additions and 52 deletions

View file

@ -33,9 +33,6 @@
fcntl, win32event, isfrozen, __author__, terminal_controller
winerror, win32api, isfreebsd, guess_type
import cssutils
cssutils.log.setLevel(logging.WARN)
def to_unicode(raw, encoding='utf-8', errors='strict'):
if isinstance(raw, unicode):
return raw
@ -679,4 +676,3 @@ def main():
ipshell()
sys.argv = old_argv

View file

@ -9,7 +9,6 @@
from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.utils.config import test_eight_code
# To archive plugins {{{
@ -98,6 +97,8 @@ class TXT2TXTZ(FileTypePlugin):
on_import = True
def _get_image_references(self, txt, base_dir):
from calibre.ebooks.oeb.base import OEB_IMAGES
images = []
# Textile

View file

@ -18,9 +18,6 @@
from calibre import prepare_string_for_xml
from calibre.constants import __appname__, __version__
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES, OPF
from calibre.utils.magick import Image
class FB2MLizer(object):
@ -71,7 +68,7 @@ def fb2mlize_spine(self):
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
def clean_text(self, text):
# Condense empty paragraphs into a line break.
# Condense empty paragraphs into a line break.
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line />', text)
# Remove empty paragraphs.
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
@ -100,6 +97,7 @@ def clean_text(self, text):
return text
def fb2_header(self):
from calibre.ebooks.oeb.base import OPF
metadata = {}
metadata['title'] = self.oeb_book.metadata.title[0].value
metadata['appname'] = __appname__
@ -180,6 +178,8 @@ def fb2_footer(self):
return u'</FictionBook>'
def get_cover(self):
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
cover_href = None
# Get the raster cover if it's available.
@ -213,6 +213,8 @@ def get_cover(self):
return u''
def get_text(self):
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.stylizer import Stylizer
text = ['<body>']
# Create main section if there are no others to create
@ -248,6 +250,8 @@ def fb2mlize_images(self):
'''
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
'''
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
images = []
for item in self.oeb_book.manifest:
# Don't write the image if it's not referenced in the document's text.
@ -344,6 +348,8 @@ def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
@return: List of string representing the XHTML converted to FB2 markup.
'''
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
# Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace.
if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS:
return []

View file

@ -315,7 +315,8 @@ def create_oebbook(self, htmlpath, basedir, opts, log, mi):
from calibre import guess_type
from calibre.ebooks.oeb.transforms.metadata import \
meta_info_to_oeb_metadata
import cssutils
import cssutils, logging
cssutils.log.setLevel(logging.WARN)
self.OEB_STYLES = OEB_STYLES
oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False)

View file

@ -4,7 +4,6 @@
__docformat__ = 'restructuredtext en'
from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
class EasyMeta(object):
@ -12,6 +11,7 @@ def __init__(self, meta):
self.meta = meta
def __iter__(self):
from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
meta = self.meta
for item_name in meta.items:
for item in meta[item_name]:

View file

@ -12,7 +12,6 @@
from lxml import etree
from templite import Templite
from calibre.ebooks.oeb.base import element
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre import CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory
@ -51,6 +50,7 @@ def generate_toc(self, oeb_book, ref_url, output_dir):
'''
Generate table of contents
'''
from calibre.ebooks.oeb.base import element
with CurrentDir(output_dir):
def build_node(current_node, parent=None):
if parent is None:

View file

@ -12,7 +12,6 @@
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
@ -42,6 +41,8 @@ class HTMLZOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
# HTML
if opts.htmlz_css_type == 'inline':
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
@ -72,7 +73,7 @@ def convert(self, oeb_book, output_path, input_plugin, opts, log):
for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES and item.href in images:
if item.media_type == SVG_MIME:
data = unicode(etree.tostring(item.data, encoding=unicode))
data = unicode(etree.tostring(item.data, encoding=unicode))
else:
data = item.data
fname = os.path.join(tdir, 'images', images[item.href])

View file

@ -15,11 +15,7 @@
from urllib import unquote as urlunquote
from lxml import etree, html
from cssutils import CSSParser, parseString, parseStyle, replaceUrls
from cssutils.css import CSSRule
import calibre
from calibre.constants import filesystem_encoding
from calibre.constants import filesystem_encoding, __version__
from calibre.translations.dynamic import translate
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -179,6 +175,9 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
If the ``link_repl_func`` returns None, the attribute or
tag text will be removed completely.
'''
from cssutils import parseString, parseStyle, replaceUrls, log
log.setLevel(logging.WARN)
if resolve_base_href:
resolve_base_href(root)
for el, attrib, link, pos in iterlinks(root, find_links_in_css=False):
@ -1075,7 +1074,9 @@ def _parse_txt(self, data):
def _parse_css(self, data):
from cssutils.css import CSSRule
from cssutils import CSSParser, log
log.setLevel(logging.WARN)
def get_style_rules_from_import(import_rule):
ans = []
if not import_rule.styleSheet:
@ -2011,7 +2012,7 @@ def _to_ncx(self):
name='dtb:uid', content=unicode(self.uid))
etree.SubElement(head, NCX('meta'),
name='dtb:depth', content=str(self.toc.depth()))
generator = ''.join(['calibre (', calibre.__version__, ')'])
generator = ''.join(['calibre (', __version__, ')'])
etree.SubElement(head, NCX('meta'),
name='dtb:generator', content=generator)
etree.SubElement(head, NCX('meta'),

View file

@ -14,7 +14,6 @@
from collections import defaultdict
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
DC_NSES, OPF, xml2text
@ -172,6 +171,7 @@ def _manifest_prune_invalid(self):
return bad
def _manifest_add_missing(self, invalid):
import cssutils
manifest = self.oeb.manifest
known = set(manifest.hrefs)
unchecked = set(manifest.values())

View file

@ -12,17 +12,18 @@
from weakref import WeakKeyDictionary
from xml.dom import SyntaxErr as CSSSyntaxError
import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, CSSFontFaceRule, cssproperties
from cssutils.css import (CSSStyleRule, CSSPageRule, CSSStyleDeclaration,
CSSValueList, CSSFontFaceRule, cssproperties)
from cssutils import profile as cssprofiles
from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
from calibre import force_unicode
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
from calibre.ebooks.oeb.profile import PROFILES
cssutils.log.setLevel(logging.WARN)
_html_css_stylesheet = None
def html_css_stylesheet():

View file

@ -9,7 +9,6 @@
from urlparse import urldefrag, urlparse
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
@ -25,6 +24,7 @@ def __init__(self, rename_map, renamed_items_map = None):
self.renamed_items_map = renamed_items_map
def __call__(self, oeb, opts):
import cssutils
self.log = oeb.logger
self.opts = opts
self.oeb = oeb

View file

@ -8,8 +8,6 @@
from urlparse import urldefrag
import cssutils
from calibre.ebooks.oeb.base import CSS_MIME, OEB_DOCS
from calibre.ebooks.oeb.base import urlnormalize, iterlinks
@ -23,6 +21,7 @@ def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
import cssutils
oeb.logger.info('Trimming unused files from manifest...')
self.opts = context
used = set()

View file

@ -21,7 +21,6 @@
import cStringIO
from calibre.ebooks.pdb.formatwriter import FormatWriter
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.ebooks.pdb.header import PdbHeaderBuilder
from calibre.ebooks.pml.pmlml import PMLMLizer
@ -135,6 +134,7 @@ def _images(self, manifest, image_hrefs):
62-...: Raw image data in 8 bit PNG format.
'''
images = []
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
for item in manifest:
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():

View file

@ -18,7 +18,6 @@
from calibre.customize.conversion import OptionRecommendation
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.ebooks.pml.pmlml import PMLMLizer
class PMLOutput(OutputFormatPlugin):
@ -60,6 +59,7 @@ def convert(self, oeb_book, output_path, input_plugin, opts, log):
pmlz.add_dir(tdir)
def write_images(self, manifest, image_hrefs, out_dir, opts):
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
for item in manifest:
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
if opts.full_image_depth:

View file

@ -12,8 +12,6 @@
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.pdb.ereader import image_name
from calibre.ebooks.pml import unipmlcode
@ -110,6 +108,9 @@ def pmlmlize_spine(self):
return output
def get_cover_page(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
output = u''
if 'cover' in self.oeb_book.guide:
output += '\\m="cover.png"\n'
@ -125,6 +126,9 @@ def get_cover_page(self):
return output
def get_text(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
text = [u'']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to PML markup...' % item.href)
@ -180,7 +184,7 @@ def clean_text(self, text):
links = set(re.findall(r'(?<=\\q="#).+?(?=")', text))
for unused in anchors.difference(links):
text = text.replace('\\Q="%s"' % unused, '')
# Remove \Cn tags that are within \x and \Xn tags
text = re.sub(ur'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)', '\g<t>\g<a>\g<b>\g<t>', text)
@ -214,6 +218,8 @@ def clean_text(self, text):
return text
def dump_text(self, elem, stylizer, page, tag_stack=[]):
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return []

View file

@ -11,8 +11,6 @@
import re
from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.rb import unique_name
TAGS = [
@ -81,6 +79,8 @@ def mlize_spine(self):
return output
def get_cover_page(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
output = u''
if 'cover' in self.oeb_book.guide:
if self.name_map.get(self.oeb_book.guide['cover'].href, None):
@ -109,6 +109,9 @@ def get_toc(self):
return ''.join(toc)
def get_text(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
output = [u'']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
@ -137,6 +140,8 @@ def clean_text(self, text):
return text
def dump_text(self, elem, stylizer, page, tag_stack=[]):
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return [u'']

View file

@ -18,7 +18,6 @@
from calibre.ebooks.rb.rbml import RBMLizer
from calibre.ebooks.rb import HEADER
from calibre.ebooks.rb import unique_name
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.constants import __appname__, __version__
TEXT_RECORD_SIZE = 4096
@ -111,6 +110,7 @@ def _text(self, oeb_book):
return (size, pages)
def _images(self, manifest):
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
images = []
used_names = []

View file

@ -14,9 +14,6 @@
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \
OEB_RASTER_IMAGES
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.filenames import ascii_text
from calibre.utils.magick.draw import save_cover_data_to, identify_data
@ -100,6 +97,8 @@ def extract_content(self, oeb_book, opts):
return self.mlize_spine()
def mlize_spine(self):
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.stylizer import Stylizer
output = self.header()
if 'titlepage' in self.oeb_book.guide:
href = self.oeb_book.guide['titlepage'].href
@ -154,6 +153,8 @@ def footer(self):
return ' }'
def insert_images(self, text):
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
for item in self.oeb_book.manifest:
if item.media_type in OEB_RASTER_IMAGES:
src = os.path.basename(item.href)
@ -201,6 +202,8 @@ def clean_text(self, text):
return text
def dump_text(self, elem, stylizer, tag_stack=[]):
from calibre.ebooks.oeb.base import XHTML_NS, namespace, barename
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
p = elem.getparent()

View file

@ -7,7 +7,6 @@
import os, uuid
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.oeb.base import DirContainer
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.filenames import ascii_filename
@ -30,6 +29,7 @@ class SNBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.oeb.base import DirContainer
log.debug("Parsing SNB file...")
snbFile = SNBFile()
try:

View file

@ -13,8 +13,6 @@
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
def ProcessFileName(fileName):
# Flat the path
@ -81,6 +79,8 @@ def merge_content(self, old_tree, oeb_book, item, subitems, opts):
body.append(entity)
def mlize(self):
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.stylizer import Stylizer
output = [ u'' ]
stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
@ -208,6 +208,7 @@ def cleanup_text(self, text):
return text
def dump_text(self, subitems, elem, stylizer, end='', pre=False, li = ''):
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:

View file

@ -11,7 +11,6 @@
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.ebooks.txt.txtml import TXTMLizer
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
from calibre.ptempfile import TemporaryDirectory, TemporaryFile
@ -103,12 +102,13 @@ def convert(self, oeb_book, output_path, input_plugin, opts, log):
class TXTZOutput(TXTOutput):
name = 'TXTZ Output'
author = 'John Schember'
file_type = 'txtz'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.oeb.base import OEB_IMAGES
with TemporaryDirectory('_txtz_output') as tdir:
# TXT
with TemporaryFile('index.txt') as tf:
@ -123,10 +123,10 @@ def convert(self, oeb_book, output_path, input_plugin, opts, log):
os.makedirs(path)
with open(os.path.join(tdir, item.href), 'wb') as imgf:
imgf.write(item.data)
# Metadata
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
txtz = ZipFile(output_path, 'w')
txtz.add_dir(tdir)

View file

@ -12,8 +12,6 @@
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
BLOCK_TAGS = [
'div',
@ -58,12 +56,14 @@ def extract_content(self, oeb_book, opts):
self.toc_titles = []
self.toc_ids = []
self.last_was_heading = False
self.create_flat_toc(self.oeb_book.toc)
return self.mlize_spine()
def mlize_spine(self):
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.stylizer import Stylizer
output = [u'']
output.append(self.get_toc())
for item in self.oeb_book.spine:
@ -139,7 +139,7 @@ def cleanup_text(self, text):
# when remove paragraph spacing is enabled.
text = re.sub('(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text)
# Remove empty space and newlines at the beginning of the document.
text = re.sub(r'(?u)^[ \n]+', '', text)
@ -185,6 +185,7 @@ def dump_text(self, elem, stylizer, page):
@stylizer: The style information attached to the element.
@page: OEB page used to determine absolute urls.
'''
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:

View file

@ -15,7 +15,6 @@
from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.oeb.base import XHTML_NS
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.config import config_dir
from calibre.utils.date import format_date, isoformat, is_date_undefined, now as nowf
@ -4322,6 +4321,8 @@ def generateHTMLDescriptionHeader(self, book):
'''
Generate description header from template
'''
from calibre.ebooks.oeb.base import XHTML_NS
def generate_html():
args = dict(
author=author,