Added LIT input plugin. Ported splitting code now works (at least on the handful of files I've tested)

This commit is contained in:
Kovid Goyal 2009-04-18 01:01:18 -07:00
parent b9f80aa229
commit 3e29dfbe56
13 changed files with 209 additions and 137 deletions

View file

@ -263,14 +263,14 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.mobi import set_metadata
set_metadata(stream, mi)
class PDFMetadataWriter(MetadataWriterPlugin):
name = 'Set PDF metadata'
file_types = set(['pdf'])
description = _('Set metadata in %s files') % 'PDF'
author = 'John Schember'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.pdf import set_metadata
set_metadata(stream, mi)
@ -280,6 +280,7 @@ def set_metadata(self, stream, mi, type):
from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput
@ -287,7 +288,7 @@ def set_metadata(self, stream, mi, type):
from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput]
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View file

@ -41,6 +41,11 @@ def __hash__(self):
def __eq__(self, other):
return hash(self) == hash(other)
def clone(self):
return ConversionOption(name=self.name, help=self.help,
long_switch=self.long_switch, short_switch=self.short_switch,
choices=self.choices)
class OptionRecommendation(object):
LOW = 1
MED = 2
@ -59,6 +64,10 @@ def __init__(self, recommended_value=None, level=LOW, **kwargs):
self.validate_parameters()
def clone(self):
return OptionRecommendation(recommended_value=self.recommended_value,
level=self.level, option=self.option.clone())
def validate_parameters(self):
if self.option.choices and self.recommended_value not in \
self.option.choices:
@ -170,8 +179,14 @@ def __call__(self, stream, options, file_ext, log,
options.debug_input = os.path.abspath(options.debug_input)
if not os.path.exists(options.debug_input):
os.makedirs(options.debug_input)
shutil.rmtree(options.debug_input)
shutil.copytree(output_dir, options.debug_input)
if isinstance(ret, basestring):
shutil.rmtree(options.debug_input)
shutil.copytree(output_dir, options.debug_input)
else:
from calibre.ebooks.oeb.writer import OEBWriter
w = OEBWriter(pretty_print=options.pretty_print)
w(ret, options.debug_input)
log.info('Input debug saved to:', options.debug_input)
return ret

View file

@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log):
raise SystemExit(1)
output = args[2]
if output.startswith('.'):
if output.startswith('.') and output != '.':
output = os.path.splitext(os.path.basename(input))[0]+output
output = os.path.abspath(output)
@ -171,7 +171,8 @@ def main(args=sys.argv):
plumber.run()
log(_('Output saved to'), ' ', plumber.output)
if plumber.opts.debug_input is None:
log(_('Output saved to'), ' ', plumber.output)
return 0

View file

@ -32,8 +32,8 @@ def __init__(self, input, output, log):
:param input: Path to input file.
:param output: Path to output file/directory
'''
self.input = input
self.output = output
self.input = os.path.abspath(input)
self.output = os.path.abspath(output)
self.log = log
# Initialize the conversion options that are independent of input and
@ -188,15 +188,15 @@ def __init__(self, input, output, log):
]
input_fmt = os.path.splitext(input)[1]
input_fmt = os.path.splitext(self.input)[1]
if not input_fmt:
raise ValueError('Input file must have an extension')
input_fmt = input_fmt[1:].lower()
if os.path.exists(output) and os.path.isdir(output):
if os.path.exists(self.output) and os.path.isdir(self.output):
output_fmt = 'oeb'
else:
output_fmt = os.path.splitext(output)[1]
output_fmt = os.path.splitext(self.output)[1]
if not output_fmt:
output_fmt = '.oeb'
output_fmt = output_fmt[1:].lower()
@ -323,6 +323,9 @@ def run(self):
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
self.input_fmt, self.log,
accelerators, tdir)
if self.opts.debug_input is not None:
self.log('Debug input called, aborting the rest of the pipeline.')
return
if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
@ -365,18 +368,20 @@ def run(self):
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log)
def create_oebbook(log, opfpath, opts):
def create_oebbook(log, path_or_stream, opts, reader=None):
'''
Create an OEBBook from an OPF file.
Create an OEBBook.
'''
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.base import OEBBook
html_preprocessor = HTMLPreProcessor()
reader = OEBReader()
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
pretty_print=opts.pretty_print)
# Read OEB Book into OEBBook
log.info('Parsing all content...')
reader(oeb, opfpath)
log('Parsing all content...')
if reader is None:
from calibre.ebooks.oeb.reader import OEBReader
reader = OEBReader
reader()(oeb, path_or_stream)
return oeb

View file

@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin):
)
),
OptionRecommendation(name='dont_package',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Normally this input plugin re-arranges all the input '
'files into a standard folder hierarchy. Only use this option '
'if you know what you are doing as it can result in various '
'nasty side effects in the rest of of the conversion pipeline.'
)
),
])
def convert(self, stream, opts, file_ext, log,
@ -276,6 +284,9 @@ def convert(self, stream, opts, file_ext, log,
mi.render(open('metadata.opf', 'wb'))
opfpath = os.path.abspath('metadata.opf')
if opts.dont_package:
return opfpath
from calibre.ebooks.conversion.plumber import create_oebbook
oeb = create_oebbook(log, opfpath, opts)

View file

@ -0,0 +1,24 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import InputFormatPlugin
class LITInput(InputFormatPlugin):
name = 'LIT Input'
author = 'Marshall T. Vandegrift'
description = 'Convert LIT files to HTML'
file_types = set(['lit'])
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.lit.reader import LitReader
from calibre.ebooks.conversion.plumber import create_oebbook
return create_oebbook(log, stream, options, reader=LitReader)

View file

@ -7,13 +7,12 @@
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>'
import sys, struct, os
import struct, os
import functools
import re
from urlparse import urldefrag
from cStringIO import StringIO
from urllib import unquote as urlunquote
from lxml import etree
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1
@ -29,12 +28,12 @@
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
"""
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package
<!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
"""
HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC
<!DOCTYPE html PUBLIC
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
"""
@ -73,7 +72,7 @@ def encint(bytes, remaining):
val <<= 7
val |= (b & 0x7f)
if b & 0x80 == 0: break
return val, bytes[pos:], remaining
return val, bytes[pos:], remaining
def msguid(bytes):
values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
@ -123,7 +122,7 @@ class UnBinary(object):
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
EMPTY_ATOMS = ({},{})
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map
@ -143,7 +142,7 @@ def escape_reserved(self):
raw = self.CLOSE_ANGLE_RE.sub(r'&gt;', raw)
raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw)
self.raw = raw
def item_path(self, internal_id):
try:
target = self.manifest[internal_id].path
@ -159,7 +158,7 @@ def item_path(self, internal_id):
index += 1
relpath = (['..'] * (len(base) - index)) + target[index:]
return '/'.join(relpath)
def __unicode__(self):
return self.raw.decode('utf-8')
@ -172,11 +171,11 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
in_censorship = is_goingdown = False
state = 'text'
flags = 0
while index < len(bin):
c, index = read_utf8_char(bin, index)
oc = ord(c)
if state == 'text':
if oc == 0:
state = 'get flags'
@ -188,14 +187,14 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
elif c == '<':
c = '<<'
buf.write(encode(c))
elif state == 'get flags':
if oc == 0:
state = 'text'
continue
flags = oc
state = 'get tag'
elif state == 'get tag':
state = 'text' if oc == 0 else 'get attr'
if flags & FLAG_OPENING:
@ -226,7 +225,7 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
if depth == 0:
raise LitError('Extra closing tag')
return index
elif state == 'get attr':
in_censorship = False
if oc == 0:
@ -265,7 +264,7 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
state = 'get href length'
else:
state = 'get value length'
elif state == 'get value length':
if not in_censorship:
buf.write('"')
@ -281,7 +280,7 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
continue
if count < 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count)
elif state == 'get value':
if count == 0xfffe:
if not in_censorship:
@ -301,7 +300,7 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
buf.write('"')
in_censorship = False
state = 'get attr'
elif state == 'get custom length':
count = oc - 1
if count <= 0 or count > len(bin)-index:
@ -309,21 +308,21 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
dynamic_tag += 1
state = 'get custom'
tag_name = ''
elif state == 'get custom':
tag_name += c
count -= 1
if count == 0:
buf.write(encode(tag_name))
state = 'get attr'
elif state == 'get attr length':
count = oc - 1
if count <= 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count)
buf.write(' ')
state = 'get custom attr'
elif state == 'get custom attr':
buf.write(encode(c))
count -= 1
@ -337,7 +336,7 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
raise LitError('Invalid character count %d' % count)
href = ''
state = 'get href'
elif state == 'get href':
href += c
count -= 1
@ -350,7 +349,7 @@ def binary_to_text(self, bin, buf, index=0, depth=0):
buf.write(encode(u'"%s"' % path))
state = 'get attr'
return index
class DirectoryEntry(object):
def __init__(self, name, section, offset, size):
@ -358,11 +357,11 @@ def __init__(self, name, section, offset, size):
self.section = section
self.offset = offset
self.size = size
def __repr__(self):
return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
% (repr(self.name), self.section, self.offset, self.size)
def __str__(self):
return repr(self)
@ -382,12 +381,12 @@ def __init__(self, original, internal, mime_type, offset, root, state):
path = os.path.normpath(path).replace('\\', '/')
while path.startswith('../'): path = path[3:]
self.path = path
def __eq__(self, other):
if hasattr(other, 'internal'):
return self.internal == other.internal
return self.internal == other
def __repr__(self):
return "ManifestItem(internal=%r, path=%r, mime_type=%r, " \
"offset=%d, root=%r, state=%r)" \
@ -404,7 +403,7 @@ def wrapper(self, *args, **kwargs):
self.stream.seek(opos)
functools.update_wrapper(wrapper, function)
return wrapper
class LitFile(object):
PIECE_SIZE = 16
@ -438,14 +437,14 @@ def fget(self):
return self.stream.read(8)
return property(fget=fget)
magic = magic()
def version():
def fget(self):
self.stream.seek(8)
return u32(self.stream.read(4))
return property(fget=fget)
version = version()
def hdr_len():
@preserve
def fget(self):
@ -453,7 +452,7 @@ def fget(self):
return int32(self.stream.read(4))
return property(fget=fget)
hdr_len = hdr_len()
def num_pieces():
@preserve
def fget(self):
@ -461,7 +460,7 @@ def fget(self):
return int32(self.stream.read(4))
return property(fget=fget)
num_pieces = num_pieces()
def sec_hdr_len():
@preserve
def fget(self):
@ -469,7 +468,7 @@ def fget(self):
return int32(self.stream.read(4))
return property(fget=fget)
sec_hdr_len = sec_hdr_len()
def guid():
@preserve
def fget(self):
@ -477,7 +476,7 @@ def fget(self):
return self.stream.read(16)
return property(fget=fget)
guid = guid()
def header():
@preserve
def fget(self):
@ -488,7 +487,7 @@ def fget(self):
return self.stream.read(size)
return property(fget=fget)
header = header()
@preserve
def __len__(self):
self.stream.seek(0, 2)
@ -501,7 +500,7 @@ def read_raw(self, offset, size):
def read_content(self, offset, size):
return self.read_raw(self.content_offset + offset, size)
def read_secondary_header(self):
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
bytes = self.read_raw(offset, self.sec_hdr_len)
@ -526,12 +525,12 @@ def read_secondary_header(self):
if u32(bytes[offset+4+16:]):
raise LitError('This file has a 64bit content offset')
self.content_offset = u32(bytes[offset+16:])
self.timestamp = u32(bytes[offset+24:])
self.timestamp = u32(bytes[offset+24:])
self.language_id = u32(bytes[offset+28:])
offset += 48
if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset')
def read_header_pieces(self):
src = self.header[self.hdr_len:]
for i in xrange(self.num_pieces):
@ -556,7 +555,7 @@ def read_header_pieces(self):
self.piece3_guid = piece
elif i == 4:
self.piece4_guid = piece
def read_directory(self, piece):
if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.')
@ -760,9 +759,9 @@ def decompress(self, content, control, reset_table):
raise LitError("Reset table is too short")
if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
raise LitError("Reset table has 64bit value for UCLENGTH")
result = []
window_size = 14
u = u32(control[CONTROL_WINDOW_SIZE:])
while u > 0:
@ -847,13 +846,13 @@ class LitContainer(object):
def __init__(self, filename_or_stream):
self._litfile = LitFile(filename_or_stream)
def namelist(self):
return self._litfile.paths.keys()
def exists(self, name):
return urlunquote(name) in self._litfile.paths
def read(self, name):
entry = self._litfile.paths[urlunquote(name)] if name else None
if entry is None:
@ -869,7 +868,7 @@ def read(self, name):
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
return content
def _read_meta(self):
path = 'content.opf'
raw = self._litfile.get_file('/meta')

View file

@ -272,11 +272,7 @@ def XPath(expr):
def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
def _prepare_xml_for_serialization(root):
pass
def xml2str(root, pretty_print=False, strip_comments=False):
_prepare_xml_for_serialization(root)
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
pretty_print=pretty_print)
@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False):
def xml2unicode(root, pretty_print=False):
_prepare_xml_for_serialization(root)
return etree.tostring(root, pretty_print=pretty_print)
ASCII_CHARS = set(chr(x) for x in xrange(128))
@ -321,6 +316,25 @@ def urlnormalize(href):
parts = (urlquote(part) for part in parts)
return urlunparse(parts)
class DummyHandler(logging.Handler):
def __init__(self):
logging.Handler.__init__(self, logging.WARNING)
self.setFormatter(logging.Formatter('%(message)s'))
self.log = None
def emit(self, record):
if self.log is not None:
msg = self.format(record)
f = self.log.error if record.levelno >= logging.ERROR \
else self.log.warn
f(msg)
_css_logger = logging.getLogger('calibre.css')
_css_logger.setLevel(logging.WARNING)
_css_log_handler = DummyHandler()
_css_logger.addHandler(_css_log_handler)
class OEBError(Exception):
"""Generic OEB-processing error."""
@ -778,7 +792,8 @@ def _parse_css(self, data):
data = self.oeb.css_preprocessor(data)
data = XHTML_CSS_NAMESPACE + data
parser = CSSParser(loglevel=logging.WARNING,
fetcher=self._fetch_css)
fetcher=self._fetch_css,
log=_css_logger)
data = parser.parseString(data, href=self.href)
data.namespaces['h'] = XHTML_NS
return data
@ -1435,7 +1450,7 @@ def __init__(self, logger,
:attr:`pages`: List of "pages," such as indexed to a print edition of
the same text.
"""
_css_log_handler.log = logger
self.encoding = encoding
self.html_preprocessor = html_preprocessor
self.css_preprocessor = css_preprocessor
@ -1450,6 +1465,7 @@ def __init__(self, logger,
self.guide = Guide(self)
self.toc = TOC()
self.pages = PageList()
self.auto_generated_toc = True
@classmethod
def generate(cls, opts):

View file

@ -13,13 +13,12 @@
from calibre.customize.ui import available_input_formats
from calibre.ebooks.epub.from_html import TITLEPAGE
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.zipfile import safe_replace, ZipFile
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log
from calibre import CurrentDir
def character_count(html):
'''
@ -57,31 +56,21 @@ class FakeOpts(object):
max_levels = 5
input_encoding = None
def html2opf(path, tdir, log):
from calibre.ebooks.html.input import get_filelist
from calibre.ebooks.metadata.meta import get_metadata
with CurrentDir(tdir):
fl = get_filelist(path, tdir, FakeOpts(), log)
mi = get_metadata(open(path, 'rb'), 'html')
mi = OPFCreator(os.getcwdu(), mi)
mi.guide = None
entries = [(f.path, 'application/xhtml+xml') for f in fl]
mi.create_manifest(entries)
mi.create_spine([f.path for f in fl])
mi.render(open('metadata.opf', 'wb'))
opfpath = os.path.abspath('metadata.opf')
return opfpath
def opf2opf(path, tdir, opts):
return path
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
def write_oebbook(oeb, path):
from calibre.ebooks.oeb.writer import OEBWriter
from calibre import walk
w = OEBWriter()
w(oeb, path)
for f in walk(path):
if f.endswith('.opf'):
return f
class EbookIterator(object):
CHARACTERS_PER_PAGE = 1000
@ -131,17 +120,16 @@ def find_embedded_fonts(self):
def __enter__(self):
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
if self.ebook_ext == 'opf':
self.pathtoopf = self.pathtoebook
elif self.ebook_ext == 'html':
self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log)
else:
from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
if hasattr(plumber.opts, 'dont_package'):
plumber.opts.dont_package = True
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir)
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))

View file

@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin):
author = 'Kovid Goyal'
file_type = 'oeb'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
self.log, self.opts = log, opts
if not os.path.exists(output_path):

View file

@ -349,6 +349,7 @@ def _toc_from_navpoint(self, item, toc, navpoint):
def _toc_from_ncx(self, item):
if item is None:
return False
self.log.debug('Reading TOC from NCX...')
ncx = item.data
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
@ -364,6 +365,7 @@ def _toc_from_tour(self, opf):
result = xpath(opf, 'o2:tours/o2:tour')
if not result:
return False
self.log.debug('Reading TOC from tour...')
tour = result[0]
toc = self.oeb.toc
toc.title = tour.get('title')
@ -384,6 +386,7 @@ def _toc_from_tour(self, opf):
def _toc_from_html(self, opf):
if 'toc' not in self.oeb.guide:
return False
self.log.debug('Reading TOC from HTML...')
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
item = self.oeb.manifest.hrefs[itempath]
html = item.data
@ -414,6 +417,7 @@ def _toc_from_html(self, opf):
return True
def _toc_from_spine(self, opf):
self.log.warn('Generating default TOC from spine...')
toc = self.oeb.toc
titles = []
headers = []
@ -441,11 +445,14 @@ def _toc_from_spine(self, opf):
return True
def _toc_from_opf(self, opf, item):
self.oeb.auto_generated_toc = False
if self._toc_from_ncx(item): return
if self._toc_from_tour(opf): return
self.logger.warn('No metadata table of contents found')
# Prefer HTML to tour based TOC, since several LIT files
# have good HTML TOCs but bad tour based TOCs
if self._toc_from_html(opf): return
if self._toc_from_tour(opf): return
self._toc_from_spine(opf)
self.oeb.auto_generated_toc = True
def _pages_from_ncx(self, opf, item):
if item is None:

View file

@ -51,8 +51,8 @@ def __call__(self, oeb, context):
self.log = oeb.log
self.map = {}
self.page_break_selectors = None
for item in self.oeb.manifest.items:
if etree.iselement(item.data):
for item in list(self.oeb.manifest.items):
if item.spine_position is not None and etree.iselement(item.data):
self.split_item(item)
self.fix_links()
@ -74,31 +74,34 @@ def find_page_breaks(self, item):
self.page_break_selectors = set([])
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
OEB_STYLES]
page_break_selectors = set([])
for rule in rules(stylesheets):
before = getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower()
after = getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower()
try:
if before and before != 'avoid':
page_break_selectors.add((CSSSelector(rule.selectorText),
True))
except:
pass
try:
if after and after != 'avoid':
page_break_selectors.add((CSSSelector(rule.selectorText),
False))
except:
pass
for rule in rules(stylesheets):
before = getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower()
after = getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower()
try:
if before and before != 'avoid':
self.page_break_selectors.add((CSSSelector(rule.selectorText),
True))
except:
pass
try:
if after and after != 'avoid':
self.page_break_selectors.add((CSSSelector(rule.selectorText),
False))
except:
pass
page_breaks = set([])
for selector, before in page_break_selectors:
for elem in selector(item.data):
if before:
elem.set('pb_before', '1')
page_breaks.add(elem)
for selector, before in self.page_break_selectors:
body = item.data.xpath('//h:body', namespaces=NAMESPACES)
if not body:
continue
for elem in selector(body[0]):
if elem not in body:
if before:
elem.set('pb_before', '1')
page_breaks.add(elem)
for i, elem in enumerate(item.data.iter()):
elem.set('pb_order', str(i))
@ -136,8 +139,10 @@ def rewrite_links(self, url):
if href in self.map:
anchor_map = self.map[href]
nhref = anchor_map[frag if frag else None]
nhref = self.current_item.relhref(nhref)
if frag:
nhref = '#'.join(href, frag)
nhref = '#'.join((nhref, frag))
return nhref
return url
@ -153,7 +158,7 @@ def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb):
self.page_breaks = page_breaks
self.page_break_ids = page_break_ids
self.max_flow_size = max_flow_size
self.base = item.abshref(item.href)
self.base = item.href
base, ext = os.path.splitext(self.base)
self.base = base.replace('%', '%%')+'_split_%d'+ext
@ -192,9 +197,9 @@ def split_on_page_breaks(self, orig_tree):
self.trees = []
tree = orig_tree
for pattern, before in ordered_ids:
self.log.debug('\t\tSplitting on page-break')
elem = pattern(tree)
if elem:
self.log.debug('\t\tSplitting on page-break')
before, after = self.do_split(tree, elem[0], before)
self.trees.append(before)
tree = after
@ -414,13 +419,14 @@ def commit(self):
elem.attrib.pop(SPLIT_ATTR, None)
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
spine_pos = self.item.spine_pos
for current, tree in zip(map(reversed, (self.files, self.trees))):
spine_pos = self.item.spine_position
for current, tree in zip(*map(reversed, (self.files, self.trees))):
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
href = a.get('href').strip()
if href.startswith('#'):
anchor = href[1:]
file = self.anchor_map[anchor]
file = self.item.relhref(file)
if file != current:
a.set('href', file+href)
@ -430,12 +436,12 @@ def commit(self):
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
if self.oeb.guide:
for ref in self.oeb.guide:
for ref in self.oeb.guide.values():
href, frag = urldefrag(ref.href)
if href == self.item.href:
nhref = self.anchor_map[frag if frag else None]
if frag:
nhref = '#'.join(nhref, frag)
nhref = '#'.join((nhref, frag))
ref.href = nhref
def fix_toc_entry(toc):
@ -444,7 +450,7 @@ def fix_toc_entry(toc):
if href == self.item.href:
nhref = self.anchor_map[frag if frag else None]
if frag:
nhref = '#'.join(nhref, frag)
nhref = '#'.join((nhref, frag))
toc.href = nhref
for x in toc:
fix_toc_entry(x)

View file

@ -49,7 +49,7 @@ def generate(cls, opts):
def __call__(self, oeb, path):
"""
Read the book in the :class:`OEBBook` object :param:`oeb` to a file
Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
at :param:`path`.
"""
version = int(self.version[0])