mirror of
git://github.com/kovidgoyal/calibre.git
synced 2025-12-23 04:25:28 +01:00
HTML Input: Handle HTML fragments more gracefully. Fixes #4854 (Imported HTML fragments get converted to ZIPs containing no HTML)
This commit is contained in:
parent
a0671a64d4
commit
b33bfe2e43
2 changed files with 5 additions and 3 deletions
|
|
@ -111,7 +111,7 @@ def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
|
|||
raise IOError(msg)
|
||||
raise IgnoreFile(msg, err.errno)
|
||||
|
||||
self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
|
||||
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
|
||||
if not self.is_binary:
|
||||
if encoding is None:
|
||||
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
||||
|
|
|
|||
|
|
@ -851,8 +851,10 @@ def first_pass(data):
|
|||
self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
|
||||
nroot = etree.fromstring('<html><body/></html>')
|
||||
parent = nroot[0]
|
||||
for child in list(data):
|
||||
child.getparent().remove(child)
|
||||
for child in list(data.iter()):
|
||||
oparent = child.getparent()
|
||||
if oparent is not None:
|
||||
oparent.remove(child)
|
||||
parent.append(child)
|
||||
data = nroot
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue