mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-03 00:43:03 +02:00
EPUB Output:Be less aggressive when stripping invalid HTML constructs inserted by MS Word. Fixes regression in the Time recipe.
This commit is contained in:
parent
e6473047fe
commit
70b99872c6
1 changed files with 2 additions and 1 deletions
|
|
@ -331,7 +331,8 @@ class PreProcessor(object):
|
|||
# Convert all entities, since lxml doesn't handle them well
|
||||
(re.compile(r'&(\S+?);'), convert_entities),
|
||||
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
||||
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
|
||||
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
# Fix pdftohtml markup
|
||||
|
|
|
|||
Loading…
Reference in a new issue