mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-04-30 11:13:02 +02:00
pdftohtml preprocess rules work
This commit is contained in:
parent
aafc6d9764
commit
575b021f48
1 changed files with 1 additions and 1 deletions
|
|
@ -73,7 +73,7 @@ class HTMLPreProcessor(object):
|
|||
(re.compile(r'<br.*?>'), lambda match : '<p>'),
|
||||
|
||||
# Un wrap lines
|
||||
(re.compile(r'(?<=\w)\s*</i>\s*<p.*?>\s*<i>\s*(?=\w)'), lambda match: ' '),
|
||||
(re.compile(r'(?<=\w)\s*</(i|b|u)>\s*<p.*?>\s*<(i|b|u)>\s*(?=\w)'), lambda match: ' '),
|
||||
(re.compile(r'(?<=\w)\s*<p.*?>\s*(?=\w)', re.UNICODE), lambda match: ' '),
|
||||
# Clean up spaces
|
||||
(re.compile(u'(?<=\.|,|:|;|\?|!|”|"|\')[\s^ ]*(?=<)'), lambda match: ' '),
|
||||
|
|
|
|||
Loading…
Reference in a new issue