mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-09 01:52:31 +02:00
Conversion: Fix heuristics processing incorrectly removing some <br> tags. Fixes #1205637 [break line/italic - wrong conversion](https://bugs.launchpad.net/calibre/+bug/1205637)
This commit is contained in:
parent
6ae397126b
commit
ade57f1447
1 changed files with 6 additions and 6 deletions
|
|
@ -436,12 +436,12 @@ def cleanup_markup(self, html):
|
|||
# Re-open self closing paragraph tags
|
||||
html = re.sub('<p[^>/]*/>', '<p> </p>', html)
|
||||
# Get rid of empty span, bold, font, em, & italics tags
|
||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||
html = re.sub(
|
||||
r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||
html = re.sub(
|
||||
r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
||||
fmt_tags = 'font|[ibu]|em|strong'
|
||||
open_fmt_pat, close_fmt_pat = r'<(?:{})(?:\s[^>]*)?>'.format(fmt_tags), '</(?:{})>'.format(fmt_tags)
|
||||
for i in range(2):
|
||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||
html = re.sub(
|
||||
r"\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}".format(open=open_fmt_pat, close=close_fmt_pat) , " ", html)
|
||||
# delete surrounding divs from empty paragraphs
|
||||
html = re.sub('<div[^>]*>\s*<p[^>]*>\s*</p>\s*</div>', '<p> </p>', html)
|
||||
# Empty heading tags
|
||||
|
|
|
|||
Loading…
Reference in a new issue