mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-08 21:11:59 +02:00
TtH.org sometimes place a div tag around hr tags, this caused the heuristics to generate faulty html
Also TtH adds an empty div at the end, when placed inside the last paragraph, the ePub will fail.
This commit is contained in:
parent
f046605517
commit
3b04b6ad61
1 changed files with 6 additions and 0 deletions
|
|
@ -35,6 +35,9 @@ def replace_br_with_p(body):
|
|||
# last tags.
|
||||
body = u'<p>'+body[body.index('>')+1:body.rindex("<")]+u'</p>'
|
||||
|
||||
# Nuke div tags surrounding a HR tag.
|
||||
body = re.sub(r'<div[^>]+>\s*<hr[^>]+>\s*</div>', r'\n<hr />\n', body)
|
||||
|
||||
# So many people add formatting to their HR tags, and ePub does not allow those, we are supposed to use css.
|
||||
# This nukes the hr tag attributes.
|
||||
body = re.sub(r'\s*<hr[^>]+>\s*', r'\n<hr />\n', body)
|
||||
|
|
@ -120,6 +123,9 @@ def replace_br_with_p(body):
|
|||
# superflous cleaning, remove whitespaces leading closing p tags. These does not affect formatting.
|
||||
body = re.sub(r'\s*</p>', r'</p>', body)
|
||||
|
||||
# Remove empty tag pairs
|
||||
body = re.sub(r'\s*<(\S+)[^>]*>\s*</\1>', r'', body)
|
||||
|
||||
# re-wrap in div tag.
|
||||
body = u'<div>\n' + body + u'\n</div>'
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue