diff --git a/fanficdownloader/htmlheuristics.py b/fanficdownloader/htmlheuristics.py index 44c68fa2..14dea96a 100644 --- a/fanficdownloader/htmlheuristics.py +++ b/fanficdownloader/htmlheuristics.py @@ -35,6 +35,9 @@ def replace_br_with_p(body): # last tags. body = u'

'+body[body.index('>')+1:body.rindex("<")]+u'

' + # Nuke div tags surrounding a HR tag. + body = re.sub(r']+>\s*]+>\s*', r'\n
\n', body) + # So many people add formatting to their HR tags, and ePub does not allow those, we are supposed to use css. # This nukes the hr tag attributes. body = re.sub(r'\s*]+>\s*', r'\n
\n', body) @@ -120,6 +123,9 @@ def replace_br_with_p(body): # superflous cleaning, remove whitespaces leading closing p tags. These does not affect formatting. body = re.sub(r'\s*

', r'

', body) + # Remove empty tag pairs + body = re.sub(r'\s*<(\S+)[^>]*>\s*', r'', body) + # re-wrap in div tag. body = u'
\n' + body + u'\n
'