diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py
index 612af196..e2e87b8e 100644
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@@ -372,7 +372,7 @@ class BaseSiteAdapter(Configurable):
if self.getConfig("replace_br_with_p"):
# Apply heuristic processing to replace
paragraph
# breaks with
tags. - retval = replace_br_with_p(self,retval) + retval = replace_br_with_p(retval) if self.getConfig('replace_hr'): # replacing a self-closing tag with a container tag in the diff --git a/fanficdownloader/htmlheuristics.py b/fanficdownloader/htmlheuristics.py index fd9fe737..0d89c4e0 100644 --- a/fanficdownloader/htmlheuristics.py +++ b/fanficdownloader/htmlheuristics.py @@ -23,6 +23,17 @@ from . import exceptions as exceptions def replace_br_with_p(body): + logger.debug('Body Length.: %d' % len(body)) + logger.debug('Body First >: %d' % body.find('>')) + logger.debug('Body Last <.: %d' % body.rfind("<")) + + # Ascii character (and Unicode as well) xA0 is a non-breaking space, ascii code 160. + # However, Python Regex does not recognize it as a whitespace, so we'll be changing it to a reagular space. + body = body.replace(u'\xa0', u' ') + + if body.find('>') == -1 or body.rfind("<") == -1: + return body + # change surrounding div to a p and remove attrs Top surrounding # tag in all cases now should be div, to just strip the first and # last tags.