From e8e418062196abdd056e4fe23bb2bbef76478b31 Mon Sep 17 00:00:00 2001
From: asbjorn grandt '+body[body.index('>')+1:body.rindex("<")]+u' ' + body + u' \s*
).
+ body = re.sub(r'\s*
]*>\s*', r'
', body)
+
+ # Find all bexisting blocks with p, pre and blockquote tags, we need to leave those alone.
+ blocksRegex = re.compile(r'(\s*
\s*)*\s*<(pre|p|blockquote)([^>]*)>(.+?)\2>\s*(\s*
\s*)*', re.DOTALL)
+ body = blocksRegex.sub(r'\n<\2\3>\4\2>\n', body)
+
+ blocks = blocksRegex.finditer(body)
+ # For our replacements to work, we need to work backwards, so we reverse the iterator.
+ blocksList = []
+ for match in blocks:
+ blocksList.insert(0, match)
+
+ for match in blocksList:
+ group4 = match.group(4).replace(u'
', u'{br /}')
+ body = body[:match.start(4)] + group4 + body[match.end(4):]
+
+ # change surrounding div to a p and remove attrs Top surrounding
+ # tag in all cases now should be div, to just strip the first and
+ # last tags.
+ body = u'
]+>\s*
\n', body)
@@ -46,9 +68,6 @@ def replace_br_with_p(body):
# This nukes the hr tag attributes.
body = re.sub(r'\s*
]+>\s*', r'\n
\n', body)
- # Need to look at BeautifulSoup to see if it'll even return breaks that aren't properly formatted (
).
- body = re.sub(r'\s*
]*>\s*', r'
', body)
-
# Remove leading and trailing breaks from HR tags
body = re.sub(r'\s*(
)*\s*
\s*(
)*\s*', r'\n
\n', body)
# Nuking breaks leading paragraps that may be in the body. They are eventually treated as
\s*(
\n', body)
+
# Clean up hr tags, and add inverted p tag pairs
body = re.sub(r'\s*
\s*', r'
', body)
@@ -180,6 +201,8 @@ def replace_br_with_p(body):
# Remove empty tag pairs
body = re.sub(r'\s*<(\S+)[^>]*>\s*\1>', r'', body)
+ body = body.replace(u'{br /}', u'
')
+
# re-wrap in div tag.
body = u'