mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-28 11:45:19 +01:00
Add markers and check to prevent replace_br_with_p running more than once on the same text.
This commit is contained in:
parent
c243b1db3e
commit
b3a32ae240
1 changed files with 13 additions and 11 deletions
|
|
@ -36,7 +36,13 @@ def replace_br_with_p(body):
|
|||
logger.debug("replace_br_with_p time:%s"%(datetime.now() - start))
|
||||
return retval
|
||||
|
||||
was_run_marker=u'FFF_replace_br_with_p_has_been_run'
|
||||
def _replace_br_with_p(body):
|
||||
|
||||
if was_run_marker in body:
|
||||
logger.debug("replace_br_with_p previously applied, skipping.")
|
||||
return body
|
||||
|
||||
# Ascii character (and Unicode as well) xA0 is a non-breaking space, ascii code 160.
|
||||
# However, Python Regex does not recognize it as a whitespace, so we'll be changing it to a regular space.
|
||||
# .strip() so "\n<div>" at beginning is also recognized.
|
||||
|
|
@ -256,23 +262,19 @@ def _replace_br_with_p(body):
|
|||
body = re.sub(r'XAMP;(.+?);', r'&\1;', body)
|
||||
body = body.strip()
|
||||
|
||||
## strip off extra <div> nestings that have built up over time.
|
||||
# b='<div>'
|
||||
# e='</div>'
|
||||
# while body.startswith(b) and body.endswith(e):
|
||||
# body = body[len(b):-len(e)].strip()
|
||||
|
||||
# re-wrap in div tag.
|
||||
body = u'<div>\n' + body + u'</div>\n'
|
||||
|
||||
# return body
|
||||
return tag_sanitizer(body)
|
||||
body = u'<div id="' +was_run_marker+ u'">\n' + body + u'</div>\n'
|
||||
# return body after tag_sanitizer with 'replace_br_with_p done' marker.
|
||||
## marker included twice becaues the comment & id could each be
|
||||
## removed by different 'clean ups'. I hope it's less likely both
|
||||
## will be.
|
||||
return u'<!-- ' +was_run_marker+ u' -->\n' + tag_sanitizer(body)
|
||||
|
||||
def is_valid_block(block):
|
||||
return unicode(block).find('<') == 0 and unicode(block).find('<!') != 0
|
||||
|
||||
def soup_up_div(body):
|
||||
blockTags = ['address', 'blockquote', 'del', 'div', 'dl', 'fieldset', 'form', 'ins', 'noscript', 'ol', 'p', 'pre', 'table', 'ul']
|
||||
blockTags = ['address', 'aside', 'blockquote', 'del', 'div', 'dl', 'fieldset', 'form', 'ins', 'noscript', 'ol', 'p', 'pre', 'table', 'ul']
|
||||
recurseTags = ['blockquote', 'div', 'noscript']
|
||||
|
||||
tag = body[:body.index('>')+1]
|
||||
|
|
|
|||
Loading…
Reference in a new issue