Need to reverse sort entities list to get entities with ';' ahead of versions without. Like '"' and '&quot'. Otherwise '"' becomes '";'.

This commit is contained in:
retiefjimm 2010-11-09 23:02:33 -06:00
parent d67668de23
commit c326aa47b0
2 changed files with 2 additions and 2 deletions

View file

@ -157,7 +157,6 @@ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
acceptable_attributes = ['href']
# entity list from http://code.google.com/p/doctype/wiki/CharacterEntitiesConsistent
# when version without ; is allowed, make sure to put the version with first.
entities = { 'á' : 'á',
'Á' : 'Á',
'&Aacute' : 'Á',

View file

@ -339,7 +339,8 @@ def removeEntities(text):
# replace several named entities with character, such as — -> -
# see constants.py for the list.
for e in entities:
# reverse sort will put entities with ; before the same one without, when valid.
for e in reversed(sorted(entities.keys())):
v = entities[e]
try:
text = text.replace(e, v)