mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-01-07 01:56:18 +01:00
Replace non-breaking spaces with spaces.
This commit is contained in:
parent
68e3acd43a
commit
66f7802f9e
1 changed files with 4 additions and 0 deletions
|
|
@ -153,6 +153,10 @@ def clean_text(self, text):
|
|||
for unused in anchors.difference(links):
|
||||
text = text.replace('\\Q="%s"' % unused, '')
|
||||
|
||||
# Replace bad characters.
|
||||
text = text.replace(u'\xc2', '')
|
||||
text = text.replace(u'\xa0', ' ')
|
||||
|
||||
# Turn all html entities into unicode. This should not be necessary as
|
||||
# lxml should have already done this but we want to be sure it happens.
|
||||
for entity in set(re.findall('&.+?;', text)):
|
||||
|
|
|
|||
Loading…
Reference in a new issue