diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py
index a59cecfe..bd75cf1c 100644
--- a/calibre-plugin/__init__.py
+++ b/calibre-plugin/__init__.py
@@ -27,7 +27,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
description = 'UI plugin to download FanFiction stories from various sites.'
supported_platforms = ['windows', 'osx', 'linux']
author = 'Jim Miller'
- version = (1, 5, 15)
+ version = (1, 5, 16)
minimum_calibre_version = (0, 8, 30)
#: This field defines the GUI plugin class that contains all the code
diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py
index 0496227b..c6557370 100644
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@@ -184,6 +184,7 @@ br breaks
Puella Magi Madoka Magica/魔法少女まどか★マギカ

br breaks
+Don't—e;ver—d;o—that—a;gain, 法 é
horizontal rules
diff --git a/fanficdownloader/htmlcleanup.py b/fanficdownloader/htmlcleanup.py
index 2bf42803..4dfb306c 100644
--- a/fanficdownloader/htmlcleanup.py
+++ b/fanficdownloader/htmlcleanup.py
@@ -19,12 +19,21 @@ import re
def _unirepl(match):
"Return the unicode string for a decimal number"
- if match.group(1)=='x':
+ if match.group(1).startswith('x'):
radix=16
+ s = match.group(1)[1:]
else:
radix=10
- value = int(match.group(2), radix)
- return "%s%s"%(unichr(value),match.group(3))
+ s = match.group(1)
+ try:
+ value = int(s, radix)
+ retval = "%s%s"%(unichr(value),match.group(2))
+ except:
+ # This way, at least if there's more of entities out there
+ # that fail, it doesn't blow the entire download.
+ print "Numeric entity translation failed, skipping: %s%s"%(match.group(1),match.group(2))
+ retval = ""
+ return retval
def _replaceNumberEntities(data):
# The same brokenish entity parsing in SGMLParser that inserts ';'
@@ -33,7 +42,8 @@ def _replaceNumberEntities(data):
# "Don't—ever—do—that—again," becomes
# "Don't—e;ver—d;o—that—a;gain,"
# Also need to allow for 5 digit decimal entities 法
- p = re.compile(r'(x?)([0-9]{,5}|[0-9a-fA-F]{,4})([0-9a-fA-F]*?);')
+ # Last expression didn't allow for 2 digit hex correctly: é
+ p = re.compile(r'(x[0-9a-fA-F]{,4}|[0-9]{,5})([0-9a-fA-F]*?);')
return p.sub(_unirepl, data)
def _replaceNotEntities(data):