diff --git a/lib/request/basic.py b/lib/request/basic.py index 0fbf1418d..6f3f3f620 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -254,7 +254,14 @@ def decodePage(page, contentEncoding, contentType): # e.g. ’…™ if "&#" in page: - page = re.sub(r"&#(\d+);", lambda _: unichr(int(_.group(1))), page) + def _(match): + retVal = match.group(0) + try: + retVal = unichr(int(match.group(1))) + except ValueError: + pass + return retVal + page = re.sub(r"&#(\d+);", _, page) # e.g. ζ page = re.sub(r"&([^;]+);", lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)