diff --git a/lib/request/basic.py b/lib/request/basic.py index 12c64b92c..b8ee69347 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -213,7 +213,7 @@ def decodePage(page, contentEncoding, contentType): if contentType and not isinstance(page, unicode) and any(map(lambda x: x in contentType.lower(), ("text/txt", "text/raw", "text/html", "text/xml"))): # e.g. Ãëàâà if "&#" in page: - page = re.sub('&#(\d+);', lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page) + page = re.sub('&#(\d{1,3});', lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page) kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page)) page = getUnicode(page, kb.pageEncoding)