Update html5lib from 0.9x5 to 0.9x7.

This commit is contained in:
Jim Miller 2015-12-11 10:28:14 -06:00
parent 7a42c84f51
commit bddd02ce1c
3 changed files with 11 additions and 5 deletions

View file

@ -20,4 +20,6 @@ from .serializer import serialize
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"]
__version__ = "0.99999"
# this has to be at the top level, see how setup.py parses this
__version__ = "0.9999999"

View file

@ -207,8 +207,12 @@ class HTMLSanitizerMixin(object):
unescape(attrs[attr])).lower()
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
uri = urlparse.urlparse(val_unescaped)
if uri:
try:
uri = urlparse.urlparse(val_unescaped)
except ValueError:
uri = None
del attrs[attr]
if uri and uri.scheme:
if uri.scheme not in self.allowed_protocols:
del attrs[attr]
if uri.scheme == 'data':

View file

@ -10,7 +10,7 @@ except ImportError:
import re
from six import text_type
from six import string_types
from . import _base
from ..utils import moduleFactoryFactory
@ -58,7 +58,7 @@ def getETreeBuilder(ElementTreeImplementation):
return _base.COMMENT, node.text
else:
assert type(node.tag) == text_type, type(node.tag)
assert isinstance(node.tag, string_types), type(node.tag)
# This is assumed to be an ordinary element
match = tag_regexp.match(node.tag)
if match: