Roll included soupsieve back--newest isn't py2 compat.

2025-12-06 08:52:55 +01:00 · 2020-12-22 14:03:03 -06:00 · 2020-12-22 14:03:03 -06:00 · 9112346f41
commit 9112346f41
parent 34dc2e14b2
6 changed files with 366 additions and 253 deletions
--- a/included_dependencies/soupsieve/init.py
+++ b/included_dependencies/soupsieve/init.py
@ -25,16 +25,17 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
 from __future__ import unicode_literals
 from .__meta__ import __version__, __version_info__  # noqa: F401
 from . import css_parser as cp
 from . import css_match as cm
 from . import css_types as ct
-from .util import DEBUG, SelectorSyntaxError  # noqa: F401
+from .util import DEBUG, _QUIRKS, deprecated, SelectorSyntaxError  # noqa: F401
 __all__ = (
-    'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
+    'DEBUG', "_QUIRKS", 'SelectorSyntaxError', 'SoupSieve',
-    'closest', 'compile', 'filter', 'iselect',
+    'closest', 'comments', 'compile', 'filter', 'icomments',
-    'match', 'select', 'select_one'
+    'iselect', 'match', 'select', 'select_one'
 )
 SoupSieve = cm.SoupSieve
@ -86,6 +87,21 @@ def filter(select, iterable, namespaces=None, flags=0, **kwargs):  # noqa: A001
    return compile(select, namespaces, flags, **kwargs).filter(iterable)
@deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
 def comments(tag, limit=0, flags=0, **kwargs):
    """Get comments only."""
    return [comment for comment in cm.CommentsMatch(tag).get_comments(limit)]
@deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
 def icomments(tag, limit=0, flags=0, **kwargs):
    """Iterate comments only."""
    for comment in cm.CommentsMatch(tag).get_comments(limit):
        yield comment
 def select_one(select, tag, namespaces=None, flags=0, **kwargs):
    """Select a single tag."""
--- a/included_dependencies/soupsieve/meta.py
+++ b/included_dependencies/soupsieve/meta.py
@ -1,4 +1,5 @@
 """Meta related things."""
 from __future__ import unicode_literals
 from collections import namedtuple
 import re
@ -185,5 +186,5 @@ def parse_version(ver, pre=False):
    return Version(major, minor, micro, release, pre, post, dev)
-__version_info__ = Version(2, 1, 0, "final")
+__version_info__ = Version(1, 9, 1, "final")
 __version__ = __version_info__._get_canonical()
--- a/included_dependencies/soupsieve/css_match.py
+++ b/included_dependencies/soupsieve/css_match.py
@ -1,12 +1,11 @@
 """CSS matcher."""
 from __future__ import unicode_literals
 from datetime import datetime
 from . import util
 import re
 from .import css_types as ct
 import unicodedata
 import bs4
 # Empty tag pattern (whitespace okay)
 RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -44,7 +43,6 @@ RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2}
 RE_DATETIME = re.compile(
    r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
 )
 RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')
 MONTHS_30 = (4, 6, 9, 11)  # April, June, September, and November
 FEB = 2
@ -55,7 +53,7 @@ FEB_LEAP_MONTH = 29
 DAYS_IN_WEEK = 7
-class _FakeParent(object):
+class FakeParent(object):
    """
    Fake parent class.
@ -75,7 +73,7 @@ class _FakeParent(object):
        return len(self.contents)
-class _DocumentNav(object):
+class Document(object):
    """Navigate a Beautiful Soup document."""
    @classmethod
@ -89,37 +87,58 @@ class _DocumentNav(object):
    @staticmethod
    def is_doc(obj):
        """Is `BeautifulSoup` object."""
        import bs4
        return isinstance(obj, bs4.BeautifulSoup)
    @staticmethod
    def is_tag(obj):
        """Is tag."""
        import bs4
        return isinstance(obj, bs4.Tag)
    @staticmethod
    def is_comment(obj):
        """Is comment."""
        import bs4
        return isinstance(obj, bs4.Comment)
    @staticmethod
    def is_declaration(obj):  # pragma: no cover
        """Is declaration."""
        import bs4
        return isinstance(obj, bs4.Declaration)
    @staticmethod
-    def is_cdata(obj):
+    def is_cdata(obj):  # pragma: no cover
        """Is CDATA."""
-        return isinstance(obj, bs4.CData)
+
        import bs4
        return isinstance(obj, bs4.Declaration)
    @staticmethod
    def is_processing_instruction(obj):  # pragma: no cover
        """Is processing instruction."""
        import bs4
        return isinstance(obj, bs4.ProcessingInstruction)
    @staticmethod
    def is_navigable_string(obj):
        """Is navigable string."""
        import bs4
        return isinstance(obj, bs4.NavigableString)
    @staticmethod
    def is_special_string(obj):
        """Is special string."""
-        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
+
        import bs4
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction))
    @classmethod
    def is_content_string(cls, obj):
@ -131,7 +150,7 @@ class _DocumentNav(object):
    def create_fake_parent(el):
        """Create fake parent for a given element."""
-        return _FakeParent(el)
+        return FakeParent(el)
    @staticmethod
    def is_xml_tree(el):
@ -198,13 +217,10 @@ class _DocumentNav(object):
                is_tag = self.is_tag(child)
                if no_iframe and is_tag and self.is_iframe(child):
-                    if child.next_sibling is not None:
+                    last_child = child
-                        next_good = child.next_sibling
+                    while self.is_tag(last_child) and last_child.contents:
-                    else:
+                        last_child = last_child.contents[-1]
-                        last_child = child
+                    next_good = last_child.next_element
                        while self.is_tag(last_child) and last_child.contents:
                            last_child = last_child.contents[-1]
                        next_good = last_child.next_element
                    yield child
                    if next_good is None:
                        break
@ -234,27 +250,21 @@ class _DocumentNav(object):
        return el.prefix
    @staticmethod
    def get_uri(el):
        """Get namespace `URI`."""
        return el.namespace
    @classmethod
-    def get_next(cls, el, tags=True):
+    def get_next_tag(cls, el):
        """Get next sibling tag."""
        sibling = el.next_sibling
-        while tags and not cls.is_tag(sibling) and sibling is not None:
+        while not cls.is_tag(sibling) and sibling is not None:
            sibling = sibling.next_sibling
        return sibling
    @classmethod
-    def get_previous(cls, el, tags=True):
+    def get_previous_tag(cls, el):
        """Get previous sibling tag."""
        sibling = el.previous_sibling
-        while tags and not cls.is_tag(sibling) and sibling is not None:
+        while not cls.is_tag(sibling) and sibling is not None:
            sibling = sibling.previous_sibling
        return sibling
@ -305,7 +315,7 @@ class _DocumentNav(object):
        """Get classes."""
        classes = cls.get_attribute_by_name(el, 'class', [])
-        if isinstance(classes, str):
+        if isinstance(classes, util.ustr):
            classes = RE_NOT_WS.findall(classes)
        return classes
@ -316,11 +326,6 @@ class _DocumentNav(object):
            [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
        )
    def get_own_text(self, el, no_iframe=False):
        """Get Own Text."""
        return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
 class Inputs(object):
    """Class for parsing and validating input items."""
@ -423,7 +428,7 @@ class Inputs(object):
        return parsed
-class _Match(object):
+class CSSMatch(Document, object):
    """Perform CSS matching."""
    def __init__(self, selectors, scope, namespaces, flags):
@ -471,7 +476,7 @@ class _Match(object):
        if self.supports_namespaces():
            namespace = ''
-            ns = self.get_uri(el)
+            ns = el.namespace
            if ns:
                namespace = ns
        else:
@ -531,57 +536,6 @@ class _Match(object):
                    return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
        return None
    def extended_language_filter(self, lang_range, lang_tag):
        """Filter the language tags."""
        match = True
        lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
        ranges = lang_range.split('-')
        subtags = lang_tag.lower().split('-')
        length = len(ranges)
        rindex = 0
        sindex = 0
        r = ranges[rindex]
        s = subtags[sindex]
        # Primary tag needs to match
        if r != '*' and r != s:
            match = False
        rindex += 1
        sindex += 1
        # Match until we run out of ranges
        while match and rindex < length:
            r = ranges[rindex]
            try:
                s = subtags[sindex]
            except IndexError:
                # Ran out of subtags,
                # but we still have ranges
                match = False
                continue
            # Empty range
            if not r:
                match = False
                continue
            # Matched range
            elif s == r:
                rindex += 1
            # Implicit wildcard cannot match
            # singletons
            elif len(s) == 1:
                match = False
                continue
            # Implicitly matched, so grab next subtag
            sindex += 1
        return match
    def match_attribute_name(self, el, attr, prefix):
        """Match attribute name and return value if it exists."""
@ -706,12 +660,12 @@ class _Match(object):
            if parent:
                found = self.match_selectors(parent, relation)
        elif relation[0].rel_type == REL_SIBLING:
-            sibling = self.get_previous(el)
+            sibling = self.get_previous_tag(el)
            while not found and sibling:
                found = self.match_selectors(sibling, relation)
-                sibling = self.get_previous(sibling)
+                sibling = self.get_previous_tag(sibling)
        elif relation[0].rel_type == REL_CLOSE_SIBLING:
-            sibling = self.get_previous(el)
+            sibling = self.get_previous_tag(el)
            if sibling and self.is_tag(sibling):
                found = self.match_selectors(sibling, relation)
        return found
@ -736,12 +690,12 @@ class _Match(object):
        elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
            found = self.match_future_child(el, relation)
        elif relation[0].rel_type == REL_HAS_SIBLING:
-            sibling = self.get_next(el)
+            sibling = self.get_next_tag(el)
            while not found and sibling:
                found = self.match_selectors(sibling, relation)
-                sibling = self.get_next(sibling)
+                sibling = self.get_next_tag(sibling)
        elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
-            sibling = self.get_next(el)
+            sibling = self.get_next_tag(el)
            if sibling and self.is_tag(sibling):
                found = self.match_selectors(sibling, relation)
        return found
@ -782,28 +736,7 @@ class _Match(object):
    def match_root(self, el):
        """Match element as root."""
-        is_root = self.is_root(el)
+        return self.is_root(el)
        if is_root:
            sibling = self.get_previous(el, tags=False)
            while is_root and sibling is not None:
                if (
                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
                    self.is_cdata(sibling)
                ):
                    is_root = False
                else:
                    sibling = self.get_previous(sibling, tags=False)
        if is_root:
            sibling = self.get_next(el, tags=False)
            while is_root and sibling is not None:
                if (
                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
                    self.is_cdata(sibling)
                ):
                    is_root = False
                else:
                    sibling = self.get_next(sibling, tags=False)
        return is_root
    def match_scope(self, el):
        """Match element as scope."""
@ -948,23 +881,12 @@ class _Match(object):
        content = None
        for contain_list in contains:
            if content is None:
-                if contain_list.own:
+                content = self.get_text(el, no_iframe=self.is_html)
                    content = self.get_own_text(el, no_iframe=self.is_html)
                else:
                    content = self.get_text(el, no_iframe=self.is_html)
            found = False
            for text in contain_list.text:
-                if contain_list.own:
+                if text in content:
-                    for c in content:
+                    found = True
-                        if text in c:
+                    break
                            found = True
                            break
                    if found:
                        break
                else:
                    if text in content:
                        found = True
                        break
            if not found:
                match = False
        return match
@ -1148,7 +1070,7 @@ class _Match(object):
            for patterns in langs:
                match = False
                for pattern in patterns:
-                    if self.extended_language_filter(pattern, found_lang):
+                    if pattern.match(found_lang):
                        match = True
                if not match:
                    break
@ -1230,7 +1152,7 @@ class _Match(object):
        out_of_range = False
-        itype = util.lower(self.get_attribute_by_name(el, 'type'))
+        itype = self.get_attribute_by_name(el, 'type').lower()
        mn = self.get_attribute_by_name(el, 'min', None)
        if mn is not None:
            mn = Inputs.parse_value(itype, mn)
@ -1285,21 +1207,6 @@ class _Match(object):
            self.get_prefix(el) is not None
        )
    def match_placeholder_shown(self, el):
        """
        Match placeholder shown according to HTML spec.
        - text area should be checked if they have content. A single newline does not count as content.
        """
        match = False
        content = self.get_text(el)
        if content in ('', '\n'):
            match = True
        return match
    def match_selectors(self, el, selectors):
        """Check if element matches one of the selectors."""
@ -1332,9 +1239,6 @@ class _Match(object):
                # Verify element is scope
                if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
                    continue
                # Verify element has placeholder shown
                if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
                    continue
                # Verify `nth` matches
                if not self.match_nth(el, selector.nth):
                    continue
@ -1421,8 +1325,28 @@ class _Match(object):
        return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
-class CSSMatch(_DocumentNav, _Match):
+class CommentsMatch(Document, object):
-    """The Beautiful Soup CSS match class."""
+    """Comments matcher."""
    def __init__(self, el):
        """Initialize."""
        self.assert_valid_input(el)
        self.tag = el
    def get_comments(self, limit=0):
        """Get comments."""
        if limit < 1:
            limit = None
        for child in self.get_descendants(self.tag, tags=False):
            if self.is_comment(child):
                yield child
                if limit is not None:
                    limit -= 1
                    if limit < 1:
                        break
 class SoupSieve(ct.Immutable):
@ -1468,6 +1392,19 @@ class SoupSieve(ct.Immutable):
        else:
            return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
    @util.deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
    def comments(self, tag, limit=0):
        """Get comments only."""
        return [comment for comment in CommentsMatch(tag).get_comments(limit)]
    @util.deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
    def icomments(self, tag, limit=0):
        """Iterate comments only."""
        for comment in CommentsMatch(tag).get_comments(limit):
            yield comment
    def select_one(self, tag):
        """Select a single tag."""
--- a/included_dependencies/soupsieve/css_parser.py
+++ b/included_dependencies/soupsieve/css_parser.py
@ -1,11 +1,10 @@
 """CSS selector parser."""
 from __future__ import unicode_literals
 import re
 from functools import lru_cache
 from . import util
 from . import css_match as cm
 from . import css_types as ct
 from .util import SelectorSyntaxError
 import warnings
 UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -60,8 +59,6 @@ PSEUDO_SIMPLE_NO_MATCH = {
 # Complex pseudo classes that take selector lists
 PSEUDO_COMPLEX = {
    ':contains',
    ':-soup-contains',
    ':-soup-contains-own',
    ':has',
    ':is',
    ':matches',
@ -113,6 +110,11 @@ VALUE = r'''
 ATTR = r'''
 (?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
 '''.format(ws=WSC, value=VALUE)
 # Definitions for quirks mode
 QUIRKS_ATTR_IDENTIFIER = r'(?:(?:{esc}|(?!/\*)[^"\] \t\r\n\f])+?)'.format(esc=CSS_ESCAPES)
 QUIRKS_ATTR = r'''
 (?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
 '''.format(ws=WSC, value=QUIRKS_ATTR_IDENTIFIER)
 # Selector patterns
 # IDs (`#id`)
@ -120,11 +122,13 @@ PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
 # Classes (`.class`)
 PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
 # Prefix:Tag (`prefix|tag`)
-PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
+PAT_TAG = r'(?:(?:{ident}|\*)?\|)?(?:{ident}|\*)'.format(ident=IDENTIFIER)
 # Attributes (`[attr]`, `[attr=value]`, etc.)
-PAT_ATTR = r'''
+PAT_ATTR = r'\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}'.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
-\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
+# Quirks attributes, like real attributes, but unquoted values can contain anything but whitespace and closing `]`.
-'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
+PAT_QUIRKS_ATTR = r'''
 \[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}
 '''.format(ws=WSC, ident=IDENTIFIER, attr=QUIRKS_ATTR)
 # Pseudo class (`:pseudo-class`, `:pseudo-class(`)
 PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
 # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
@ -195,13 +199,12 @@ FLG_INDETERMINATE = 0x20
 FLG_OPEN = 0x40
 FLG_IN_RANGE = 0x80
 FLG_OUT_OF_RANGE = 0x100
 FLG_PLACEHOLDER_SHOWN = 0x200
 # Maximum cached patterns to store
 _MAXCACHE = 500
-@lru_cache(maxsize=_MAXCACHE)
+@util.lru_cache(maxsize=_MAXCACHE)
 def _cached_css_compile(pattern, namespaces, custom, flags):
    """Cached CSS compile."""
@ -250,7 +253,7 @@ def css_unescape(content, string=False):
            codepoint = int(m.group(1)[1:], 16)
            if codepoint == 0:
                codepoint = UNICODE_REPLACEMENT_CHAR
-            value = chr(codepoint)
+            value = util.uchr(codepoint)
        elif m.group(2):
            value = m.group(2)[1:]
        elif m.group(3):
@ -274,7 +277,7 @@ def escape(ident):
        string.append('\\{}'.format(ident))
    else:
        for index, c in enumerate(ident):
-            codepoint = ord(c)
+            codepoint = util.uord(c)
            if codepoint == 0x00:
                string.append('\ufffd')
            elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
@ -305,7 +308,12 @@ class SelectorPattern(object):
        return self.name
-    def match(self, selector, index, flags):
+    def enabled(self, flags):
        """Enabled."""
        return True
    def match(self, selector, index):
        """Match the selector."""
        return self.re_pattern.match(selector, index)
@ -320,7 +328,7 @@ class SpecialPseudoPattern(SelectorPattern):
        self.patterns = {}
        for p in patterns:
            name = p[0]
-            pattern = p[3](name, p[2])
+            pattern = SelectorPattern(name, p[2])
            for pseudo in p[1]:
                self.patterns[pseudo] = pattern
@ -332,7 +340,12 @@ class SpecialPseudoPattern(SelectorPattern):
        return self.matched_name.get_name()
-    def match(self, selector, index, flags):
+    def enabled(self, flags):
        """Enabled."""
        return True
    def match(self, selector, index):
        """Match the selector."""
        pseudo = None
@ -341,13 +354,22 @@ class SpecialPseudoPattern(SelectorPattern):
            name = util.lower(css_unescape(m.group('name')))
            pattern = self.patterns.get(name)
            if pattern:
-                pseudo = pattern.match(selector, index, flags)
+                pseudo = pattern.match(selector, index)
                if pseudo:
                    self.matched_name = pattern
        return pseudo
 class QuirkPattern(SelectorPattern):
    """Selector pattern for quirk mode."""
    def enabled(self, flags):
        """Enabled if quirks flag is present."""
        return flags & util._QUIRKS
 class _Selector(object):
    """
    Intermediate selector class.
@ -424,16 +446,11 @@ class CSSParser(object):
        SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
        SpecialPseudoPattern(
            (
-                (
+                ("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS),
-                    "pseudo_contains",
+                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD),
-                    (':contains', ':-soup-contains', ':-soup-contains-own'),
+                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE),
-                    PAT_PSEUDO_CONTAINS,
+                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG),
-                    SelectorPattern
+                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR)
                ),
                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
            )
        ),
        SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
@ -444,6 +461,7 @@ class CSSParser(object):
        SelectorPattern("class", PAT_CLASS),
        SelectorPattern("tag", PAT_TAG),
        SelectorPattern("attribute", PAT_ATTR),
        QuirkPattern("quirks_attribute", PAT_QUIRKS_ATTR),
        SelectorPattern("combine", PAT_COMBINE)
    )
@ -453,19 +471,24 @@ class CSSParser(object):
        self.pattern = selector.replace('\x00', '\ufffd')
        self.flags = flags
        self.debug = self.flags & util.DEBUG
        self.quirks = self.flags & util._QUIRKS
        self.custom = {} if custom is None else custom
-    def parse_attribute_selector(self, sel, m, has_selector):
+    def parse_attribute_selector(self, sel, m, has_selector, quirks):
        """Create attribute selector from the returned regex match."""
        inverse = False
        op = m.group('cmp')
        case = util.lower(m.group('case')) if m.group('case') else None
-        ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
+        parts = [css_unescape(a) for a in m.group('ns_attr').split('|')]
-        attr = css_unescape(m.group('attr_name'))
+        ns = ''
        is_type = False
        pattern2 = None
-
+        if len(parts) > 1:
            ns = parts[0]
            attr = parts[1]
        else:
            attr = parts[0]
        if case:
            flags = re.I if case == 'i' else 0
        elif util.lower(attr) == 'type':
@ -475,7 +498,7 @@ class CSSParser(object):
            flags = 0
        if op:
-            if m.group('value').startswith(('"', "'")):
+            if m.group('value').startswith(('"', "'")) and not quirks:
                value = css_unescape(m.group('value')[1:-1], True)
            else:
                value = css_unescape(m.group('value'))
@ -502,12 +525,13 @@ class CSSParser(object):
        elif op.startswith('|'):
            # Value starts with word in dash separated list
            pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
        elif op.startswith('!'):
            # Equivalent to `:not([attr=value])`
            pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
            inverse = True
        else:
            # Value matches
            pattern = re.compile(r'^%s$' % re.escape(value), flags)
            if op.startswith('!'):
                # Equivalent to `:not([attr=value])`
                inverse = True
        if is_type and pattern:
            pattern2 = re.compile(pattern.pattern)
@ -528,8 +552,13 @@ class CSSParser(object):
    def parse_tag_pattern(self, sel, m, has_selector):
        """Parse tag pattern from regex match."""
-        prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
+        parts = [css_unescape(x) for x in m.group(0).split('|')]
-        tag = css_unescape(m.group('tag_name'))
+        if len(parts) > 1:
            prefix = parts[0]
            tag = parts[1]
        else:
            tag = parts[0]
            prefix = None
        sel.tag = ct.SelectorTag(tag, prefix)
        has_selector = True
        return has_selector
@ -771,11 +800,21 @@ class CSSParser(object):
        if not combinator:
            combinator = WS_COMBINATOR
        if not has_selector:
-            raise SelectorSyntaxError(
+            # The only way we don't fail is if we are at the root level and quirks mode is enabled,
-                "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
+            # and we've found no other selectors yet in this compound selector.
            if (not self.quirks or is_pseudo or combinator == COMMA_COMBINATOR or relations):
                raise SelectorSyntaxError(
                    "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
                    self.pattern,
                    index
                )
            util.warn_quirks(
                'You have attempted to use a combinator without a selector before it at position {}.'.format(index),
                'the :scope pseudo class (or another appropriate selector) should be placed before the combinator.',
                self.pattern,
                index
            )
            sel.flags |= ct.SEL_SCOPE
        if combinator == COMMA_COMBINATOR:
            if not sel.tag and not is_pseudo:
@ -808,14 +847,7 @@ class CSSParser(object):
    def parse_pseudo_contains(self, sel, m, has_selector):
        """Parse contains."""
-        pseudo = util.lower(css_unescape(m.group('name')))
+        values = m.group('values')
        if pseudo == ":contains":
            warnings.warn(
                "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
                FutureWarning
            )
        contains_own = pseudo == ":-soup-contains-own"
        values = css_unescape(m.group('values'))
        patterns = []
        for token in RE_VALUES.finditer(values):
            if token.group('split'):
@ -826,7 +858,7 @@ class CSSParser(object):
            else:
                value = css_unescape(value)
            patterns.append(value)
-        sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own))
+        sel.contains.append(ct.SelectorContains(tuple(patterns)))
        has_selector = True
        return has_selector
@ -840,12 +872,20 @@ class CSSParser(object):
                continue
            value = token.group('value')
            if value.startswith(('"', "'")):
-                value = css_unescape(value[1:-1], True)
+                parts = css_unescape(value[1:-1], True).split('-')
            else:
-                value = css_unescape(value)
+                parts = css_unescape(value).split('-')
            patterns.append(value)
            new_parts = []
            first = True
            for part in parts:
                if part == '*' and first:
                    new_parts.append('(?!x\b)[a-z0-9]+?')
                elif part != '*':
                    new_parts.append(('' if first else '(-(?!x\b)[a-z0-9]+)*?\\-') + re.escape(part))
                if first:
                    first = False
            patterns.append(re.compile(r'^{}(?:-.*)?$'.format(''.join(new_parts)), re.I))
        sel.lang.append(ct.SelectorLang(patterns))
        has_selector = True
@ -877,7 +917,6 @@ class CSSParser(object):
        is_indeterminate = bool(flags & FLG_INDETERMINATE)
        is_in_range = bool(flags & FLG_IN_RANGE)
        is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
        is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
        if self.debug:  # pragma: no cover
            if is_pseudo:
@ -898,8 +937,6 @@ class CSSParser(object):
                print('    is_in_range: True')
            if is_out_of_range:
                print('    is_out_of_range: True')
            if is_placeholder_shown:
                print('    is_placeholder_shown: True')
        if is_relative:
            selectors.append(_Selector())
@ -916,7 +953,7 @@ class CSSParser(object):
                elif key == 'pseudo_class':
                    has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
                elif key == 'pseudo_element':
-                    raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
+                    raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0)))
                elif key == 'pseudo_contains':
                    has_selector = self.parse_pseudo_contains(sel, m, has_selector)
                elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -952,8 +989,18 @@ class CSSParser(object):
                        has_selector, sel = self.parse_combinator(
                            sel, m, has_selector, selectors, relations, is_pseudo, index
                        )
-                elif key == 'attribute':
+                elif key in ('attribute', 'quirks_attribute'):
-                    has_selector = self.parse_attribute_selector(sel, m, has_selector)
+                    quirks = key == 'quirks_attribute'
                    if quirks:
                        temp_index = index + m.group(0).find('=') + 1
                        util.warn_quirks(
                            "You have attempted to use an attribute " +
                            "value that should have been quoted at position {}.".format(temp_index),
                            "the attribute value should be quoted.",
                            self.pattern,
                            temp_index
                        )
                    has_selector = self.parse_attribute_selector(sel, m, has_selector, quirks)
                elif key == 'tag':
                    if has_selector:
                        raise SelectorSyntaxError(
@ -1006,8 +1053,6 @@ class CSSParser(object):
            selectors[-1].flags = ct.SEL_IN_RANGE
        if is_out_of_range:
            selectors[-1].flags = ct.SEL_OUT_OF_RANGE
        if is_placeholder_shown:
            selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
        return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
@ -1021,11 +1066,15 @@ class CSSParser(object):
        end = (m.start(0) - 1) if m else (len(pattern) - 1)
        if self.debug:  # pragma: no cover
            if self.quirks:
                print('## QUIRKS MODE: Throwing out the spec!')
            print('## PARSING: {!r}'.format(pattern))
        while index <= end:
            m = None
            for v in self.css_tokens:
-                m = v.match(pattern, index, self.flags)
+                if not v.enabled(self.flags):  # pragma: no cover
                    continue
                m = v.match(pattern, index)
                if m:
                    name = v.get_name()
                    if self.debug:  # pragma: no cover
@ -1053,7 +1102,13 @@ class CSSParser(object):
            print('## END PARSING')
    def process_selectors(self, index=0, flags=0):
-        """Process selectors."""
+        """
        Process selectors.
        We do our own selectors as BeautifulSoup4 has some annoying quirks,
        and we don't really need to do nth selectors or siblings or
        descendants etc.
        """
        return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
@ -1068,7 +1123,8 @@ CSS_LINK = CSSParser(
 # CSS pattern for `:checked`
 CSS_CHECKED = CSSParser(
    '''
-    html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
+    html|*:is(input[type=checkbox], input[type=radio])[checked],
    html|select > html|option[selected]
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:default` (must compile CSS_CHECKED first)
@ -1094,23 +1150,23 @@ CSS_INDETERMINATE = CSSParser(
    This pattern must be at the end.
    Special logic is applied to the last selector.
    */
-    html|input[type="radio"][name]:not([name='']):not([checked])
+    html|input[type="radio"][name][name!='']:not([checked])
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
 # CSS pattern for `:disabled`
 CSS_DISABLED = CSSParser(
    '''
-    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
+    html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
    html|optgroup[disabled] > html|option,
-    html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
+    html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
    html|fieldset[disabled] >
-        html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
+        html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:enabled`
 CSS_ENABLED = CSSParser(
    '''
-    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
+    html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:required`
@ -1124,20 +1180,22 @@ CSS_OPTIONAL = CSSParser(
 # CSS pattern for `:placeholder-shown`
 CSS_PLACEHOLDER_SHOWN = CSSParser(
    '''
-    html|input:is(
+    html|*:is(
-        :not([type]),
+        input:is(
-        [type=""],
+            :not([type]),
-        [type=text],
+            [type=""],
-        [type=search],
+            [type=text],
-        [type=url],
+            [type=search],
-        [type=tel],
+            [type=url],
-        [type=email],
+            [type=tel],
-        [type=password],
+            [type=email],
-        [type=number]
+            [type=password],
-    )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
+            [type=number]
-    html|textarea[placeholder]:not([placeholder=''])
+        ),
        textarea
    )[placeholder][placeholder!='']
    '''
-).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern default for `:nth-child` "of S" feature
 CSS_NTH_OF_S_DEFAULT = CSSParser(
    '*|*'
--- a/included_dependencies/soupsieve/css_types.py
+++ b/included_dependencies/soupsieve/css_types.py
@ -1,6 +1,6 @@
 """CSS selector structure items."""
-import copyreg
+from __future__ import unicode_literals
-from collections.abc import Hashable, Mapping
+from . import util
 __all__ = (
    'Selector',
@ -26,7 +26,6 @@ SEL_DIR_RTL = 0x40
 SEL_IN_RANGE = 0x80
 SEL_OUT_OF_RANGE = 0x100
 SEL_DEFINED = 0x200
 SEL_PLACEHOLDER_SHOWN = 0x400
 class Immutable(object):
@ -86,7 +85,7 @@ class Immutable(object):
    __str__ = __repr__
-class ImmutableDict(Mapping):
+class ImmutableDict(util.Mapping):
    """Hashable, immutable dictionary."""
    def __init__(self, *args, **kwargs):
@ -95,8 +94,8 @@ class ImmutableDict(Mapping):
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
        if (
-            is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
+            is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or
-            not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
+            not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg])
        ):
            raise TypeError('All values must be hashable')
@ -141,9 +140,9 @@ class Namespaces(ImmutableDict):
        # so don't bother checking that.
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
-        if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
+        if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
            raise TypeError('Namespace keys and values must be Unicode strings')
-        elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
+        elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
            raise TypeError('Namespace keys and values must be Unicode strings')
        super(Namespaces, self).__init__(*args, **kwargs)
@ -160,9 +159,9 @@ class CustomSelectors(ImmutableDict):
        # so don't bother checking that.
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
-        if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
+        if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
            raise TypeError('CustomSelectors keys and values must be Unicode strings')
-        elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
+        elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
            raise TypeError('CustomSelectors keys and values must be Unicode strings')
        super(CustomSelectors, self).__init__(*args, **kwargs)
@ -239,14 +238,13 @@ class SelectorAttribute(Immutable):
 class SelectorContains(Immutable):
    """Selector contains rule."""
-    __slots__ = ("text", "own", "_hash")
+    __slots__ = ("text", "_hash")
-    def __init__(self, text, own):
+    def __init__(self, text):
        """Initialize."""
        super(SelectorContains, self).__init__(
-            text=text,
+            text=text
            own=own
        )
@ -333,7 +331,7 @@ def _pickle(p):
 def pickle_register(obj):
    """Allow object to be pickled."""
-    copyreg.pickle(obj, _pickle)
+    util.copyreg.pickle(obj, _pickle)
 pickle_register(Selector)
--- a/included_dependencies/soupsieve/util.py
+++ b/included_dependencies/soupsieve/util.py
@ -1,17 +1,47 @@
 """Utility."""
-from functools import wraps, lru_cache
+from __future__ import unicode_literals
 from functools import wraps
 import warnings
 import sys
 import struct
 import os
 import re
 MODULE = os.path.dirname(__file__)
 PY3 = sys.version_info >= (3, 0)
 PY35 = sys.version_info >= (3, 5)
 PY37 = sys.version_info >= (3, 7)
 if PY3:
    from functools import lru_cache  # noqa F401
    import copyreg  # noqa F401
    from collections.abc import Hashable, Mapping  # noqa F401
    ustr = str
    bstr = bytes
    unichar = chr
    string = str
 else:
    from backports.functools_lru_cache import lru_cache  # noqa F401
    import copy_reg as copyreg  # noqa F401
    from collections import Hashable, Mapping  # noqa F401
    ustr = unicode  # noqa: F821
    bstr = str
    unichar = unichr  # noqa: F821
    string = basestring  # noqa: F821
 DEBUG = 0x00001
 _QUIRKS = 0x10000
 RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
 LC_A = ord('a')
 LC_Z = ord('z')
 UC_A = ord('A')
 UC_Z = ord('Z')
@lru_cache(maxsize=512)
 def lower(string):
    """Lower."""
@ -22,7 +52,38 @@ def lower(string):
    return ''.join(new_string)
-class SelectorSyntaxError(Exception):
+def upper(string):  # pragma: no cover
    """Lower."""
    new_string = []
    for c in string:
        o = ord(c)
        new_string.append(chr(o - 32) if LC_A <= o <= LC_Z else c)
    return ''.join(new_string)
 def uchr(i):
    """Allow getting Unicode character on narrow python builds."""
    try:
        return unichar(i)
    except ValueError:  # pragma: no cover
        return struct.pack('i', i).decode('utf-32')
 def uord(c):
    """Get Unicode ordinal."""
    if len(c) == 2:  # pragma: no cover
        high, low = [ord(p) for p in c]
        ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
    else:
        ordinal = ord(c)
    return ordinal
 class SelectorSyntaxError(SyntaxError):
    """Syntax error in a CSS selector."""
    def __init__(self, msg, pattern=None, index=None):
@ -108,3 +169,45 @@ def get_pattern_context(pattern, index):
        last = m.end(0)
    return ''.join(text), line, col
 class QuirksWarning(UserWarning):  # pragma: no cover
    """Warning for quirks mode."""
 def warn_quirks(message, recommend, pattern, index):
    """Warn quirks."""
    import traceback
    import bs4  # noqa: F401
    # Acquire source code line context
    paths = (MODULE, sys.modules['bs4'].__path__[0])
    tb = traceback.extract_stack()
    previous = None
    filename = None
    lineno = None
    for entry in tb:
        if (PY35 and entry.filename.startswith(paths)) or (not PY35 and entry[0].startswith(paths)):
            break
        previous = entry
    if previous:
        filename = previous.filename if PY35 else previous[0]
        lineno = previous.lineno if PY35 else previous[1]
    # Format pattern to show line and column position
    context, line = get_pattern_context(pattern, index)[0:2]
    # Display warning
    warnings.warn_explicit(
        "\nCSS selector pattern:\n" +
        "    {}\n".format(message) +
        "    This behavior is only allowed temporarily for Beautiful Soup's transition to Soup Sieve.\n" +
        "    In order to confrom to the CSS spec, {}\n".format(recommend) +
        "    It is strongly recommended the selector be altered to conform to the CSS spec " +
        "as an exception will be raised for this case in the future.\n" +
        "pattern line {}:\n{}".format(line, context),
        QuirksWarning,
        filename,
        lineno
    )