Roll included soupsieve back--newest isn't py2 compat.

2026-05-08 12:36:11 +02:00 · 2020-12-22 14:03:03 -06:00 · 2020-12-22 14:03:03 -06:00 · 9112346f41
commit 9112346f41
parent 34dc2e14b2
6 changed files with 366 additions and 253 deletions
--- a/included_dependencies/soupsieve/init.py
+++ b/included_dependencies/soupsieve/init.py
@ -25,16 +25,17 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
+from __future__ import unicode_literals
 from .__meta__ import __version__, __version_info__  # noqa: F401
 from . import css_parser as cp
 from . import css_match as cm
 from . import css_types as ct
-from .util import DEBUG, SelectorSyntaxError  # noqa: F401
+from .util import DEBUG, _QUIRKS, deprecated, SelectorSyntaxError  # noqa: F401

 __all__ = (
-    'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
-    'closest', 'compile', 'filter', 'iselect',
-    'match', 'select', 'select_one'
+    'DEBUG', "_QUIRKS", 'SelectorSyntaxError', 'SoupSieve',
+    'closest', 'comments', 'compile', 'filter', 'icomments',
+    'iselect', 'match', 'select', 'select_one'
 )

 SoupSieve = cm.SoupSieve
@ -86,6 +87,21 @@ def filter(select, iterable, namespaces=None, flags=0, **kwargs):  # noqa: A001
    return compile(select, namespaces, flags, **kwargs).filter(iterable)


+@deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
+def comments(tag, limit=0, flags=0, **kwargs):
+    """Get comments only."""
+
+    return [comment for comment in cm.CommentsMatch(tag).get_comments(limit)]
+
+
+@deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
+def icomments(tag, limit=0, flags=0, **kwargs):
+    """Iterate comments only."""
+
+    for comment in cm.CommentsMatch(tag).get_comments(limit):
+        yield comment
+
+
 def select_one(select, tag, namespaces=None, flags=0, **kwargs):
    """Select a single tag."""

--- a/included_dependencies/soupsieve/meta.py
+++ b/included_dependencies/soupsieve/meta.py
@ -1,4 +1,5 @@
 """Meta related things."""
+from __future__ import unicode_literals
 from collections import namedtuple
 import re

@ -185,5 +186,5 @@ def parse_version(ver, pre=False):
    return Version(major, minor, micro, release, pre, post, dev)


-__version_info__ = Version(2, 1, 0, "final")
+__version_info__ = Version(1, 9, 1, "final")
 __version__ = __version_info__._get_canonical()
--- a/included_dependencies/soupsieve/css_match.py
+++ b/included_dependencies/soupsieve/css_match.py
@ -1,12 +1,11 @@
 """CSS matcher."""
+from __future__ import unicode_literals
 from datetime import datetime
 from . import util
 import re
 from .import css_types as ct
 import unicodedata

-import bs4
-
 # Empty tag pattern (whitespace okay)
 RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')

@ -44,7 +43,6 @@ RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2}
 RE_DATETIME = re.compile(
    r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
 )
-RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')

 MONTHS_30 = (4, 6, 9, 11)  # April, June, September, and November
 FEB = 2
@ -55,7 +53,7 @@ FEB_LEAP_MONTH = 29
 DAYS_IN_WEEK = 7


-class _FakeParent(object):
+class FakeParent(object):
    """
    Fake parent class.

@ -75,7 +73,7 @@ class _FakeParent(object):
        return len(self.contents)


-class _DocumentNav(object):
+class Document(object):
    """Navigate a Beautiful Soup document."""

    @classmethod
@ -89,37 +87,58 @@ class _DocumentNav(object):
    @staticmethod
    def is_doc(obj):
        """Is `BeautifulSoup` object."""
+
+        import bs4
        return isinstance(obj, bs4.BeautifulSoup)

    @staticmethod
    def is_tag(obj):
        """Is tag."""
+
+        import bs4
        return isinstance(obj, bs4.Tag)

+    @staticmethod
+    def is_comment(obj):
+        """Is comment."""
+
+        import bs4
+        return isinstance(obj, bs4.Comment)
+
    @staticmethod
    def is_declaration(obj):  # pragma: no cover
        """Is declaration."""
+
+        import bs4
        return isinstance(obj, bs4.Declaration)

    @staticmethod
-    def is_cdata(obj):
+    def is_cdata(obj):  # pragma: no cover
        """Is CDATA."""
-        return isinstance(obj, bs4.CData)
+
+        import bs4
+        return isinstance(obj, bs4.Declaration)

    @staticmethod
    def is_processing_instruction(obj):  # pragma: no cover
        """Is processing instruction."""
+
+        import bs4
        return isinstance(obj, bs4.ProcessingInstruction)

    @staticmethod
    def is_navigable_string(obj):
        """Is navigable string."""
+
+        import bs4
        return isinstance(obj, bs4.NavigableString)

    @staticmethod
    def is_special_string(obj):
        """Is special string."""
-        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
+
+        import bs4
+        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction))

    @classmethod
    def is_content_string(cls, obj):
@ -131,7 +150,7 @@ class _DocumentNav(object):
    def create_fake_parent(el):
        """Create fake parent for a given element."""

-        return _FakeParent(el)
+        return FakeParent(el)

    @staticmethod
    def is_xml_tree(el):
@ -198,13 +217,10 @@ class _DocumentNav(object):
                is_tag = self.is_tag(child)

                if no_iframe and is_tag and self.is_iframe(child):
-                    if child.next_sibling is not None:
-                        next_good = child.next_sibling
-                    else:
-                        last_child = child
-                        while self.is_tag(last_child) and last_child.contents:
-                            last_child = last_child.contents[-1]
-                        next_good = last_child.next_element
+                    last_child = child
+                    while self.is_tag(last_child) and last_child.contents:
+                        last_child = last_child.contents[-1]
+                    next_good = last_child.next_element
                    yield child
                    if next_good is None:
                        break
@ -234,27 +250,21 @@ class _DocumentNav(object):

        return el.prefix

-    @staticmethod
-    def get_uri(el):
-        """Get namespace `URI`."""
-
-        return el.namespace
-
    @classmethod
-    def get_next(cls, el, tags=True):
+    def get_next_tag(cls, el):
        """Get next sibling tag."""

        sibling = el.next_sibling
-        while tags and not cls.is_tag(sibling) and sibling is not None:
+        while not cls.is_tag(sibling) and sibling is not None:
            sibling = sibling.next_sibling
        return sibling

    @classmethod
-    def get_previous(cls, el, tags=True):
+    def get_previous_tag(cls, el):
        """Get previous sibling tag."""

        sibling = el.previous_sibling
-        while tags and not cls.is_tag(sibling) and sibling is not None:
+        while not cls.is_tag(sibling) and sibling is not None:
            sibling = sibling.previous_sibling
        return sibling

@ -305,7 +315,7 @@ class _DocumentNav(object):
        """Get classes."""

        classes = cls.get_attribute_by_name(el, 'class', [])
-        if isinstance(classes, str):
+        if isinstance(classes, util.ustr):
            classes = RE_NOT_WS.findall(classes)
        return classes

@ -316,11 +326,6 @@ class _DocumentNav(object):
            [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
        )

-    def get_own_text(self, el, no_iframe=False):
-        """Get Own Text."""
-
-        return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
-

 class Inputs(object):
    """Class for parsing and validating input items."""
@ -423,7 +428,7 @@ class Inputs(object):
        return parsed


-class _Match(object):
+class CSSMatch(Document, object):
    """Perform CSS matching."""

    def __init__(self, selectors, scope, namespaces, flags):
@ -471,7 +476,7 @@ class _Match(object):

        if self.supports_namespaces():
            namespace = ''
-            ns = self.get_uri(el)
+            ns = el.namespace
            if ns:
                namespace = ns
        else:
@ -531,57 +536,6 @@ class _Match(object):
                    return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
        return None

-    def extended_language_filter(self, lang_range, lang_tag):
-        """Filter the language tags."""
-
-        match = True
-        lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
-        ranges = lang_range.split('-')
-        subtags = lang_tag.lower().split('-')
-        length = len(ranges)
-        rindex = 0
-        sindex = 0
-        r = ranges[rindex]
-        s = subtags[sindex]
-
-        # Primary tag needs to match
-        if r != '*' and r != s:
-            match = False
-
-        rindex += 1
-        sindex += 1
-
-        # Match until we run out of ranges
-        while match and rindex < length:
-            r = ranges[rindex]
-            try:
-                s = subtags[sindex]
-            except IndexError:
-                # Ran out of subtags,
-                # but we still have ranges
-                match = False
-                continue
-
-            # Empty range
-            if not r:
-                match = False
-                continue
-
-            # Matched range
-            elif s == r:
-                rindex += 1
-
-            # Implicit wildcard cannot match
-            # singletons
-            elif len(s) == 1:
-                match = False
-                continue
-
-            # Implicitly matched, so grab next subtag
-            sindex += 1
-
-        return match
-
    def match_attribute_name(self, el, attr, prefix):
        """Match attribute name and return value if it exists."""

@ -706,12 +660,12 @@ class _Match(object):
            if parent:
                found = self.match_selectors(parent, relation)
        elif relation[0].rel_type == REL_SIBLING:
-            sibling = self.get_previous(el)
+            sibling = self.get_previous_tag(el)
            while not found and sibling:
                found = self.match_selectors(sibling, relation)
-                sibling = self.get_previous(sibling)
+                sibling = self.get_previous_tag(sibling)
        elif relation[0].rel_type == REL_CLOSE_SIBLING:
-            sibling = self.get_previous(el)
+            sibling = self.get_previous_tag(el)
            if sibling and self.is_tag(sibling):
                found = self.match_selectors(sibling, relation)
        return found
@ -736,12 +690,12 @@ class _Match(object):
        elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
            found = self.match_future_child(el, relation)
        elif relation[0].rel_type == REL_HAS_SIBLING:
-            sibling = self.get_next(el)
+            sibling = self.get_next_tag(el)
            while not found and sibling:
                found = self.match_selectors(sibling, relation)
-                sibling = self.get_next(sibling)
+                sibling = self.get_next_tag(sibling)
        elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
-            sibling = self.get_next(el)
+            sibling = self.get_next_tag(el)
            if sibling and self.is_tag(sibling):
                found = self.match_selectors(sibling, relation)
        return found
@ -782,28 +736,7 @@ class _Match(object):
    def match_root(self, el):
        """Match element as root."""

-        is_root = self.is_root(el)
-        if is_root:
-            sibling = self.get_previous(el, tags=False)
-            while is_root and sibling is not None:
-                if (
-                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
-                    self.is_cdata(sibling)
-                ):
-                    is_root = False
-                else:
-                    sibling = self.get_previous(sibling, tags=False)
-        if is_root:
-            sibling = self.get_next(el, tags=False)
-            while is_root and sibling is not None:
-                if (
-                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
-                    self.is_cdata(sibling)
-                ):
-                    is_root = False
-                else:
-                    sibling = self.get_next(sibling, tags=False)
-        return is_root
+        return self.is_root(el)

    def match_scope(self, el):
        """Match element as scope."""
@ -948,23 +881,12 @@ class _Match(object):
        content = None
        for contain_list in contains:
            if content is None:
-                if contain_list.own:
-                    content = self.get_own_text(el, no_iframe=self.is_html)
-                else:
-                    content = self.get_text(el, no_iframe=self.is_html)
+                content = self.get_text(el, no_iframe=self.is_html)
            found = False
            for text in contain_list.text:
-                if contain_list.own:
-                    for c in content:
-                        if text in c:
-                            found = True
-                            break
-                    if found:
-                        break
-                else:
-                    if text in content:
-                        found = True
-                        break
+                if text in content:
+                    found = True
+                    break
            if not found:
                match = False
        return match
@ -1148,7 +1070,7 @@ class _Match(object):
            for patterns in langs:
                match = False
                for pattern in patterns:
-                    if self.extended_language_filter(pattern, found_lang):
+                    if pattern.match(found_lang):
                        match = True
                if not match:
                    break
@ -1230,7 +1152,7 @@ class _Match(object):

        out_of_range = False

-        itype = util.lower(self.get_attribute_by_name(el, 'type'))
+        itype = self.get_attribute_by_name(el, 'type').lower()
        mn = self.get_attribute_by_name(el, 'min', None)
        if mn is not None:
            mn = Inputs.parse_value(itype, mn)
@ -1285,21 +1207,6 @@ class _Match(object):
            self.get_prefix(el) is not None
        )

-    def match_placeholder_shown(self, el):
-        """
-        Match placeholder shown according to HTML spec.
-
-        - text area should be checked if they have content. A single newline does not count as content.
-
-        """
-
-        match = False
-        content = self.get_text(el)
-        if content in ('', '\n'):
-            match = True
-
-        return match
-
    def match_selectors(self, el, selectors):
        """Check if element matches one of the selectors."""

@ -1332,9 +1239,6 @@ class _Match(object):
                # Verify element is scope
                if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
                    continue
-                # Verify element has placeholder shown
-                if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
-                    continue
                # Verify `nth` matches
                if not self.match_nth(el, selector.nth):
                    continue
@ -1421,8 +1325,28 @@ class _Match(object):
        return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)


-class CSSMatch(_DocumentNav, _Match):
-    """The Beautiful Soup CSS match class."""
+class CommentsMatch(Document, object):
+    """Comments matcher."""
+
+    def __init__(self, el):
+        """Initialize."""
+
+        self.assert_valid_input(el)
+        self.tag = el
+
+    def get_comments(self, limit=0):
+        """Get comments."""
+
+        if limit < 1:
+            limit = None
+
+        for child in self.get_descendants(self.tag, tags=False):
+            if self.is_comment(child):
+                yield child
+                if limit is not None:
+                    limit -= 1
+                    if limit < 1:
+                        break


 class SoupSieve(ct.Immutable):
@ -1468,6 +1392,19 @@ class SoupSieve(ct.Immutable):
        else:
            return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]

+    @util.deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
+    def comments(self, tag, limit=0):
+        """Get comments only."""
+
+        return [comment for comment in CommentsMatch(tag).get_comments(limit)]
+
+    @util.deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
+    def icomments(self, tag, limit=0):
+        """Iterate comments only."""
+
+        for comment in CommentsMatch(tag).get_comments(limit):
+            yield comment
+
    def select_one(self, tag):
        """Select a single tag."""

--- a/included_dependencies/soupsieve/css_parser.py
+++ b/included_dependencies/soupsieve/css_parser.py
@ -1,11 +1,10 @@
 """CSS selector parser."""
+from __future__ import unicode_literals
 import re
-from functools import lru_cache
 from . import util
 from . import css_match as cm
 from . import css_types as ct
 from .util import SelectorSyntaxError
-import warnings

 UNICODE_REPLACEMENT_CHAR = 0xFFFD

@ -60,8 +59,6 @@ PSEUDO_SIMPLE_NO_MATCH = {
 # Complex pseudo classes that take selector lists
 PSEUDO_COMPLEX = {
    ':contains',
-    ':-soup-contains',
-    ':-soup-contains-own',
    ':has',
    ':is',
    ':matches',
@ -113,6 +110,11 @@ VALUE = r'''
 ATTR = r'''
 (?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
 '''.format(ws=WSC, value=VALUE)
+# Definitions for quirks mode
+QUIRKS_ATTR_IDENTIFIER = r'(?:(?:{esc}|(?!/\*)[^"\] \t\r\n\f])+?)'.format(esc=CSS_ESCAPES)
+QUIRKS_ATTR = r'''
+(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
+'''.format(ws=WSC, value=QUIRKS_ATTR_IDENTIFIER)

 # Selector patterns
 # IDs (`#id`)
@ -120,11 +122,13 @@ PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
 # Classes (`.class`)
 PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
 # Prefix:Tag (`prefix|tag`)
-PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
+PAT_TAG = r'(?:(?:{ident}|\*)?\|)?(?:{ident}|\*)'.format(ident=IDENTIFIER)
 # Attributes (`[attr]`, `[attr=value]`, etc.)
-PAT_ATTR = r'''
-\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
-'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
+PAT_ATTR = r'\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}'.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
+# Quirks attributes, like real attributes, but unquoted values can contain anything but whitespace and closing `]`.
+PAT_QUIRKS_ATTR = r'''
+\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}
+'''.format(ws=WSC, ident=IDENTIFIER, attr=QUIRKS_ATTR)
 # Pseudo class (`:pseudo-class`, `:pseudo-class(`)
 PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
 # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
@ -195,13 +199,12 @@ FLG_INDETERMINATE = 0x20
 FLG_OPEN = 0x40
 FLG_IN_RANGE = 0x80
 FLG_OUT_OF_RANGE = 0x100
-FLG_PLACEHOLDER_SHOWN = 0x200

 # Maximum cached patterns to store
 _MAXCACHE = 500


-@lru_cache(maxsize=_MAXCACHE)
+@util.lru_cache(maxsize=_MAXCACHE)
 def _cached_css_compile(pattern, namespaces, custom, flags):
    """Cached CSS compile."""

@ -250,7 +253,7 @@ def css_unescape(content, string=False):
            codepoint = int(m.group(1)[1:], 16)
            if codepoint == 0:
                codepoint = UNICODE_REPLACEMENT_CHAR
-            value = chr(codepoint)
+            value = util.uchr(codepoint)
        elif m.group(2):
            value = m.group(2)[1:]
        elif m.group(3):
@ -274,7 +277,7 @@ def escape(ident):
        string.append('\\{}'.format(ident))
    else:
        for index, c in enumerate(ident):
-            codepoint = ord(c)
+            codepoint = util.uord(c)
            if codepoint == 0x00:
                string.append('\ufffd')
            elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
@ -305,7 +308,12 @@ class SelectorPattern(object):

        return self.name

-    def match(self, selector, index, flags):
+    def enabled(self, flags):
+        """Enabled."""
+
+        return True
+
+    def match(self, selector, index):
        """Match the selector."""

        return self.re_pattern.match(selector, index)
@ -320,7 +328,7 @@ class SpecialPseudoPattern(SelectorPattern):
        self.patterns = {}
        for p in patterns:
            name = p[0]
-            pattern = p[3](name, p[2])
+            pattern = SelectorPattern(name, p[2])
            for pseudo in p[1]:
                self.patterns[pseudo] = pattern

@ -332,7 +340,12 @@ class SpecialPseudoPattern(SelectorPattern):

        return self.matched_name.get_name()

-    def match(self, selector, index, flags):
+    def enabled(self, flags):
+        """Enabled."""
+
+        return True
+
+    def match(self, selector, index):
        """Match the selector."""

        pseudo = None
@ -341,13 +354,22 @@ class SpecialPseudoPattern(SelectorPattern):
            name = util.lower(css_unescape(m.group('name')))
            pattern = self.patterns.get(name)
            if pattern:
-                pseudo = pattern.match(selector, index, flags)
+                pseudo = pattern.match(selector, index)
                if pseudo:
                    self.matched_name = pattern

        return pseudo


+class QuirkPattern(SelectorPattern):
+    """Selector pattern for quirk mode."""
+
+    def enabled(self, flags):
+        """Enabled if quirks flag is present."""
+
+        return flags & util._QUIRKS
+
+
 class _Selector(object):
    """
    Intermediate selector class.
@ -424,16 +446,11 @@ class CSSParser(object):
        SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
        SpecialPseudoPattern(
            (
-                (
-                    "pseudo_contains",
-                    (':contains', ':-soup-contains', ':-soup-contains-own'),
-                    PAT_PSEUDO_CONTAINS,
-                    SelectorPattern
-                ),
-                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
-                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
-                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
-                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
+                ("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS),
+                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD),
+                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE),
+                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG),
+                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR)
            )
        ),
        SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
@ -444,6 +461,7 @@ class CSSParser(object):
        SelectorPattern("class", PAT_CLASS),
        SelectorPattern("tag", PAT_TAG),
        SelectorPattern("attribute", PAT_ATTR),
+        QuirkPattern("quirks_attribute", PAT_QUIRKS_ATTR),
        SelectorPattern("combine", PAT_COMBINE)
    )

@ -453,19 +471,24 @@ class CSSParser(object):
        self.pattern = selector.replace('\x00', '\ufffd')
        self.flags = flags
        self.debug = self.flags & util.DEBUG
+        self.quirks = self.flags & util._QUIRKS
        self.custom = {} if custom is None else custom

-    def parse_attribute_selector(self, sel, m, has_selector):
+    def parse_attribute_selector(self, sel, m, has_selector, quirks):
        """Create attribute selector from the returned regex match."""

        inverse = False
        op = m.group('cmp')
        case = util.lower(m.group('case')) if m.group('case') else None
-        ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
-        attr = css_unescape(m.group('attr_name'))
+        parts = [css_unescape(a) for a in m.group('ns_attr').split('|')]
+        ns = ''
        is_type = False
        pattern2 = None
-
+        if len(parts) > 1:
+            ns = parts[0]
+            attr = parts[1]
+        else:
+            attr = parts[0]
        if case:
            flags = re.I if case == 'i' else 0
        elif util.lower(attr) == 'type':
@ -475,7 +498,7 @@ class CSSParser(object):
            flags = 0

        if op:
-            if m.group('value').startswith(('"', "'")):
+            if m.group('value').startswith(('"', "'")) and not quirks:
                value = css_unescape(m.group('value')[1:-1], True)
            else:
                value = css_unescape(m.group('value'))
@ -502,12 +525,13 @@ class CSSParser(object):
        elif op.startswith('|'):
            # Value starts with word in dash separated list
            pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
+        elif op.startswith('!'):
+            # Equivalent to `:not([attr=value])`
+            pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
+            inverse = True
        else:
            # Value matches
            pattern = re.compile(r'^%s$' % re.escape(value), flags)
-            if op.startswith('!'):
-                # Equivalent to `:not([attr=value])`
-                inverse = True
        if is_type and pattern:
            pattern2 = re.compile(pattern.pattern)

@ -528,8 +552,13 @@ class CSSParser(object):
    def parse_tag_pattern(self, sel, m, has_selector):
        """Parse tag pattern from regex match."""

-        prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
-        tag = css_unescape(m.group('tag_name'))
+        parts = [css_unescape(x) for x in m.group(0).split('|')]
+        if len(parts) > 1:
+            prefix = parts[0]
+            tag = parts[1]
+        else:
+            tag = parts[0]
+            prefix = None
        sel.tag = ct.SelectorTag(tag, prefix)
        has_selector = True
        return has_selector
@ -771,11 +800,21 @@ class CSSParser(object):
        if not combinator:
            combinator = WS_COMBINATOR
        if not has_selector:
-            raise SelectorSyntaxError(
-                "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
+            # The only way we don't fail is if we are at the root level and quirks mode is enabled,
+            # and we've found no other selectors yet in this compound selector.
+            if (not self.quirks or is_pseudo or combinator == COMMA_COMBINATOR or relations):
+                raise SelectorSyntaxError(
+                    "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
+                    self.pattern,
+                    index
+                )
+            util.warn_quirks(
+                'You have attempted to use a combinator without a selector before it at position {}.'.format(index),
+                'the :scope pseudo class (or another appropriate selector) should be placed before the combinator.',
                self.pattern,
                index
            )
+            sel.flags |= ct.SEL_SCOPE

        if combinator == COMMA_COMBINATOR:
            if not sel.tag and not is_pseudo:
@ -808,14 +847,7 @@ class CSSParser(object):
    def parse_pseudo_contains(self, sel, m, has_selector):
        """Parse contains."""

-        pseudo = util.lower(css_unescape(m.group('name')))
-        if pseudo == ":contains":
-            warnings.warn(
-                "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
-                FutureWarning
-            )
-        contains_own = pseudo == ":-soup-contains-own"
-        values = css_unescape(m.group('values'))
+        values = m.group('values')
        patterns = []
        for token in RE_VALUES.finditer(values):
            if token.group('split'):
@ -826,7 +858,7 @@ class CSSParser(object):
            else:
                value = css_unescape(value)
            patterns.append(value)
-        sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own))
+        sel.contains.append(ct.SelectorContains(tuple(patterns)))
        has_selector = True
        return has_selector

@ -840,12 +872,20 @@ class CSSParser(object):
                continue
            value = token.group('value')
            if value.startswith(('"', "'")):
-                value = css_unescape(value[1:-1], True)
+                parts = css_unescape(value[1:-1], True).split('-')
            else:
-                value = css_unescape(value)
-
-            patterns.append(value)
+                parts = css_unescape(value).split('-')

+            new_parts = []
+            first = True
+            for part in parts:
+                if part == '*' and first:
+                    new_parts.append('(?!x\b)[a-z0-9]+?')
+                elif part != '*':
+                    new_parts.append(('' if first else '(-(?!x\b)[a-z0-9]+)*?\\-') + re.escape(part))
+                if first:
+                    first = False
+            patterns.append(re.compile(r'^{}(?:-.*)?$'.format(''.join(new_parts)), re.I))
        sel.lang.append(ct.SelectorLang(patterns))
        has_selector = True

@ -877,7 +917,6 @@ class CSSParser(object):
        is_indeterminate = bool(flags & FLG_INDETERMINATE)
        is_in_range = bool(flags & FLG_IN_RANGE)
        is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
-        is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)

        if self.debug:  # pragma: no cover
            if is_pseudo:
@ -898,8 +937,6 @@ class CSSParser(object):
                print('    is_in_range: True')
            if is_out_of_range:
                print('    is_out_of_range: True')
-            if is_placeholder_shown:
-                print('    is_placeholder_shown: True')

        if is_relative:
            selectors.append(_Selector())
@ -916,7 +953,7 @@ class CSSParser(object):
                elif key == 'pseudo_class':
                    has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
                elif key == 'pseudo_element':
-                    raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
+                    raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0)))
                elif key == 'pseudo_contains':
                    has_selector = self.parse_pseudo_contains(sel, m, has_selector)
                elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -952,8 +989,18 @@ class CSSParser(object):
                        has_selector, sel = self.parse_combinator(
                            sel, m, has_selector, selectors, relations, is_pseudo, index
                        )
-                elif key == 'attribute':
-                    has_selector = self.parse_attribute_selector(sel, m, has_selector)
+                elif key in ('attribute', 'quirks_attribute'):
+                    quirks = key == 'quirks_attribute'
+                    if quirks:
+                        temp_index = index + m.group(0).find('=') + 1
+                        util.warn_quirks(
+                            "You have attempted to use an attribute " +
+                            "value that should have been quoted at position {}.".format(temp_index),
+                            "the attribute value should be quoted.",
+                            self.pattern,
+                            temp_index
+                        )
+                    has_selector = self.parse_attribute_selector(sel, m, has_selector, quirks)
                elif key == 'tag':
                    if has_selector:
                        raise SelectorSyntaxError(
@ -1006,8 +1053,6 @@ class CSSParser(object):
            selectors[-1].flags = ct.SEL_IN_RANGE
        if is_out_of_range:
            selectors[-1].flags = ct.SEL_OUT_OF_RANGE
-        if is_placeholder_shown:
-            selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN

        return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)

@ -1021,11 +1066,15 @@ class CSSParser(object):
        end = (m.start(0) - 1) if m else (len(pattern) - 1)

        if self.debug:  # pragma: no cover
+            if self.quirks:
+                print('## QUIRKS MODE: Throwing out the spec!')
            print('## PARSING: {!r}'.format(pattern))
        while index <= end:
            m = None
            for v in self.css_tokens:
-                m = v.match(pattern, index, self.flags)
+                if not v.enabled(self.flags):  # pragma: no cover
+                    continue
+                m = v.match(pattern, index)
                if m:
                    name = v.get_name()
                    if self.debug:  # pragma: no cover
@ -1053,7 +1102,13 @@ class CSSParser(object):
            print('## END PARSING')

    def process_selectors(self, index=0, flags=0):
-        """Process selectors."""
+        """
+        Process selectors.
+
+        We do our own selectors as BeautifulSoup4 has some annoying quirks,
+        and we don't really need to do nth selectors or siblings or
+        descendants etc.
+        """

        return self.parse_selectors(self.selector_iter(self.pattern), index, flags)

@ -1068,7 +1123,8 @@ CSS_LINK = CSSParser(
 # CSS pattern for `:checked`
 CSS_CHECKED = CSSParser(
    '''
-    html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
+    html|*:is(input[type=checkbox], input[type=radio])[checked],
+    html|select > html|option[selected]
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:default` (must compile CSS_CHECKED first)
@ -1094,23 +1150,23 @@ CSS_INDETERMINATE = CSSParser(
    This pattern must be at the end.
    Special logic is applied to the last selector.
    */
-    html|input[type="radio"][name]:not([name='']):not([checked])
+    html|input[type="radio"][name][name!='']:not([checked])
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
 # CSS pattern for `:disabled`
 CSS_DISABLED = CSSParser(
    '''
-    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
+    html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
    html|optgroup[disabled] > html|option,
-    html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
+    html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
    html|fieldset[disabled] >
-        html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
+        html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:enabled`
 CSS_ENABLED = CSSParser(
    '''
-    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
+    html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:required`
@ -1124,20 +1180,22 @@ CSS_OPTIONAL = CSSParser(
 # CSS pattern for `:placeholder-shown`
 CSS_PLACEHOLDER_SHOWN = CSSParser(
    '''
-    html|input:is(
-        :not([type]),
-        [type=""],
-        [type=text],
-        [type=search],
-        [type=url],
-        [type=tel],
-        [type=email],
-        [type=password],
-        [type=number]
-    )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
-    html|textarea[placeholder]:not([placeholder=''])
+    html|*:is(
+        input:is(
+            :not([type]),
+            [type=""],
+            [type=text],
+            [type=search],
+            [type=url],
+            [type=tel],
+            [type=email],
+            [type=password],
+            [type=number]
+        ),
+        textarea
+    )[placeholder][placeholder!='']
    '''
-).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern default for `:nth-child` "of S" feature
 CSS_NTH_OF_S_DEFAULT = CSSParser(
    '*|*'
--- a/included_dependencies/soupsieve/css_types.py
+++ b/included_dependencies/soupsieve/css_types.py
@ -1,6 +1,6 @@
 """CSS selector structure items."""
-import copyreg
-from collections.abc import Hashable, Mapping
+from __future__ import unicode_literals
+from . import util

 __all__ = (
    'Selector',
@ -26,7 +26,6 @@ SEL_DIR_RTL = 0x40
 SEL_IN_RANGE = 0x80
 SEL_OUT_OF_RANGE = 0x100
 SEL_DEFINED = 0x200
-SEL_PLACEHOLDER_SHOWN = 0x400


 class Immutable(object):
@ -86,7 +85,7 @@ class Immutable(object):
    __str__ = __repr__


-class ImmutableDict(Mapping):
+class ImmutableDict(util.Mapping):
    """Hashable, immutable dictionary."""

    def __init__(self, *args, **kwargs):
@ -95,8 +94,8 @@ class ImmutableDict(Mapping):
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
        if (
-            is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
-            not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
+            is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or
+            not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg])
        ):
            raise TypeError('All values must be hashable')

@ -141,9 +140,9 @@ class Namespaces(ImmutableDict):
        # so don't bother checking that.
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
-        if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
+        if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
            raise TypeError('Namespace keys and values must be Unicode strings')
-        elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
+        elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
            raise TypeError('Namespace keys and values must be Unicode strings')

        super(Namespaces, self).__init__(*args, **kwargs)
@ -160,9 +159,9 @@ class CustomSelectors(ImmutableDict):
        # so don't bother checking that.
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
-        if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
+        if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
            raise TypeError('CustomSelectors keys and values must be Unicode strings')
-        elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
+        elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
            raise TypeError('CustomSelectors keys and values must be Unicode strings')

        super(CustomSelectors, self).__init__(*args, **kwargs)
@ -239,14 +238,13 @@ class SelectorAttribute(Immutable):
 class SelectorContains(Immutable):
    """Selector contains rule."""

-    __slots__ = ("text", "own", "_hash")
+    __slots__ = ("text", "_hash")

-    def __init__(self, text, own):
+    def __init__(self, text):
        """Initialize."""

        super(SelectorContains, self).__init__(
-            text=text,
-            own=own
+            text=text
        )


@ -333,7 +331,7 @@ def _pickle(p):
 def pickle_register(obj):
    """Allow object to be pickled."""

-    copyreg.pickle(obj, _pickle)
+    util.copyreg.pickle(obj, _pickle)


 pickle_register(Selector)
--- a/included_dependencies/soupsieve/util.py
+++ b/included_dependencies/soupsieve/util.py
@ -1,17 +1,47 @@
 """Utility."""
-from functools import wraps, lru_cache
+from __future__ import unicode_literals
+from functools import wraps
 import warnings
+import sys
+import struct
+import os
 import re
+MODULE = os.path.dirname(__file__)
+
+PY3 = sys.version_info >= (3, 0)
+PY35 = sys.version_info >= (3, 5)
+PY37 = sys.version_info >= (3, 7)
+
+if PY3:
+    from functools import lru_cache  # noqa F401
+    import copyreg  # noqa F401
+    from collections.abc import Hashable, Mapping  # noqa F401
+
+    ustr = str
+    bstr = bytes
+    unichar = chr
+    string = str
+else:
+    from backports.functools_lru_cache import lru_cache  # noqa F401
+    import copy_reg as copyreg  # noqa F401
+    from collections import Hashable, Mapping  # noqa F401
+
+    ustr = unicode  # noqa: F821
+    bstr = str
+    unichar = unichr  # noqa: F821
+    string = basestring  # noqa: F821

 DEBUG = 0x00001
+_QUIRKS = 0x10000

 RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')

+LC_A = ord('a')
+LC_Z = ord('z')
 UC_A = ord('A')
 UC_Z = ord('Z')


-@lru_cache(maxsize=512)
 def lower(string):
    """Lower."""

@ -22,7 +52,38 @@ def lower(string):
    return ''.join(new_string)


-class SelectorSyntaxError(Exception):
+def upper(string):  # pragma: no cover
+    """Lower."""
+
+    new_string = []
+    for c in string:
+        o = ord(c)
+        new_string.append(chr(o - 32) if LC_A <= o <= LC_Z else c)
+    return ''.join(new_string)
+
+
+def uchr(i):
+    """Allow getting Unicode character on narrow python builds."""
+
+    try:
+        return unichar(i)
+    except ValueError:  # pragma: no cover
+        return struct.pack('i', i).decode('utf-32')
+
+
+def uord(c):
+    """Get Unicode ordinal."""
+
+    if len(c) == 2:  # pragma: no cover
+        high, low = [ord(p) for p in c]
+        ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
+    else:
+        ordinal = ord(c)
+
+    return ordinal
+
+
+class SelectorSyntaxError(SyntaxError):
    """Syntax error in a CSS selector."""

    def __init__(self, msg, pattern=None, index=None):
@ -108,3 +169,45 @@ def get_pattern_context(pattern, index):
        last = m.end(0)

    return ''.join(text), line, col
+
+
+class QuirksWarning(UserWarning):  # pragma: no cover
+    """Warning for quirks mode."""
+
+
+def warn_quirks(message, recommend, pattern, index):
+    """Warn quirks."""
+
+    import traceback
+    import bs4  # noqa: F401
+
+    # Acquire source code line context
+    paths = (MODULE, sys.modules['bs4'].__path__[0])
+    tb = traceback.extract_stack()
+    previous = None
+    filename = None
+    lineno = None
+    for entry in tb:
+        if (PY35 and entry.filename.startswith(paths)) or (not PY35 and entry[0].startswith(paths)):
+            break
+        previous = entry
+    if previous:
+        filename = previous.filename if PY35 else previous[0]
+        lineno = previous.lineno if PY35 else previous[1]
+
+    # Format pattern to show line and column position
+    context, line = get_pattern_context(pattern, index)[0:2]
+
+    # Display warning
+    warnings.warn_explicit(
+        "\nCSS selector pattern:\n" +
+        "    {}\n".format(message) +
+        "    This behavior is only allowed temporarily for Beautiful Soup's transition to Soup Sieve.\n" +
+        "    In order to confrom to the CSS spec, {}\n".format(recommend) +
+        "    It is strongly recommended the selector be altered to conform to the CSS spec " +
+        "as an exception will be raised for this case in the future.\n" +
+        "pattern line {}:\n{}".format(line, context),
+        QuirksWarning,
+        filename,
+        lineno
+    )