Update old included_dependencies to current versions.

2025-12-06 08:52:55 +01:00 · 2020-12-22 13:29:20 -06:00 · 2020-12-22 13:29:20 -06:00 · 7b951d7f4d
commit 7b951d7f4d
parent d33decd8f5
23 changed files with 33216 additions and 1655 deletions
--- a/included_dependencies/chardet/init.py
+++ b/included_dependencies/chardet/init.py
@ -16,11 +16,14 @@
 ######################### END LICENSE BLOCK #########################


-from .compat import PY2, PY3
 from .universaldetector import UniversalDetector
+from .enums import InputState
 from .version import __version__, VERSION


+__all__ = ['UniversalDetector', 'detect', 'detect_all', '__version__', 'VERSION']
+
+
 def detect(byte_str):
    """
    Detect the encoding of the given byte string.
@ -31,9 +34,50 @@ def detect(byte_str):
    if not isinstance(byte_str, bytearray):
        if not isinstance(byte_str, bytes):
            raise TypeError('Expected object of type bytes or bytearray, got: '
-                            '{0}'.format(type(byte_str)))
+                            '{}'.format(type(byte_str)))
        else:
            byte_str = bytearray(byte_str)
    detector = UniversalDetector()
    detector.feed(byte_str)
    return detector.close()
+
+
+def detect_all(byte_str):
+    """
+    Detect all the possible encodings of the given byte string.
+
+    :param byte_str:     The byte sequence to examine.
+    :type byte_str:      ``bytes`` or ``bytearray``
+    """
+    if not isinstance(byte_str, bytearray):
+        if not isinstance(byte_str, bytes):
+            raise TypeError('Expected object of type bytes or bytearray, got: '
+                            '{}'.format(type(byte_str)))
+        else:
+            byte_str = bytearray(byte_str)
+
+    detector = UniversalDetector()
+    detector.feed(byte_str)
+    detector.close()
+
+    if detector._input_state == InputState.HIGH_BYTE:
+        results = []
+        for prober in detector._charset_probers:
+            if prober.get_confidence() > detector.MINIMUM_THRESHOLD:
+                charset_name = prober.charset_name
+                lower_charset_name = prober.charset_name.lower()
+                # Use Windows encoding name instead of ISO-8859 if we saw any
+                # extra Windows-specific bytes
+                if lower_charset_name.startswith('iso-8859'):
+                    if detector._has_win_bytes:
+                        charset_name = detector.ISO_WIN_MAP.get(lower_charset_name,
+                                                            charset_name)
+                results.append({
+                    'encoding': charset_name,
+                    'confidence': prober.get_confidence(),
+                    'language': prober.language,
+                })
+        if len(results) > 0:
+            return sorted(results, key=lambda result: -result['confidence'])
+
+    return [detector.result]
--- a/included_dependencies/chardet/charsetgroupprober.py
+++ b/included_dependencies/chardet/charsetgroupprober.py
@ -73,6 +73,7 @@ class CharSetGroupProber(CharSetProber):
                continue
            if state == ProbingState.FOUND_IT:
                self._best_guess_prober = prober
+                self._state = ProbingState.FOUND_IT
                return self.state
            elif state == ProbingState.NOT_ME:
                prober.active = False
--- a/included_dependencies/chardet/cli/chardetect.py
+++ b/included_dependencies/chardet/cli/chardetect.py
@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """
 Script which takes one or more file paths and reports on their detected
 encodings
@ -45,10 +44,10 @@ def description_of(lines, name='stdin'):
    if PY2:
        name = name.decode(sys.getfilesystemencoding(), 'ignore')
    if result['encoding']:
-        return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
+        return '{}: {} with confidence {}'.format(name, result['encoding'],
                                                     result['confidence'])
    else:
-        return '{0}: no result'.format(name)
+        return '{}: no result'.format(name)


 def main(argv=None):
@ -69,7 +68,7 @@ def main(argv=None):
                        type=argparse.FileType('rb'), nargs='*',
                        default=[sys.stdin if PY2 else sys.stdin.buffer])
    parser.add_argument('--version', action='version',
-                        version='%(prog)s {0}'.format(__version__))
+                        version='%(prog)s {}'.format(__version__))
    args = parser.parse_args(argv)

    for f in args.input:
--- a/included_dependencies/chardet/compat.py
+++ b/included_dependencies/chardet/compat.py
@ -25,10 +25,12 @@ import sys
 if sys.version_info < (3, 0):
    PY2 = True
    PY3 = False
-    base_str = (str, unicode)
+    string_types = (str, unicode)
    text_type = unicode
+    iteritems = dict.iteritems
 else:
    PY2 = False
    PY3 = True
-    base_str = (bytes, str)
+    string_types = (bytes, str)
    text_type = str
+    iteritems = dict.items
--- a/included_dependencies/chardet/langbulgarianmodel.py
+++ b/included_dependencies/chardet/langbulgarianmodel.py
--- a/included_dependencies/chardet/langgreekmodel.py
+++ b/included_dependencies/chardet/langgreekmodel.py
--- a/included_dependencies/chardet/langhebrewmodel.py
+++ b/included_dependencies/chardet/langhebrewmodel.py
--- a/included_dependencies/chardet/langhungarianmodel.py
+++ b/included_dependencies/chardet/langhungarianmodel.py
--- a/included_dependencies/chardet/langrussianmodel.py
+++ b/included_dependencies/chardet/langrussianmodel.py
--- a/included_dependencies/chardet/langthaimodel.py
+++ b/included_dependencies/chardet/langthaimodel.py
--- a/included_dependencies/chardet/langturkishmodel.py
+++ b/included_dependencies/chardet/langturkishmodel.py
--- a/included_dependencies/chardet/metadata/init.py
+++ b/included_dependencies/chardet/metadata/init.py
--- a/included_dependencies/chardet/metadata/languages.py
+++ b/included_dependencies/chardet/metadata/languages.py
@ -0,0 +1,310 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Metadata about languages used by our model training code for our
+SingleByteCharSetProbers.  Could be used for other things in the future.
+
+This code is based on the language metadata from the uchardet project.
+"""
+from __future__ import absolute_import, print_function
+
+from string import ascii_letters
+
+
+# TODO: Add Ukranian (KOI8-U)
+
+class Language(object):
+    """Metadata about a language useful for training models
+
+    :ivar name: The human name for the language, in English.
+    :type name: str
+    :ivar iso_code: 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
+                    or use another catalog as a last resort.
+    :type iso_code: str
+    :ivar use_ascii: Whether or not ASCII letters should be included in trained
+                     models.
+    :type use_ascii: bool
+    :ivar charsets: The charsets we want to support and create data for.
+    :type charsets: list of str
+    :ivar alphabet: The characters in the language's alphabet. If `use_ascii` is
+                    `True`, you only need to add those not in the ASCII set.
+    :type alphabet: str
+    :ivar wiki_start_pages: The Wikipedia pages to start from if we're crawling
+                            Wikipedia for training data.
+    :type wiki_start_pages: list of str
+    """
+    def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None,
+                 alphabet=None, wiki_start_pages=None):
+        super(Language, self).__init__()
+        self.name = name
+        self.iso_code = iso_code
+        self.use_ascii = use_ascii
+        self.charsets = charsets
+        if self.use_ascii:
+            if alphabet:
+                alphabet += ascii_letters
+            else:
+                alphabet = ascii_letters
+        elif not alphabet:
+            raise ValueError('Must supply alphabet if use_ascii is False')
+        self.alphabet = ''.join(sorted(set(alphabet))) if alphabet else None
+        self.wiki_start_pages = wiki_start_pages
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__,
+                               ', '.join('{}={!r}'.format(k, v)
+                                         for k, v in self.__dict__.items()
+                                         if not k.startswith('_')))
+
+
+LANGUAGES = {'Arabic': Language(name='Arabic',
+                                iso_code='ar',
+                                use_ascii=False,
+                                # We only support encodings that use isolated
+                                # forms, because the current recommendation is
+                                # that the rendering system handles presentation
+                                # forms. This means we purposefully skip IBM864.
+                                charsets=['ISO-8859-6', 'WINDOWS-1256',
+                                          'CP720', 'CP864'],
+                                alphabet=u'ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ',
+                                wiki_start_pages=[u'الصفحة_الرئيسية']),
+             'Belarusian': Language(name='Belarusian',
+                                    iso_code='be',
+                                    use_ascii=False,
+                                    charsets=['ISO-8859-5', 'WINDOWS-1251',
+                                              'IBM866', 'MacCyrillic'],
+                                    alphabet=(u'АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ'
+                                              u'абвгдеёжзійклмнопрстуўфхцчшыьэюяʼ'),
+                                    wiki_start_pages=[u'Галоўная_старонка']),
+             'Bulgarian': Language(name='Bulgarian',
+                                   iso_code='bg',
+                                   use_ascii=False,
+                                   charsets=['ISO-8859-5', 'WINDOWS-1251',
+                                             'IBM855'],
+                                   alphabet=(u'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯ'
+                                             u'абвгдежзийклмнопрстуфхцчшщъьюя'),
+                                   wiki_start_pages=[u'Начална_страница']),
+             'Czech': Language(name='Czech',
+                               iso_code='cz',
+                               use_ascii=True,
+                               charsets=['ISO-8859-2', 'WINDOWS-1250'],
+                               alphabet=u'áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ',
+                               wiki_start_pages=[u'Hlavní_strana']),
+             'Danish': Language(name='Danish',
+                                iso_code='da',
+                                use_ascii=True,
+                                charsets=['ISO-8859-1', 'ISO-8859-15',
+                                          'WINDOWS-1252'],
+                                alphabet=u'æøåÆØÅ',
+                                wiki_start_pages=[u'Forside']),
+             'German': Language(name='German',
+                                iso_code='de',
+                                use_ascii=True,
+                                charsets=['ISO-8859-1', 'WINDOWS-1252'],
+                                alphabet=u'äöüßÄÖÜ',
+                                wiki_start_pages=[u'Wikipedia:Hauptseite']),
+             'Greek': Language(name='Greek',
+                               iso_code='el',
+                               use_ascii=False,
+                               charsets=['ISO-8859-7', 'WINDOWS-1253'],
+                               alphabet=(u'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ'
+                                         u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ'),
+                               wiki_start_pages=[u'Πύλη:Κύρια']),
+             'English': Language(name='English',
+                                 iso_code='en',
+                                 use_ascii=True,
+                                 charsets=['ISO-8859-1', 'WINDOWS-1252'],
+                                 wiki_start_pages=[u'Main_Page']),
+             'Esperanto': Language(name='Esperanto',
+                                   iso_code='eo',
+                                   # Q, W, X, and Y not used at all
+                                   use_ascii=False,
+                                   charsets=['ISO-8859-3'],
+                                   alphabet=(u'abcĉdefgĝhĥijĵklmnoprsŝtuŭvz'
+                                             u'ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ'),
+                                   wiki_start_pages=[u'Vikipedio:Ĉefpaĝo']),
+             'Spanish': Language(name='Spanish',
+                                 iso_code='es',
+                                 use_ascii=True,
+                                 charsets=['ISO-8859-1', 'ISO-8859-15',
+                                           'WINDOWS-1252'],
+                                 alphabet=u'ñáéíóúüÑÁÉÍÓÚÜ',
+                                 wiki_start_pages=[u'Wikipedia:Portada']),
+             'Estonian': Language(name='Estonian',
+                                  iso_code='et',
+                                  use_ascii=False,
+                                  charsets=['ISO-8859-4', 'ISO-8859-13',
+                                            'WINDOWS-1257'],
+                                  # C, F, Š, Q, W, X, Y, Z, Ž are only for
+                                  # loanwords
+                                  alphabet=(u'ABDEGHIJKLMNOPRSTUVÕÄÖÜ'
+                                            u'abdeghijklmnoprstuvõäöü'),
+                                  wiki_start_pages=[u'Esileht']),
+             'Finnish': Language(name='Finnish',
+                                 iso_code='fi',
+                                 use_ascii=True,
+                                 charsets=['ISO-8859-1', 'ISO-8859-15',
+                                           'WINDOWS-1252'],
+                                 alphabet=u'ÅÄÖŠŽåäöšž',
+                                 wiki_start_pages=[u'Wikipedia:Etusivu']),
+             'French': Language(name='French',
+                                iso_code='fr',
+                                use_ascii=True,
+                                charsets=['ISO-8859-1', 'ISO-8859-15',
+                                          'WINDOWS-1252'],
+                                alphabet=u'œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ',
+                                wiki_start_pages=[u'Wikipédia:Accueil_principal',
+                                                  u'Bœuf (animal)']),
+             'Hebrew': Language(name='Hebrew',
+                                iso_code='he',
+                                use_ascii=False,
+                                charsets=['ISO-8859-8', 'WINDOWS-1255'],
+                                alphabet=u'אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ',
+                                wiki_start_pages=[u'עמוד_ראשי']),
+             'Croatian': Language(name='Croatian',
+                                  iso_code='hr',
+                                  # Q, W, X, Y are only used for foreign words.
+                                  use_ascii=False,
+                                  charsets=['ISO-8859-2', 'WINDOWS-1250'],
+                                  alphabet=(u'abcčćdđefghijklmnoprsštuvzž'
+                                            u'ABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ'),
+                                  wiki_start_pages=[u'Glavna_stranica']),
+             'Hungarian': Language(name='Hungarian',
+                                   iso_code='hu',
+                                   # Q, W, X, Y are only used for foreign words.
+                                   use_ascii=False,
+                                   charsets=['ISO-8859-2', 'WINDOWS-1250'],
+                                   alphabet=(u'abcdefghijklmnoprstuvzáéíóöőúüű'
+                                             u'ABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ'),
+                                   wiki_start_pages=[u'Kezdőlap']),
+             'Italian': Language(name='Italian',
+                                 iso_code='it',
+                                 use_ascii=True,
+                                 charsets=['ISO-8859-1', 'ISO-8859-15',
+                                           'WINDOWS-1252'],
+                                 alphabet=u'ÀÈÉÌÒÓÙàèéìòóù',
+                                 wiki_start_pages=[u'Pagina_principale']),
+             'Lithuanian': Language(name='Lithuanian',
+                                    iso_code='lt',
+                                    use_ascii=False,
+                                    charsets=['ISO-8859-13', 'WINDOWS-1257',
+                                              'ISO-8859-4'],
+                                    # Q, W, and X not used at all
+                                    alphabet=(u'AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽ'
+                                              u'aąbcčdeęėfghiįyjklmnoprsštuųūvzž'),
+                                    wiki_start_pages=[u'Pagrindinis_puslapis']),
+             'Latvian': Language(name='Latvian',
+                                 iso_code='lv',
+                                 use_ascii=False,
+                                 charsets=['ISO-8859-13', 'WINDOWS-1257',
+                                           'ISO-8859-4'],
+                                 # Q, W, X, Y are only for loanwords
+                                 alphabet=(u'AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽ'
+                                           u'aābcčdeēfgģhiījkķlļmnņoprsštuūvzž'),
+                                 wiki_start_pages=[u'Sākumlapa']),
+             'Macedonian': Language(name='Macedonian',
+                                    iso_code='mk',
+                                    use_ascii=False,
+                                    charsets=['ISO-8859-5', 'WINDOWS-1251',
+                                              'MacCyrillic', 'IBM855'],
+                                    alphabet=(u'АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ'
+                                              u'абвгдѓежзѕијклљмнњопрстќуфхцчџш'),
+                                    wiki_start_pages=[u'Главна_страница']),
+             'Dutch': Language(name='Dutch',
+                               iso_code='nl',
+                               use_ascii=True,
+                               charsets=['ISO-8859-1', 'WINDOWS-1252'],
+                               wiki_start_pages=[u'Hoofdpagina']),
+             'Polish': Language(name='Polish',
+                                iso_code='pl',
+                                # Q and X are only used for foreign words.
+                                use_ascii=False,
+                                charsets=['ISO-8859-2', 'WINDOWS-1250'],
+                                alphabet=(u'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ'
+                                          u'aąbcćdeęfghijklłmnńoóprsśtuwyzźż'),
+                                wiki_start_pages=[u'Wikipedia:Strona_główna']),
+             'Portuguese': Language(name='Portuguese',
+                                 iso_code='pt',
+                                 use_ascii=True,
+                                 charsets=['ISO-8859-1', 'ISO-8859-15',
+                                           'WINDOWS-1252'],
+                                 alphabet=u'ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú',
+                                 wiki_start_pages=[u'Wikipédia:Página_principal']),
+             'Romanian': Language(name='Romanian',
+                                  iso_code='ro',
+                                  use_ascii=True,
+                                  charsets=['ISO-8859-2', 'WINDOWS-1250'],
+                                  alphabet=u'ăâîșțĂÂÎȘȚ',
+                                  wiki_start_pages=[u'Pagina_principală']),
+             'Russian': Language(name='Russian',
+                                 iso_code='ru',
+                                 use_ascii=False,
+                                 charsets=['ISO-8859-5', 'WINDOWS-1251',
+                                           'KOI8-R', 'MacCyrillic', 'IBM866',
+                                           'IBM855'],
+                                 alphabet=(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'
+                                           u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'),
+                                 wiki_start_pages=[u'Заглавная_страница']),
+             'Slovak': Language(name='Slovak',
+                                iso_code='sk',
+                                use_ascii=True,
+                                charsets=['ISO-8859-2', 'WINDOWS-1250'],
+                                alphabet=u'áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ',
+                                wiki_start_pages=[u'Hlavná_stránka']),
+             'Slovene': Language(name='Slovene',
+                                 iso_code='sl',
+                                 # Q, W, X, Y are only used for foreign words.
+                                 use_ascii=False,
+                                 charsets=['ISO-8859-2', 'WINDOWS-1250'],
+                                 alphabet=(u'abcčdefghijklmnoprsštuvzž'
+                                           u'ABCČDEFGHIJKLMNOPRSŠTUVZŽ'),
+                                 wiki_start_pages=[u'Glavna_stran']),
+             # Serbian can be written in both Latin and Cyrillic, but there's no
+             # simple way to get the Latin alphabet pages from Wikipedia through
+             # the API, so for now we just support Cyrillic.
+             'Serbian': Language(name='Serbian',
+                                 iso_code='sr',
+                                 alphabet=(u'АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ'
+                                           u'абвгдђежзијклљмнњопрстћуфхцчџш'),
+                                 charsets=['ISO-8859-5', 'WINDOWS-1251',
+                                           'MacCyrillic', 'IBM855'],
+                                 wiki_start_pages=[u'Главна_страна']),
+             'Thai': Language(name='Thai',
+                              iso_code='th',
+                              use_ascii=False,
+                              charsets=['ISO-8859-11', 'TIS-620', 'CP874'],
+                              alphabet=u'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛',
+                              wiki_start_pages=[u'หน้าหลัก']),
+             'Turkish': Language(name='Turkish',
+                                 iso_code='tr',
+                                 # Q, W, and X are not used by Turkish
+                                 use_ascii=False,
+                                 charsets=['ISO-8859-3', 'ISO-8859-9',
+                                           'WINDOWS-1254'],
+                                 alphabet=(u'abcçdefgğhıijklmnoöprsştuüvyzâîû'
+                                           u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ'),
+                                 wiki_start_pages=[u'Ana_Sayfa']),
+             'Vietnamese': Language(name='Vietnamese',
+                                    iso_code='vi',
+                                    use_ascii=False,
+                                    # Windows-1258 is the only common 8-bit
+                                    # Vietnamese encoding supported by Python.
+                                    # From Wikipedia:
+                                    # For systems that lack support for Unicode,
+                                    # dozens of 8-bit Vietnamese code pages are
+                                    # available.[1] The most common are VISCII
+                                    # (TCVN 5712:1993), VPS, and Windows-1258.[3]
+                                    # Where ASCII is required, such as when
+                                    # ensuring readability in plain text e-mail,
+                                    # Vietnamese letters are often encoded
+                                    # according to Vietnamese Quoted-Readable
+                                    # (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4]
+                                    # though usage of either variable-width
+                                    # scheme has declined dramatically following
+                                    # the adoption of Unicode on the World Wide
+                                    # Web.
+                                    charsets=['WINDOWS-1258'],
+                                    alphabet=(u'aăâbcdđeêghiklmnoôơpqrstuưvxy'
+                                              u'AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY'),
+                                    wiki_start_pages=[u'Chữ_Quốc_ngữ']),
+            }
--- a/included_dependencies/chardet/sbcharsetprober.py
+++ b/included_dependencies/chardet/sbcharsetprober.py
@ -26,10 +26,22 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

+from collections import namedtuple
+
 from .charsetprober import CharSetProber
 from .enums import CharacterCategory, ProbingState, SequenceLikelihood


+SingleByteCharSetModel = namedtuple('SingleByteCharSetModel',
+                                    ['charset_name',
+                                     'language',
+                                     'char_to_order_map',
+                                     'language_model',
+                                     'typical_positive_ratio',
+                                     'keep_ascii_letters',
+                                     'alphabet'])
+
+
 class SingleByteCharSetProber(CharSetProber):
    SAMPLE_SIZE = 64
    SB_ENOUGH_REL_THRESHOLD = 1024  #  0.25 * SAMPLE_SIZE^2
@ -65,25 +77,25 @@ class SingleByteCharSetProber(CharSetProber):
        if self._name_prober:
            return self._name_prober.charset_name
        else:
-            return self._model['charset_name']
+            return self._model.charset_name

    @property
    def language(self):
        if self._name_prober:
            return self._name_prober.language
        else:
-            return self._model.get('language')
+            return self._model.language

    def feed(self, byte_str):
-        if not self._model['keep_english_letter']:
+        # TODO: Make filter_international_words keep things in self.alphabet
+        if not self._model.keep_ascii_letters:
            byte_str = self.filter_international_words(byte_str)
        if not byte_str:
            return self.state
-        char_to_order_map = self._model['char_to_order_map']
-        for i, c in enumerate(byte_str):
-            # XXX: Order is in range 1-64, so one would think we want 0-63 here,
-            #      but that leads to 27 more test failures than before.
-            order = char_to_order_map[c]
+        char_to_order_map = self._model.char_to_order_map
+        language_model = self._model.language_model
+        for char in byte_str:
+            order = char_to_order_map.get(char, CharacterCategory.UNDEFINED)
            # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
            #      CharacterCategory.SYMBOL is actually 253, so we use CONTROL
            #      to make it closer to the original intent. The only difference
@ -91,20 +103,21 @@ class SingleByteCharSetProber(CharSetProber):
            #      _total_char purposes.
            if order < CharacterCategory.CONTROL:
                self._total_char += 1
+            # TODO: Follow uchardet's lead and discount confidence for frequent
+            #       control characters.
+            #       See https://github.com/BYVoid/uchardet/commit/55b4f23971db61
            if order < self.SAMPLE_SIZE:
                self._freq_char += 1
                if self._last_order < self.SAMPLE_SIZE:
                    self._total_seqs += 1
                    if not self._reversed:
-                        i = (self._last_order * self.SAMPLE_SIZE) + order
-                        model = self._model['precedence_matrix'][i]
-                    else:  # reverse the order of the letters in the lookup
-                        i = (order * self.SAMPLE_SIZE) + self._last_order
-                        model = self._model['precedence_matrix'][i]
-                    self._seq_counters[model] += 1
+                        lm_cat = language_model[self._last_order][order]
+                    else:
+                        lm_cat = language_model[order][self._last_order]
+                    self._seq_counters[lm_cat] += 1
            self._last_order = order

-        charset_name = self._model['charset_name']
+        charset_name = self._model.charset_name
        if self.state == ProbingState.DETECTING:
            if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
                confidence = self.get_confidence()
@ -125,7 +138,7 @@ class SingleByteCharSetProber(CharSetProber):
        r = 0.01
        if self._total_seqs > 0:
            r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
-                 self._total_seqs / self._model['typical_positive_ratio'])
+                 self._total_seqs / self._model.typical_positive_ratio)
            r = r * self._freq_char / self._total_char
            if r >= 1.0:
                r = 0.99
--- a/included_dependencies/chardet/sbcsgroupprober.py
+++ b/included_dependencies/chardet/sbcsgroupprober.py
@ -27,47 +27,57 @@
 ######################### END LICENSE BLOCK #########################

 from .charsetgroupprober import CharSetGroupProber
-from .sbcharsetprober import SingleByteCharSetProber
-from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
-                                Latin5CyrillicModel, MacCyrillicModel,
-                                Ibm866Model, Ibm855Model)
-from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
-from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
-# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
-from .langthaimodel import TIS620ThaiModel
-from .langhebrewmodel import Win1255HebrewModel
 from .hebrewprober import HebrewProber
-from .langturkishmodel import Latin5TurkishModel
+from .langbulgarianmodel import (ISO_8859_5_BULGARIAN_MODEL,
+                                 WINDOWS_1251_BULGARIAN_MODEL)
+from .langgreekmodel import ISO_8859_7_GREEK_MODEL, WINDOWS_1253_GREEK_MODEL
+from .langhebrewmodel import WINDOWS_1255_HEBREW_MODEL
+# from .langhungarianmodel import (ISO_8859_2_HUNGARIAN_MODEL,
+#                                  WINDOWS_1250_HUNGARIAN_MODEL)
+from .langrussianmodel import (IBM855_RUSSIAN_MODEL, IBM866_RUSSIAN_MODEL,
+                               ISO_8859_5_RUSSIAN_MODEL, KOI8_R_RUSSIAN_MODEL,
+                               MACCYRILLIC_RUSSIAN_MODEL,
+                               WINDOWS_1251_RUSSIAN_MODEL)
+from .langthaimodel import TIS_620_THAI_MODEL
+from .langturkishmodel import ISO_8859_9_TURKISH_MODEL
+from .sbcharsetprober import SingleByteCharSetProber


 class SBCSGroupProber(CharSetGroupProber):
    def __init__(self):
        super(SBCSGroupProber, self).__init__()
+        hebrew_prober = HebrewProber()
+        logical_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL,
+                                                        False, hebrew_prober)
+        # TODO: See if using ISO-8859-8 Hebrew model works better here, since
+        #       it's actually the visual one
+        visual_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL,
+                                                       True, hebrew_prober)
+        hebrew_prober.set_model_probers(logical_hebrew_prober,
+                                        visual_hebrew_prober)
+        # TODO: ORDER MATTERS HERE. I changed the order vs what was in master
+        #       and several tests failed that did not before. Some thought
+        #       should be put into the ordering, and we should consider making
+        #       order not matter here, because that is very counter-intuitive.
        self.probers = [
-            SingleByteCharSetProber(Win1251CyrillicModel),
-            SingleByteCharSetProber(Koi8rModel),
-            SingleByteCharSetProber(Latin5CyrillicModel),
-            SingleByteCharSetProber(MacCyrillicModel),
-            SingleByteCharSetProber(Ibm866Model),
-            SingleByteCharSetProber(Ibm855Model),
-            SingleByteCharSetProber(Latin7GreekModel),
-            SingleByteCharSetProber(Win1253GreekModel),
-            SingleByteCharSetProber(Latin5BulgarianModel),
-            SingleByteCharSetProber(Win1251BulgarianModel),
+            SingleByteCharSetProber(WINDOWS_1251_RUSSIAN_MODEL),
+            SingleByteCharSetProber(KOI8_R_RUSSIAN_MODEL),
+            SingleByteCharSetProber(ISO_8859_5_RUSSIAN_MODEL),
+            SingleByteCharSetProber(MACCYRILLIC_RUSSIAN_MODEL),
+            SingleByteCharSetProber(IBM866_RUSSIAN_MODEL),
+            SingleByteCharSetProber(IBM855_RUSSIAN_MODEL),
+            SingleByteCharSetProber(ISO_8859_7_GREEK_MODEL),
+            SingleByteCharSetProber(WINDOWS_1253_GREEK_MODEL),
+            SingleByteCharSetProber(ISO_8859_5_BULGARIAN_MODEL),
+            SingleByteCharSetProber(WINDOWS_1251_BULGARIAN_MODEL),
            # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
            #       after we retrain model.
-            # SingleByteCharSetProber(Latin2HungarianModel),
-            # SingleByteCharSetProber(Win1250HungarianModel),
-            SingleByteCharSetProber(TIS620ThaiModel),
-            SingleByteCharSetProber(Latin5TurkishModel),
+            # SingleByteCharSetProber(ISO_8859_2_HUNGARIAN_MODEL),
+            # SingleByteCharSetProber(WINDOWS_1250_HUNGARIAN_MODEL),
+            SingleByteCharSetProber(TIS_620_THAI_MODEL),
+            SingleByteCharSetProber(ISO_8859_9_TURKISH_MODEL),
+            hebrew_prober,
+            logical_hebrew_prober,
+            visual_hebrew_prober,
        ]
-        hebrew_prober = HebrewProber()
-        logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
-                                                        False, hebrew_prober)
-        visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
-                                                       hebrew_prober)
-        hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
-        self.probers.extend([hebrew_prober, logical_hebrew_prober,
-                             visual_hebrew_prober])
-
        self.reset()
--- a/included_dependencies/chardet/universaldetector.py
+++ b/included_dependencies/chardet/universaldetector.py
@ -266,7 +266,7 @@ class UniversalDetector(object):
                               'language': max_prober.language}

        # Log all prober confidences if none met MINIMUM_THRESHOLD
-        if self.logger.getEffectiveLevel() == logging.DEBUG:
+        if self.logger.getEffectiveLevel() <= logging.DEBUG:
            if self.result['encoding'] is None:
                self.logger.debug('no probers hit minimum threshold')
                for group_prober in self._charset_probers:
@ -280,7 +280,7 @@ class UniversalDetector(object):
                                              prober.get_confidence())
                    else:
                        self.logger.debug('%s %s confidence = %s',
-                                          prober.charset_name,
-                                          prober.language,
-                                          prober.get_confidence())
+                                          group_prober.charset_name,
+                                          group_prober.language,
+                                          group_prober.get_confidence())
        return self.result
--- a/included_dependencies/chardet/version.py
+++ b/included_dependencies/chardet/version.py
@ -5,5 +5,5 @@ from within setup.py and from chardet subpackages.
 :author: Dan Blanchard (dan.blanchard@gmail.com)
 """

-__version__ = "3.0.4"
+__version__ = "4.0.0"
 VERSION = __version__.split('.')
--- a/included_dependencies/soupsieve/init.py
+++ b/included_dependencies/soupsieve/init.py
@ -25,17 +25,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-from __future__ import unicode_literals
 from .__meta__ import __version__, __version_info__  # noqa: F401
 from . import css_parser as cp
 from . import css_match as cm
 from . import css_types as ct
-from .util import DEBUG, _QUIRKS, deprecated, SelectorSyntaxError  # noqa: F401
+from .util import DEBUG, SelectorSyntaxError  # noqa: F401

 __all__ = (
-    'DEBUG', "_QUIRKS", 'SelectorSyntaxError', 'SoupSieve',
-    'closest', 'comments', 'compile', 'filter', 'icomments',
-    'iselect', 'match', 'select', 'select_one'
+    'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
+    'closest', 'compile', 'filter', 'iselect',
+    'match', 'select', 'select_one'
 )

 SoupSieve = cm.SoupSieve
@ -87,21 +86,6 @@ def filter(select, iterable, namespaces=None, flags=0, **kwargs):  # noqa: A001
    return compile(select, namespaces, flags, **kwargs).filter(iterable)


-@deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
-def comments(tag, limit=0, flags=0, **kwargs):
-    """Get comments only."""
-
-    return [comment for comment in cm.CommentsMatch(tag).get_comments(limit)]
-
-
-@deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
-def icomments(tag, limit=0, flags=0, **kwargs):
-    """Iterate comments only."""
-
-    for comment in cm.CommentsMatch(tag).get_comments(limit):
-        yield comment
-
-
 def select_one(select, tag, namespaces=None, flags=0, **kwargs):
    """Select a single tag."""

--- a/included_dependencies/soupsieve/meta.py
+++ b/included_dependencies/soupsieve/meta.py
@ -1,5 +1,4 @@
 """Meta related things."""
-from __future__ import unicode_literals
 from collections import namedtuple
 import re

@ -186,5 +185,5 @@ def parse_version(ver, pre=False):
    return Version(major, minor, micro, release, pre, post, dev)


-__version_info__ = Version(1, 9, 1, "final")
+__version_info__ = Version(2, 1, 0, "final")
 __version__ = __version_info__._get_canonical()
--- a/included_dependencies/soupsieve/css_match.py
+++ b/included_dependencies/soupsieve/css_match.py
@ -1,11 +1,12 @@
 """CSS matcher."""
-from __future__ import unicode_literals
 from datetime import datetime
 from . import util
 import re
 from .import css_types as ct
 import unicodedata

+import bs4
+
 # Empty tag pattern (whitespace okay)
 RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')

@ -43,6 +44,7 @@ RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2}
 RE_DATETIME = re.compile(
    r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
 )
+RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')

 MONTHS_30 = (4, 6, 9, 11)  # April, June, September, and November
 FEB = 2
@ -53,7 +55,7 @@ FEB_LEAP_MONTH = 29
 DAYS_IN_WEEK = 7


-class FakeParent(object):
+class _FakeParent(object):
    """
    Fake parent class.

@ -73,7 +75,7 @@ class FakeParent(object):
        return len(self.contents)


-class Document(object):
+class _DocumentNav(object):
    """Navigate a Beautiful Soup document."""

    @classmethod
@ -87,58 +89,37 @@ class Document(object):
    @staticmethod
    def is_doc(obj):
        """Is `BeautifulSoup` object."""
-
-        import bs4
        return isinstance(obj, bs4.BeautifulSoup)

    @staticmethod
    def is_tag(obj):
        """Is tag."""
-
-        import bs4
        return isinstance(obj, bs4.Tag)

-    @staticmethod
-    def is_comment(obj):
-        """Is comment."""
-
-        import bs4
-        return isinstance(obj, bs4.Comment)
-
    @staticmethod
    def is_declaration(obj):  # pragma: no cover
        """Is declaration."""
-
-        import bs4
        return isinstance(obj, bs4.Declaration)

    @staticmethod
-    def is_cdata(obj):  # pragma: no cover
+    def is_cdata(obj):
        """Is CDATA."""
-
-        import bs4
-        return isinstance(obj, bs4.Declaration)
+        return isinstance(obj, bs4.CData)

    @staticmethod
    def is_processing_instruction(obj):  # pragma: no cover
        """Is processing instruction."""
-
-        import bs4
        return isinstance(obj, bs4.ProcessingInstruction)

    @staticmethod
    def is_navigable_string(obj):
        """Is navigable string."""
-
-        import bs4
        return isinstance(obj, bs4.NavigableString)

    @staticmethod
    def is_special_string(obj):
        """Is special string."""
-
-        import bs4
-        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction))
+        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))

    @classmethod
    def is_content_string(cls, obj):
@ -150,7 +131,7 @@ class Document(object):
    def create_fake_parent(el):
        """Create fake parent for a given element."""

-        return FakeParent(el)
+        return _FakeParent(el)

    @staticmethod
    def is_xml_tree(el):
@ -217,10 +198,13 @@ class Document(object):
                is_tag = self.is_tag(child)

                if no_iframe and is_tag and self.is_iframe(child):
-                    last_child = child
-                    while self.is_tag(last_child) and last_child.contents:
-                        last_child = last_child.contents[-1]
-                    next_good = last_child.next_element
+                    if child.next_sibling is not None:
+                        next_good = child.next_sibling
+                    else:
+                        last_child = child
+                        while self.is_tag(last_child) and last_child.contents:
+                            last_child = last_child.contents[-1]
+                        next_good = last_child.next_element
                    yield child
                    if next_good is None:
                        break
@ -250,21 +234,27 @@ class Document(object):

        return el.prefix

+    @staticmethod
+    def get_uri(el):
+        """Get namespace `URI`."""
+
+        return el.namespace
+
    @classmethod
-    def get_next_tag(cls, el):
+    def get_next(cls, el, tags=True):
        """Get next sibling tag."""

        sibling = el.next_sibling
-        while not cls.is_tag(sibling) and sibling is not None:
+        while tags and not cls.is_tag(sibling) and sibling is not None:
            sibling = sibling.next_sibling
        return sibling

    @classmethod
-    def get_previous_tag(cls, el):
+    def get_previous(cls, el, tags=True):
        """Get previous sibling tag."""

        sibling = el.previous_sibling
-        while not cls.is_tag(sibling) and sibling is not None:
+        while tags and not cls.is_tag(sibling) and sibling is not None:
            sibling = sibling.previous_sibling
        return sibling

@ -315,7 +305,7 @@ class Document(object):
        """Get classes."""

        classes = cls.get_attribute_by_name(el, 'class', [])
-        if isinstance(classes, util.ustr):
+        if isinstance(classes, str):
            classes = RE_NOT_WS.findall(classes)
        return classes

@ -326,6 +316,11 @@ class Document(object):
            [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
        )

+    def get_own_text(self, el, no_iframe=False):
+        """Get Own Text."""
+
+        return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
+

 class Inputs(object):
    """Class for parsing and validating input items."""
@ -428,7 +423,7 @@ class Inputs(object):
        return parsed


-class CSSMatch(Document, object):
+class _Match(object):
    """Perform CSS matching."""

    def __init__(self, selectors, scope, namespaces, flags):
@ -476,7 +471,7 @@ class CSSMatch(Document, object):

        if self.supports_namespaces():
            namespace = ''
-            ns = el.namespace
+            ns = self.get_uri(el)
            if ns:
                namespace = ns
        else:
@ -536,6 +531,57 @@ class CSSMatch(Document, object):
                    return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
        return None

+    def extended_language_filter(self, lang_range, lang_tag):
+        """Filter the language tags."""
+
+        match = True
+        lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
+        ranges = lang_range.split('-')
+        subtags = lang_tag.lower().split('-')
+        length = len(ranges)
+        rindex = 0
+        sindex = 0
+        r = ranges[rindex]
+        s = subtags[sindex]
+
+        # Primary tag needs to match
+        if r != '*' and r != s:
+            match = False
+
+        rindex += 1
+        sindex += 1
+
+        # Match until we run out of ranges
+        while match and rindex < length:
+            r = ranges[rindex]
+            try:
+                s = subtags[sindex]
+            except IndexError:
+                # Ran out of subtags,
+                # but we still have ranges
+                match = False
+                continue
+
+            # Empty range
+            if not r:
+                match = False
+                continue
+
+            # Matched range
+            elif s == r:
+                rindex += 1
+
+            # Implicit wildcard cannot match
+            # singletons
+            elif len(s) == 1:
+                match = False
+                continue
+
+            # Implicitly matched, so grab next subtag
+            sindex += 1
+
+        return match
+
    def match_attribute_name(self, el, attr, prefix):
        """Match attribute name and return value if it exists."""

@ -660,12 +706,12 @@ class CSSMatch(Document, object):
            if parent:
                found = self.match_selectors(parent, relation)
        elif relation[0].rel_type == REL_SIBLING:
-            sibling = self.get_previous_tag(el)
+            sibling = self.get_previous(el)
            while not found and sibling:
                found = self.match_selectors(sibling, relation)
-                sibling = self.get_previous_tag(sibling)
+                sibling = self.get_previous(sibling)
        elif relation[0].rel_type == REL_CLOSE_SIBLING:
-            sibling = self.get_previous_tag(el)
+            sibling = self.get_previous(el)
            if sibling and self.is_tag(sibling):
                found = self.match_selectors(sibling, relation)
        return found
@ -690,12 +736,12 @@ class CSSMatch(Document, object):
        elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
            found = self.match_future_child(el, relation)
        elif relation[0].rel_type == REL_HAS_SIBLING:
-            sibling = self.get_next_tag(el)
+            sibling = self.get_next(el)
            while not found and sibling:
                found = self.match_selectors(sibling, relation)
-                sibling = self.get_next_tag(sibling)
+                sibling = self.get_next(sibling)
        elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
-            sibling = self.get_next_tag(el)
+            sibling = self.get_next(el)
            if sibling and self.is_tag(sibling):
                found = self.match_selectors(sibling, relation)
        return found
@ -736,7 +782,28 @@ class CSSMatch(Document, object):
    def match_root(self, el):
        """Match element as root."""

-        return self.is_root(el)
+        is_root = self.is_root(el)
+        if is_root:
+            sibling = self.get_previous(el, tags=False)
+            while is_root and sibling is not None:
+                if (
+                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
+                    self.is_cdata(sibling)
+                ):
+                    is_root = False
+                else:
+                    sibling = self.get_previous(sibling, tags=False)
+        if is_root:
+            sibling = self.get_next(el, tags=False)
+            while is_root and sibling is not None:
+                if (
+                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
+                    self.is_cdata(sibling)
+                ):
+                    is_root = False
+                else:
+                    sibling = self.get_next(sibling, tags=False)
+        return is_root

    def match_scope(self, el):
        """Match element as scope."""
@ -881,12 +948,23 @@ class CSSMatch(Document, object):
        content = None
        for contain_list in contains:
            if content is None:
-                content = self.get_text(el, no_iframe=self.is_html)
+                if contain_list.own:
+                    content = self.get_own_text(el, no_iframe=self.is_html)
+                else:
+                    content = self.get_text(el, no_iframe=self.is_html)
            found = False
            for text in contain_list.text:
-                if text in content:
-                    found = True
-                    break
+                if contain_list.own:
+                    for c in content:
+                        if text in c:
+                            found = True
+                            break
+                    if found:
+                        break
+                else:
+                    if text in content:
+                        found = True
+                        break
            if not found:
                match = False
        return match
@ -1070,7 +1148,7 @@ class CSSMatch(Document, object):
            for patterns in langs:
                match = False
                for pattern in patterns:
-                    if pattern.match(found_lang):
+                    if self.extended_language_filter(pattern, found_lang):
                        match = True
                if not match:
                    break
@ -1152,7 +1230,7 @@ class CSSMatch(Document, object):

        out_of_range = False

-        itype = self.get_attribute_by_name(el, 'type').lower()
+        itype = util.lower(self.get_attribute_by_name(el, 'type'))
        mn = self.get_attribute_by_name(el, 'min', None)
        if mn is not None:
            mn = Inputs.parse_value(itype, mn)
@ -1207,6 +1285,21 @@ class CSSMatch(Document, object):
            self.get_prefix(el) is not None
        )

+    def match_placeholder_shown(self, el):
+        """
+        Match placeholder shown according to HTML spec.
+
+        - text area should be checked if they have content. A single newline does not count as content.
+
+        """
+
+        match = False
+        content = self.get_text(el)
+        if content in ('', '\n'):
+            match = True
+
+        return match
+
    def match_selectors(self, el, selectors):
        """Check if element matches one of the selectors."""

@ -1239,6 +1332,9 @@ class CSSMatch(Document, object):
                # Verify element is scope
                if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
                    continue
+                # Verify element has placeholder shown
+                if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
+                    continue
                # Verify `nth` matches
                if not self.match_nth(el, selector.nth):
                    continue
@ -1325,28 +1421,8 @@ class CSSMatch(Document, object):
        return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)


-class CommentsMatch(Document, object):
-    """Comments matcher."""
-
-    def __init__(self, el):
-        """Initialize."""
-
-        self.assert_valid_input(el)
-        self.tag = el
-
-    def get_comments(self, limit=0):
-        """Get comments."""
-
-        if limit < 1:
-            limit = None
-
-        for child in self.get_descendants(self.tag, tags=False):
-            if self.is_comment(child):
-                yield child
-                if limit is not None:
-                    limit -= 1
-                    if limit < 1:
-                        break
+class CSSMatch(_DocumentNav, _Match):
+    """The Beautiful Soup CSS match class."""


 class SoupSieve(ct.Immutable):
@ -1392,19 +1468,6 @@ class SoupSieve(ct.Immutable):
        else:
            return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]

-    @util.deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
-    def comments(self, tag, limit=0):
-        """Get comments only."""
-
-        return [comment for comment in CommentsMatch(tag).get_comments(limit)]
-
-    @util.deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
-    def icomments(self, tag, limit=0):
-        """Iterate comments only."""
-
-        for comment in CommentsMatch(tag).get_comments(limit):
-            yield comment
-
    def select_one(self, tag):
        """Select a single tag."""

--- a/included_dependencies/soupsieve/css_parser.py
+++ b/included_dependencies/soupsieve/css_parser.py
@ -1,10 +1,11 @@
 """CSS selector parser."""
-from __future__ import unicode_literals
 import re
+from functools import lru_cache
 from . import util
 from . import css_match as cm
 from . import css_types as ct
 from .util import SelectorSyntaxError
+import warnings

 UNICODE_REPLACEMENT_CHAR = 0xFFFD

@ -59,6 +60,8 @@ PSEUDO_SIMPLE_NO_MATCH = {
 # Complex pseudo classes that take selector lists
 PSEUDO_COMPLEX = {
    ':contains',
+    ':-soup-contains',
+    ':-soup-contains-own',
    ':has',
    ':is',
    ':matches',
@ -110,11 +113,6 @@ VALUE = r'''
 ATTR = r'''
 (?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
 '''.format(ws=WSC, value=VALUE)
-# Definitions for quirks mode
-QUIRKS_ATTR_IDENTIFIER = r'(?:(?:{esc}|(?!/\*)[^"\] \t\r\n\f])+?)'.format(esc=CSS_ESCAPES)
-QUIRKS_ATTR = r'''
-(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
-'''.format(ws=WSC, value=QUIRKS_ATTR_IDENTIFIER)

 # Selector patterns
 # IDs (`#id`)
@ -122,13 +120,11 @@ PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
 # Classes (`.class`)
 PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
 # Prefix:Tag (`prefix|tag`)
-PAT_TAG = r'(?:(?:{ident}|\*)?\|)?(?:{ident}|\*)'.format(ident=IDENTIFIER)
+PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
 # Attributes (`[attr]`, `[attr=value]`, etc.)
-PAT_ATTR = r'\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}'.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
-# Quirks attributes, like real attributes, but unquoted values can contain anything but whitespace and closing `]`.
-PAT_QUIRKS_ATTR = r'''
-\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}
-'''.format(ws=WSC, ident=IDENTIFIER, attr=QUIRKS_ATTR)
+PAT_ATTR = r'''
+\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
+'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
 # Pseudo class (`:pseudo-class`, `:pseudo-class(`)
 PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
 # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
@ -199,12 +195,13 @@ FLG_INDETERMINATE = 0x20
 FLG_OPEN = 0x40
 FLG_IN_RANGE = 0x80
 FLG_OUT_OF_RANGE = 0x100
+FLG_PLACEHOLDER_SHOWN = 0x200

 # Maximum cached patterns to store
 _MAXCACHE = 500


-@util.lru_cache(maxsize=_MAXCACHE)
+@lru_cache(maxsize=_MAXCACHE)
 def _cached_css_compile(pattern, namespaces, custom, flags):
    """Cached CSS compile."""

@ -253,7 +250,7 @@ def css_unescape(content, string=False):
            codepoint = int(m.group(1)[1:], 16)
            if codepoint == 0:
                codepoint = UNICODE_REPLACEMENT_CHAR
-            value = util.uchr(codepoint)
+            value = chr(codepoint)
        elif m.group(2):
            value = m.group(2)[1:]
        elif m.group(3):
@ -277,7 +274,7 @@ def escape(ident):
        string.append('\\{}'.format(ident))
    else:
        for index, c in enumerate(ident):
-            codepoint = util.uord(c)
+            codepoint = ord(c)
            if codepoint == 0x00:
                string.append('\ufffd')
            elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
@ -308,12 +305,7 @@ class SelectorPattern(object):

        return self.name

-    def enabled(self, flags):
-        """Enabled."""
-
-        return True
-
-    def match(self, selector, index):
+    def match(self, selector, index, flags):
        """Match the selector."""

        return self.re_pattern.match(selector, index)
@ -328,7 +320,7 @@ class SpecialPseudoPattern(SelectorPattern):
        self.patterns = {}
        for p in patterns:
            name = p[0]
-            pattern = SelectorPattern(name, p[2])
+            pattern = p[3](name, p[2])
            for pseudo in p[1]:
                self.patterns[pseudo] = pattern

@ -340,12 +332,7 @@ class SpecialPseudoPattern(SelectorPattern):

        return self.matched_name.get_name()

-    def enabled(self, flags):
-        """Enabled."""
-
-        return True
-
-    def match(self, selector, index):
+    def match(self, selector, index, flags):
        """Match the selector."""

        pseudo = None
@ -354,22 +341,13 @@ class SpecialPseudoPattern(SelectorPattern):
            name = util.lower(css_unescape(m.group('name')))
            pattern = self.patterns.get(name)
            if pattern:
-                pseudo = pattern.match(selector, index)
+                pseudo = pattern.match(selector, index, flags)
                if pseudo:
                    self.matched_name = pattern

        return pseudo


-class QuirkPattern(SelectorPattern):
-    """Selector pattern for quirk mode."""
-
-    def enabled(self, flags):
-        """Enabled if quirks flag is present."""
-
-        return flags & util._QUIRKS
-
-
 class _Selector(object):
    """
    Intermediate selector class.
@ -446,11 +424,16 @@ class CSSParser(object):
        SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
        SpecialPseudoPattern(
            (
-                ("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS),
-                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD),
-                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE),
-                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG),
-                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR)
+                (
+                    "pseudo_contains",
+                    (':contains', ':-soup-contains', ':-soup-contains-own'),
+                    PAT_PSEUDO_CONTAINS,
+                    SelectorPattern
+                ),
+                ("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
+                ("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
+                ("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
+                ("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
            )
        ),
        SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
@ -461,7 +444,6 @@ class CSSParser(object):
        SelectorPattern("class", PAT_CLASS),
        SelectorPattern("tag", PAT_TAG),
        SelectorPattern("attribute", PAT_ATTR),
-        QuirkPattern("quirks_attribute", PAT_QUIRKS_ATTR),
        SelectorPattern("combine", PAT_COMBINE)
    )

@ -471,24 +453,19 @@ class CSSParser(object):
        self.pattern = selector.replace('\x00', '\ufffd')
        self.flags = flags
        self.debug = self.flags & util.DEBUG
-        self.quirks = self.flags & util._QUIRKS
        self.custom = {} if custom is None else custom

-    def parse_attribute_selector(self, sel, m, has_selector, quirks):
+    def parse_attribute_selector(self, sel, m, has_selector):
        """Create attribute selector from the returned regex match."""

        inverse = False
        op = m.group('cmp')
        case = util.lower(m.group('case')) if m.group('case') else None
-        parts = [css_unescape(a) for a in m.group('ns_attr').split('|')]
-        ns = ''
+        ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
+        attr = css_unescape(m.group('attr_name'))
        is_type = False
        pattern2 = None
-        if len(parts) > 1:
-            ns = parts[0]
-            attr = parts[1]
-        else:
-            attr = parts[0]
+
        if case:
            flags = re.I if case == 'i' else 0
        elif util.lower(attr) == 'type':
@ -498,7 +475,7 @@ class CSSParser(object):
            flags = 0

        if op:
-            if m.group('value').startswith(('"', "'")) and not quirks:
+            if m.group('value').startswith(('"', "'")):
                value = css_unescape(m.group('value')[1:-1], True)
            else:
                value = css_unescape(m.group('value'))
@ -525,13 +502,12 @@ class CSSParser(object):
        elif op.startswith('|'):
            # Value starts with word in dash separated list
            pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
-        elif op.startswith('!'):
-            # Equivalent to `:not([attr=value])`
-            pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
-            inverse = True
        else:
            # Value matches
            pattern = re.compile(r'^%s$' % re.escape(value), flags)
+            if op.startswith('!'):
+                # Equivalent to `:not([attr=value])`
+                inverse = True
        if is_type and pattern:
            pattern2 = re.compile(pattern.pattern)

@ -552,13 +528,8 @@ class CSSParser(object):
    def parse_tag_pattern(self, sel, m, has_selector):
        """Parse tag pattern from regex match."""

-        parts = [css_unescape(x) for x in m.group(0).split('|')]
-        if len(parts) > 1:
-            prefix = parts[0]
-            tag = parts[1]
-        else:
-            tag = parts[0]
-            prefix = None
+        prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
+        tag = css_unescape(m.group('tag_name'))
        sel.tag = ct.SelectorTag(tag, prefix)
        has_selector = True
        return has_selector
@ -800,21 +771,11 @@ class CSSParser(object):
        if not combinator:
            combinator = WS_COMBINATOR
        if not has_selector:
-            # The only way we don't fail is if we are at the root level and quirks mode is enabled,
-            # and we've found no other selectors yet in this compound selector.
-            if (not self.quirks or is_pseudo or combinator == COMMA_COMBINATOR or relations):
-                raise SelectorSyntaxError(
-                    "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
-                    self.pattern,
-                    index
-                )
-            util.warn_quirks(
-                'You have attempted to use a combinator without a selector before it at position {}.'.format(index),
-                'the :scope pseudo class (or another appropriate selector) should be placed before the combinator.',
+            raise SelectorSyntaxError(
+                "The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
                self.pattern,
                index
            )
-            sel.flags |= ct.SEL_SCOPE

        if combinator == COMMA_COMBINATOR:
            if not sel.tag and not is_pseudo:
@ -847,7 +808,14 @@ class CSSParser(object):
    def parse_pseudo_contains(self, sel, m, has_selector):
        """Parse contains."""

-        values = m.group('values')
+        pseudo = util.lower(css_unescape(m.group('name')))
+        if pseudo == ":contains":
+            warnings.warn(
+                "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
+                FutureWarning
+            )
+        contains_own = pseudo == ":-soup-contains-own"
+        values = css_unescape(m.group('values'))
        patterns = []
        for token in RE_VALUES.finditer(values):
            if token.group('split'):
@ -858,7 +826,7 @@ class CSSParser(object):
            else:
                value = css_unescape(value)
            patterns.append(value)
-        sel.contains.append(ct.SelectorContains(tuple(patterns)))
+        sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own))
        has_selector = True
        return has_selector

@ -872,20 +840,12 @@ class CSSParser(object):
                continue
            value = token.group('value')
            if value.startswith(('"', "'")):
-                parts = css_unescape(value[1:-1], True).split('-')
+                value = css_unescape(value[1:-1], True)
            else:
-                parts = css_unescape(value).split('-')
+                value = css_unescape(value)
+
+            patterns.append(value)

-            new_parts = []
-            first = True
-            for part in parts:
-                if part == '*' and first:
-                    new_parts.append('(?!x\b)[a-z0-9]+?')
-                elif part != '*':
-                    new_parts.append(('' if first else '(-(?!x\b)[a-z0-9]+)*?\\-') + re.escape(part))
-                if first:
-                    first = False
-            patterns.append(re.compile(r'^{}(?:-.*)?$'.format(''.join(new_parts)), re.I))
        sel.lang.append(ct.SelectorLang(patterns))
        has_selector = True

@ -917,6 +877,7 @@ class CSSParser(object):
        is_indeterminate = bool(flags & FLG_INDETERMINATE)
        is_in_range = bool(flags & FLG_IN_RANGE)
        is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
+        is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)

        if self.debug:  # pragma: no cover
            if is_pseudo:
@ -937,6 +898,8 @@ class CSSParser(object):
                print('    is_in_range: True')
            if is_out_of_range:
                print('    is_out_of_range: True')
+            if is_placeholder_shown:
+                print('    is_placeholder_shown: True')

        if is_relative:
            selectors.append(_Selector())
@ -953,7 +916,7 @@ class CSSParser(object):
                elif key == 'pseudo_class':
                    has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
                elif key == 'pseudo_element':
-                    raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0)))
+                    raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
                elif key == 'pseudo_contains':
                    has_selector = self.parse_pseudo_contains(sel, m, has_selector)
                elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -989,18 +952,8 @@ class CSSParser(object):
                        has_selector, sel = self.parse_combinator(
                            sel, m, has_selector, selectors, relations, is_pseudo, index
                        )
-                elif key in ('attribute', 'quirks_attribute'):
-                    quirks = key == 'quirks_attribute'
-                    if quirks:
-                        temp_index = index + m.group(0).find('=') + 1
-                        util.warn_quirks(
-                            "You have attempted to use an attribute " +
-                            "value that should have been quoted at position {}.".format(temp_index),
-                            "the attribute value should be quoted.",
-                            self.pattern,
-                            temp_index
-                        )
-                    has_selector = self.parse_attribute_selector(sel, m, has_selector, quirks)
+                elif key == 'attribute':
+                    has_selector = self.parse_attribute_selector(sel, m, has_selector)
                elif key == 'tag':
                    if has_selector:
                        raise SelectorSyntaxError(
@ -1053,6 +1006,8 @@ class CSSParser(object):
            selectors[-1].flags = ct.SEL_IN_RANGE
        if is_out_of_range:
            selectors[-1].flags = ct.SEL_OUT_OF_RANGE
+        if is_placeholder_shown:
+            selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN

        return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)

@ -1066,15 +1021,11 @@ class CSSParser(object):
        end = (m.start(0) - 1) if m else (len(pattern) - 1)

        if self.debug:  # pragma: no cover
-            if self.quirks:
-                print('## QUIRKS MODE: Throwing out the spec!')
            print('## PARSING: {!r}'.format(pattern))
        while index <= end:
            m = None
            for v in self.css_tokens:
-                if not v.enabled(self.flags):  # pragma: no cover
-                    continue
-                m = v.match(pattern, index)
+                m = v.match(pattern, index, self.flags)
                if m:
                    name = v.get_name()
                    if self.debug:  # pragma: no cover
@ -1102,13 +1053,7 @@ class CSSParser(object):
            print('## END PARSING')

    def process_selectors(self, index=0, flags=0):
-        """
-        Process selectors.
-
-        We do our own selectors as BeautifulSoup4 has some annoying quirks,
-        and we don't really need to do nth selectors or siblings or
-        descendants etc.
-        """
+        """Process selectors."""

        return self.parse_selectors(self.selector_iter(self.pattern), index, flags)

@ -1123,8 +1068,7 @@ CSS_LINK = CSSParser(
 # CSS pattern for `:checked`
 CSS_CHECKED = CSSParser(
    '''
-    html|*:is(input[type=checkbox], input[type=radio])[checked],
-    html|select > html|option[selected]
+    html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:default` (must compile CSS_CHECKED first)
@ -1150,23 +1094,23 @@ CSS_INDETERMINATE = CSSParser(
    This pattern must be at the end.
    Special logic is applied to the last selector.
    */
-    html|input[type="radio"][name][name!='']:not([checked])
+    html|input[type="radio"][name]:not([name='']):not([checked])
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
 # CSS pattern for `:disabled`
 CSS_DISABLED = CSSParser(
    '''
-    html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
+    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
    html|optgroup[disabled] > html|option,
-    html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
+    html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
    html|fieldset[disabled] >
-        html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
+        html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:enabled`
 CSS_ENABLED = CSSParser(
    '''
-    html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
+    html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
    '''
 ).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
 # CSS pattern for `:required`
@ -1180,22 +1124,20 @@ CSS_OPTIONAL = CSSParser(
 # CSS pattern for `:placeholder-shown`
 CSS_PLACEHOLDER_SHOWN = CSSParser(
    '''
-    html|*:is(
-        input:is(
-            :not([type]),
-            [type=""],
-            [type=text],
-            [type=search],
-            [type=url],
-            [type=tel],
-            [type=email],
-            [type=password],
-            [type=number]
-        ),
-        textarea
-    )[placeholder][placeholder!='']
+    html|input:is(
+        :not([type]),
+        [type=""],
+        [type=text],
+        [type=search],
+        [type=url],
+        [type=tel],
+        [type=email],
+        [type=password],
+        [type=number]
+    )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
+    html|textarea[placeholder]:not([placeholder=''])
    '''
-).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
+).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
 # CSS pattern default for `:nth-child` "of S" feature
 CSS_NTH_OF_S_DEFAULT = CSSParser(
    '*|*'
--- a/included_dependencies/soupsieve/css_types.py
+++ b/included_dependencies/soupsieve/css_types.py
@ -1,6 +1,6 @@
 """CSS selector structure items."""
-from __future__ import unicode_literals
-from . import util
+import copyreg
+from collections.abc import Hashable, Mapping

 __all__ = (
    'Selector',
@ -26,6 +26,7 @@ SEL_DIR_RTL = 0x40
 SEL_IN_RANGE = 0x80
 SEL_OUT_OF_RANGE = 0x100
 SEL_DEFINED = 0x200
+SEL_PLACEHOLDER_SHOWN = 0x400


 class Immutable(object):
@ -85,7 +86,7 @@ class Immutable(object):
    __str__ = __repr__


-class ImmutableDict(util.Mapping):
+class ImmutableDict(Mapping):
    """Hashable, immutable dictionary."""

    def __init__(self, *args, **kwargs):
@ -94,8 +95,8 @@ class ImmutableDict(util.Mapping):
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
        if (
-            is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or
-            not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg])
+            is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
+            not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
        ):
            raise TypeError('All values must be hashable')

@ -140,9 +141,9 @@ class Namespaces(ImmutableDict):
        # so don't bother checking that.
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
-        if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
+        if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
            raise TypeError('Namespace keys and values must be Unicode strings')
-        elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
+        elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
            raise TypeError('Namespace keys and values must be Unicode strings')

        super(Namespaces, self).__init__(*args, **kwargs)
@ -159,9 +160,9 @@ class CustomSelectors(ImmutableDict):
        # so don't bother checking that.
        arg = args[0] if args else kwargs
        is_dict = isinstance(arg, dict)
-        if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
+        if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
            raise TypeError('CustomSelectors keys and values must be Unicode strings')
-        elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
+        elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
            raise TypeError('CustomSelectors keys and values must be Unicode strings')

        super(CustomSelectors, self).__init__(*args, **kwargs)
@ -238,13 +239,14 @@ class SelectorAttribute(Immutable):
 class SelectorContains(Immutable):
    """Selector contains rule."""

-    __slots__ = ("text", "_hash")
+    __slots__ = ("text", "own", "_hash")

-    def __init__(self, text):
+    def __init__(self, text, own):
        """Initialize."""

        super(SelectorContains, self).__init__(
-            text=text
+            text=text,
+            own=own
        )


@ -331,7 +333,7 @@ def _pickle(p):
 def pickle_register(obj):
    """Allow object to be pickled."""

-    util.copyreg.pickle(obj, _pickle)
+    copyreg.pickle(obj, _pickle)


 pickle_register(Selector)
--- a/included_dependencies/soupsieve/util.py
+++ b/included_dependencies/soupsieve/util.py
@ -1,47 +1,17 @@
 """Utility."""
-from __future__ import unicode_literals
-from functools import wraps
+from functools import wraps, lru_cache
 import warnings
-import sys
-import struct
-import os
 import re
-MODULE = os.path.dirname(__file__)
-
-PY3 = sys.version_info >= (3, 0)
-PY35 = sys.version_info >= (3, 5)
-PY37 = sys.version_info >= (3, 7)
-
-if PY3:
-    from functools import lru_cache  # noqa F401
-    import copyreg  # noqa F401
-    from collections.abc import Hashable, Mapping  # noqa F401
-
-    ustr = str
-    bstr = bytes
-    unichar = chr
-    string = str
-else:
-    from backports.functools_lru_cache import lru_cache  # noqa F401
-    import copy_reg as copyreg  # noqa F401
-    from collections import Hashable, Mapping  # noqa F401
-
-    ustr = unicode  # noqa: F821
-    bstr = str
-    unichar = unichr  # noqa: F821
-    string = basestring  # noqa: F821

 DEBUG = 0x00001
-_QUIRKS = 0x10000

 RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')

-LC_A = ord('a')
-LC_Z = ord('z')
 UC_A = ord('A')
 UC_Z = ord('Z')


+@lru_cache(maxsize=512)
 def lower(string):
    """Lower."""

@ -52,38 +22,7 @@ def lower(string):
    return ''.join(new_string)


-def upper(string):  # pragma: no cover
-    """Lower."""
-
-    new_string = []
-    for c in string:
-        o = ord(c)
-        new_string.append(chr(o - 32) if LC_A <= o <= LC_Z else c)
-    return ''.join(new_string)
-
-
-def uchr(i):
-    """Allow getting Unicode character on narrow python builds."""
-
-    try:
-        return unichar(i)
-    except ValueError:  # pragma: no cover
-        return struct.pack('i', i).decode('utf-32')
-
-
-def uord(c):
-    """Get Unicode ordinal."""
-
-    if len(c) == 2:  # pragma: no cover
-        high, low = [ord(p) for p in c]
-        ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
-    else:
-        ordinal = ord(c)
-
-    return ordinal
-
-
-class SelectorSyntaxError(SyntaxError):
+class SelectorSyntaxError(Exception):
    """Syntax error in a CSS selector."""

    def __init__(self, msg, pattern=None, index=None):
@ -169,45 +108,3 @@ def get_pattern_context(pattern, index):
        last = m.end(0)

    return ''.join(text), line, col
-
-
-class QuirksWarning(UserWarning):  # pragma: no cover
-    """Warning for quirks mode."""
-
-
-def warn_quirks(message, recommend, pattern, index):
-    """Warn quirks."""
-
-    import traceback
-    import bs4  # noqa: F401
-
-    # Acquire source code line context
-    paths = (MODULE, sys.modules['bs4'].__path__[0])
-    tb = traceback.extract_stack()
-    previous = None
-    filename = None
-    lineno = None
-    for entry in tb:
-        if (PY35 and entry.filename.startswith(paths)) or (not PY35 and entry[0].startswith(paths)):
-            break
-        previous = entry
-    if previous:
-        filename = previous.filename if PY35 else previous[0]
-        lineno = previous.lineno if PY35 else previous[1]
-
-    # Format pattern to show line and column position
-    context, line = get_pattern_context(pattern, index)[0:2]
-
-    # Display warning
-    warnings.warn_explicit(
-        "\nCSS selector pattern:\n" +
-        "    {}\n".format(message) +
-        "    This behavior is only allowed temporarily for Beautiful Soup's transition to Soup Sieve.\n" +
-        "    In order to confrom to the CSS spec, {}\n".format(recommend) +
-        "    It is strongly recommended the selector be altered to conform to the CSS spec " +
-        "as an exception will be raised for this case in the future.\n" +
-        "pattern line {}:\n{}".format(line, context),
-        QuirksWarning,
-        filename,
-        lineno
-    )