mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
Roll included soupsieve back--newest isn't py2 compat.
This commit is contained in:
parent
34dc2e14b2
commit
9112346f41
6 changed files with 366 additions and 253 deletions
|
|
@ -25,16 +25,17 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
"""
|
"""
|
||||||
|
from __future__ import unicode_literals
|
||||||
from .__meta__ import __version__, __version_info__ # noqa: F401
|
from .__meta__ import __version__, __version_info__ # noqa: F401
|
||||||
from . import css_parser as cp
|
from . import css_parser as cp
|
||||||
from . import css_match as cm
|
from . import css_match as cm
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
from .util import DEBUG, _QUIRKS, deprecated, SelectorSyntaxError # noqa: F401
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
'DEBUG', "_QUIRKS", 'SelectorSyntaxError', 'SoupSieve',
|
||||||
'closest', 'compile', 'filter', 'iselect',
|
'closest', 'comments', 'compile', 'filter', 'icomments',
|
||||||
'match', 'select', 'select_one'
|
'iselect', 'match', 'select', 'select_one'
|
||||||
)
|
)
|
||||||
|
|
||||||
SoupSieve = cm.SoupSieve
|
SoupSieve = cm.SoupSieve
|
||||||
|
|
@ -86,6 +87,21 @@ def filter(select, iterable, namespaces=None, flags=0, **kwargs): # noqa: A001
|
||||||
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
|
||||||
|
def comments(tag, limit=0, flags=0, **kwargs):
|
||||||
|
"""Get comments only."""
|
||||||
|
|
||||||
|
return [comment for comment in cm.CommentsMatch(tag).get_comments(limit)]
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
|
||||||
|
def icomments(tag, limit=0, flags=0, **kwargs):
|
||||||
|
"""Iterate comments only."""
|
||||||
|
|
||||||
|
for comment in cm.CommentsMatch(tag).get_comments(limit):
|
||||||
|
yield comment
|
||||||
|
|
||||||
|
|
||||||
def select_one(select, tag, namespaces=None, flags=0, **kwargs):
|
def select_one(select, tag, namespaces=None, flags=0, **kwargs):
|
||||||
"""Select a single tag."""
|
"""Select a single tag."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
"""Meta related things."""
|
"""Meta related things."""
|
||||||
|
from __future__ import unicode_literals
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
@ -185,5 +186,5 @@ def parse_version(ver, pre=False):
|
||||||
return Version(major, minor, micro, release, pre, post, dev)
|
return Version(major, minor, micro, release, pre, post, dev)
|
||||||
|
|
||||||
|
|
||||||
__version_info__ = Version(2, 1, 0, "final")
|
__version_info__ = Version(1, 9, 1, "final")
|
||||||
__version__ = __version_info__._get_canonical()
|
__version__ = __version_info__._get_canonical()
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,11 @@
|
||||||
"""CSS matcher."""
|
"""CSS matcher."""
|
||||||
|
from __future__ import unicode_literals
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from . import util
|
from . import util
|
||||||
import re
|
import re
|
||||||
from .import css_types as ct
|
from .import css_types as ct
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
import bs4
|
|
||||||
|
|
||||||
# Empty tag pattern (whitespace okay)
|
# Empty tag pattern (whitespace okay)
|
||||||
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
||||||
|
|
||||||
|
|
@ -44,7 +43,6 @@ RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2}
|
||||||
RE_DATETIME = re.compile(
|
RE_DATETIME = re.compile(
|
||||||
r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
|
r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
|
||||||
)
|
)
|
||||||
RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')
|
|
||||||
|
|
||||||
MONTHS_30 = (4, 6, 9, 11) # April, June, September, and November
|
MONTHS_30 = (4, 6, 9, 11) # April, June, September, and November
|
||||||
FEB = 2
|
FEB = 2
|
||||||
|
|
@ -55,7 +53,7 @@ FEB_LEAP_MONTH = 29
|
||||||
DAYS_IN_WEEK = 7
|
DAYS_IN_WEEK = 7
|
||||||
|
|
||||||
|
|
||||||
class _FakeParent(object):
|
class FakeParent(object):
|
||||||
"""
|
"""
|
||||||
Fake parent class.
|
Fake parent class.
|
||||||
|
|
||||||
|
|
@ -75,7 +73,7 @@ class _FakeParent(object):
|
||||||
return len(self.contents)
|
return len(self.contents)
|
||||||
|
|
||||||
|
|
||||||
class _DocumentNav(object):
|
class Document(object):
|
||||||
"""Navigate a Beautiful Soup document."""
|
"""Navigate a Beautiful Soup document."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -89,37 +87,58 @@ class _DocumentNav(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_doc(obj):
|
def is_doc(obj):
|
||||||
"""Is `BeautifulSoup` object."""
|
"""Is `BeautifulSoup` object."""
|
||||||
|
|
||||||
|
import bs4
|
||||||
return isinstance(obj, bs4.BeautifulSoup)
|
return isinstance(obj, bs4.BeautifulSoup)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_tag(obj):
|
def is_tag(obj):
|
||||||
"""Is tag."""
|
"""Is tag."""
|
||||||
|
|
||||||
|
import bs4
|
||||||
return isinstance(obj, bs4.Tag)
|
return isinstance(obj, bs4.Tag)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_comment(obj):
|
||||||
|
"""Is comment."""
|
||||||
|
|
||||||
|
import bs4
|
||||||
|
return isinstance(obj, bs4.Comment)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_declaration(obj): # pragma: no cover
|
def is_declaration(obj): # pragma: no cover
|
||||||
"""Is declaration."""
|
"""Is declaration."""
|
||||||
|
|
||||||
|
import bs4
|
||||||
return isinstance(obj, bs4.Declaration)
|
return isinstance(obj, bs4.Declaration)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_cdata(obj):
|
def is_cdata(obj): # pragma: no cover
|
||||||
"""Is CDATA."""
|
"""Is CDATA."""
|
||||||
return isinstance(obj, bs4.CData)
|
|
||||||
|
import bs4
|
||||||
|
return isinstance(obj, bs4.Declaration)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_processing_instruction(obj): # pragma: no cover
|
def is_processing_instruction(obj): # pragma: no cover
|
||||||
"""Is processing instruction."""
|
"""Is processing instruction."""
|
||||||
|
|
||||||
|
import bs4
|
||||||
return isinstance(obj, bs4.ProcessingInstruction)
|
return isinstance(obj, bs4.ProcessingInstruction)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_navigable_string(obj):
|
def is_navigable_string(obj):
|
||||||
"""Is navigable string."""
|
"""Is navigable string."""
|
||||||
|
|
||||||
|
import bs4
|
||||||
return isinstance(obj, bs4.NavigableString)
|
return isinstance(obj, bs4.NavigableString)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_special_string(obj):
|
def is_special_string(obj):
|
||||||
"""Is special string."""
|
"""Is special string."""
|
||||||
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
|
|
||||||
|
import bs4
|
||||||
|
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_content_string(cls, obj):
|
def is_content_string(cls, obj):
|
||||||
|
|
@ -131,7 +150,7 @@ class _DocumentNav(object):
|
||||||
def create_fake_parent(el):
|
def create_fake_parent(el):
|
||||||
"""Create fake parent for a given element."""
|
"""Create fake parent for a given element."""
|
||||||
|
|
||||||
return _FakeParent(el)
|
return FakeParent(el)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_xml_tree(el):
|
def is_xml_tree(el):
|
||||||
|
|
@ -198,13 +217,10 @@ class _DocumentNav(object):
|
||||||
is_tag = self.is_tag(child)
|
is_tag = self.is_tag(child)
|
||||||
|
|
||||||
if no_iframe and is_tag and self.is_iframe(child):
|
if no_iframe and is_tag and self.is_iframe(child):
|
||||||
if child.next_sibling is not None:
|
last_child = child
|
||||||
next_good = child.next_sibling
|
while self.is_tag(last_child) and last_child.contents:
|
||||||
else:
|
last_child = last_child.contents[-1]
|
||||||
last_child = child
|
next_good = last_child.next_element
|
||||||
while self.is_tag(last_child) and last_child.contents:
|
|
||||||
last_child = last_child.contents[-1]
|
|
||||||
next_good = last_child.next_element
|
|
||||||
yield child
|
yield child
|
||||||
if next_good is None:
|
if next_good is None:
|
||||||
break
|
break
|
||||||
|
|
@ -234,27 +250,21 @@ class _DocumentNav(object):
|
||||||
|
|
||||||
return el.prefix
|
return el.prefix
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_uri(el):
|
|
||||||
"""Get namespace `URI`."""
|
|
||||||
|
|
||||||
return el.namespace
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_next(cls, el, tags=True):
|
def get_next_tag(cls, el):
|
||||||
"""Get next sibling tag."""
|
"""Get next sibling tag."""
|
||||||
|
|
||||||
sibling = el.next_sibling
|
sibling = el.next_sibling
|
||||||
while tags and not cls.is_tag(sibling) and sibling is not None:
|
while not cls.is_tag(sibling) and sibling is not None:
|
||||||
sibling = sibling.next_sibling
|
sibling = sibling.next_sibling
|
||||||
return sibling
|
return sibling
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_previous(cls, el, tags=True):
|
def get_previous_tag(cls, el):
|
||||||
"""Get previous sibling tag."""
|
"""Get previous sibling tag."""
|
||||||
|
|
||||||
sibling = el.previous_sibling
|
sibling = el.previous_sibling
|
||||||
while tags and not cls.is_tag(sibling) and sibling is not None:
|
while not cls.is_tag(sibling) and sibling is not None:
|
||||||
sibling = sibling.previous_sibling
|
sibling = sibling.previous_sibling
|
||||||
return sibling
|
return sibling
|
||||||
|
|
||||||
|
|
@ -305,7 +315,7 @@ class _DocumentNav(object):
|
||||||
"""Get classes."""
|
"""Get classes."""
|
||||||
|
|
||||||
classes = cls.get_attribute_by_name(el, 'class', [])
|
classes = cls.get_attribute_by_name(el, 'class', [])
|
||||||
if isinstance(classes, str):
|
if isinstance(classes, util.ustr):
|
||||||
classes = RE_NOT_WS.findall(classes)
|
classes = RE_NOT_WS.findall(classes)
|
||||||
return classes
|
return classes
|
||||||
|
|
||||||
|
|
@ -316,11 +326,6 @@ class _DocumentNav(object):
|
||||||
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
|
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_own_text(self, el, no_iframe=False):
|
|
||||||
"""Get Own Text."""
|
|
||||||
|
|
||||||
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
|
|
||||||
|
|
||||||
|
|
||||||
class Inputs(object):
|
class Inputs(object):
|
||||||
"""Class for parsing and validating input items."""
|
"""Class for parsing and validating input items."""
|
||||||
|
|
@ -423,7 +428,7 @@ class Inputs(object):
|
||||||
return parsed
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
class _Match(object):
|
class CSSMatch(Document, object):
|
||||||
"""Perform CSS matching."""
|
"""Perform CSS matching."""
|
||||||
|
|
||||||
def __init__(self, selectors, scope, namespaces, flags):
|
def __init__(self, selectors, scope, namespaces, flags):
|
||||||
|
|
@ -471,7 +476,7 @@ class _Match(object):
|
||||||
|
|
||||||
if self.supports_namespaces():
|
if self.supports_namespaces():
|
||||||
namespace = ''
|
namespace = ''
|
||||||
ns = self.get_uri(el)
|
ns = el.namespace
|
||||||
if ns:
|
if ns:
|
||||||
namespace = ns
|
namespace = ns
|
||||||
else:
|
else:
|
||||||
|
|
@ -531,57 +536,6 @@ class _Match(object):
|
||||||
return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
|
return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def extended_language_filter(self, lang_range, lang_tag):
|
|
||||||
"""Filter the language tags."""
|
|
||||||
|
|
||||||
match = True
|
|
||||||
lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
|
|
||||||
ranges = lang_range.split('-')
|
|
||||||
subtags = lang_tag.lower().split('-')
|
|
||||||
length = len(ranges)
|
|
||||||
rindex = 0
|
|
||||||
sindex = 0
|
|
||||||
r = ranges[rindex]
|
|
||||||
s = subtags[sindex]
|
|
||||||
|
|
||||||
# Primary tag needs to match
|
|
||||||
if r != '*' and r != s:
|
|
||||||
match = False
|
|
||||||
|
|
||||||
rindex += 1
|
|
||||||
sindex += 1
|
|
||||||
|
|
||||||
# Match until we run out of ranges
|
|
||||||
while match and rindex < length:
|
|
||||||
r = ranges[rindex]
|
|
||||||
try:
|
|
||||||
s = subtags[sindex]
|
|
||||||
except IndexError:
|
|
||||||
# Ran out of subtags,
|
|
||||||
# but we still have ranges
|
|
||||||
match = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Empty range
|
|
||||||
if not r:
|
|
||||||
match = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Matched range
|
|
||||||
elif s == r:
|
|
||||||
rindex += 1
|
|
||||||
|
|
||||||
# Implicit wildcard cannot match
|
|
||||||
# singletons
|
|
||||||
elif len(s) == 1:
|
|
||||||
match = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Implicitly matched, so grab next subtag
|
|
||||||
sindex += 1
|
|
||||||
|
|
||||||
return match
|
|
||||||
|
|
||||||
def match_attribute_name(self, el, attr, prefix):
|
def match_attribute_name(self, el, attr, prefix):
|
||||||
"""Match attribute name and return value if it exists."""
|
"""Match attribute name and return value if it exists."""
|
||||||
|
|
||||||
|
|
@ -706,12 +660,12 @@ class _Match(object):
|
||||||
if parent:
|
if parent:
|
||||||
found = self.match_selectors(parent, relation)
|
found = self.match_selectors(parent, relation)
|
||||||
elif relation[0].rel_type == REL_SIBLING:
|
elif relation[0].rel_type == REL_SIBLING:
|
||||||
sibling = self.get_previous(el)
|
sibling = self.get_previous_tag(el)
|
||||||
while not found and sibling:
|
while not found and sibling:
|
||||||
found = self.match_selectors(sibling, relation)
|
found = self.match_selectors(sibling, relation)
|
||||||
sibling = self.get_previous(sibling)
|
sibling = self.get_previous_tag(sibling)
|
||||||
elif relation[0].rel_type == REL_CLOSE_SIBLING:
|
elif relation[0].rel_type == REL_CLOSE_SIBLING:
|
||||||
sibling = self.get_previous(el)
|
sibling = self.get_previous_tag(el)
|
||||||
if sibling and self.is_tag(sibling):
|
if sibling and self.is_tag(sibling):
|
||||||
found = self.match_selectors(sibling, relation)
|
found = self.match_selectors(sibling, relation)
|
||||||
return found
|
return found
|
||||||
|
|
@ -736,12 +690,12 @@ class _Match(object):
|
||||||
elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
|
elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
|
||||||
found = self.match_future_child(el, relation)
|
found = self.match_future_child(el, relation)
|
||||||
elif relation[0].rel_type == REL_HAS_SIBLING:
|
elif relation[0].rel_type == REL_HAS_SIBLING:
|
||||||
sibling = self.get_next(el)
|
sibling = self.get_next_tag(el)
|
||||||
while not found and sibling:
|
while not found and sibling:
|
||||||
found = self.match_selectors(sibling, relation)
|
found = self.match_selectors(sibling, relation)
|
||||||
sibling = self.get_next(sibling)
|
sibling = self.get_next_tag(sibling)
|
||||||
elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
|
elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
|
||||||
sibling = self.get_next(el)
|
sibling = self.get_next_tag(el)
|
||||||
if sibling and self.is_tag(sibling):
|
if sibling and self.is_tag(sibling):
|
||||||
found = self.match_selectors(sibling, relation)
|
found = self.match_selectors(sibling, relation)
|
||||||
return found
|
return found
|
||||||
|
|
@ -782,28 +736,7 @@ class _Match(object):
|
||||||
def match_root(self, el):
|
def match_root(self, el):
|
||||||
"""Match element as root."""
|
"""Match element as root."""
|
||||||
|
|
||||||
is_root = self.is_root(el)
|
return self.is_root(el)
|
||||||
if is_root:
|
|
||||||
sibling = self.get_previous(el, tags=False)
|
|
||||||
while is_root and sibling is not None:
|
|
||||||
if (
|
|
||||||
self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
|
|
||||||
self.is_cdata(sibling)
|
|
||||||
):
|
|
||||||
is_root = False
|
|
||||||
else:
|
|
||||||
sibling = self.get_previous(sibling, tags=False)
|
|
||||||
if is_root:
|
|
||||||
sibling = self.get_next(el, tags=False)
|
|
||||||
while is_root and sibling is not None:
|
|
||||||
if (
|
|
||||||
self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
|
|
||||||
self.is_cdata(sibling)
|
|
||||||
):
|
|
||||||
is_root = False
|
|
||||||
else:
|
|
||||||
sibling = self.get_next(sibling, tags=False)
|
|
||||||
return is_root
|
|
||||||
|
|
||||||
def match_scope(self, el):
|
def match_scope(self, el):
|
||||||
"""Match element as scope."""
|
"""Match element as scope."""
|
||||||
|
|
@ -948,23 +881,12 @@ class _Match(object):
|
||||||
content = None
|
content = None
|
||||||
for contain_list in contains:
|
for contain_list in contains:
|
||||||
if content is None:
|
if content is None:
|
||||||
if contain_list.own:
|
content = self.get_text(el, no_iframe=self.is_html)
|
||||||
content = self.get_own_text(el, no_iframe=self.is_html)
|
|
||||||
else:
|
|
||||||
content = self.get_text(el, no_iframe=self.is_html)
|
|
||||||
found = False
|
found = False
|
||||||
for text in contain_list.text:
|
for text in contain_list.text:
|
||||||
if contain_list.own:
|
if text in content:
|
||||||
for c in content:
|
found = True
|
||||||
if text in c:
|
break
|
||||||
found = True
|
|
||||||
break
|
|
||||||
if found:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
if text in content:
|
|
||||||
found = True
|
|
||||||
break
|
|
||||||
if not found:
|
if not found:
|
||||||
match = False
|
match = False
|
||||||
return match
|
return match
|
||||||
|
|
@ -1148,7 +1070,7 @@ class _Match(object):
|
||||||
for patterns in langs:
|
for patterns in langs:
|
||||||
match = False
|
match = False
|
||||||
for pattern in patterns:
|
for pattern in patterns:
|
||||||
if self.extended_language_filter(pattern, found_lang):
|
if pattern.match(found_lang):
|
||||||
match = True
|
match = True
|
||||||
if not match:
|
if not match:
|
||||||
break
|
break
|
||||||
|
|
@ -1230,7 +1152,7 @@ class _Match(object):
|
||||||
|
|
||||||
out_of_range = False
|
out_of_range = False
|
||||||
|
|
||||||
itype = util.lower(self.get_attribute_by_name(el, 'type'))
|
itype = self.get_attribute_by_name(el, 'type').lower()
|
||||||
mn = self.get_attribute_by_name(el, 'min', None)
|
mn = self.get_attribute_by_name(el, 'min', None)
|
||||||
if mn is not None:
|
if mn is not None:
|
||||||
mn = Inputs.parse_value(itype, mn)
|
mn = Inputs.parse_value(itype, mn)
|
||||||
|
|
@ -1285,21 +1207,6 @@ class _Match(object):
|
||||||
self.get_prefix(el) is not None
|
self.get_prefix(el) is not None
|
||||||
)
|
)
|
||||||
|
|
||||||
def match_placeholder_shown(self, el):
|
|
||||||
"""
|
|
||||||
Match placeholder shown according to HTML spec.
|
|
||||||
|
|
||||||
- text area should be checked if they have content. A single newline does not count as content.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
match = False
|
|
||||||
content = self.get_text(el)
|
|
||||||
if content in ('', '\n'):
|
|
||||||
match = True
|
|
||||||
|
|
||||||
return match
|
|
||||||
|
|
||||||
def match_selectors(self, el, selectors):
|
def match_selectors(self, el, selectors):
|
||||||
"""Check if element matches one of the selectors."""
|
"""Check if element matches one of the selectors."""
|
||||||
|
|
||||||
|
|
@ -1332,9 +1239,6 @@ class _Match(object):
|
||||||
# Verify element is scope
|
# Verify element is scope
|
||||||
if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
|
if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
|
||||||
continue
|
continue
|
||||||
# Verify element has placeholder shown
|
|
||||||
if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
|
|
||||||
continue
|
|
||||||
# Verify `nth` matches
|
# Verify `nth` matches
|
||||||
if not self.match_nth(el, selector.nth):
|
if not self.match_nth(el, selector.nth):
|
||||||
continue
|
continue
|
||||||
|
|
@ -1421,8 +1325,28 @@ class _Match(object):
|
||||||
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
|
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
|
||||||
|
|
||||||
|
|
||||||
class CSSMatch(_DocumentNav, _Match):
|
class CommentsMatch(Document, object):
|
||||||
"""The Beautiful Soup CSS match class."""
|
"""Comments matcher."""
|
||||||
|
|
||||||
|
def __init__(self, el):
|
||||||
|
"""Initialize."""
|
||||||
|
|
||||||
|
self.assert_valid_input(el)
|
||||||
|
self.tag = el
|
||||||
|
|
||||||
|
def get_comments(self, limit=0):
|
||||||
|
"""Get comments."""
|
||||||
|
|
||||||
|
if limit < 1:
|
||||||
|
limit = None
|
||||||
|
|
||||||
|
for child in self.get_descendants(self.tag, tags=False):
|
||||||
|
if self.is_comment(child):
|
||||||
|
yield child
|
||||||
|
if limit is not None:
|
||||||
|
limit -= 1
|
||||||
|
if limit < 1:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
class SoupSieve(ct.Immutable):
|
class SoupSieve(ct.Immutable):
|
||||||
|
|
@ -1468,6 +1392,19 @@ class SoupSieve(ct.Immutable):
|
||||||
else:
|
else:
|
||||||
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
|
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
|
||||||
|
|
||||||
|
@util.deprecated("'comments' is not related to CSS selectors and will be removed in the future.")
|
||||||
|
def comments(self, tag, limit=0):
|
||||||
|
"""Get comments only."""
|
||||||
|
|
||||||
|
return [comment for comment in CommentsMatch(tag).get_comments(limit)]
|
||||||
|
|
||||||
|
@util.deprecated("'icomments' is not related to CSS selectors and will be removed in the future.")
|
||||||
|
def icomments(self, tag, limit=0):
|
||||||
|
"""Iterate comments only."""
|
||||||
|
|
||||||
|
for comment in CommentsMatch(tag).get_comments(limit):
|
||||||
|
yield comment
|
||||||
|
|
||||||
def select_one(self, tag):
|
def select_one(self, tag):
|
||||||
"""Select a single tag."""
|
"""Select a single tag."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,10 @@
|
||||||
"""CSS selector parser."""
|
"""CSS selector parser."""
|
||||||
|
from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
from functools import lru_cache
|
|
||||||
from . import util
|
from . import util
|
||||||
from . import css_match as cm
|
from . import css_match as cm
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
from .util import SelectorSyntaxError
|
from .util import SelectorSyntaxError
|
||||||
import warnings
|
|
||||||
|
|
||||||
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
||||||
|
|
||||||
|
|
@ -60,8 +59,6 @@ PSEUDO_SIMPLE_NO_MATCH = {
|
||||||
# Complex pseudo classes that take selector lists
|
# Complex pseudo classes that take selector lists
|
||||||
PSEUDO_COMPLEX = {
|
PSEUDO_COMPLEX = {
|
||||||
':contains',
|
':contains',
|
||||||
':-soup-contains',
|
|
||||||
':-soup-contains-own',
|
|
||||||
':has',
|
':has',
|
||||||
':is',
|
':is',
|
||||||
':matches',
|
':matches',
|
||||||
|
|
@ -113,6 +110,11 @@ VALUE = r'''
|
||||||
ATTR = r'''
|
ATTR = r'''
|
||||||
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
|
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
|
||||||
'''.format(ws=WSC, value=VALUE)
|
'''.format(ws=WSC, value=VALUE)
|
||||||
|
# Definitions for quirks mode
|
||||||
|
QUIRKS_ATTR_IDENTIFIER = r'(?:(?:{esc}|(?!/\*)[^"\] \t\r\n\f])+?)'.format(esc=CSS_ESCAPES)
|
||||||
|
QUIRKS_ATTR = r'''
|
||||||
|
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
|
||||||
|
'''.format(ws=WSC, value=QUIRKS_ATTR_IDENTIFIER)
|
||||||
|
|
||||||
# Selector patterns
|
# Selector patterns
|
||||||
# IDs (`#id`)
|
# IDs (`#id`)
|
||||||
|
|
@ -120,11 +122,13 @@ PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
|
||||||
# Classes (`.class`)
|
# Classes (`.class`)
|
||||||
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
|
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
|
||||||
# Prefix:Tag (`prefix|tag`)
|
# Prefix:Tag (`prefix|tag`)
|
||||||
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
|
PAT_TAG = r'(?:(?:{ident}|\*)?\|)?(?:{ident}|\*)'.format(ident=IDENTIFIER)
|
||||||
# Attributes (`[attr]`, `[attr=value]`, etc.)
|
# Attributes (`[attr]`, `[attr=value]`, etc.)
|
||||||
PAT_ATTR = r'''
|
PAT_ATTR = r'\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}'.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
|
||||||
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
|
# Quirks attributes, like real attributes, but unquoted values can contain anything but whitespace and closing `]`.
|
||||||
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
|
PAT_QUIRKS_ATTR = r'''
|
||||||
|
\[{ws}*(?P<ns_attr>(?:(?:{ident}|\*)?\|)?{ident}){attr}
|
||||||
|
'''.format(ws=WSC, ident=IDENTIFIER, attr=QUIRKS_ATTR)
|
||||||
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
|
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
|
||||||
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
|
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
|
||||||
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
|
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
|
||||||
|
|
@ -195,13 +199,12 @@ FLG_INDETERMINATE = 0x20
|
||||||
FLG_OPEN = 0x40
|
FLG_OPEN = 0x40
|
||||||
FLG_IN_RANGE = 0x80
|
FLG_IN_RANGE = 0x80
|
||||||
FLG_OUT_OF_RANGE = 0x100
|
FLG_OUT_OF_RANGE = 0x100
|
||||||
FLG_PLACEHOLDER_SHOWN = 0x200
|
|
||||||
|
|
||||||
# Maximum cached patterns to store
|
# Maximum cached patterns to store
|
||||||
_MAXCACHE = 500
|
_MAXCACHE = 500
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=_MAXCACHE)
|
@util.lru_cache(maxsize=_MAXCACHE)
|
||||||
def _cached_css_compile(pattern, namespaces, custom, flags):
|
def _cached_css_compile(pattern, namespaces, custom, flags):
|
||||||
"""Cached CSS compile."""
|
"""Cached CSS compile."""
|
||||||
|
|
||||||
|
|
@ -250,7 +253,7 @@ def css_unescape(content, string=False):
|
||||||
codepoint = int(m.group(1)[1:], 16)
|
codepoint = int(m.group(1)[1:], 16)
|
||||||
if codepoint == 0:
|
if codepoint == 0:
|
||||||
codepoint = UNICODE_REPLACEMENT_CHAR
|
codepoint = UNICODE_REPLACEMENT_CHAR
|
||||||
value = chr(codepoint)
|
value = util.uchr(codepoint)
|
||||||
elif m.group(2):
|
elif m.group(2):
|
||||||
value = m.group(2)[1:]
|
value = m.group(2)[1:]
|
||||||
elif m.group(3):
|
elif m.group(3):
|
||||||
|
|
@ -274,7 +277,7 @@ def escape(ident):
|
||||||
string.append('\\{}'.format(ident))
|
string.append('\\{}'.format(ident))
|
||||||
else:
|
else:
|
||||||
for index, c in enumerate(ident):
|
for index, c in enumerate(ident):
|
||||||
codepoint = ord(c)
|
codepoint = util.uord(c)
|
||||||
if codepoint == 0x00:
|
if codepoint == 0x00:
|
||||||
string.append('\ufffd')
|
string.append('\ufffd')
|
||||||
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
|
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
|
||||||
|
|
@ -305,7 +308,12 @@ class SelectorPattern(object):
|
||||||
|
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
def match(self, selector, index, flags):
|
def enabled(self, flags):
|
||||||
|
"""Enabled."""
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def match(self, selector, index):
|
||||||
"""Match the selector."""
|
"""Match the selector."""
|
||||||
|
|
||||||
return self.re_pattern.match(selector, index)
|
return self.re_pattern.match(selector, index)
|
||||||
|
|
@ -320,7 +328,7 @@ class SpecialPseudoPattern(SelectorPattern):
|
||||||
self.patterns = {}
|
self.patterns = {}
|
||||||
for p in patterns:
|
for p in patterns:
|
||||||
name = p[0]
|
name = p[0]
|
||||||
pattern = p[3](name, p[2])
|
pattern = SelectorPattern(name, p[2])
|
||||||
for pseudo in p[1]:
|
for pseudo in p[1]:
|
||||||
self.patterns[pseudo] = pattern
|
self.patterns[pseudo] = pattern
|
||||||
|
|
||||||
|
|
@ -332,7 +340,12 @@ class SpecialPseudoPattern(SelectorPattern):
|
||||||
|
|
||||||
return self.matched_name.get_name()
|
return self.matched_name.get_name()
|
||||||
|
|
||||||
def match(self, selector, index, flags):
|
def enabled(self, flags):
|
||||||
|
"""Enabled."""
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def match(self, selector, index):
|
||||||
"""Match the selector."""
|
"""Match the selector."""
|
||||||
|
|
||||||
pseudo = None
|
pseudo = None
|
||||||
|
|
@ -341,13 +354,22 @@ class SpecialPseudoPattern(SelectorPattern):
|
||||||
name = util.lower(css_unescape(m.group('name')))
|
name = util.lower(css_unescape(m.group('name')))
|
||||||
pattern = self.patterns.get(name)
|
pattern = self.patterns.get(name)
|
||||||
if pattern:
|
if pattern:
|
||||||
pseudo = pattern.match(selector, index, flags)
|
pseudo = pattern.match(selector, index)
|
||||||
if pseudo:
|
if pseudo:
|
||||||
self.matched_name = pattern
|
self.matched_name = pattern
|
||||||
|
|
||||||
return pseudo
|
return pseudo
|
||||||
|
|
||||||
|
|
||||||
|
class QuirkPattern(SelectorPattern):
|
||||||
|
"""Selector pattern for quirk mode."""
|
||||||
|
|
||||||
|
def enabled(self, flags):
|
||||||
|
"""Enabled if quirks flag is present."""
|
||||||
|
|
||||||
|
return flags & util._QUIRKS
|
||||||
|
|
||||||
|
|
||||||
class _Selector(object):
|
class _Selector(object):
|
||||||
"""
|
"""
|
||||||
Intermediate selector class.
|
Intermediate selector class.
|
||||||
|
|
@ -424,16 +446,11 @@ class CSSParser(object):
|
||||||
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
|
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
|
||||||
SpecialPseudoPattern(
|
SpecialPseudoPattern(
|
||||||
(
|
(
|
||||||
(
|
("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS),
|
||||||
"pseudo_contains",
|
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD),
|
||||||
(':contains', ':-soup-contains', ':-soup-contains-own'),
|
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE),
|
||||||
PAT_PSEUDO_CONTAINS,
|
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG),
|
||||||
SelectorPattern
|
("pseudo_dir", (':dir',), PAT_PSEUDO_DIR)
|
||||||
),
|
|
||||||
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
|
|
||||||
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
|
|
||||||
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
|
|
||||||
("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
|
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
|
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
|
||||||
|
|
@ -444,6 +461,7 @@ class CSSParser(object):
|
||||||
SelectorPattern("class", PAT_CLASS),
|
SelectorPattern("class", PAT_CLASS),
|
||||||
SelectorPattern("tag", PAT_TAG),
|
SelectorPattern("tag", PAT_TAG),
|
||||||
SelectorPattern("attribute", PAT_ATTR),
|
SelectorPattern("attribute", PAT_ATTR),
|
||||||
|
QuirkPattern("quirks_attribute", PAT_QUIRKS_ATTR),
|
||||||
SelectorPattern("combine", PAT_COMBINE)
|
SelectorPattern("combine", PAT_COMBINE)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -453,19 +471,24 @@ class CSSParser(object):
|
||||||
self.pattern = selector.replace('\x00', '\ufffd')
|
self.pattern = selector.replace('\x00', '\ufffd')
|
||||||
self.flags = flags
|
self.flags = flags
|
||||||
self.debug = self.flags & util.DEBUG
|
self.debug = self.flags & util.DEBUG
|
||||||
|
self.quirks = self.flags & util._QUIRKS
|
||||||
self.custom = {} if custom is None else custom
|
self.custom = {} if custom is None else custom
|
||||||
|
|
||||||
def parse_attribute_selector(self, sel, m, has_selector):
|
def parse_attribute_selector(self, sel, m, has_selector, quirks):
|
||||||
"""Create attribute selector from the returned regex match."""
|
"""Create attribute selector from the returned regex match."""
|
||||||
|
|
||||||
inverse = False
|
inverse = False
|
||||||
op = m.group('cmp')
|
op = m.group('cmp')
|
||||||
case = util.lower(m.group('case')) if m.group('case') else None
|
case = util.lower(m.group('case')) if m.group('case') else None
|
||||||
ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
|
parts = [css_unescape(a) for a in m.group('ns_attr').split('|')]
|
||||||
attr = css_unescape(m.group('attr_name'))
|
ns = ''
|
||||||
is_type = False
|
is_type = False
|
||||||
pattern2 = None
|
pattern2 = None
|
||||||
|
if len(parts) > 1:
|
||||||
|
ns = parts[0]
|
||||||
|
attr = parts[1]
|
||||||
|
else:
|
||||||
|
attr = parts[0]
|
||||||
if case:
|
if case:
|
||||||
flags = re.I if case == 'i' else 0
|
flags = re.I if case == 'i' else 0
|
||||||
elif util.lower(attr) == 'type':
|
elif util.lower(attr) == 'type':
|
||||||
|
|
@ -475,7 +498,7 @@ class CSSParser(object):
|
||||||
flags = 0
|
flags = 0
|
||||||
|
|
||||||
if op:
|
if op:
|
||||||
if m.group('value').startswith(('"', "'")):
|
if m.group('value').startswith(('"', "'")) and not quirks:
|
||||||
value = css_unescape(m.group('value')[1:-1], True)
|
value = css_unescape(m.group('value')[1:-1], True)
|
||||||
else:
|
else:
|
||||||
value = css_unescape(m.group('value'))
|
value = css_unescape(m.group('value'))
|
||||||
|
|
@ -502,12 +525,13 @@ class CSSParser(object):
|
||||||
elif op.startswith('|'):
|
elif op.startswith('|'):
|
||||||
# Value starts with word in dash separated list
|
# Value starts with word in dash separated list
|
||||||
pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
|
pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
|
||||||
|
elif op.startswith('!'):
|
||||||
|
# Equivalent to `:not([attr=value])`
|
||||||
|
pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
|
||||||
|
inverse = True
|
||||||
else:
|
else:
|
||||||
# Value matches
|
# Value matches
|
||||||
pattern = re.compile(r'^%s$' % re.escape(value), flags)
|
pattern = re.compile(r'^%s$' % re.escape(value), flags)
|
||||||
if op.startswith('!'):
|
|
||||||
# Equivalent to `:not([attr=value])`
|
|
||||||
inverse = True
|
|
||||||
if is_type and pattern:
|
if is_type and pattern:
|
||||||
pattern2 = re.compile(pattern.pattern)
|
pattern2 = re.compile(pattern.pattern)
|
||||||
|
|
||||||
|
|
@ -528,8 +552,13 @@ class CSSParser(object):
|
||||||
def parse_tag_pattern(self, sel, m, has_selector):
|
def parse_tag_pattern(self, sel, m, has_selector):
|
||||||
"""Parse tag pattern from regex match."""
|
"""Parse tag pattern from regex match."""
|
||||||
|
|
||||||
prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
|
parts = [css_unescape(x) for x in m.group(0).split('|')]
|
||||||
tag = css_unescape(m.group('tag_name'))
|
if len(parts) > 1:
|
||||||
|
prefix = parts[0]
|
||||||
|
tag = parts[1]
|
||||||
|
else:
|
||||||
|
tag = parts[0]
|
||||||
|
prefix = None
|
||||||
sel.tag = ct.SelectorTag(tag, prefix)
|
sel.tag = ct.SelectorTag(tag, prefix)
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
@ -771,11 +800,21 @@ class CSSParser(object):
|
||||||
if not combinator:
|
if not combinator:
|
||||||
combinator = WS_COMBINATOR
|
combinator = WS_COMBINATOR
|
||||||
if not has_selector:
|
if not has_selector:
|
||||||
raise SelectorSyntaxError(
|
# The only way we don't fail is if we are at the root level and quirks mode is enabled,
|
||||||
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
|
# and we've found no other selectors yet in this compound selector.
|
||||||
|
if (not self.quirks or is_pseudo or combinator == COMMA_COMBINATOR or relations):
|
||||||
|
raise SelectorSyntaxError(
|
||||||
|
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
|
||||||
|
self.pattern,
|
||||||
|
index
|
||||||
|
)
|
||||||
|
util.warn_quirks(
|
||||||
|
'You have attempted to use a combinator without a selector before it at position {}.'.format(index),
|
||||||
|
'the :scope pseudo class (or another appropriate selector) should be placed before the combinator.',
|
||||||
self.pattern,
|
self.pattern,
|
||||||
index
|
index
|
||||||
)
|
)
|
||||||
|
sel.flags |= ct.SEL_SCOPE
|
||||||
|
|
||||||
if combinator == COMMA_COMBINATOR:
|
if combinator == COMMA_COMBINATOR:
|
||||||
if not sel.tag and not is_pseudo:
|
if not sel.tag and not is_pseudo:
|
||||||
|
|
@ -808,14 +847,7 @@ class CSSParser(object):
|
||||||
def parse_pseudo_contains(self, sel, m, has_selector):
|
def parse_pseudo_contains(self, sel, m, has_selector):
|
||||||
"""Parse contains."""
|
"""Parse contains."""
|
||||||
|
|
||||||
pseudo = util.lower(css_unescape(m.group('name')))
|
values = m.group('values')
|
||||||
if pseudo == ":contains":
|
|
||||||
warnings.warn(
|
|
||||||
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
|
|
||||||
FutureWarning
|
|
||||||
)
|
|
||||||
contains_own = pseudo == ":-soup-contains-own"
|
|
||||||
values = css_unescape(m.group('values'))
|
|
||||||
patterns = []
|
patterns = []
|
||||||
for token in RE_VALUES.finditer(values):
|
for token in RE_VALUES.finditer(values):
|
||||||
if token.group('split'):
|
if token.group('split'):
|
||||||
|
|
@ -826,7 +858,7 @@ class CSSParser(object):
|
||||||
else:
|
else:
|
||||||
value = css_unescape(value)
|
value = css_unescape(value)
|
||||||
patterns.append(value)
|
patterns.append(value)
|
||||||
sel.contains.append(ct.SelectorContains(tuple(patterns), contains_own))
|
sel.contains.append(ct.SelectorContains(tuple(patterns)))
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
|
|
@ -840,12 +872,20 @@ class CSSParser(object):
|
||||||
continue
|
continue
|
||||||
value = token.group('value')
|
value = token.group('value')
|
||||||
if value.startswith(('"', "'")):
|
if value.startswith(('"', "'")):
|
||||||
value = css_unescape(value[1:-1], True)
|
parts = css_unescape(value[1:-1], True).split('-')
|
||||||
else:
|
else:
|
||||||
value = css_unescape(value)
|
parts = css_unescape(value).split('-')
|
||||||
|
|
||||||
patterns.append(value)
|
|
||||||
|
|
||||||
|
new_parts = []
|
||||||
|
first = True
|
||||||
|
for part in parts:
|
||||||
|
if part == '*' and first:
|
||||||
|
new_parts.append('(?!x\b)[a-z0-9]+?')
|
||||||
|
elif part != '*':
|
||||||
|
new_parts.append(('' if first else '(-(?!x\b)[a-z0-9]+)*?\\-') + re.escape(part))
|
||||||
|
if first:
|
||||||
|
first = False
|
||||||
|
patterns.append(re.compile(r'^{}(?:-.*)?$'.format(''.join(new_parts)), re.I))
|
||||||
sel.lang.append(ct.SelectorLang(patterns))
|
sel.lang.append(ct.SelectorLang(patterns))
|
||||||
has_selector = True
|
has_selector = True
|
||||||
|
|
||||||
|
|
@ -877,7 +917,6 @@ class CSSParser(object):
|
||||||
is_indeterminate = bool(flags & FLG_INDETERMINATE)
|
is_indeterminate = bool(flags & FLG_INDETERMINATE)
|
||||||
is_in_range = bool(flags & FLG_IN_RANGE)
|
is_in_range = bool(flags & FLG_IN_RANGE)
|
||||||
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
|
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
|
||||||
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
|
|
||||||
|
|
||||||
if self.debug: # pragma: no cover
|
if self.debug: # pragma: no cover
|
||||||
if is_pseudo:
|
if is_pseudo:
|
||||||
|
|
@ -898,8 +937,6 @@ class CSSParser(object):
|
||||||
print(' is_in_range: True')
|
print(' is_in_range: True')
|
||||||
if is_out_of_range:
|
if is_out_of_range:
|
||||||
print(' is_out_of_range: True')
|
print(' is_out_of_range: True')
|
||||||
if is_placeholder_shown:
|
|
||||||
print(' is_placeholder_shown: True')
|
|
||||||
|
|
||||||
if is_relative:
|
if is_relative:
|
||||||
selectors.append(_Selector())
|
selectors.append(_Selector())
|
||||||
|
|
@ -916,7 +953,7 @@ class CSSParser(object):
|
||||||
elif key == 'pseudo_class':
|
elif key == 'pseudo_class':
|
||||||
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
|
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
|
||||||
elif key == 'pseudo_element':
|
elif key == 'pseudo_element':
|
||||||
raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
|
raise NotImplementedError("Psuedo-element found at position {}".format(m.start(0)))
|
||||||
elif key == 'pseudo_contains':
|
elif key == 'pseudo_contains':
|
||||||
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
|
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
|
||||||
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
|
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
|
||||||
|
|
@ -952,8 +989,18 @@ class CSSParser(object):
|
||||||
has_selector, sel = self.parse_combinator(
|
has_selector, sel = self.parse_combinator(
|
||||||
sel, m, has_selector, selectors, relations, is_pseudo, index
|
sel, m, has_selector, selectors, relations, is_pseudo, index
|
||||||
)
|
)
|
||||||
elif key == 'attribute':
|
elif key in ('attribute', 'quirks_attribute'):
|
||||||
has_selector = self.parse_attribute_selector(sel, m, has_selector)
|
quirks = key == 'quirks_attribute'
|
||||||
|
if quirks:
|
||||||
|
temp_index = index + m.group(0).find('=') + 1
|
||||||
|
util.warn_quirks(
|
||||||
|
"You have attempted to use an attribute " +
|
||||||
|
"value that should have been quoted at position {}.".format(temp_index),
|
||||||
|
"the attribute value should be quoted.",
|
||||||
|
self.pattern,
|
||||||
|
temp_index
|
||||||
|
)
|
||||||
|
has_selector = self.parse_attribute_selector(sel, m, has_selector, quirks)
|
||||||
elif key == 'tag':
|
elif key == 'tag':
|
||||||
if has_selector:
|
if has_selector:
|
||||||
raise SelectorSyntaxError(
|
raise SelectorSyntaxError(
|
||||||
|
|
@ -1006,8 +1053,6 @@ class CSSParser(object):
|
||||||
selectors[-1].flags = ct.SEL_IN_RANGE
|
selectors[-1].flags = ct.SEL_IN_RANGE
|
||||||
if is_out_of_range:
|
if is_out_of_range:
|
||||||
selectors[-1].flags = ct.SEL_OUT_OF_RANGE
|
selectors[-1].flags = ct.SEL_OUT_OF_RANGE
|
||||||
if is_placeholder_shown:
|
|
||||||
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
|
|
||||||
|
|
||||||
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
|
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
|
||||||
|
|
||||||
|
|
@ -1021,11 +1066,15 @@ class CSSParser(object):
|
||||||
end = (m.start(0) - 1) if m else (len(pattern) - 1)
|
end = (m.start(0) - 1) if m else (len(pattern) - 1)
|
||||||
|
|
||||||
if self.debug: # pragma: no cover
|
if self.debug: # pragma: no cover
|
||||||
|
if self.quirks:
|
||||||
|
print('## QUIRKS MODE: Throwing out the spec!')
|
||||||
print('## PARSING: {!r}'.format(pattern))
|
print('## PARSING: {!r}'.format(pattern))
|
||||||
while index <= end:
|
while index <= end:
|
||||||
m = None
|
m = None
|
||||||
for v in self.css_tokens:
|
for v in self.css_tokens:
|
||||||
m = v.match(pattern, index, self.flags)
|
if not v.enabled(self.flags): # pragma: no cover
|
||||||
|
continue
|
||||||
|
m = v.match(pattern, index)
|
||||||
if m:
|
if m:
|
||||||
name = v.get_name()
|
name = v.get_name()
|
||||||
if self.debug: # pragma: no cover
|
if self.debug: # pragma: no cover
|
||||||
|
|
@ -1053,7 +1102,13 @@ class CSSParser(object):
|
||||||
print('## END PARSING')
|
print('## END PARSING')
|
||||||
|
|
||||||
def process_selectors(self, index=0, flags=0):
|
def process_selectors(self, index=0, flags=0):
|
||||||
"""Process selectors."""
|
"""
|
||||||
|
Process selectors.
|
||||||
|
|
||||||
|
We do our own selectors as BeautifulSoup4 has some annoying quirks,
|
||||||
|
and we don't really need to do nth selectors or siblings or
|
||||||
|
descendants etc.
|
||||||
|
"""
|
||||||
|
|
||||||
return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
|
return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
|
||||||
|
|
||||||
|
|
@ -1068,7 +1123,8 @@ CSS_LINK = CSSParser(
|
||||||
# CSS pattern for `:checked`
|
# CSS pattern for `:checked`
|
||||||
CSS_CHECKED = CSSParser(
|
CSS_CHECKED = CSSParser(
|
||||||
'''
|
'''
|
||||||
html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
|
html|*:is(input[type=checkbox], input[type=radio])[checked],
|
||||||
|
html|select > html|option[selected]
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:default` (must compile CSS_CHECKED first)
|
# CSS pattern for `:default` (must compile CSS_CHECKED first)
|
||||||
|
|
@ -1094,23 +1150,23 @@ CSS_INDETERMINATE = CSSParser(
|
||||||
This pattern must be at the end.
|
This pattern must be at the end.
|
||||||
Special logic is applied to the last selector.
|
Special logic is applied to the last selector.
|
||||||
*/
|
*/
|
||||||
html|input[type="radio"][name]:not([name='']):not([checked])
|
html|input[type="radio"][name][name!='']:not([checked])
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
|
||||||
# CSS pattern for `:disabled`
|
# CSS pattern for `:disabled`
|
||||||
CSS_DISABLED = CSSParser(
|
CSS_DISABLED = CSSParser(
|
||||||
'''
|
'''
|
||||||
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
|
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
|
||||||
html|optgroup[disabled] > html|option,
|
html|optgroup[disabled] > html|option,
|
||||||
html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
|
html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
|
||||||
html|fieldset[disabled] >
|
html|fieldset[disabled] >
|
||||||
html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
|
html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:enabled`
|
# CSS pattern for `:enabled`
|
||||||
CSS_ENABLED = CSSParser(
|
CSS_ENABLED = CSSParser(
|
||||||
'''
|
'''
|
||||||
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
|
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:required`
|
# CSS pattern for `:required`
|
||||||
|
|
@ -1124,20 +1180,22 @@ CSS_OPTIONAL = CSSParser(
|
||||||
# CSS pattern for `:placeholder-shown`
|
# CSS pattern for `:placeholder-shown`
|
||||||
CSS_PLACEHOLDER_SHOWN = CSSParser(
|
CSS_PLACEHOLDER_SHOWN = CSSParser(
|
||||||
'''
|
'''
|
||||||
html|input:is(
|
html|*:is(
|
||||||
:not([type]),
|
input:is(
|
||||||
[type=""],
|
:not([type]),
|
||||||
[type=text],
|
[type=""],
|
||||||
[type=search],
|
[type=text],
|
||||||
[type=url],
|
[type=search],
|
||||||
[type=tel],
|
[type=url],
|
||||||
[type=email],
|
[type=tel],
|
||||||
[type=password],
|
[type=email],
|
||||||
[type=number]
|
[type=password],
|
||||||
)[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
|
[type=number]
|
||||||
html|textarea[placeholder]:not([placeholder=''])
|
),
|
||||||
|
textarea
|
||||||
|
)[placeholder][placeholder!='']
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern default for `:nth-child` "of S" feature
|
# CSS pattern default for `:nth-child` "of S" feature
|
||||||
CSS_NTH_OF_S_DEFAULT = CSSParser(
|
CSS_NTH_OF_S_DEFAULT = CSSParser(
|
||||||
'*|*'
|
'*|*'
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
"""CSS selector structure items."""
|
"""CSS selector structure items."""
|
||||||
import copyreg
|
from __future__ import unicode_literals
|
||||||
from collections.abc import Hashable, Mapping
|
from . import util
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'Selector',
|
'Selector',
|
||||||
|
|
@ -26,7 +26,6 @@ SEL_DIR_RTL = 0x40
|
||||||
SEL_IN_RANGE = 0x80
|
SEL_IN_RANGE = 0x80
|
||||||
SEL_OUT_OF_RANGE = 0x100
|
SEL_OUT_OF_RANGE = 0x100
|
||||||
SEL_DEFINED = 0x200
|
SEL_DEFINED = 0x200
|
||||||
SEL_PLACEHOLDER_SHOWN = 0x400
|
|
||||||
|
|
||||||
|
|
||||||
class Immutable(object):
|
class Immutable(object):
|
||||||
|
|
@ -86,7 +85,7 @@ class Immutable(object):
|
||||||
__str__ = __repr__
|
__str__ = __repr__
|
||||||
|
|
||||||
|
|
||||||
class ImmutableDict(Mapping):
|
class ImmutableDict(util.Mapping):
|
||||||
"""Hashable, immutable dictionary."""
|
"""Hashable, immutable dictionary."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
@ -95,8 +94,8 @@ class ImmutableDict(Mapping):
|
||||||
arg = args[0] if args else kwargs
|
arg = args[0] if args else kwargs
|
||||||
is_dict = isinstance(arg, dict)
|
is_dict = isinstance(arg, dict)
|
||||||
if (
|
if (
|
||||||
is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
|
is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or
|
||||||
not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
|
not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg])
|
||||||
):
|
):
|
||||||
raise TypeError('All values must be hashable')
|
raise TypeError('All values must be hashable')
|
||||||
|
|
||||||
|
|
@ -141,9 +140,9 @@ class Namespaces(ImmutableDict):
|
||||||
# so don't bother checking that.
|
# so don't bother checking that.
|
||||||
arg = args[0] if args else kwargs
|
arg = args[0] if args else kwargs
|
||||||
is_dict = isinstance(arg, dict)
|
is_dict = isinstance(arg, dict)
|
||||||
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
|
if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
|
||||||
raise TypeError('Namespace keys and values must be Unicode strings')
|
raise TypeError('Namespace keys and values must be Unicode strings')
|
||||||
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
|
||||||
raise TypeError('Namespace keys and values must be Unicode strings')
|
raise TypeError('Namespace keys and values must be Unicode strings')
|
||||||
|
|
||||||
super(Namespaces, self).__init__(*args, **kwargs)
|
super(Namespaces, self).__init__(*args, **kwargs)
|
||||||
|
|
@ -160,9 +159,9 @@ class CustomSelectors(ImmutableDict):
|
||||||
# so don't bother checking that.
|
# so don't bother checking that.
|
||||||
arg = args[0] if args else kwargs
|
arg = args[0] if args else kwargs
|
||||||
is_dict = isinstance(arg, dict)
|
is_dict = isinstance(arg, dict)
|
||||||
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
|
if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
|
||||||
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
||||||
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
|
||||||
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
||||||
|
|
||||||
super(CustomSelectors, self).__init__(*args, **kwargs)
|
super(CustomSelectors, self).__init__(*args, **kwargs)
|
||||||
|
|
@ -239,14 +238,13 @@ class SelectorAttribute(Immutable):
|
||||||
class SelectorContains(Immutable):
|
class SelectorContains(Immutable):
|
||||||
"""Selector contains rule."""
|
"""Selector contains rule."""
|
||||||
|
|
||||||
__slots__ = ("text", "own", "_hash")
|
__slots__ = ("text", "_hash")
|
||||||
|
|
||||||
def __init__(self, text, own):
|
def __init__(self, text):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorContains, self).__init__(
|
super(SelectorContains, self).__init__(
|
||||||
text=text,
|
text=text
|
||||||
own=own
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -333,7 +331,7 @@ def _pickle(p):
|
||||||
def pickle_register(obj):
|
def pickle_register(obj):
|
||||||
"""Allow object to be pickled."""
|
"""Allow object to be pickled."""
|
||||||
|
|
||||||
copyreg.pickle(obj, _pickle)
|
util.copyreg.pickle(obj, _pickle)
|
||||||
|
|
||||||
|
|
||||||
pickle_register(Selector)
|
pickle_register(Selector)
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,47 @@
|
||||||
"""Utility."""
|
"""Utility."""
|
||||||
from functools import wraps, lru_cache
|
from __future__ import unicode_literals
|
||||||
|
from functools import wraps
|
||||||
import warnings
|
import warnings
|
||||||
|
import sys
|
||||||
|
import struct
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
MODULE = os.path.dirname(__file__)
|
||||||
|
|
||||||
|
PY3 = sys.version_info >= (3, 0)
|
||||||
|
PY35 = sys.version_info >= (3, 5)
|
||||||
|
PY37 = sys.version_info >= (3, 7)
|
||||||
|
|
||||||
|
if PY3:
|
||||||
|
from functools import lru_cache # noqa F401
|
||||||
|
import copyreg # noqa F401
|
||||||
|
from collections.abc import Hashable, Mapping # noqa F401
|
||||||
|
|
||||||
|
ustr = str
|
||||||
|
bstr = bytes
|
||||||
|
unichar = chr
|
||||||
|
string = str
|
||||||
|
else:
|
||||||
|
from backports.functools_lru_cache import lru_cache # noqa F401
|
||||||
|
import copy_reg as copyreg # noqa F401
|
||||||
|
from collections import Hashable, Mapping # noqa F401
|
||||||
|
|
||||||
|
ustr = unicode # noqa: F821
|
||||||
|
bstr = str
|
||||||
|
unichar = unichr # noqa: F821
|
||||||
|
string = basestring # noqa: F821
|
||||||
|
|
||||||
DEBUG = 0x00001
|
DEBUG = 0x00001
|
||||||
|
_QUIRKS = 0x10000
|
||||||
|
|
||||||
RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
|
RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$')
|
||||||
|
|
||||||
|
LC_A = ord('a')
|
||||||
|
LC_Z = ord('z')
|
||||||
UC_A = ord('A')
|
UC_A = ord('A')
|
||||||
UC_Z = ord('Z')
|
UC_Z = ord('Z')
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=512)
|
|
||||||
def lower(string):
|
def lower(string):
|
||||||
"""Lower."""
|
"""Lower."""
|
||||||
|
|
||||||
|
|
@ -22,7 +52,38 @@ def lower(string):
|
||||||
return ''.join(new_string)
|
return ''.join(new_string)
|
||||||
|
|
||||||
|
|
||||||
class SelectorSyntaxError(Exception):
|
def upper(string): # pragma: no cover
|
||||||
|
"""Lower."""
|
||||||
|
|
||||||
|
new_string = []
|
||||||
|
for c in string:
|
||||||
|
o = ord(c)
|
||||||
|
new_string.append(chr(o - 32) if LC_A <= o <= LC_Z else c)
|
||||||
|
return ''.join(new_string)
|
||||||
|
|
||||||
|
|
||||||
|
def uchr(i):
|
||||||
|
"""Allow getting Unicode character on narrow python builds."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
return unichar(i)
|
||||||
|
except ValueError: # pragma: no cover
|
||||||
|
return struct.pack('i', i).decode('utf-32')
|
||||||
|
|
||||||
|
|
||||||
|
def uord(c):
|
||||||
|
"""Get Unicode ordinal."""
|
||||||
|
|
||||||
|
if len(c) == 2: # pragma: no cover
|
||||||
|
high, low = [ord(p) for p in c]
|
||||||
|
ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
|
||||||
|
else:
|
||||||
|
ordinal = ord(c)
|
||||||
|
|
||||||
|
return ordinal
|
||||||
|
|
||||||
|
|
||||||
|
class SelectorSyntaxError(SyntaxError):
|
||||||
"""Syntax error in a CSS selector."""
|
"""Syntax error in a CSS selector."""
|
||||||
|
|
||||||
def __init__(self, msg, pattern=None, index=None):
|
def __init__(self, msg, pattern=None, index=None):
|
||||||
|
|
@ -108,3 +169,45 @@ def get_pattern_context(pattern, index):
|
||||||
last = m.end(0)
|
last = m.end(0)
|
||||||
|
|
||||||
return ''.join(text), line, col
|
return ''.join(text), line, col
|
||||||
|
|
||||||
|
|
||||||
|
class QuirksWarning(UserWarning): # pragma: no cover
|
||||||
|
"""Warning for quirks mode."""
|
||||||
|
|
||||||
|
|
||||||
|
def warn_quirks(message, recommend, pattern, index):
|
||||||
|
"""Warn quirks."""
|
||||||
|
|
||||||
|
import traceback
|
||||||
|
import bs4 # noqa: F401
|
||||||
|
|
||||||
|
# Acquire source code line context
|
||||||
|
paths = (MODULE, sys.modules['bs4'].__path__[0])
|
||||||
|
tb = traceback.extract_stack()
|
||||||
|
previous = None
|
||||||
|
filename = None
|
||||||
|
lineno = None
|
||||||
|
for entry in tb:
|
||||||
|
if (PY35 and entry.filename.startswith(paths)) or (not PY35 and entry[0].startswith(paths)):
|
||||||
|
break
|
||||||
|
previous = entry
|
||||||
|
if previous:
|
||||||
|
filename = previous.filename if PY35 else previous[0]
|
||||||
|
lineno = previous.lineno if PY35 else previous[1]
|
||||||
|
|
||||||
|
# Format pattern to show line and column position
|
||||||
|
context, line = get_pattern_context(pattern, index)[0:2]
|
||||||
|
|
||||||
|
# Display warning
|
||||||
|
warnings.warn_explicit(
|
||||||
|
"\nCSS selector pattern:\n" +
|
||||||
|
" {}\n".format(message) +
|
||||||
|
" This behavior is only allowed temporarily for Beautiful Soup's transition to Soup Sieve.\n" +
|
||||||
|
" In order to confrom to the CSS spec, {}\n".format(recommend) +
|
||||||
|
" It is strongly recommended the selector be altered to conform to the CSS spec " +
|
||||||
|
"as an exception will be raised for this case in the future.\n" +
|
||||||
|
"pattern line {}:\n{}".format(line, context),
|
||||||
|
QuirksWarning,
|
||||||
|
filename,
|
||||||
|
lineno
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue