Fix for SB using an attr on noscript tags now.

This commit is contained in:
Jim Miller 2022-04-29 10:31:19 -05:00
parent 6c3a133ccd
commit 6e3055e753

View file

@ -770,9 +770,12 @@ class BaseSiteAdapter(Requestable):
'''
## html5lib handles <noscript> oddly. See:
## https://bugs.launchpad.net/beautifulsoup/+bug/1277464
## This should 'hide' and restore <noscript> tags.
data = data.replace("noscript>","fff_hide_noscript>")
## https://bugs.launchpad.net/beautifulsoup/+bug/1277464 This
## should 'hide' and restore <noscript> tags. Need to do
## </?noscript instead of noscript> as of Apr2022 when SB
## added a class attr to noscript. 2x replace() faster than
## re.sub() in simple test
data = data.replace("<noscript","<fff_hide_noscript").replace("</noscript","</fff_hide_noscript")
## soup and re-soup because BS4/html5lib is more forgiving of
## incorrectly nested tags that way.