Merge pull request #3790 from AnonTester/master

lyrics: Strip \u2005 (four-per-em space) in lyrics (Issue 3789)
This commit is contained in:
Adrian Sampson 2020-11-08 09:14:11 -05:00 committed by GitHub
commit 78722b079e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -424,6 +424,7 @@ def _scrape_strip_cruft(html, plain_text_out=False):
html = re.sub(r' +', ' ', html) # Whitespaces collapse.
html = BREAK_RE.sub('\n', html) # <br> eats up surrounding '\n'.
html = re.sub(r'(?s)<(script).*?</\1>', '', html) # Strip script tags.
html = re.sub(u'\u2005', " ", html) # replace unicode with regular space
if plain_text_out: # Strip remaining HTML tags
html = COMMENT_RE.sub('', html)