From 0325fe2225f05fe37cb53f7e7c080ec0eb7be05f Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Fri, 24 Oct 2014 17:33:11 -0700 Subject: [PATCH] lyrics: Remove script tags (fix #1034) --- beetsplug/lyrics.py | 6 +++--- test/test_lyrics.py | 10 +++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 47e299f82..7dea01a32 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -329,10 +329,10 @@ def _scrape_strip_cruft(html, plain_text_out=False): """ html = unescape(html) - # Normalize EOL - html = html.replace('\r', '\n') + html = html.replace('\r', '\n') # Normalize EOL. html = re.sub(r' +', ' ', html) # Whitespaces collapse. - html = BREAK_RE.sub('\n', html) #
eats up surrounding '\n' + html = BREAK_RE.sub('\n', html) #
eats up surrounding '\n'. + html = re.sub(r'<(script).*?(?s)', '', html) # Strip script tags. if plain_text_out: # Strip remaining HTML tags html = TAG_RE.sub('', html) diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 6b2929565..c4876c003 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2014, Fabrice Laporte. # @@ -131,7 +130,7 @@ class LyricsPluginTest(unittest.TestCase): self.assertFalse(lyrics.is_lyrics(t)) def test_slugify(self): - text = u"http://site.com/çafe-au_lait(boisson)" + text = u"http://site.com/\xe7afe-au_lait(boisson)" self.assertEqual(lyrics.slugify(text), 'http://site.com/cafe_au_lait') def test_scrape_strip_cruft(self): @@ -144,6 +143,11 @@ class LyricsPluginTest(unittest.TestCase): self.assertEqual(lyrics._scrape_strip_cruft(text, True), "one\ntwo !\n\nfour") + def test_scrape_strip_scripts(self): + text = u"""foobaz""" + self.assertEqual(lyrics._scrape_strip_cruft(text, True), + "foobaz") + def test_scrape_merge_paragraphs(self): text = u"one

two

three" self.assertEqual(lyrics._scrape_merge_paragraphs(text), @@ -263,7 +267,7 @@ class LyricsGooglePluginTest(unittest.TestCase): except ImportError: self.skipTest('Beautiful Soup 4 not available') if sys.version_info[:3] < (2, 7, 3): - self.skipTest("Python’s built-in HTML parser is not good enough") + self.skipTest("Python's built-in HTML parser is not good enough") lyrics.LyricsPlugin() lyrics.fetch_url = MockFetchUrl()