From 0325fe2225f05fe37cb53f7e7c080ec0eb7be05f Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Fri, 24 Oct 2014 17:33:11 -0700
Subject: [PATCH] lyrics: Remove script tags (fix #1034)

---
 beetsplug/lyrics.py |  6 +++---
 test/test_lyrics.py | 10 +++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 47e299f82..7dea01a32 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -329,10 +329,10 @@ def _scrape_strip_cruft(html, plain_text_out=False):
     """
     html = unescape(html)
 
-    # Normalize EOL
-    html = html.replace('\r', '\n')
+    html = html.replace('\r', '\n')  # Normalize EOL.
     html = re.sub(r' +', ' ', html)  # Whitespaces collapse.
-    html = BREAK_RE.sub('\n', html)  # <br> eats up surrounding '\n'
+    html = BREAK_RE.sub('\n', html)  # <br> eats up surrounding '\n'.
+    html = re.sub(r'<(script).*?</\1>(?s)', '', html)  # Strip script tags.
 
     if plain_text_out:  # Strip remaining HTML tags
         html = TAG_RE.sub('', html)
diff --git a/test/test_lyrics.py b/test/test_lyrics.py
index 6b2929565..c4876c003 100644
--- a/test/test_lyrics.py
+++ b/test/test_lyrics.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # This file is part of beets.
 # Copyright 2014, Fabrice Laporte.
 #
@@ -131,7 +130,7 @@ class LyricsPluginTest(unittest.TestCase):
             self.assertFalse(lyrics.is_lyrics(t))
 
     def test_slugify(self):
-        text = u"http://site.com/çafe-au_lait(boisson)"
+        text = u"http://site.com/\xe7afe-au_lait(boisson)"
         self.assertEqual(lyrics.slugify(text), 'http://site.com/cafe_au_lait')
 
     def test_scrape_strip_cruft(self):
@@ -144,6 +143,11 @@ class LyricsPluginTest(unittest.TestCase):
         self.assertEqual(lyrics._scrape_strip_cruft(text, True),
                          "one\ntwo !\n\nfour")
 
+    def test_scrape_strip_scripts(self):
+        text = u"""foo<script>bar</script>baz"""
+        self.assertEqual(lyrics._scrape_strip_cruft(text, True),
+                         "foobaz")
+
     def test_scrape_merge_paragraphs(self):
         text = u"one</p>   <p class='myclass'>two</p><p>three"
         self.assertEqual(lyrics._scrape_merge_paragraphs(text),
@@ -263,7 +267,7 @@ class LyricsGooglePluginTest(unittest.TestCase):
         except ImportError:
             self.skipTest('Beautiful Soup 4 not available')
         if sys.version_info[:3] < (2, 7, 3):
-            self.skipTest("Python’s built-in HTML parser is not good enough")
+            self.skipTest("Python's built-in HTML parser is not good enough")
         lyrics.LyricsPlugin()
         lyrics.fetch_url = MockFetchUrl()