Fix #1673: Escape regex terms in lyrics

2025-12-26 02:24:33 +01:00 · 2015-12-12 18:09:24 -08:00 · 2015-12-12 18:09:24 -08:00 · 5a285cc11f
commit 5a285cc11f
parent d11fe020ce
3 changed files with 18 additions and 0 deletions
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@ -463,15 +463,19 @@ class Google(Backend):
        sitename = re.search(u"//([^/]+)/.*",
                             self.slugify(url_link.lower())).group(1)
        url_title = self.slugify(url_title.lower())
+
        # Check if URL title contains song title (exact match)
        if url_title.find(title) != -1:
            return True
+
        # or try extracting song title from URL title and check if
        # they are close enough
        tokens = [by + '_' + artist for by in self.BY_TRANS] + \
                 [artist, sitename, sitename.replace('www.', '')] + \
            self.LYRICS_TRANS
+        tokens = [re.escape(t) for t in tokens]
        song_title = re.sub(u'(%s)' % u'|'.join(tokens), u'', url_title)
+
        song_title = song_title.strip('_|')
        typo_ratio = .9
        ratio = difflib.SequenceMatcher(None, song_title, title).ratio()
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -84,6 +84,9 @@ Fixes:
  older encodings. The encoding is now always updated to UTF-8. :bug:`879`
 * :doc:`/plugins/fetchart`: The Google Images backend has been removed. It
  used an API that has been shut down. :bug:`1760`
+* :doc:`/plugins/lyrics`: Fix a crash in the Google backend when searching for
+  bands with regular-expression characters in their names, like Sunn O))).
+  :bug:`1673`

 .. _Emby Server: http://emby.media

--- a/test/test_lyrics.py
+++ b/test/test_lyrics.py
@ -376,6 +376,17 @@ class LyricsGooglePluginTest(unittest.TestCase):
        self.assertEqual(google.is_page_candidate(url, urlTitle, s['title'],
                         s['artist']), False, url)

+    def test_is_page_candidate_special_chars(self):
+        """Ensure that `is_page_candidate` doesn't crash when the artist
+        and such contain special regular expression characters.
+        """
+        # https://github.com/sampsyo/beets/issues/1673
+        s = self.source
+        url = s['url'] + s['path']
+        url_title = u'foo'
+
+        google.is_page_candidate(url, url_title, s['title'], 'Sunn O)))')
+

 def suite():
    return unittest.TestLoader().loadTestsFromName(__name__)