diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 584891789..1af34df99 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -463,15 +463,19 @@ class Google(Backend): sitename = re.search(u"//([^/]+)/.*", self.slugify(url_link.lower())).group(1) url_title = self.slugify(url_title.lower()) + # Check if URL title contains song title (exact match) if url_title.find(title) != -1: return True + # or try extracting song title from URL title and check if # they are close enough tokens = [by + '_' + artist for by in self.BY_TRANS] + \ [artist, sitename, sitename.replace('www.', '')] + \ self.LYRICS_TRANS + tokens = [re.escape(t) for t in tokens] song_title = re.sub(u'(%s)' % u'|'.join(tokens), u'', url_title) + song_title = song_title.strip('_|') typo_ratio = .9 ratio = difflib.SequenceMatcher(None, song_title, title).ratio() diff --git a/docs/changelog.rst b/docs/changelog.rst index 305157bf0..88e74fe5a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -84,6 +84,9 @@ Fixes: older encodings. The encoding is now always updated to UTF-8. :bug:`879` * :doc:`/plugins/fetchart`: The Google Images backend has been removed. It used an API that has been shut down. :bug:`1760` +* :doc:`/plugins/lyrics`: Fix a crash in the Google backend when searching for + bands with regular-expression characters in their names, like Sunn O))). + :bug:`1673` .. _Emby Server: http://emby.media diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 33b8c6bb5..515e96587 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -376,6 +376,17 @@ class LyricsGooglePluginTest(unittest.TestCase): self.assertEqual(google.is_page_candidate(url, urlTitle, s['title'], s['artist']), False, url) + def test_is_page_candidate_special_chars(self): + """Ensure that `is_page_candidate` doesn't crash when the artist + and such contain special regular expression characters. + """ + # https://github.com/sampsyo/beets/issues/1673 + s = self.source + url = s['url'] + s['path'] + url_title = u'foo' + + google.is_page_candidate(url, url_title, s['title'], 'Sunn O)))') + def suite(): return unittest.TestLoader().loadTestsFromName(__name__)