Fix #1673: Escape regex terms in lyrics

This commit is contained in:
Adrian Sampson 2015-12-12 18:09:24 -08:00
parent d11fe020ce
commit 5a285cc11f
3 changed files with 18 additions and 0 deletions

View file

@ -463,15 +463,19 @@ class Google(Backend):
sitename = re.search(u"//([^/]+)/.*",
self.slugify(url_link.lower())).group(1)
url_title = self.slugify(url_title.lower())
# Check if URL title contains song title (exact match)
if url_title.find(title) != -1:
return True
# or try extracting song title from URL title and check if
# they are close enough
tokens = [by + '_' + artist for by in self.BY_TRANS] + \
[artist, sitename, sitename.replace('www.', '')] + \
self.LYRICS_TRANS
tokens = [re.escape(t) for t in tokens]
song_title = re.sub(u'(%s)' % u'|'.join(tokens), u'', url_title)
song_title = song_title.strip('_|')
typo_ratio = .9
ratio = difflib.SequenceMatcher(None, song_title, title).ratio()

View file

@ -84,6 +84,9 @@ Fixes:
older encodings. The encoding is now always updated to UTF-8. :bug:`879`
* :doc:`/plugins/fetchart`: The Google Images backend has been removed. It
used an API that has been shut down. :bug:`1760`
* :doc:`/plugins/lyrics`: Fix a crash in the Google backend when searching for
bands with regular-expression characters in their names, like Sunn O))).
:bug:`1673`
.. _Emby Server: http://emby.media

View file

@ -376,6 +376,17 @@ class LyricsGooglePluginTest(unittest.TestCase):
self.assertEqual(google.is_page_candidate(url, urlTitle, s['title'],
s['artist']), False, url)
def test_is_page_candidate_special_chars(self):
"""Ensure that `is_page_candidate` doesn't crash when the artist
and such contain special regular expression characters.
"""
# https://github.com/sampsyo/beets/issues/1673
s = self.source
url = s['url'] + s['path']
url_title = u'foo'
google.is_page_candidate(url, url_title, s['title'], 'Sunn O)))')
def suite():
return unittest.TestLoader().loadTestsFromName(__name__)