From b512a0ce37660941c97f4f8d05a5bb1837005094 Mon Sep 17 00:00:00 2001 From: Thomas Scholtes Date: Sun, 24 Aug 2014 16:15:14 +0200 Subject: [PATCH] lyrics: Use multiple lyrics search strings. In particular we use the original artist and title before stripping *and* and *featuring* suffixes. Fixes #914. --- beetsplug/lyrics.py | 103 +++++++++++++++++++------------------ docs/changelog.rst | 3 ++ test/test_lyrics.py | 120 +++++++++++++++++++++++++++----------------- 3 files changed, 129 insertions(+), 97 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 5d44a35f3..01a188616 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -22,6 +22,7 @@ import urllib import json import unicodedata import difflib +import itertools from beets.plugins import BeetsPlugin from beets import ui @@ -130,33 +131,51 @@ def strip_cruft(lyrics, wscollapse=True): return lyrics -def split_multi_titles(s): - """Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) - and returns titles as a list or None if song is not dual.""" - if '/' not in s: - return None - return [x.strip() for x in s.split('/')] +def search_pairs(item): + """Yield a pairs of artists and titles to search for. + The first item in the pair is the name of the artist, the second + item is a list of song names. -def remove_ft_artist_suffix(s): - """Remove any featuring artists from an artist string. + In addition to the artist and title obtained from the `item` the + method tries to strip extra information like paranthesized suffixes + and featured artists from the strings and add them as caniddates. + The method also tries to split multiple titles separated with `/`. """ - pattern = r"(.*?) (&|\b(and|feat(uring)?\b))" - match = re.search(pattern, s, re.IGNORECASE) + + title, artist = item.title, item.artist + titles = [title] + artists = [artist] + + # Remove any featuring artists from the artists name + pattern = r"(.*?) (&|\b(and|ft|feat(uring)?\b))" + match = re.search(pattern, artist, re.IGNORECASE) if match: - s = match.group(1) - return s + artists.append(match.group(1)) - -def remove_parenthesized_suffix(s): - """Remove a parenthesized suffix from a title string. Common - examples include (live), (remix), and (acoustic). - """ + # Remove a parenthesized suffix from a title string. Common + # examples include (live), (remix), and (acoustic). pattern = r"(.+?)\s+[(].*[)]$" - match = re.search(pattern, s, re.IGNORECASE) + match = re.search(pattern, title, re.IGNORECASE) if match: - s = match.group(1) - return s + titles.append(match.group(1)) + + # Remove any featuring artists from the title + pattern = r"(.*?) \b(ft|feat(uring)?)\b" + for title in titles: + match = re.search(pattern, title, re.IGNORECASE) + if match: + titles.append(match.group(1)) + + # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) + # and each of them. + multi_titles = [] + for title in titles: + multi_titles.append([title]) + if '/' in title: + multi_titles.append([x.strip() for x in title.split('/')]) + + return itertools.product(artists, multi_titles) def _encode(s): @@ -492,45 +511,31 @@ class LyricsPlugin(BeetsPlugin): parameter controls the visibility of the function's status log messages. """ - fallback = self.config['fallback'].get() - # Skip if the item already has lyrics. if not force and item.lyrics: log.log(loglevel, u'lyrics already present: %s - %s' % (item.artist, item.title)) return - artist = remove_ft_artist_suffix(item.artist) - title = remove_parenthesized_suffix( - remove_ft_artist_suffix(item.title) - ) + lyrics = None + for artist, titles in search_pairs(item): + lyrics = [self.get_lyrics(artist, title) for title in titles] + if any(lyrics): + break - # Fetch lyrics. - lyrics = self.get_lyrics(artist, title) + lyrics = u"\n\n---\n\n".join([l for l in lyrics if l]) - if not lyrics: - # Check for combined title. - # (e.g. Pink Floyd - Speak to Me / Breathe) - titles = split_multi_titles(title) - if titles: - for t in titles: - lyrics_title = self.get_lyrics(artist, t) - if lyrics_title: - if lyrics: - lyrics += u"\n\n---\n\n%s" % lyrics_title - else: - lyrics = lyrics_title - - if not lyrics: + if lyrics: + log.log(loglevel, u'fetched lyrics: %s - %s' % + (item.artist, item.title)) + else: log.log(loglevel, u'lyrics not found: %s - %s' % - (artist, title)) + (item.artist, item.title)) + fallback = self.config['fallback'].get() if fallback: lyrics = fallback else: return - else: - log.log(loglevel, u'fetched lyrics : %s - %s' % - (artist, title)) item.lyrics = lyrics @@ -542,12 +547,6 @@ class LyricsPlugin(BeetsPlugin): """Fetch lyrics, trying each source in turn. Return a string or None if no lyrics were found. """ - # Remove featuring artists from search. - pattern = u"(.*) feat(uring|\.)?\s\S+" - match = re.search(pattern, artist, re.IGNORECASE) - if match: - artist = match.group(0) - for backend in self.backends: lyrics = backend(artist, title) if lyrics: diff --git a/docs/changelog.rst b/docs/changelog.rst index 0a5ba81ac..49c7ef5a3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,9 @@ This release adds **sorting** to beets queries. See :ref:`query-sort`. Fixes: * Invalid state files don't crash the importer. +* The :doc:`/plugins/lyrics` only strips featured artists and + parenthesized title suffixes if no lyrics for the original artist and + title were found. 1.3.7 (August 22, 2014) diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 49cd79e23..4750e825f 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -16,6 +16,7 @@ from _common import unittest from beetsplug import lyrics +from beets.library import Item class LyricsPluginTest(unittest.TestCase): @@ -23,53 +24,82 @@ class LyricsPluginTest(unittest.TestCase): """Set up configuration""" lyrics.LyricsPlugin() - def test_split_multi_titles(self): - self.assertEqual(lyrics.split_multi_titles('song1 / song2 / song3'), - ['song1', 'song2', 'song3']) - self.assertEqual(lyrics.split_multi_titles('song1/song2 song3'), - ['song1', 'song2 song3']) - self.assertEqual(lyrics.split_multi_titles('song1 song2'), - None) + def test_search_artist(self): + item = Item(artist='Alice ft. Bob', title='song') + self.assertIn(('Alice ft. Bob', ['song']), + lyrics.search_pairs(item)) + self.assertIn(('Alice', ['song']), + lyrics.search_pairs(item)) - def test_remove_ft_artist_suffix(self): - self.assertEqual( - lyrics.remove_ft_artist_suffix('Bob featuring Marcia'), - 'Bob' - ) - self.assertEqual( - lyrics.remove_ft_artist_suffix('Bob feat Marcia'), - 'Bob' - ) - self.assertEqual( - lyrics.remove_ft_artist_suffix('Bob and Marcia'), - 'Bob' - ) - self.assertEqual( - lyrics.remove_ft_artist_suffix('Bob feat. Marcia'), - 'Bob' - ) - self.assertEqual( - lyrics.remove_ft_artist_suffix('Bob & Marcia'), - 'Bob' - ) - self.assertEqual( - lyrics.remove_ft_artist_suffix('Bob feats Marcia'), - 'Bob feats Marcia' - ) + item = Item(artist='Alice feat Bob', title='song') + self.assertIn(('Alice feat Bob', ['song']), + lyrics.search_pairs(item)) + self.assertIn(('Alice', ['song']), + lyrics.search_pairs(item)) - def test_remove_parenthesized_suffix(self): - self.assertEqual( - lyrics.remove_parenthesized_suffix('Song (live)'), - 'Song' - ) - self.assertEqual( - lyrics.remove_parenthesized_suffix('Song (live) (new)'), - 'Song' - ) - self.assertEqual( - lyrics.remove_parenthesized_suffix('Song (live (new))'), - 'Song' - ) + item = Item(artist='Alice feat. Bob', title='song') + self.assertIn(('Alice feat. Bob', ['song']), + lyrics.search_pairs(item)) + self.assertIn(('Alice', ['song']), + lyrics.search_pairs(item)) + + item = Item(artist='Alice feats Bob', title='song') + self.assertIn(('Alice feats Bob', ['song']), + lyrics.search_pairs(item)) + self.assertNotIn(('Alice', ['song']), + lyrics.search_pairs(item)) + + item = Item(artist='Alice featuring Bob', title='song') + self.assertIn(('Alice featuring Bob', ['song']), + lyrics.search_pairs(item)) + self.assertIn(('Alice', ['song']), + lyrics.search_pairs(item)) + + item = Item(artist='Alice & Bob', title='song') + self.assertIn(('Alice & Bob', ['song']), + lyrics.search_pairs(item)) + self.assertIn(('Alice', ['song']), + lyrics.search_pairs(item)) + + item = Item(artist='Alice and Bob', title='song') + self.assertIn(('Alice and Bob', ['song']), + lyrics.search_pairs(item)) + self.assertIn(('Alice', ['song']), + lyrics.search_pairs(item)) + + def test_search_pairs_multi_titles(self): + item = Item(title='1 / 2', artist='A') + self.assertIn(('A', ['1 / 2']), lyrics.search_pairs(item)) + self.assertIn(('A', ['1', '2']), lyrics.search_pairs(item)) + + item = Item(title='1/2', artist='A') + self.assertIn(('A', ['1/2']), lyrics.search_pairs(item)) + self.assertIn(('A', ['1', '2']), lyrics.search_pairs(item)) + + def test_search_pairs_titles(self): + item = Item(title='Song (live)', artist='A') + self.assertIn(('A', ['Song']), lyrics.search_pairs(item)) + self.assertIn(('A', ['Song (live)']), lyrics.search_pairs(item)) + + item = Item(title='Song (live) (new)', artist='A') + self.assertIn(('A', ['Song']), lyrics.search_pairs(item)) + self.assertIn(('A', ['Song (live) (new)']), lyrics.search_pairs(item)) + + item = Item(title='Song (live (new))', artist='A') + self.assertIn(('A', ['Song']), lyrics.search_pairs(item)) + self.assertIn(('A', ['Song (live (new))']), lyrics.search_pairs(item)) + + item = Item(title='Song ft. B', artist='A') + self.assertIn(('A', ['Song']), lyrics.search_pairs(item)) + self.assertIn(('A', ['Song ft. B']), lyrics.search_pairs(item)) + + item = Item(title='Song featuring B', artist='A') + self.assertIn(('A', ['Song']), lyrics.search_pairs(item)) + self.assertIn(('A', ['Song featuring B']), lyrics.search_pairs(item)) + + item = Item(title='Song and B', artist='A') + self.assertNotIn(('A', ['Song']), lyrics.search_pairs(item)) + self.assertIn(('A', ['Song and B']), lyrics.search_pairs(item)) def test_remove_credits(self): self.assertEqual(