From 4b702b338e0dcc83a1abe85eb311fb2fe8ae0fa3 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Fri, 23 Sep 2016 22:21:00 +0200 Subject: [PATCH 1/6] lyrics: reduce code duplication in search_pairs() --- beetsplug/lyrics.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index f6092f625..b2e5b5f0d 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -144,29 +144,29 @@ def search_pairs(item): The method also tries to split multiple titles separated with `/`. """ + def strip_part(string, pattern): + """Return first matching group if string matches pattern, the full + string otherwise.""" + match = re.search(pattern, string, re.IGNORECASE) + if match: + return match.group(1) + return string + title, artist = item.title, item.artist - titles = [title] - artists = [artist] + titles = set([title]) + artists = set([artist]) # Remove any featuring artists from the artists name - pattern = r"(.*?) {0}".format(plugins.feat_tokens()) - match = re.search(pattern, artist, re.IGNORECASE) - if match: - artists.append(match.group(1)) + artists.add(strip_part(artist, r"(.*?) {0}".format(plugins.feat_tokens()))) # Remove a parenthesized suffix from a title string. Common # examples include (live), (remix), and (acoustic). - pattern = r"(.+?)\s+[(].*[)]$" - match = re.search(pattern, title, re.IGNORECASE) - if match: - titles.append(match.group(1)) + titles.add(strip_part(title, r"(.+?)\s+[(].*[)]$")) # Remove any featuring artists from the title pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)) - for title in titles[:]: - match = re.search(pattern, title, re.IGNORECASE) - if match: - titles.append(match.group(1)) + for title in list(titles): + titles.add(strip_part(title, pattern)) # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) # and each of them. From 8b4f39da422de4b3a90e07fcd00bc76b7dcbd6c2 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Fri, 23 Sep 2016 22:23:32 +0200 Subject: [PATCH 2/6] lyrics: search for song title part preceding colon. fix #2205 --- beetsplug/lyrics.py | 3 +++ test/test_lyrics.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index b2e5b5f0d..dc069e430 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -168,6 +168,9 @@ def search_pairs(item): for title in list(titles): titles.add(strip_part(title, pattern)) + # Remove part of the title string after colon ':' + titles.add(strip_part(title, r"(.+?)\s*:.*")) + # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) # and each of them. multi_titles = [] diff --git a/test/test_lyrics.py b/test/test_lyrics.py index d49c4d980..797a227be 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -84,6 +84,7 @@ class LyricsPluginTest(unittest.TestCase): self.assertIn(('Alice', ['song']), lyrics.search_pairs(item)) + def test_search_pairs_multi_titles(self): item = Item(title='1 / 2', artist='A') self.assertIn(('A', ['1 / 2']), lyrics.search_pairs(item)) @@ -118,6 +119,10 @@ class LyricsPluginTest(unittest.TestCase): self.assertNotIn(('A', ['Song']), lyrics.search_pairs(item)) self.assertIn(('A', ['Song and B']), lyrics.search_pairs(item)) + item = Item(title='Song: B', artist='A') + self.assertIn(('A', ['Song']), lyrics.search_pairs(item)) + self.assertIn(('A', ['Song: B']), lyrics.search_pairs(item)) + def test_remove_credits(self): self.assertEqual( lyrics.remove_credits("""It's close to midnight From 059be3b94ca0fb3df462873f0500855c117088ab Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Fri, 23 Sep 2016 22:41:44 +0200 Subject: [PATCH 3/6] test_lyrics: fix too many blank lines --- test/test_lyrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 797a227be..0e9f13043 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -84,7 +84,6 @@ class LyricsPluginTest(unittest.TestCase): self.assertIn(('Alice', ['song']), lyrics.search_pairs(item)) - def test_search_pairs_multi_titles(self): item = Item(title='1 / 2', artist='A') self.assertIn(('A', ['1 / 2']), lyrics.search_pairs(item)) From 7e15ac069577eed6aa751a5e66bd3c50c7ddd537 Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Fri, 23 Sep 2016 23:11:27 +0200 Subject: [PATCH 4/6] add changelog --- docs/changelog.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 4c94af42b..2f09c0599 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -60,6 +60,8 @@ And there are a few bug fixes too: This is fixed. :bug:`2168` * :doc:`/plugins/embyupdate`: Fixes authentication header problem that caused a problem that it was not possible to get tokens from the Emby API. +* :doc:`/plugins/lyrics`: Search for lyrics using the title part preceding the + colon character. :bug:`2206` The last release, 1.3.19, also erroneously reported its version as "1.3.18" when you typed ``beet version``. This has been corrected. From e2703b9a7cf79b66027840b6869104e049a8418e Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Sun, 25 Sep 2016 15:46:22 +0200 Subject: [PATCH 5/6] always yield item artist and title first Rather than using an unordered set for storing pairs, append to a list and build an OrderedDict from it to filter duplicated strings while keeping order. --- beetsplug/lyrics.py | 22 ++++++++++++++-------- test/test_lyrics.py | 4 ++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index dc069e430..179f0c0b6 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -25,8 +25,9 @@ import re import requests import unicodedata import warnings -from six.moves import urllib import six +from six.moves import urllib +from collections import OrderedDict try: from bs4 import SoupStrainer, BeautifulSoup @@ -153,23 +154,28 @@ def search_pairs(item): return string title, artist = item.title, item.artist - titles = set([title]) - artists = set([artist]) + titles = [title] + artists = [artist] # Remove any featuring artists from the artists name - artists.add(strip_part(artist, r"(.*?) {0}".format(plugins.feat_tokens()))) + artists.append( + strip_part(artist, r"(.*?) {0}".format(plugins.feat_tokens()))) # Remove a parenthesized suffix from a title string. Common # examples include (live), (remix), and (acoustic). - titles.add(strip_part(title, r"(.+?)\s+[(].*[)]$")) + titles.append(strip_part(title, r"(.+?)\s+[(].*[)]$")) # Remove any featuring artists from the title pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)) for title in list(titles): - titles.add(strip_part(title, pattern)) + titles.append(strip_part(title, pattern)) - # Remove part of the title string after colon ':' - titles.add(strip_part(title, r"(.+?)\s*:.*")) + # Remove part of the title string after colon ':' for songs with subtitles + titles.append(strip_part(title, r"(.+?)\s*:.*")) + + # Yield artist and title obtained from item first + titles = list(OrderedDict.fromkeys([title] + titles)) + artists = list(OrderedDict.fromkeys([artist] + artists)) # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) # and each of them. diff --git a/test/test_lyrics.py b/test/test_lyrics.py index 0e9f13043..429dee4e8 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -84,6 +84,10 @@ class LyricsPluginTest(unittest.TestCase): self.assertIn(('Alice', ['song']), lyrics.search_pairs(item)) + item = Item(artist='Alice and Bob', title='song') + self.assertEqual(('Alice and Bob', ['song']), + list(lyrics.search_pairs(item))[0]) + def test_search_pairs_multi_titles(self): item = Item(title='1 / 2', artist='A') self.assertIn(('A', ['1 / 2']), lyrics.search_pairs(item)) From 722662440552bd28d6fb9bf6f2e06314ca5826da Mon Sep 17 00:00:00 2001 From: Fabrice Laporte Date: Sun, 25 Sep 2016 19:37:14 +0200 Subject: [PATCH 6/6] replace strip_part() by generate_alternatives() Delegate the update of titles and artists lists to the helper generate_alternatives() function. --- beetsplug/lyrics.py | 50 ++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 179f0c0b6..92757a40a 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -27,7 +27,6 @@ import unicodedata import warnings import six from six.moves import urllib -from collections import OrderedDict try: from bs4 import SoupStrainer, BeautifulSoup @@ -145,37 +144,32 @@ def search_pairs(item): The method also tries to split multiple titles separated with `/`. """ - def strip_part(string, pattern): - """Return first matching group if string matches pattern, the full - string otherwise.""" - match = re.search(pattern, string, re.IGNORECASE) - if match: - return match.group(1) - return string + def generate_alternatives(string, patterns): + """Generate string alternatives by extracting first matching group for + each given pattern.""" + alternatives = [string] + for pattern in patterns: + match = re.search(pattern, string, re.IGNORECASE) + if match: + alternatives.append(match.group(1)) + return alternatives title, artist = item.title, item.artist - titles = [title] - artists = [artist] - # Remove any featuring artists from the artists name - artists.append( - strip_part(artist, r"(.*?) {0}".format(plugins.feat_tokens()))) + patterns = [ + # Remove any featuring artists from the artists name + r"(.*?) {0}".format(plugins.feat_tokens())] + artists = generate_alternatives(artist, patterns) - # Remove a parenthesized suffix from a title string. Common - # examples include (live), (remix), and (acoustic). - titles.append(strip_part(title, r"(.+?)\s+[(].*[)]$")) - - # Remove any featuring artists from the title - pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)) - for title in list(titles): - titles.append(strip_part(title, pattern)) - - # Remove part of the title string after colon ':' for songs with subtitles - titles.append(strip_part(title, r"(.+?)\s*:.*")) - - # Yield artist and title obtained from item first - titles = list(OrderedDict.fromkeys([title] + titles)) - artists = list(OrderedDict.fromkeys([artist] + artists)) + patterns = [ + # Remove a parenthesized suffix from a title string. Common + # examples include (live), (remix), and (acoustic). + r"(.+?)\s+[(].*[)]$", + # Remove any featuring artists from the title + r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)), + # Remove part of title after colon ':' for songs with subtitles + r"(.+?)\s*:.*"] + titles = generate_alternatives(title, patterns) # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) # and each of them.