always yield item artist and title first

Rather than using an unordered set for storing pairs, append to a list
and build an OrderedDict from it to filter duplicated strings while
keeping order.
This commit is contained in:
Fabrice Laporte 2016-09-25 15:46:22 +02:00
parent 7e15ac0695
commit e2703b9a7c
2 changed files with 18 additions and 8 deletions

View file

@ -25,8 +25,9 @@ import re
import requests
import unicodedata
import warnings
from six.moves import urllib
import six
from six.moves import urllib
from collections import OrderedDict
try:
from bs4 import SoupStrainer, BeautifulSoup
@ -153,23 +154,28 @@ def search_pairs(item):
return string
title, artist = item.title, item.artist
titles = set([title])
artists = set([artist])
titles = [title]
artists = [artist]
# Remove any featuring artists from the artists name
artists.add(strip_part(artist, r"(.*?) {0}".format(plugins.feat_tokens())))
artists.append(
strip_part(artist, r"(.*?) {0}".format(plugins.feat_tokens())))
# Remove a parenthesized suffix from a title string. Common
# examples include (live), (remix), and (acoustic).
titles.add(strip_part(title, r"(.+?)\s+[(].*[)]$"))
titles.append(strip_part(title, r"(.+?)\s+[(].*[)]$"))
# Remove any featuring artists from the title
pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False))
for title in list(titles):
titles.add(strip_part(title, pattern))
titles.append(strip_part(title, pattern))
# Remove part of the title string after colon ':'
titles.add(strip_part(title, r"(.+?)\s*:.*"))
# Remove part of the title string after colon ':' for songs with subtitles
titles.append(strip_part(title, r"(.+?)\s*:.*"))
# Yield artist and title obtained from item first
titles = list(OrderedDict.fromkeys([title] + titles))
artists = list(OrderedDict.fromkeys([artist] + artists))
# Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe)
# and each of them.

View file

@ -84,6 +84,10 @@ class LyricsPluginTest(unittest.TestCase):
self.assertIn(('Alice', ['song']),
lyrics.search_pairs(item))
item = Item(artist='Alice and Bob', title='song')
self.assertEqual(('Alice and Bob', ['song']),
list(lyrics.search_pairs(item))[0])
def test_search_pairs_multi_titles(self):
item = Item(title='1 / 2', artist='A')
self.assertIn(('A', ['1 / 2']), lyrics.search_pairs(item))