lyrics: Use multiple lyrics search strings.

In particular we use the original artist and title before stripping
*and* and *featuring* suffixes.

Fixes #914.
This commit is contained in:
Thomas Scholtes 2014-08-24 16:15:14 +02:00
parent 32626fedc4
commit b512a0ce37
3 changed files with 129 additions and 97 deletions

View file

@ -22,6 +22,7 @@ import urllib
import json
import unicodedata
import difflib
import itertools
from beets.plugins import BeetsPlugin
from beets import ui
@ -130,33 +131,51 @@ def strip_cruft(lyrics, wscollapse=True):
return lyrics
def split_multi_titles(s):
"""Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe)
and returns titles as a list or None if song is not dual."""
if '/' not in s:
return None
return [x.strip() for x in s.split('/')]
def search_pairs(item):
"""Yield a pairs of artists and titles to search for.
The first item in the pair is the name of the artist, the second
item is a list of song names.
def remove_ft_artist_suffix(s):
"""Remove any featuring artists from an artist string.
In addition to the artist and title obtained from the `item` the
method tries to strip extra information like paranthesized suffixes
and featured artists from the strings and add them as caniddates.
The method also tries to split multiple titles separated with `/`.
"""
pattern = r"(.*?) (&|\b(and|feat(uring)?\b))"
match = re.search(pattern, s, re.IGNORECASE)
title, artist = item.title, item.artist
titles = [title]
artists = [artist]
# Remove any featuring artists from the artists name
pattern = r"(.*?) (&|\b(and|ft|feat(uring)?\b))"
match = re.search(pattern, artist, re.IGNORECASE)
if match:
s = match.group(1)
return s
artists.append(match.group(1))
def remove_parenthesized_suffix(s):
"""Remove a parenthesized suffix from a title string. Common
examples include (live), (remix), and (acoustic).
"""
# Remove a parenthesized suffix from a title string. Common
# examples include (live), (remix), and (acoustic).
pattern = r"(.+?)\s+[(].*[)]$"
match = re.search(pattern, s, re.IGNORECASE)
match = re.search(pattern, title, re.IGNORECASE)
if match:
s = match.group(1)
return s
titles.append(match.group(1))
# Remove any featuring artists from the title
pattern = r"(.*?) \b(ft|feat(uring)?)\b"
for title in titles:
match = re.search(pattern, title, re.IGNORECASE)
if match:
titles.append(match.group(1))
# Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe)
# and each of them.
multi_titles = []
for title in titles:
multi_titles.append([title])
if '/' in title:
multi_titles.append([x.strip() for x in title.split('/')])
return itertools.product(artists, multi_titles)
def _encode(s):
@ -492,45 +511,31 @@ class LyricsPlugin(BeetsPlugin):
parameter controls the visibility of the function's status log
messages.
"""
fallback = self.config['fallback'].get()
# Skip if the item already has lyrics.
if not force and item.lyrics:
log.log(loglevel, u'lyrics already present: %s - %s' %
(item.artist, item.title))
return
artist = remove_ft_artist_suffix(item.artist)
title = remove_parenthesized_suffix(
remove_ft_artist_suffix(item.title)
)
lyrics = None
for artist, titles in search_pairs(item):
lyrics = [self.get_lyrics(artist, title) for title in titles]
if any(lyrics):
break
# Fetch lyrics.
lyrics = self.get_lyrics(artist, title)
lyrics = u"\n\n---\n\n".join([l for l in lyrics if l])
if not lyrics:
# Check for combined title.
# (e.g. Pink Floyd - Speak to Me / Breathe)
titles = split_multi_titles(title)
if titles:
for t in titles:
lyrics_title = self.get_lyrics(artist, t)
if lyrics_title:
if lyrics:
lyrics += u"\n\n---\n\n%s" % lyrics_title
else:
lyrics = lyrics_title
if not lyrics:
if lyrics:
log.log(loglevel, u'fetched lyrics: %s - %s' %
(item.artist, item.title))
else:
log.log(loglevel, u'lyrics not found: %s - %s' %
(artist, title))
(item.artist, item.title))
fallback = self.config['fallback'].get()
if fallback:
lyrics = fallback
else:
return
else:
log.log(loglevel, u'fetched lyrics : %s - %s' %
(artist, title))
item.lyrics = lyrics
@ -542,12 +547,6 @@ class LyricsPlugin(BeetsPlugin):
"""Fetch lyrics, trying each source in turn. Return a string or
None if no lyrics were found.
"""
# Remove featuring artists from search.
pattern = u"(.*) feat(uring|\.)?\s\S+"
match = re.search(pattern, artist, re.IGNORECASE)
if match:
artist = match.group(0)
for backend in self.backends:
lyrics = backend(artist, title)
if lyrics:

View file

@ -9,6 +9,9 @@ This release adds **sorting** to beets queries. See :ref:`query-sort`.
Fixes:
* Invalid state files don't crash the importer.
* The :doc:`/plugins/lyrics` only strips featured artists and
parenthesized title suffixes if no lyrics for the original artist and
title were found.
1.3.7 (August 22, 2014)

View file

@ -16,6 +16,7 @@
from _common import unittest
from beetsplug import lyrics
from beets.library import Item
class LyricsPluginTest(unittest.TestCase):
@ -23,53 +24,82 @@ class LyricsPluginTest(unittest.TestCase):
"""Set up configuration"""
lyrics.LyricsPlugin()
def test_split_multi_titles(self):
self.assertEqual(lyrics.split_multi_titles('song1 / song2 / song3'),
['song1', 'song2', 'song3'])
self.assertEqual(lyrics.split_multi_titles('song1/song2 song3'),
['song1', 'song2 song3'])
self.assertEqual(lyrics.split_multi_titles('song1 song2'),
None)
def test_search_artist(self):
item = Item(artist='Alice ft. Bob', title='song')
self.assertIn(('Alice ft. Bob', ['song']),
lyrics.search_pairs(item))
self.assertIn(('Alice', ['song']),
lyrics.search_pairs(item))
def test_remove_ft_artist_suffix(self):
self.assertEqual(
lyrics.remove_ft_artist_suffix('Bob featuring Marcia'),
'Bob'
)
self.assertEqual(
lyrics.remove_ft_artist_suffix('Bob feat Marcia'),
'Bob'
)
self.assertEqual(
lyrics.remove_ft_artist_suffix('Bob and Marcia'),
'Bob'
)
self.assertEqual(
lyrics.remove_ft_artist_suffix('Bob feat. Marcia'),
'Bob'
)
self.assertEqual(
lyrics.remove_ft_artist_suffix('Bob & Marcia'),
'Bob'
)
self.assertEqual(
lyrics.remove_ft_artist_suffix('Bob feats Marcia'),
'Bob feats Marcia'
)
item = Item(artist='Alice feat Bob', title='song')
self.assertIn(('Alice feat Bob', ['song']),
lyrics.search_pairs(item))
self.assertIn(('Alice', ['song']),
lyrics.search_pairs(item))
def test_remove_parenthesized_suffix(self):
self.assertEqual(
lyrics.remove_parenthesized_suffix('Song (live)'),
'Song'
)
self.assertEqual(
lyrics.remove_parenthesized_suffix('Song (live) (new)'),
'Song'
)
self.assertEqual(
lyrics.remove_parenthesized_suffix('Song (live (new))'),
'Song'
)
item = Item(artist='Alice feat. Bob', title='song')
self.assertIn(('Alice feat. Bob', ['song']),
lyrics.search_pairs(item))
self.assertIn(('Alice', ['song']),
lyrics.search_pairs(item))
item = Item(artist='Alice feats Bob', title='song')
self.assertIn(('Alice feats Bob', ['song']),
lyrics.search_pairs(item))
self.assertNotIn(('Alice', ['song']),
lyrics.search_pairs(item))
item = Item(artist='Alice featuring Bob', title='song')
self.assertIn(('Alice featuring Bob', ['song']),
lyrics.search_pairs(item))
self.assertIn(('Alice', ['song']),
lyrics.search_pairs(item))
item = Item(artist='Alice & Bob', title='song')
self.assertIn(('Alice & Bob', ['song']),
lyrics.search_pairs(item))
self.assertIn(('Alice', ['song']),
lyrics.search_pairs(item))
item = Item(artist='Alice and Bob', title='song')
self.assertIn(('Alice and Bob', ['song']),
lyrics.search_pairs(item))
self.assertIn(('Alice', ['song']),
lyrics.search_pairs(item))
def test_search_pairs_multi_titles(self):
item = Item(title='1 / 2', artist='A')
self.assertIn(('A', ['1 / 2']), lyrics.search_pairs(item))
self.assertIn(('A', ['1', '2']), lyrics.search_pairs(item))
item = Item(title='1/2', artist='A')
self.assertIn(('A', ['1/2']), lyrics.search_pairs(item))
self.assertIn(('A', ['1', '2']), lyrics.search_pairs(item))
def test_search_pairs_titles(self):
item = Item(title='Song (live)', artist='A')
self.assertIn(('A', ['Song']), lyrics.search_pairs(item))
self.assertIn(('A', ['Song (live)']), lyrics.search_pairs(item))
item = Item(title='Song (live) (new)', artist='A')
self.assertIn(('A', ['Song']), lyrics.search_pairs(item))
self.assertIn(('A', ['Song (live) (new)']), lyrics.search_pairs(item))
item = Item(title='Song (live (new))', artist='A')
self.assertIn(('A', ['Song']), lyrics.search_pairs(item))
self.assertIn(('A', ['Song (live (new))']), lyrics.search_pairs(item))
item = Item(title='Song ft. B', artist='A')
self.assertIn(('A', ['Song']), lyrics.search_pairs(item))
self.assertIn(('A', ['Song ft. B']), lyrics.search_pairs(item))
item = Item(title='Song featuring B', artist='A')
self.assertIn(('A', ['Song']), lyrics.search_pairs(item))
self.assertIn(('A', ['Song featuring B']), lyrics.search_pairs(item))
item = Item(title='Song and B', artist='A')
self.assertNotIn(('A', ['Song']), lyrics.search_pairs(item))
self.assertIn(('A', ['Song and B']), lyrics.search_pairs(item))
def test_remove_credits(self):
self.assertEqual(