From ae175e156b92f77b593532f371f5a1c8317153d1 Mon Sep 17 00:00:00 2001 From: Jacob Pavlock Date: Fri, 26 Jun 2020 16:10:33 -0700 Subject: [PATCH 1/5] fix genius lyrics artist matching when artist contains a hyphen --- beetsplug/lyrics.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index f53191d8a..49d2be672 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -419,16 +419,23 @@ class Genius(Backend): return None for hit in json["response"]["hits"]: - # Genius uses zero-width characters to denote lowercase - # artist names. - hit_artist = hit["result"]["primary_artist"]["name"]. \ - strip(u'\u200b').lower() + hit_artist = self._clean(hit["result"]["primary_artist"]["name"]) - if hit_artist == artist.lower(): + if hit_artist.lower() == artist.lower(): return self.lyrics_from_song_page(hit["result"]["url"]) self._log.debug(u'genius: no matching artist') + def _clean(self, artist): + """Cleans genius-isms to help artist matching""" + # Genius uses zero-width characters to denote lowercase artist names + artist = artist.strip(u'\u200b') + + # Genius uses minus-hypen compared to beets hyphen + artist = artist.replace(u'\u002D', u'\u2010') + + return artist + class LyricsWiki(SymbolsReplaced): """Fetch lyrics from LyricsWiki.""" From bb5740d63caaaf230ceaab64e0ec9b52a1d19e1d Mon Sep 17 00:00:00 2001 From: Jacob Pavlock Date: Fri, 26 Jun 2020 16:41:20 -0700 Subject: [PATCH 2/5] better debug message for no genius artist match --- beetsplug/lyrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 49d2be672..090b99278 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -424,7 +424,8 @@ class Genius(Backend): if hit_artist.lower() == artist.lower(): return self.lyrics_from_song_page(hit["result"]["url"]) - self._log.debug(u'genius: no matching artist') + self._log.debug(u'Genius failed to find a matching artist for \'{0}\'', + artist) def _clean(self, artist): """Cleans genius-isms to help artist matching""" From 7220c91d2abc3c2bb565765ab512f20c11946c12 Mon Sep 17 00:00:00 2001 From: Jacob Pavlock Date: Fri, 26 Jun 2020 18:56:20 -0700 Subject: [PATCH 3/5] generalize genius artist matching to use slug() --- beetsplug/lyrics.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 090b99278..8e45d25b4 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -419,24 +419,14 @@ class Genius(Backend): return None for hit in json["response"]["hits"]: - hit_artist = self._clean(hit["result"]["primary_artist"]["name"]) + hit_artist = hit["result"]["primary_artist"]["name"] - if hit_artist.lower() == artist.lower(): + if slug(hit_artist) == slug(artist): return self.lyrics_from_song_page(hit["result"]["url"]) self._log.debug(u'Genius failed to find a matching artist for \'{0}\'', artist) - def _clean(self, artist): - """Cleans genius-isms to help artist matching""" - # Genius uses zero-width characters to denote lowercase artist names - artist = artist.strip(u'\u200b') - - # Genius uses minus-hypen compared to beets hyphen - artist = artist.replace(u'\u002D', u'\u2010') - - return artist - class LyricsWiki(SymbolsReplaced): """Fetch lyrics from LyricsWiki.""" From 17d9477106afd319b3a861b09366e0a32953de74 Mon Sep 17 00:00:00 2001 From: Jacob Pavlock Date: Fri, 26 Jun 2020 19:33:08 -0700 Subject: [PATCH 4/5] tests for applicable genius artist slugging --- test/test_lyrics.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_lyrics.py b/test/test_lyrics.py index d31116284..22d5f2f0c 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function +import itertools import os import re import six @@ -485,18 +486,28 @@ class SlugTests(unittest.TestCase): # plain ascii passthrough text = u"test" self.assertEqual(lyrics.slug(text), 'test') + # german unicode and capitals text = u"Mørdag" self.assertEqual(lyrics.slug(text), 'mordag') + # more accents and quotes text = u"l'été c'est fait pour jouer" self.assertEqual(lyrics.slug(text), 'l-ete-c-est-fait-pour-jouer') + # accents, parens and spaces text = u"\xe7afe au lait (boisson)" self.assertEqual(lyrics.slug(text), 'cafe-au-lait-boisson') text = u"Multiple spaces -- and symbols! -- merged" self.assertEqual(lyrics.slug(text), 'multiple-spaces-and-symbols-merged') + text = u"\u200Bno-width-space" + self.assertEqual(lyrics.slug(text), 'no-width-space') + + # variations of dashes should get standardized + dashes = [u'\u200D', u'\u2010'] + for dash1, dash2 in itertools.combinations(dashes, 2): + self.assertEqual(lyrics.slug(dash1), lyrics.slug(dash2)) def suite(): From 685d21a8fb907ff84559d8c98712ab196da579aa Mon Sep 17 00:00:00 2001 From: Jacob Pavlock Date: Sat, 27 Jun 2020 14:10:03 -0700 Subject: [PATCH 5/5] changelog --- docs/changelog.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 453d22305..73cd89fb8 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -129,6 +129,9 @@ New features: * :doc:`/plugins/plexupdate`: Add option to use secure connection to Plex server, and to ignore certificate validation errors if necessary. :bug:`2871` +* :doc:`/plugins/lyrics`: Improved searching Genius backend when artist + contained special characters. + :bug:`3634` Fixes: