From 36f84bfedd137f16ce6263f6b57c9f89e70be7b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 17 Jul 2017 11:44:06 -0400 Subject: [PATCH 1/8] add missing trailing newline after lyrics block this would yield a warning for every song --- beetsplug/lyrics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 57265a469..f2a59c5b5 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -781,9 +781,9 @@ class LyricsPlugin(plugins.BeetsPlugin): u'-' * len(tmpalbum.strip())) title_str = u":index:`%s`" % item.title.strip() block = u'| ' + item.lyrics.replace(u'\n', u'\n| ') - self.rest += u"%s\n%s\n\n%s\n" % (title_str, - u'~' * len(title_str), - block) + self.rest += u"%s\n%s\n\n%s\n\n" % (title_str, + u'~' * len(title_str), + block) def writerest_indexes(self, directory): """Write conf.py and index.rst files necessary for Sphinx From 9894e8752ba375a8f1734099e85dde48536222f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 17 Jul 2017 11:49:35 -0400 Subject: [PATCH 2/8] ignore trailing/leading whitespace when comparing artists --- beetsplug/lyrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index f2a59c5b5..acd1ab99d 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -759,7 +759,7 @@ class LyricsPlugin(plugins.BeetsPlugin): This will keep state (in the `rest` variable) in order to avoid writing continuously to the same files. """ - if item is None or self.artist != item.artist: + if item is None or self.artist != item.artist.strip(): if self.rest is not None: slug = re.sub(r'\W+', '-', unidecode(self.artist.strip()).lower()) @@ -769,10 +769,10 @@ class LyricsPlugin(plugins.BeetsPlugin): self.rest = None if item is None: return - self.artist = item.artist + self.artist = item.artist.strip() self.rest = u"%s\n%s\n\n.. contents::\n :local:\n\n" \ - % (self.artist.strip(), - u'=' * len(self.artist.strip())) + % (self.artist, + u'=' * len(self.artist)) if self.album != item.album: tmpalbum = self.album = item.album if self.album == '': From 9c36a41ea8630bc0e1339c8b41b3877b125c95f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 17 Jul 2017 11:50:15 -0400 Subject: [PATCH 3/8] slight refactoring: strip album only once --- beetsplug/lyrics.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index acd1ab99d..1d6d31814 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -774,11 +774,10 @@ class LyricsPlugin(plugins.BeetsPlugin): % (self.artist, u'=' * len(self.artist)) if self.album != item.album: - tmpalbum = self.album = item.album + tmpalbum = self.album = item.album.strip() if self.album == '': tmpalbum = u'Unknown album' - self.rest += u"%s\n%s\n\n" % (tmpalbum.strip(), - u'-' * len(tmpalbum.strip())) + self.rest += u"%s\n%s\n\n" % (tmpalbum, u'-' * len(tmpalbum)) title_str = u":index:`%s`" % item.title.strip() block = u'| ' + item.lyrics.replace(u'\n', u'\n| ') self.rest += u"%s\n%s\n\n%s\n\n" % (title_str, From 458f3636f4ef4fdb9627359c504aab9b35b93515 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 17 Jul 2017 11:59:12 -0400 Subject: [PATCH 4/8] compare artists based on the slug this is necessary because otherwise artists with different string representations but the same slug would overwrite one another this outlines more clearly the code duplication between the rst code and the slugify function, something which can be fixed later. --- beetsplug/lyrics.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 1d6d31814..27b05f542 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -759,11 +759,22 @@ class LyricsPlugin(plugins.BeetsPlugin): This will keep state (in the `rest` variable) in order to avoid writing continuously to the same files. """ - if item is None or self.artist != item.artist.strip(): + def slug(text): + """Make a URL-safe, human-readable version of the given text + + This will do the following: + + 1. decode unicode characters into ASCII + 2. shift everything to lowercase + 3. strip whitespace + 4. replace other non-word characters with dashes + """ + return re.sub(r'\W+', '-', unidecode(text).lower().strip()) + + if item is None or slug(self.artist) != slug(item.artist): if self.rest is not None: - slug = re.sub(r'\W+', '-', - unidecode(self.artist.strip()).lower()) - path = os.path.join(directory, 'artists', slug + u'.rst') + path = os.path.join(directory, 'artists', + slug(self.artist) + u'.rst') with open(path, 'wb') as output: output.write(self.rest.encode('utf-8')) self.rest = None From a8afabea809d202811653a8e3d4aa607f4e15e8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 18 Jul 2017 16:12:39 -0400 Subject: [PATCH 5/8] move slug utility function to top-level it's a generic utility function that can be reused, there's nothing class-specific about it. --- beetsplug/lyrics.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 27b05f542..95c3d6ad6 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -227,6 +227,19 @@ def search_pairs(item): return itertools.product(artists, multi_titles) +def slug(text): + """Make a URL-safe, human-readable version of the given text + + This will do the following: + + 1. decode unicode characters into ASCII + 2. shift everything to lowercase + 3. strip whitespace + 4. replace other non-word characters with dashes + """ + return re.sub(r'\W+', '-', unidecode(text).lower().strip()) + + class Backend(object): def __init__(self, config, log): self._log = log @@ -759,17 +772,6 @@ class LyricsPlugin(plugins.BeetsPlugin): This will keep state (in the `rest` variable) in order to avoid writing continuously to the same files. """ - def slug(text): - """Make a URL-safe, human-readable version of the given text - - This will do the following: - - 1. decode unicode characters into ASCII - 2. shift everything to lowercase - 3. strip whitespace - 4. replace other non-word characters with dashes - """ - return re.sub(r'\W+', '-', unidecode(text).lower().strip()) if item is None or slug(self.artist) != slug(item.artist): if self.rest is not None: From b4b547309332451c18accec3e69164d261ff3a33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 18 Jul 2017 16:14:10 -0400 Subject: [PATCH 6/8] add pointer to slugify in slug --- beetsplug/lyrics.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 95c3d6ad6..84f1bf6c0 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -236,6 +236,10 @@ def slug(text): 2. shift everything to lowercase 3. strip whitespace 4. replace other non-word characters with dashes + + This somewhat duplicates the :func:`Google.slugify` function but + slugify is not as generic as this one, which can be reused + elsewhere. """ return re.sub(r'\W+', '-', unidecode(text).lower().strip()) From 914dba4152ec689851e3f7c80b84348adebd0cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 18 Jul 2017 16:32:47 -0400 Subject: [PATCH 7/8] add tests for slug function, failing --- test/test_lyrics.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_lyrics.py b/test/test_lyrics.py index e811da8d7..398314ba6 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -393,6 +393,26 @@ class LyricsGooglePluginMachineryTest(LyricsGoogleBaseTest): google.is_page_candidate(url, url_title, s['title'], u'Sunn O)))') +class SlugTests(unittest.TestCase): + + def test_slug(self): + # plain ascii passthrough + text = u"test" + self.assertEqual(lyrics.slug(text), 'test') + # german unicode and capitals + text = u"Mørdag" + self.assertEqual(lyrics.slug(text), 'mordag') + # more accents and quotes + text = u"l'été c'est fait pour jouer" + self.assertEqual(lyrics.slug(text), 'l-ete-c-est-fait-pour-jouer') + # accents, parens and spaces + text = u"\xe7afe au lait (boisson)" + self.assertEqual(lyrics.slug(text), 'cafe-au-lait-boisson') + text = u"Multiple spaces -- and symbols! -- merged" + self.assertEqual(lyrics.slug(text), + 'multiple-spaces-and-symbols-merged') + + def suite(): return unittest.TestLoader().loadTestsFromName(__name__) From 5ef68783a84d9045ce4e1f4fe95c68ce184a4d21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 18 Jul 2017 16:33:22 -0400 Subject: [PATCH 8/8] strip trailing and leading extra dashes those are introduced if non-word characters are found, and are ugly --- beetsplug/lyrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 84f1bf6c0..8bd96c7cf 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -236,12 +236,13 @@ def slug(text): 2. shift everything to lowercase 3. strip whitespace 4. replace other non-word characters with dashes + 5. strip extra dashes This somewhat duplicates the :func:`Google.slugify` function but slugify is not as generic as this one, which can be reused elsewhere. """ - return re.sub(r'\W+', '-', unidecode(text).lower().strip()) + return re.sub(r'\W+', '-', unidecode(text).lower().strip()).strip('-') class Backend(object):