From d115f3679b8c884b1a9a7c1bcdeab2b66b2ac908 Mon Sep 17 00:00:00 2001 From: Jack Wilsdon Date: Mon, 5 Oct 2020 22:08:38 +0100 Subject: [PATCH 1/2] Remove LyricWiki source LyricWiki was shut down on 2020/09/21 and no longer serves lyrics. --- beetsplug/lyrics.py | 28 +--------------------------- docs/changelog.rst | 1 + docs/plugins/lyrics.rst | 5 ++--- test/test_lyrics.py | 3 --- 4 files changed, 4 insertions(+), 33 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 16696d425..e216f33c1 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -55,7 +55,6 @@ except ImportError: from beets import plugins from beets import ui -from beets import util import beets DIV_RE = re.compile(r'<(/?)div>?', re.I) @@ -441,30 +440,6 @@ class Genius(Backend): return lyrics_div.get_text() -class LyricsWiki(SymbolsReplaced): - """Fetch lyrics from LyricsWiki.""" - - if util.SNI_SUPPORTED: - URL_PATTERN = 'https://lyrics.wikia.com/%s:%s' - else: - URL_PATTERN = 'http://lyrics.wikia.com/%s:%s' - - def fetch(self, artist, title): - url = self.build_url(artist, title) - html = self.fetch_url(url) - if not html: - return - - # Get the HTML fragment inside the appropriate HTML element and then - # extract the text from it. - html_frag = extract_text_in(html, u"
") - if html_frag: - lyrics = _scrape_strip_cruft(html_frag, True) - - if lyrics and 'Unfortunately, we are not licensed' not in lyrics: - return lyrics - - def remove_credits(text): """Remove first/last line of text if it contains the word 'lyrics' eg 'Lyrics by songsdatabase.com' @@ -656,10 +631,9 @@ class Google(Backend): class LyricsPlugin(plugins.BeetsPlugin): - SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius'] + SOURCES = ['google', 'musixmatch', 'genius'] SOURCE_BACKENDS = { 'google': Google, - 'lyricwiki': LyricsWiki, 'musixmatch': MusiXmatch, 'genius': Genius, } diff --git a/docs/changelog.rst b/docs/changelog.rst index 4122b2f51..e33299fab 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -153,6 +153,7 @@ New features: * ``beet remove`` now also allows interactive selection of items from the query similar to ``beet modify`` * :doc:`/plugins/web`: add DELETE and PATCH methods for modifying items +* :doc:`/plugins/lyrics`: Removed LyricWiki source (shut down on 21/09/2020). Fixes: diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 942497a7c..b71764042 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -2,10 +2,9 @@ Lyrics Plugin ============= The ``lyrics`` plugin fetches and stores song lyrics from databases on the Web. -Namely, the current version of the plugin uses `Lyric Wiki`_, -`Musixmatch`_, `Genius.com`_, and, optionally, the Google custom search API. +Namely, the current version of the plugin uses `Musixmatch`_, `Genius.com`_, +and, optionally, the Google custom search API. -.. _Lyric Wiki: https://lyrics.wikia.com/ .. _Musixmatch: https://www.musixmatch.com/ .. _Genius.com: https://genius.com/ diff --git a/test/test_lyrics.py b/test/test_lyrics.py index e0ec1e548..5fce1c476 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -268,7 +268,6 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest): DEFAULT_SONG = dict(artist=u'The Beatles', title=u'Lady Madonna') DEFAULT_SOURCES = [ - dict(DEFAULT_SONG, backend=lyrics.LyricsWiki), # dict(artist=u'Santana', title=u'Black magic woman', # backend=lyrics.MusiXmatch), dict(DEFAULT_SONG, backend=lyrics.Genius), @@ -295,8 +294,6 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest): dict(DEFAULT_SONG, url='http://www.lyricsmania.com/', path='lady_madonna_lyrics_the_beatles.html'), - dict(DEFAULT_SONG, url=u'http://lyrics.wikia.com/', - path=u'The_Beatles:Lady_Madonna'), dict(DEFAULT_SONG, url=u'http://www.lyricsmode.com', path=u'/lyrics/b/beatles/lady_madonna.html'), From 580495f1d67f7e84a8ed173c1463c98f7465539b Mon Sep 17 00:00:00 2001 From: Jack Wilsdon Date: Mon, 5 Oct 2020 22:51:14 +0100 Subject: [PATCH 2/2] Simplify MusiXmatch backend and remove unused code --- beetsplug/lyrics.py | 49 +++++---------------------------------------- 1 file changed, 5 insertions(+), 44 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index e216f33c1..5591598ae 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -144,39 +144,6 @@ def extract_text_between(html, start_marker, end_marker): return html -def extract_text_in(html, starttag): - """Extract the text from a
tag in the HTML starting with - ``starttag``. Returns None if parsing fails. - """ - # Strip off the leading text before opening tag. - try: - _, html = html.split(starttag, 1) - except ValueError: - return - - # Walk through balanced DIV tags. - level = 0 - parts = [] - pos = 0 - for match in DIV_RE.finditer(html): - if match.group(1): # Closing tag. - level -= 1 - if level == 0: - pos = match.end() - else: # Opening tag. - if level == 0: - parts.append(html[pos:match.start()]) - level += 1 - - if level == -1: - parts.append(html[pos:match.start()]) - break - else: - print(u'no closing tag found!') - return - return u''.join(parts) - - def search_pairs(item): """Yield a pairs of artists and titles to search for. @@ -295,9 +262,9 @@ class Backend(object): raise NotImplementedError() -class SymbolsReplaced(Backend): +class MusiXmatch(Backend): REPLACEMENTS = { - r'\s+': '_', + r'\s+': '-', '<': 'Less_Than', '>': 'Greater_Than', '#': 'Number_', @@ -305,20 +272,14 @@ class SymbolsReplaced(Backend): r'[\]\}]': ')', } + URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' + @classmethod def _encode(cls, s): for old, new in cls.REPLACEMENTS.items(): s = re.sub(old, new, s) - return super(SymbolsReplaced, cls)._encode(s) - - -class MusiXmatch(SymbolsReplaced): - REPLACEMENTS = dict(SymbolsReplaced.REPLACEMENTS, **{ - r'\s+': '-' - }) - - URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' + return super(MusiXmatch, cls)._encode(s) def fetch(self, artist, title): url = self.build_url(artist, title)