Merge pull request #3766 from jackwilsdon/remove-lyricwiki

Remove LyricWiki source
This commit is contained in:
Adrian Sampson 2020-10-05 20:47:13 -04:00 committed by GitHub
commit 92cf556682
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 9 additions and 77 deletions

View file

@ -55,7 +55,6 @@ except ImportError:
from beets import plugins
from beets import ui
from beets import util
import beets
DIV_RE = re.compile(r'<(/?)div>?', re.I)
@ -145,39 +144,6 @@ def extract_text_between(html, start_marker, end_marker):
return html
def extract_text_in(html, starttag):
"""Extract the text from a <DIV> tag in the HTML starting with
``starttag``. Returns None if parsing fails.
"""
# Strip off the leading text before opening tag.
try:
_, html = html.split(starttag, 1)
except ValueError:
return
# Walk through balanced DIV tags.
level = 0
parts = []
pos = 0
for match in DIV_RE.finditer(html):
if match.group(1): # Closing tag.
level -= 1
if level == 0:
pos = match.end()
else: # Opening tag.
if level == 0:
parts.append(html[pos:match.start()])
level += 1
if level == -1:
parts.append(html[pos:match.start()])
break
else:
print(u'no closing tag found!')
return
return u''.join(parts)
def search_pairs(item):
"""Yield a pairs of artists and titles to search for.
@ -296,9 +262,9 @@ class Backend(object):
raise NotImplementedError()
class SymbolsReplaced(Backend):
class MusiXmatch(Backend):
REPLACEMENTS = {
r'\s+': '_',
r'\s+': '-',
'<': 'Less_Than',
'>': 'Greater_Than',
'#': 'Number_',
@ -306,20 +272,14 @@ class SymbolsReplaced(Backend):
r'[\]\}]': ')',
}
URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s'
@classmethod
def _encode(cls, s):
for old, new in cls.REPLACEMENTS.items():
s = re.sub(old, new, s)
return super(SymbolsReplaced, cls)._encode(s)
class MusiXmatch(SymbolsReplaced):
REPLACEMENTS = dict(SymbolsReplaced.REPLACEMENTS, **{
r'\s+': '-'
})
URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s'
return super(MusiXmatch, cls)._encode(s)
def fetch(self, artist, title):
url = self.build_url(artist, title)
@ -441,30 +401,6 @@ class Genius(Backend):
return lyrics_div.get_text()
class LyricsWiki(SymbolsReplaced):
"""Fetch lyrics from LyricsWiki."""
if util.SNI_SUPPORTED:
URL_PATTERN = 'https://lyrics.wikia.com/%s:%s'
else:
URL_PATTERN = 'http://lyrics.wikia.com/%s:%s'
def fetch(self, artist, title):
url = self.build_url(artist, title)
html = self.fetch_url(url)
if not html:
return
# Get the HTML fragment inside the appropriate HTML element and then
# extract the text from it.
html_frag = extract_text_in(html, u"<div class='lyricbox'>")
if html_frag:
lyrics = _scrape_strip_cruft(html_frag, True)
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
return lyrics
def remove_credits(text):
"""Remove first/last line of text if it contains the word 'lyrics'
eg 'Lyrics by songsdatabase.com'
@ -656,10 +592,9 @@ class Google(Backend):
class LyricsPlugin(plugins.BeetsPlugin):
SOURCES = ['google', 'lyricwiki', 'musixmatch', 'genius']
SOURCES = ['google', 'musixmatch', 'genius']
SOURCE_BACKENDS = {
'google': Google,
'lyricwiki': LyricsWiki,
'musixmatch': MusiXmatch,
'genius': Genius,
}

View file

@ -153,6 +153,7 @@ New features:
* ``beet remove`` now also allows interactive selection of items from the query
similar to ``beet modify``
* :doc:`/plugins/web`: add DELETE and PATCH methods for modifying items
* :doc:`/plugins/lyrics`: Removed LyricWiki source (shut down on 21/09/2020).
Fixes:

View file

@ -2,10 +2,9 @@ Lyrics Plugin
=============
The ``lyrics`` plugin fetches and stores song lyrics from databases on the Web.
Namely, the current version of the plugin uses `Lyric Wiki`_,
`Musixmatch`_, `Genius.com`_, and, optionally, the Google custom search API.
Namely, the current version of the plugin uses `Musixmatch`_, `Genius.com`_,
and, optionally, the Google custom search API.
.. _Lyric Wiki: https://lyrics.wikia.com/
.. _Musixmatch: https://www.musixmatch.com/
.. _Genius.com: https://genius.com/

View file

@ -268,7 +268,6 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest):
DEFAULT_SONG = dict(artist=u'The Beatles', title=u'Lady Madonna')
DEFAULT_SOURCES = [
dict(DEFAULT_SONG, backend=lyrics.LyricsWiki),
# dict(artist=u'Santana', title=u'Black magic woman',
# backend=lyrics.MusiXmatch),
dict(DEFAULT_SONG, backend=lyrics.Genius),
@ -295,8 +294,6 @@ class LyricsPluginSourcesTest(LyricsGoogleBaseTest):
dict(DEFAULT_SONG,
url='http://www.lyricsmania.com/',
path='lady_madonna_lyrics_the_beatles.html'),
dict(DEFAULT_SONG, url=u'http://lyrics.wikia.com/',
path=u'The_Beatles:Lady_Madonna'),
dict(DEFAULT_SONG,
url=u'http://www.lyricsmode.com',
path=u'/lyrics/b/beatles/lady_madonna.html'),