lyrics: add musixmatch source

This commit is contained in:
Fabrice Laporte 2014-12-17 00:41:21 +01:00
parent 3bf383990c
commit 0f2f43ca9b
6 changed files with 50 additions and 25 deletions

View file

@ -692,3 +692,18 @@ def feat_tokens(for_artist=True):
return '(?<=\s)(?:{0})(?=\s)'.format(
'|'.join(re.escape(x) for x in feat_words)
)
def sanitize_choices(choices, choices_all):
"""Clean up a stringlist configuration attribute by removing unknown or
duplicate string while keeping original order.
"""
seen = set()
others = [x for x in choices_all if x not in choices]
print others
res = []
for s in choices:
if s in list(choices_all) + ['*']:
if not (s in seen or seen.add(s)):
res.extend(list(others) if s == '*' else [s])
return res

View file

@ -319,22 +319,6 @@ def batch_fetch_art(lib, albums, force, maxwidth=None):
message))
def sanitize_sources(sources):
"""Clean up the user's configured source list. Remove unknown or
duplicate sources while keeping original order.
"""
seen = set()
others = set(SOURCES_ALL) - set(sources)
res = []
for s in sources:
if s in SOURCES_ALL + ['*']:
if not (s in seen or seen.add(s)):
res.extend(list(others) if s == '*' else [s])
if not HAVE_ITUNES and 'itunes' in res:
res.remove('itunes')
return res
class FetchArtPlugin(BeetsPlugin):
def __init__(self):
super(FetchArtPlugin, self).__init__()
@ -359,8 +343,10 @@ class FetchArtPlugin(BeetsPlugin):
self.import_stages = [self.fetch_art]
self.register_listener('import_task_files', self.assign_art)
self.config['sources'] = sanitize_sources(
self.config['sources'].as_str_seq())
if not HAVE_ITUNES and u'itunes' in SOURCES_ALL:
SOURCES_ALL.remove(u'itunes')
self.config['sources'] = util.sanitize_choices(
self.config['sources'].as_str_seq(), SOURCES_ALL)
# Asynchronous; after music is added to the library.
def fetch_art(self, session, task):

View file

@ -86,10 +86,17 @@ def unescape(text):
return out
def extract_text(html, starttag):
def extract_text_between(html, start_marker, end_marker):
_, html = html.split(start_marker, 1)
html, _ = html.split(end_marker, 1)
return _scrape_strip_cruft(html, True)
def extract_text_in(html, starttag):
"""Extract the text from a <DIV> tag in the HTML starting with
``starttag``. Returns None if parsing fails.
"""
# Strip off the leading text before opening tag.
try:
_, html = html.split(starttag, 1)
@ -178,6 +185,19 @@ def _encode(s):
s = s.encode('utf8', 'ignore')
return urllib.quote(s)
# Musixmatch
MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s'
def fetch_musixmatch(artist, title):
url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()),
_lw_encode(title.title()))
html = fetch_url(url)
if not html:
return
lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":')
return lyrics.strip(',"').replace('\\n', '\n')
# LyricsWiki.
@ -201,7 +221,7 @@ def fetch_lyricswiki(artist, title):
if not html:
return
lyrics = extract_text(html, "<div class='lyricbox'>")
lyrics = extract_text_in(html, "<div class='lyricbox'>")
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
return lyrics
@ -228,7 +248,7 @@ def fetch_lyricscom(artist, title):
if not html:
return
lyrics = extract_text(html, '<div id="lyric_space">')
lyrics = extract_text_in(html, '<div id="lyric_space">')
if not lyrics:
return
for not_found_str in LYRICSCOM_NOT_FOUND:

View file

@ -10,6 +10,7 @@ library by typing ``pip install requests`` or the equivalent for your OS.
New:
* :doc:`/plugins/lyrics`: Add musixmatch source.
* :doc:`/plugins/lyrics`: Add brazilian and hispanic sources to Google custom
search engine.
* A new :doc:`/plugins/permissions` makes it easy to fix permissions on music

View file

@ -47,7 +47,7 @@ def main(argv=None):
url = s['url'] + s['path']
fn = test_lyrics.url_to_filename(url)
if not os.path.isfile(fn):
html = requests.get(url).text
html = requests.get(url, verify=False).text
with safe_open_w(fn) as f:
f.write(html.encode('utf8'))

View file

@ -17,6 +17,7 @@
import os
import _common
import sys
import re
from _common import unittest
from beetsplug import lyrics
from beets.library import Item
@ -163,7 +164,7 @@ class LyricsPluginTest(unittest.TestCase):
def url_to_filename(url):
url = url.replace('http://', '').replace('www.', '')
url = re.sub(r'https?://|www.', '', url)
fn = "".join(x for x in url if (x.isalnum() or x == '/'))
fn = fn.split('/')
fn = os.path.join(LYRICS_ROOT_DIR, fn[0], fn[-1]) + '.txt'
@ -207,6 +208,7 @@ DEFAULT_SOURCES = [
path=u'The_Beatles:Lady_Madonna'),
dict(DEFAULT_SONG, url='http://www.lyrics.com/',
path=u'lady-madonna-lyrics-the-beatles.html')
]
# Every source entered in default beets google custom search engine
@ -307,8 +309,9 @@ class LyricsGooglePluginTest(unittest.TestCase):
"""Test default engines with the default query"""
if not check_lyrics_fetched():
self.skipTest("Run lyrics_download_samples.py script first.")
for (fun, s) in zip((lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom),
DEFAULT_SOURCES):
for (fun, s) in zip([lyrics.fetch_lyricswiki,
lyrics.fetch_lyricscom,
lyrics.fetch_musixmatch], DEFAULT_SOURCES):
if os.path.isfile(url_to_filename(
s['url'] + s['path'])):
res = fun(s['artist'], s['title'])