diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index 54cf423e1..14e9b0306 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -692,3 +692,18 @@ def feat_tokens(for_artist=True):
return '(?<=\s)(?:{0})(?=\s)'.format(
'|'.join(re.escape(x) for x in feat_words)
)
+
+
+def sanitize_choices(choices, choices_all):
+ """Clean up a stringlist configuration attribute by removing unknown or
+ duplicate string while keeping original order.
+ """
+ seen = set()
+ others = [x for x in choices_all if x not in choices]
+ print others
+ res = []
+ for s in choices:
+ if s in list(choices_all) + ['*']:
+ if not (s in seen or seen.add(s)):
+ res.extend(list(others) if s == '*' else [s])
+ return res
diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py
index 98e9fda3e..9beb33663 100644
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@@ -319,22 +319,6 @@ def batch_fetch_art(lib, albums, force, maxwidth=None):
message))
-def sanitize_sources(sources):
- """Clean up the user's configured source list. Remove unknown or
- duplicate sources while keeping original order.
- """
- seen = set()
- others = set(SOURCES_ALL) - set(sources)
- res = []
- for s in sources:
- if s in SOURCES_ALL + ['*']:
- if not (s in seen or seen.add(s)):
- res.extend(list(others) if s == '*' else [s])
- if not HAVE_ITUNES and 'itunes' in res:
- res.remove('itunes')
- return res
-
-
class FetchArtPlugin(BeetsPlugin):
def __init__(self):
super(FetchArtPlugin, self).__init__()
@@ -359,8 +343,10 @@ class FetchArtPlugin(BeetsPlugin):
self.import_stages = [self.fetch_art]
self.register_listener('import_task_files', self.assign_art)
- self.config['sources'] = sanitize_sources(
- self.config['sources'].as_str_seq())
+ if not HAVE_ITUNES and u'itunes' in SOURCES_ALL:
+ SOURCES_ALL.remove(u'itunes')
+ self.config['sources'] = util.sanitize_choices(
+ self.config['sources'].as_str_seq(), SOURCES_ALL)
# Asynchronous; after music is added to the library.
def fetch_art(self, session, task):
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 462bf55c0..cc491bcb2 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -86,10 +86,17 @@ def unescape(text):
return out
-def extract_text(html, starttag):
+def extract_text_between(html, start_marker, end_marker):
+ _, html = html.split(start_marker, 1)
+ html, _ = html.split(end_marker, 1)
+ return _scrape_strip_cruft(html, True)
+
+
+def extract_text_in(html, starttag):
"""Extract the text from a
tag in the HTML starting with
``starttag``. Returns None if parsing fails.
"""
+
# Strip off the leading text before opening tag.
try:
_, html = html.split(starttag, 1)
@@ -178,6 +185,19 @@ def _encode(s):
s = s.encode('utf8', 'ignore')
return urllib.quote(s)
+# Musixmatch
+
+MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s'
+
+
+def fetch_musixmatch(artist, title):
+ url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()),
+ _lw_encode(title.title()))
+ html = fetch_url(url)
+ if not html:
+ return
+ lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":')
+ return lyrics.strip(',"').replace('\\n', '\n')
# LyricsWiki.
@@ -201,7 +221,7 @@ def fetch_lyricswiki(artist, title):
if not html:
return
- lyrics = extract_text(html, "
")
+ lyrics = extract_text_in(html, "
")
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
return lyrics
@@ -228,7 +248,7 @@ def fetch_lyricscom(artist, title):
if not html:
return
- lyrics = extract_text(html, '
')
+ lyrics = extract_text_in(html, '
')
if not lyrics:
return
for not_found_str in LYRICSCOM_NOT_FOUND:
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 27051ecf7..e0a9ed6b2 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -10,6 +10,7 @@ library by typing ``pip install requests`` or the equivalent for your OS.
New:
+* :doc:`/plugins/lyrics`: Add musixmatch source.
* :doc:`/plugins/lyrics`: Add brazilian and hispanic sources to Google custom
search engine.
* A new :doc:`/plugins/permissions` makes it easy to fix permissions on music
diff --git a/test/lyrics_download_samples.py b/test/lyrics_download_samples.py
index 754d2a2b5..c4aab5bd1 100644
--- a/test/lyrics_download_samples.py
+++ b/test/lyrics_download_samples.py
@@ -47,7 +47,7 @@ def main(argv=None):
url = s['url'] + s['path']
fn = test_lyrics.url_to_filename(url)
if not os.path.isfile(fn):
- html = requests.get(url).text
+ html = requests.get(url, verify=False).text
with safe_open_w(fn) as f:
f.write(html.encode('utf8'))
diff --git a/test/test_lyrics.py b/test/test_lyrics.py
index ea1d1943e..288257c6f 100644
--- a/test/test_lyrics.py
+++ b/test/test_lyrics.py
@@ -17,6 +17,7 @@
import os
import _common
import sys
+import re
from _common import unittest
from beetsplug import lyrics
from beets.library import Item
@@ -163,7 +164,7 @@ class LyricsPluginTest(unittest.TestCase):
def url_to_filename(url):
- url = url.replace('http://', '').replace('www.', '')
+ url = re.sub(r'https?://|www.', '', url)
fn = "".join(x for x in url if (x.isalnum() or x == '/'))
fn = fn.split('/')
fn = os.path.join(LYRICS_ROOT_DIR, fn[0], fn[-1]) + '.txt'
@@ -207,6 +208,7 @@ DEFAULT_SOURCES = [
path=u'The_Beatles:Lady_Madonna'),
dict(DEFAULT_SONG, url='http://www.lyrics.com/',
path=u'lady-madonna-lyrics-the-beatles.html')
+
]
# Every source entered in default beets google custom search engine
@@ -307,8 +309,9 @@ class LyricsGooglePluginTest(unittest.TestCase):
"""Test default engines with the default query"""
if not check_lyrics_fetched():
self.skipTest("Run lyrics_download_samples.py script first.")
- for (fun, s) in zip((lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom),
- DEFAULT_SOURCES):
+ for (fun, s) in zip([lyrics.fetch_lyricswiki,
+ lyrics.fetch_lyricscom,
+ lyrics.fetch_musixmatch], DEFAULT_SOURCES):
if os.path.isfile(url_to_filename(
s['url'] + s['path'])):
res = fun(s['artist'], s['title'])