diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 54cf423e1..14e9b0306 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -692,3 +692,18 @@ def feat_tokens(for_artist=True): return '(?<=\s)(?:{0})(?=\s)'.format( '|'.join(re.escape(x) for x in feat_words) ) + + +def sanitize_choices(choices, choices_all): + """Clean up a stringlist configuration attribute by removing unknown or + duplicate string while keeping original order. + """ + seen = set() + others = [x for x in choices_all if x not in choices] + print others + res = [] + for s in choices: + if s in list(choices_all) + ['*']: + if not (s in seen or seen.add(s)): + res.extend(list(others) if s == '*' else [s]) + return res diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py index 98e9fda3e..9beb33663 100644 --- a/beetsplug/fetchart.py +++ b/beetsplug/fetchart.py @@ -319,22 +319,6 @@ def batch_fetch_art(lib, albums, force, maxwidth=None): message)) -def sanitize_sources(sources): - """Clean up the user's configured source list. Remove unknown or - duplicate sources while keeping original order. - """ - seen = set() - others = set(SOURCES_ALL) - set(sources) - res = [] - for s in sources: - if s in SOURCES_ALL + ['*']: - if not (s in seen or seen.add(s)): - res.extend(list(others) if s == '*' else [s]) - if not HAVE_ITUNES and 'itunes' in res: - res.remove('itunes') - return res - - class FetchArtPlugin(BeetsPlugin): def __init__(self): super(FetchArtPlugin, self).__init__() @@ -359,8 +343,10 @@ class FetchArtPlugin(BeetsPlugin): self.import_stages = [self.fetch_art] self.register_listener('import_task_files', self.assign_art) - self.config['sources'] = sanitize_sources( - self.config['sources'].as_str_seq()) + if not HAVE_ITUNES and u'itunes' in SOURCES_ALL: + SOURCES_ALL.remove(u'itunes') + self.config['sources'] = util.sanitize_choices( + self.config['sources'].as_str_seq(), SOURCES_ALL) # Asynchronous; after music is added to the library. def fetch_art(self, session, task): diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 462bf55c0..cc491bcb2 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -86,10 +86,17 @@ def unescape(text): return out -def extract_text(html, starttag): +def extract_text_between(html, start_marker, end_marker): + _, html = html.split(start_marker, 1) + html, _ = html.split(end_marker, 1) + return _scrape_strip_cruft(html, True) + + +def extract_text_in(html, starttag): """Extract the text from a
tag in the HTML starting with ``starttag``. Returns None if parsing fails. """ + # Strip off the leading text before opening tag. try: _, html = html.split(starttag, 1) @@ -178,6 +185,19 @@ def _encode(s): s = s.encode('utf8', 'ignore') return urllib.quote(s) +# Musixmatch + +MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' + + +def fetch_musixmatch(artist, title): + url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()), + _lw_encode(title.title())) + html = fetch_url(url) + if not html: + return + lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":') + return lyrics.strip(',"').replace('\\n', '\n') # LyricsWiki. @@ -201,7 +221,7 @@ def fetch_lyricswiki(artist, title): if not html: return - lyrics = extract_text(html, "
") + lyrics = extract_text_in(html, "
") if lyrics and 'Unfortunately, we are not licensed' not in lyrics: return lyrics @@ -228,7 +248,7 @@ def fetch_lyricscom(artist, title): if not html: return - lyrics = extract_text(html, '
') + lyrics = extract_text_in(html, '
') if not lyrics: return for not_found_str in LYRICSCOM_NOT_FOUND: diff --git a/docs/changelog.rst b/docs/changelog.rst index 27051ecf7..e0a9ed6b2 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,7 @@ library by typing ``pip install requests`` or the equivalent for your OS. New: +* :doc:`/plugins/lyrics`: Add musixmatch source. * :doc:`/plugins/lyrics`: Add brazilian and hispanic sources to Google custom search engine. * A new :doc:`/plugins/permissions` makes it easy to fix permissions on music diff --git a/test/lyrics_download_samples.py b/test/lyrics_download_samples.py index 754d2a2b5..c4aab5bd1 100644 --- a/test/lyrics_download_samples.py +++ b/test/lyrics_download_samples.py @@ -47,7 +47,7 @@ def main(argv=None): url = s['url'] + s['path'] fn = test_lyrics.url_to_filename(url) if not os.path.isfile(fn): - html = requests.get(url).text + html = requests.get(url, verify=False).text with safe_open_w(fn) as f: f.write(html.encode('utf8')) diff --git a/test/test_lyrics.py b/test/test_lyrics.py index ea1d1943e..288257c6f 100644 --- a/test/test_lyrics.py +++ b/test/test_lyrics.py @@ -17,6 +17,7 @@ import os import _common import sys +import re from _common import unittest from beetsplug import lyrics from beets.library import Item @@ -163,7 +164,7 @@ class LyricsPluginTest(unittest.TestCase): def url_to_filename(url): - url = url.replace('http://', '').replace('www.', '') + url = re.sub(r'https?://|www.', '', url) fn = "".join(x for x in url if (x.isalnum() or x == '/')) fn = fn.split('/') fn = os.path.join(LYRICS_ROOT_DIR, fn[0], fn[-1]) + '.txt' @@ -207,6 +208,7 @@ DEFAULT_SOURCES = [ path=u'The_Beatles:Lady_Madonna'), dict(DEFAULT_SONG, url='http://www.lyrics.com/', path=u'lady-madonna-lyrics-the-beatles.html') + ] # Every source entered in default beets google custom search engine @@ -307,8 +309,9 @@ class LyricsGooglePluginTest(unittest.TestCase): """Test default engines with the default query""" if not check_lyrics_fetched(): self.skipTest("Run lyrics_download_samples.py script first.") - for (fun, s) in zip((lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom), - DEFAULT_SOURCES): + for (fun, s) in zip([lyrics.fetch_lyricswiki, + lyrics.fetch_lyricscom, + lyrics.fetch_musixmatch], DEFAULT_SOURCES): if os.path.isfile(url_to_filename( s['url'] + s['path'])): res = fun(s['artist'], s['title'])