lyrics: add musixmatch source

2025-12-15 13:07:09 +01:00 · 2014-12-17 00:41:21 +01:00 · 2014-12-17 00:41:21 +01:00 · 0f2f43ca9b
commit 0f2f43ca9b
parent 3bf383990c
6 changed files with 50 additions and 25 deletions
--- a/beets/util/init.py
+++ b/beets/util/init.py
@ -692,3 +692,18 @@ def feat_tokens(for_artist=True):
    return '(?<=\s)(?:{0})(?=\s)'.format(
        '|'.join(re.escape(x) for x in feat_words)
    )
+
+
+def sanitize_choices(choices, choices_all):
+    """Clean up a stringlist configuration attribute by removing unknown or
+    duplicate string while keeping original order.
+    """
+    seen = set()
+    others = [x for x in choices_all if x not in choices]
+    print others
+    res = []
+    for s in choices:
+        if s in list(choices_all) + ['*']:
+            if not (s in seen or seen.add(s)):
+                res.extend(list(others) if s == '*' else [s])
+    return res
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@ -319,22 +319,6 @@ def batch_fetch_art(lib, albums, force, maxwidth=None):
                                          message))


-def sanitize_sources(sources):
-    """Clean up the user's configured source list. Remove unknown or
-    duplicate sources while keeping original order.
-    """
-    seen = set()
-    others = set(SOURCES_ALL) - set(sources)
-    res = []
-    for s in sources:
-        if s in SOURCES_ALL + ['*']:
-            if not (s in seen or seen.add(s)):
-                res.extend(list(others) if s == '*' else [s])
-    if not HAVE_ITUNES and 'itunes' in res:
-        res.remove('itunes')
-    return res
-
-
 class FetchArtPlugin(BeetsPlugin):
    def __init__(self):
        super(FetchArtPlugin, self).__init__()
@ -359,8 +343,10 @@ class FetchArtPlugin(BeetsPlugin):
            self.import_stages = [self.fetch_art]
            self.register_listener('import_task_files', self.assign_art)

-        self.config['sources'] = sanitize_sources(
-            self.config['sources'].as_str_seq())
+        if not HAVE_ITUNES and u'itunes' in SOURCES_ALL:
+            SOURCES_ALL.remove(u'itunes')
+        self.config['sources'] = util.sanitize_choices(
+            self.config['sources'].as_str_seq(), SOURCES_ALL)

    # Asynchronous; after music is added to the library.
    def fetch_art(self, session, task):
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@ -86,10 +86,17 @@ def unescape(text):
    return out


-def extract_text(html, starttag):
+def extract_text_between(html, start_marker, end_marker):
+    _, html = html.split(start_marker, 1)
+    html, _ = html.split(end_marker, 1)
+    return _scrape_strip_cruft(html, True)
+
+
+def extract_text_in(html, starttag):
    """Extract the text from a <DIV> tag in the HTML starting with
    ``starttag``. Returns None if parsing fails.
    """
+
    # Strip off the leading text before opening tag.
    try:
        _, html = html.split(starttag, 1)
@ -178,6 +185,19 @@ def _encode(s):
        s = s.encode('utf8', 'ignore')
    return urllib.quote(s)

+# Musixmatch
+
+MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s'
+
+
+def fetch_musixmatch(artist, title):
+    url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()),
+                                    _lw_encode(title.title()))
+    html = fetch_url(url)
+    if not html:
+        return
+    lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":')
+    return lyrics.strip(',"').replace('\\n', '\n')

 # LyricsWiki.

@ -201,7 +221,7 @@ def fetch_lyricswiki(artist, title):
    if not html:
        return

-    lyrics = extract_text(html, "<div class='lyricbox'>")
+    lyrics = extract_text_in(html, "<div class='lyricbox'>")
    if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
        return lyrics

@ -228,7 +248,7 @@ def fetch_lyricscom(artist, title):
    if not html:
        return

-    lyrics = extract_text(html, '<div id="lyric_space">')
+    lyrics = extract_text_in(html, '<div id="lyric_space">')
    if not lyrics:
        return
    for not_found_str in LYRICSCOM_NOT_FOUND:
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -10,6 +10,7 @@ library by typing ``pip install requests`` or the equivalent for your OS.

 New:

+* :doc:`/plugins/lyrics`: Add musixmatch source.
 * :doc:`/plugins/lyrics`: Add brazilian and hispanic sources to Google custom
  search engine.
 * A new :doc:`/plugins/permissions` makes it easy to fix permissions on music
--- a/test/lyrics_download_samples.py
+++ b/test/lyrics_download_samples.py
@ -47,7 +47,7 @@ def main(argv=None):
        url = s['url'] + s['path']
        fn = test_lyrics.url_to_filename(url)
        if not os.path.isfile(fn):
-            html = requests.get(url).text
+            html = requests.get(url, verify=False).text
            with safe_open_w(fn) as f:
                f.write(html.encode('utf8'))

--- a/test/test_lyrics.py
+++ b/test/test_lyrics.py
@ -17,6 +17,7 @@
 import os
 import _common
 import sys
+import re
 from _common import unittest
 from beetsplug import lyrics
 from beets.library import Item
@ -163,7 +164,7 @@ class LyricsPluginTest(unittest.TestCase):


 def url_to_filename(url):
-    url = url.replace('http://', '').replace('www.', '')
+    url = re.sub(r'https?://|www.', '', url)
    fn = "".join(x for x in url if (x.isalnum() or x == '/'))
    fn = fn.split('/')
    fn = os.path.join(LYRICS_ROOT_DIR, fn[0], fn[-1]) + '.txt'
@ -207,6 +208,7 @@ DEFAULT_SOURCES = [
         path=u'The_Beatles:Lady_Madonna'),
    dict(DEFAULT_SONG, url='http://www.lyrics.com/',
         path=u'lady-madonna-lyrics-the-beatles.html')
+
 ]

 # Every source entered in default beets google custom search engine
@ -307,8 +309,9 @@ class LyricsGooglePluginTest(unittest.TestCase):
        """Test default engines with the default query"""
        if not check_lyrics_fetched():
            self.skipTest("Run lyrics_download_samples.py script first.")
-        for (fun, s) in zip((lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom),
-                            DEFAULT_SOURCES):
+        for (fun, s) in zip([lyrics.fetch_lyricswiki,
+                             lyrics.fetch_lyricscom,
+                             lyrics.fetch_musixmatch], DEFAULT_SOURCES):
            if os.path.isfile(url_to_filename(
                              s['url'] + s['path'])):
                res = fun(s['artist'], s['title'])