mirror of
https://github.com/beetbox/beets.git
synced 2025-12-15 13:07:09 +01:00
lyrics: add musixmatch source
This commit is contained in:
parent
3bf383990c
commit
0f2f43ca9b
6 changed files with 50 additions and 25 deletions
|
|
@ -692,3 +692,18 @@ def feat_tokens(for_artist=True):
|
|||
return '(?<=\s)(?:{0})(?=\s)'.format(
|
||||
'|'.join(re.escape(x) for x in feat_words)
|
||||
)
|
||||
|
||||
|
||||
def sanitize_choices(choices, choices_all):
|
||||
"""Clean up a stringlist configuration attribute by removing unknown or
|
||||
duplicate string while keeping original order.
|
||||
"""
|
||||
seen = set()
|
||||
others = [x for x in choices_all if x not in choices]
|
||||
print others
|
||||
res = []
|
||||
for s in choices:
|
||||
if s in list(choices_all) + ['*']:
|
||||
if not (s in seen or seen.add(s)):
|
||||
res.extend(list(others) if s == '*' else [s])
|
||||
return res
|
||||
|
|
|
|||
|
|
@ -319,22 +319,6 @@ def batch_fetch_art(lib, albums, force, maxwidth=None):
|
|||
message))
|
||||
|
||||
|
||||
def sanitize_sources(sources):
|
||||
"""Clean up the user's configured source list. Remove unknown or
|
||||
duplicate sources while keeping original order.
|
||||
"""
|
||||
seen = set()
|
||||
others = set(SOURCES_ALL) - set(sources)
|
||||
res = []
|
||||
for s in sources:
|
||||
if s in SOURCES_ALL + ['*']:
|
||||
if not (s in seen or seen.add(s)):
|
||||
res.extend(list(others) if s == '*' else [s])
|
||||
if not HAVE_ITUNES and 'itunes' in res:
|
||||
res.remove('itunes')
|
||||
return res
|
||||
|
||||
|
||||
class FetchArtPlugin(BeetsPlugin):
|
||||
def __init__(self):
|
||||
super(FetchArtPlugin, self).__init__()
|
||||
|
|
@ -359,8 +343,10 @@ class FetchArtPlugin(BeetsPlugin):
|
|||
self.import_stages = [self.fetch_art]
|
||||
self.register_listener('import_task_files', self.assign_art)
|
||||
|
||||
self.config['sources'] = sanitize_sources(
|
||||
self.config['sources'].as_str_seq())
|
||||
if not HAVE_ITUNES and u'itunes' in SOURCES_ALL:
|
||||
SOURCES_ALL.remove(u'itunes')
|
||||
self.config['sources'] = util.sanitize_choices(
|
||||
self.config['sources'].as_str_seq(), SOURCES_ALL)
|
||||
|
||||
# Asynchronous; after music is added to the library.
|
||||
def fetch_art(self, session, task):
|
||||
|
|
|
|||
|
|
@ -86,10 +86,17 @@ def unescape(text):
|
|||
return out
|
||||
|
||||
|
||||
def extract_text(html, starttag):
|
||||
def extract_text_between(html, start_marker, end_marker):
|
||||
_, html = html.split(start_marker, 1)
|
||||
html, _ = html.split(end_marker, 1)
|
||||
return _scrape_strip_cruft(html, True)
|
||||
|
||||
|
||||
def extract_text_in(html, starttag):
|
||||
"""Extract the text from a <DIV> tag in the HTML starting with
|
||||
``starttag``. Returns None if parsing fails.
|
||||
"""
|
||||
|
||||
# Strip off the leading text before opening tag.
|
||||
try:
|
||||
_, html = html.split(starttag, 1)
|
||||
|
|
@ -178,6 +185,19 @@ def _encode(s):
|
|||
s = s.encode('utf8', 'ignore')
|
||||
return urllib.quote(s)
|
||||
|
||||
# Musixmatch
|
||||
|
||||
MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s'
|
||||
|
||||
|
||||
def fetch_musixmatch(artist, title):
|
||||
url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()),
|
||||
_lw_encode(title.title()))
|
||||
html = fetch_url(url)
|
||||
if not html:
|
||||
return
|
||||
lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":')
|
||||
return lyrics.strip(',"').replace('\\n', '\n')
|
||||
|
||||
# LyricsWiki.
|
||||
|
||||
|
|
@ -201,7 +221,7 @@ def fetch_lyricswiki(artist, title):
|
|||
if not html:
|
||||
return
|
||||
|
||||
lyrics = extract_text(html, "<div class='lyricbox'>")
|
||||
lyrics = extract_text_in(html, "<div class='lyricbox'>")
|
||||
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
|
||||
return lyrics
|
||||
|
||||
|
|
@ -228,7 +248,7 @@ def fetch_lyricscom(artist, title):
|
|||
if not html:
|
||||
return
|
||||
|
||||
lyrics = extract_text(html, '<div id="lyric_space">')
|
||||
lyrics = extract_text_in(html, '<div id="lyric_space">')
|
||||
if not lyrics:
|
||||
return
|
||||
for not_found_str in LYRICSCOM_NOT_FOUND:
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ library by typing ``pip install requests`` or the equivalent for your OS.
|
|||
|
||||
New:
|
||||
|
||||
* :doc:`/plugins/lyrics`: Add musixmatch source.
|
||||
* :doc:`/plugins/lyrics`: Add brazilian and hispanic sources to Google custom
|
||||
search engine.
|
||||
* A new :doc:`/plugins/permissions` makes it easy to fix permissions on music
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ def main(argv=None):
|
|||
url = s['url'] + s['path']
|
||||
fn = test_lyrics.url_to_filename(url)
|
||||
if not os.path.isfile(fn):
|
||||
html = requests.get(url).text
|
||||
html = requests.get(url, verify=False).text
|
||||
with safe_open_w(fn) as f:
|
||||
f.write(html.encode('utf8'))
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
import os
|
||||
import _common
|
||||
import sys
|
||||
import re
|
||||
from _common import unittest
|
||||
from beetsplug import lyrics
|
||||
from beets.library import Item
|
||||
|
|
@ -163,7 +164,7 @@ class LyricsPluginTest(unittest.TestCase):
|
|||
|
||||
|
||||
def url_to_filename(url):
|
||||
url = url.replace('http://', '').replace('www.', '')
|
||||
url = re.sub(r'https?://|www.', '', url)
|
||||
fn = "".join(x for x in url if (x.isalnum() or x == '/'))
|
||||
fn = fn.split('/')
|
||||
fn = os.path.join(LYRICS_ROOT_DIR, fn[0], fn[-1]) + '.txt'
|
||||
|
|
@ -207,6 +208,7 @@ DEFAULT_SOURCES = [
|
|||
path=u'The_Beatles:Lady_Madonna'),
|
||||
dict(DEFAULT_SONG, url='http://www.lyrics.com/',
|
||||
path=u'lady-madonna-lyrics-the-beatles.html')
|
||||
|
||||
]
|
||||
|
||||
# Every source entered in default beets google custom search engine
|
||||
|
|
@ -307,8 +309,9 @@ class LyricsGooglePluginTest(unittest.TestCase):
|
|||
"""Test default engines with the default query"""
|
||||
if not check_lyrics_fetched():
|
||||
self.skipTest("Run lyrics_download_samples.py script first.")
|
||||
for (fun, s) in zip((lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom),
|
||||
DEFAULT_SOURCES):
|
||||
for (fun, s) in zip([lyrics.fetch_lyricswiki,
|
||||
lyrics.fetch_lyricscom,
|
||||
lyrics.fetch_musixmatch], DEFAULT_SOURCES):
|
||||
if os.path.isfile(url_to_filename(
|
||||
s['url'] + s['path'])):
|
||||
res = fun(s['artist'], s['title'])
|
||||
|
|
|
|||
Loading…
Reference in a new issue