mirror of
https://github.com/beetbox/beets.git
synced 2026-01-05 15:33:15 +01:00
Merge pull request #1148 from Kraymer/lyrics_musixmatch
lyrics: add 'musixmatch' source Conflicts: beetsplug/lyrics.py
This commit is contained in:
commit
ac3f0824b0
11 changed files with 118 additions and 76 deletions
|
|
@ -16,8 +16,10 @@
|
|||
|
||||
import logging
|
||||
import traceback
|
||||
from collections import defaultdict
|
||||
import inspect
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
import beets
|
||||
from beets import mediafile
|
||||
|
|
@ -402,3 +404,32 @@ def send(event, **arguments):
|
|||
argspec = inspect.getargspec(handler).args
|
||||
args = dict((k, v) for k, v in arguments.items() if k in argspec)
|
||||
handler(**args)
|
||||
|
||||
|
||||
def feat_tokens(for_artist=True):
|
||||
"""Return a regular expression that matches phrases like "featuring"
|
||||
that separate a main artist or a song title from secondary artists.
|
||||
The `for_artist` option determines whether the regex should be
|
||||
suitable for matching artist fields (the default) or title fields.
|
||||
"""
|
||||
feat_words = ['ft', 'featuring', 'feat', 'feat.', 'ft.']
|
||||
if for_artist:
|
||||
feat_words += ['with', 'vs', 'and', 'con', '&']
|
||||
return '(?<=\s)(?:{0})(?=\s)'.format(
|
||||
'|'.join(re.escape(x) for x in feat_words)
|
||||
)
|
||||
|
||||
|
||||
def sanitize_choices(choices, choices_all):
|
||||
"""Clean up a stringlist configuration attribute: keep only choices
|
||||
elements present in choices_all, remove duplicate elements, expand '*'
|
||||
wildcard while keeping original stringlist order.
|
||||
"""
|
||||
seen = set()
|
||||
others = [x for x in choices_all if x not in choices]
|
||||
res = []
|
||||
for s in choices:
|
||||
if s in list(choices_all) + ['*']:
|
||||
if not (s in seen or seen.add(s)):
|
||||
res.extend(list(others) if s == '*' else [s])
|
||||
return res
|
||||
|
|
|
|||
|
|
@ -678,17 +678,3 @@ def max_filename_length(path, limit=MAX_FILENAME_LENGTH):
|
|||
return min(res[9], limit)
|
||||
else:
|
||||
return limit
|
||||
|
||||
|
||||
def feat_tokens(for_artist=True):
|
||||
"""Return a regular expression that matches phrases like "featuring"
|
||||
that separate a main artist or a song title from secondary artists.
|
||||
The `for_artist` option determines whether the regex should be
|
||||
suitable for matching artist fields (the default) or title fields.
|
||||
"""
|
||||
feat_words = ['ft', 'featuring', 'feat', 'feat.', 'ft.']
|
||||
if for_artist:
|
||||
feat_words += ['with', 'vs', 'and', 'con', '&']
|
||||
return '(?<=\s)(?:{0})(?=\s)'.format(
|
||||
'|'.join(re.escape(x) for x in feat_words)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -22,12 +22,12 @@ from tempfile import NamedTemporaryFile
|
|||
|
||||
import requests
|
||||
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets.util.artresizer import ArtResizer
|
||||
from beets import plugins
|
||||
from beets import importer
|
||||
from beets import ui
|
||||
from beets import util
|
||||
from beets import config
|
||||
from beets.util.artresizer import ArtResizer
|
||||
|
||||
try:
|
||||
import itunes
|
||||
|
|
@ -319,23 +319,7 @@ def batch_fetch_art(lib, albums, force, maxwidth=None):
|
|||
message))
|
||||
|
||||
|
||||
def sanitize_sources(sources):
|
||||
"""Clean up the user's configured source list. Remove unknown or
|
||||
duplicate sources while keeping original order.
|
||||
"""
|
||||
seen = set()
|
||||
others = set(SOURCES_ALL) - set(sources)
|
||||
res = []
|
||||
for s in sources:
|
||||
if s in SOURCES_ALL + ['*']:
|
||||
if not (s in seen or seen.add(s)):
|
||||
res.extend(list(others) if s == '*' else [s])
|
||||
if not HAVE_ITUNES and 'itunes' in res:
|
||||
res.remove('itunes')
|
||||
return res
|
||||
|
||||
|
||||
class FetchArtPlugin(BeetsPlugin):
|
||||
class FetchArtPlugin(plugins.BeetsPlugin):
|
||||
def __init__(self):
|
||||
super(FetchArtPlugin, self).__init__()
|
||||
|
||||
|
|
@ -359,8 +343,10 @@ class FetchArtPlugin(BeetsPlugin):
|
|||
self.import_stages = [self.fetch_art]
|
||||
self.register_listener('import_task_files', self.assign_art)
|
||||
|
||||
self.config['sources'] = sanitize_sources(
|
||||
self.config['sources'].as_str_seq())
|
||||
if not HAVE_ITUNES and u'itunes' in SOURCES_ALL:
|
||||
SOURCES_ALL.remove(u'itunes')
|
||||
self.config['sources'] = plugins.sanitize_choices(
|
||||
self.config['sources'].as_str_seq(), SOURCES_ALL)
|
||||
|
||||
# Asynchronous; after music is added to the library.
|
||||
def fetch_art(self, session, task):
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@
|
|||
|
||||
"""Moves "featured" artists to the title from the artist field.
|
||||
"""
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets import plugins
|
||||
from beets import ui
|
||||
from beets.util import displayable_path, feat_tokens
|
||||
from beets.util import displayable_path
|
||||
from beets import config
|
||||
import logging
|
||||
import re
|
||||
|
|
@ -31,7 +31,7 @@ def split_on_feat(artist):
|
|||
may be a string or None if none is present.
|
||||
"""
|
||||
# split on the first "feat".
|
||||
regex = re.compile(feat_tokens(), re.IGNORECASE)
|
||||
regex = re.compile(plugins.feat_tokens(), re.IGNORECASE)
|
||||
parts = [s.strip() for s in regex.split(artist, 1)]
|
||||
if len(parts) == 1:
|
||||
return parts[0], None
|
||||
|
|
@ -42,7 +42,7 @@ def split_on_feat(artist):
|
|||
def contains_feat(title):
|
||||
"""Determine whether the title contains a "featured" marker.
|
||||
"""
|
||||
return bool(re.search(feat_tokens(), title, flags=re.IGNORECASE))
|
||||
return bool(re.search(plugins.feat_tokens(), title, flags=re.IGNORECASE))
|
||||
|
||||
|
||||
def update_metadata(item, feat_part, drop_feat):
|
||||
|
|
@ -110,7 +110,7 @@ def ft_in_title(item, drop_feat):
|
|||
ui.print_()
|
||||
|
||||
|
||||
class FtInTitlePlugin(BeetsPlugin):
|
||||
class FtInTitlePlugin(plugins.BeetsPlugin):
|
||||
def __init__(self):
|
||||
super(FtInTitlePlugin, self).__init__()
|
||||
|
||||
|
|
|
|||
|
|
@ -26,10 +26,8 @@ import difflib
|
|||
import itertools
|
||||
from HTMLParser import HTMLParseError
|
||||
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets import ui
|
||||
from beets import config
|
||||
from beets.util import feat_tokens
|
||||
from beets import plugins
|
||||
from beets import config, ui
|
||||
|
||||
|
||||
# Global logger.
|
||||
|
|
@ -86,10 +84,17 @@ def unescape(text):
|
|||
return out
|
||||
|
||||
|
||||
def extract_text(html, starttag):
|
||||
def extract_text_between(html, start_marker, end_marker):
|
||||
_, html = html.split(start_marker, 1)
|
||||
html, _ = html.split(end_marker, 1)
|
||||
return _scrape_strip_cruft(html, True)
|
||||
|
||||
|
||||
def extract_text_in(html, starttag):
|
||||
"""Extract the text from a <DIV> tag in the HTML starting with
|
||||
``starttag``. Returns None if parsing fails.
|
||||
"""
|
||||
|
||||
# Strip off the leading text before opening tag.
|
||||
try:
|
||||
_, html = html.split(starttag, 1)
|
||||
|
|
@ -138,7 +143,7 @@ def search_pairs(item):
|
|||
artists = [artist]
|
||||
|
||||
# Remove any featuring artists from the artists name
|
||||
pattern = r"(.*?) {0}".format(feat_tokens())
|
||||
pattern = r"(.*?) {0}".format(plugins.feat_tokens())
|
||||
match = re.search(pattern, artist, re.IGNORECASE)
|
||||
if match:
|
||||
artists.append(match.group(1))
|
||||
|
|
@ -151,7 +156,7 @@ def search_pairs(item):
|
|||
titles.append(match.group(1))
|
||||
|
||||
# Remove any featuring artists from the title
|
||||
pattern = r"(.*?) {0}".format(feat_tokens(for_artist=False))
|
||||
pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False))
|
||||
for title in titles[:]:
|
||||
match = re.search(pattern, title, re.IGNORECASE)
|
||||
if match:
|
||||
|
|
@ -178,6 +183,19 @@ def _encode(s):
|
|||
s = s.encode('utf8', 'ignore')
|
||||
return urllib.quote(s)
|
||||
|
||||
# Musixmatch
|
||||
|
||||
MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s'
|
||||
|
||||
|
||||
def fetch_musixmatch(artist, title):
|
||||
url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()),
|
||||
_lw_encode(title.title()))
|
||||
html = fetch_url(url)
|
||||
if not html:
|
||||
return
|
||||
lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":')
|
||||
return lyrics.strip(',"').replace('\\n', '\n')
|
||||
|
||||
# LyricsWiki.
|
||||
|
||||
|
|
@ -201,7 +219,7 @@ def fetch_lyricswiki(artist, title):
|
|||
if not html:
|
||||
return
|
||||
|
||||
lyrics = extract_text(html, "<div class='lyricbox'>")
|
||||
lyrics = extract_text_in(html, "<div class='lyricbox'>")
|
||||
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
|
||||
return lyrics
|
||||
|
||||
|
|
@ -228,7 +246,7 @@ def fetch_lyricscom(artist, title):
|
|||
if not html:
|
||||
return
|
||||
|
||||
lyrics = extract_text(html, '<div id="lyric_space">')
|
||||
lyrics = extract_text_in(html, '<div id="lyric_space">')
|
||||
if not lyrics:
|
||||
return
|
||||
for not_found_str in LYRICSCOM_NOT_FOUND:
|
||||
|
|
@ -411,8 +429,14 @@ def fetch_google(artist, title):
|
|||
|
||||
# Plugin logic.
|
||||
|
||||
SOURCES_KEYS = ['google', 'lyricwiki', 'lyrics.com', 'musixmatch']
|
||||
SOURCES_ALL = {'google': fetch_google,
|
||||
'lyricwiki': fetch_lyricswiki,
|
||||
'lyrics.com': fetch_lyricscom,
|
||||
'musixmatch': fetch_musixmatch}
|
||||
|
||||
class LyricsPlugin(BeetsPlugin):
|
||||
|
||||
class LyricsPlugin(plugins.BeetsPlugin):
|
||||
def __init__(self):
|
||||
super(LyricsPlugin, self).__init__()
|
||||
self.import_stages = [self.imported]
|
||||
|
|
@ -422,12 +446,17 @@ class LyricsPlugin(BeetsPlugin):
|
|||
'google_engine_ID': u'009217259823014548361:lndtuqkycfu',
|
||||
'fallback': None,
|
||||
'force': False,
|
||||
'sources': SOURCES_KEYS,
|
||||
})
|
||||
|
||||
self.backends = [fetch_lyricswiki, fetch_lyricscom]
|
||||
|
||||
if self.config['google_API_key'].get():
|
||||
self.backends.insert(0, fetch_google)
|
||||
if not self.config['google_API_key'].get() and \
|
||||
'google' in SOURCES_KEYS:
|
||||
SOURCES_KEYS.remove('google')
|
||||
self.config['sources'] = plugins.sanitize_choices(
|
||||
self.config['sources'].as_str_seq(), SOURCES_KEYS)
|
||||
self.backends = []
|
||||
for key in self.config['sources'].as_str_seq():
|
||||
self.backends.append(SOURCES_ALL[key])
|
||||
|
||||
def commands(self):
|
||||
cmd = ui.Subcommand('lyrics', help='fetch song lyrics')
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ library by typing ``pip install requests`` or the equivalent for your OS.
|
|||
|
||||
New:
|
||||
|
||||
* :doc:`/plugins/lyrics`: Add `musixmatch`_ source and introduce a new ``sources``
|
||||
config option that lets you choose exactly where to look for lyrics and in
|
||||
which order.
|
||||
* :doc:`/plugins/lyrics`: Add brazilian and hispanic sources to Google custom
|
||||
search engine.
|
||||
* A new :doc:`/plugins/permissions` makes it easy to fix permissions on music
|
||||
|
|
@ -48,7 +51,7 @@ Fixed:
|
|||
|
||||
.. _API changes: http://developer.echonest.com/forums/thread/3650
|
||||
.. _Plex: https://plex.tv/
|
||||
|
||||
.. _musixmatch: https://www.musixmatch.com/
|
||||
|
||||
1.3.9 (November 17, 2014)
|
||||
-------------------------
|
||||
|
|
|
|||
|
|
@ -46,8 +46,13 @@ configuration file. The available options are:
|
|||
backend).
|
||||
Default: None.
|
||||
- **google_engine_ID**: The custom search engine to use.
|
||||
Default: The beets custom search engine, which gathers a list of sources
|
||||
Default: The `beets custom search engine`_, which gathers a list of sources
|
||||
known to be scrapeable.
|
||||
- **sources**: List of sources to search for lyrics. An asterisk `*` expands
|
||||
to all available sources.
|
||||
Default: ``google lyricwiki lyrics.com musixmatch``, i.e., all sources.
|
||||
*google* source will be automatically deactivated if no `google_engine_ID` is
|
||||
setup.
|
||||
|
||||
Here's an example of ``config.yaml``::
|
||||
|
||||
|
|
@ -56,6 +61,7 @@ Here's an example of ``config.yaml``::
|
|||
google_API_key: AZERTYUIOPQSDFGHJKLMWXCVBN1234567890_ab
|
||||
google_engine_ID: 009217259823014548361:lndtuqkycfu
|
||||
|
||||
.. _beets custom search engine: https://www.google.com:443/cse/publicurl?cx=009217259823014548361:lndtuqkycfu
|
||||
|
||||
Fetching Lyrics Manually
|
||||
------------------------
|
||||
|
|
@ -96,7 +102,7 @@ default, beets use a list of sources known to be scrapeable.
|
|||
.. _define a custom search engine: http://www.google.com/cse/all
|
||||
|
||||
Note that the Google custom search API is limited to 100 queries per day.
|
||||
After that, the lyrics plugin will fall back on its other data sources.
|
||||
After that, the lyrics plugin will fall back on other declared data sources.
|
||||
|
||||
.. _pip: http://www.pip-installer.org/
|
||||
.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ def main(argv=None):
|
|||
url = s['url'] + s['path']
|
||||
fn = test_lyrics.url_to_filename(url)
|
||||
if not os.path.isfile(fn):
|
||||
html = requests.get(url).text
|
||||
html = requests.get(url, verify=False).text
|
||||
with safe_open_w(fn) as f:
|
||||
f.write(html.encode('utf8'))
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,6 @@
|
|||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
|
||||
from beetsplug import fetchart
|
||||
|
||||
import os.path
|
||||
from _common import unittest
|
||||
from helper import TestHelper
|
||||
|
|
@ -43,17 +41,6 @@ class FetchartCliTest(unittest.TestCase, TestHelper):
|
|||
with open(cover_path, 'r') as f:
|
||||
self.assertEqual(f.read(), 'IMAGE')
|
||||
|
||||
def test_sanitize_sources(self):
|
||||
self.assertEqual(fetchart.sanitize_sources(['google', 'unknown']),
|
||||
['google'])
|
||||
self.assertEqual(fetchart.sanitize_sources(['google', 'google']),
|
||||
['google'])
|
||||
res = fetchart.sanitize_sources(['google', '*', 'amazon'])
|
||||
# don't check strict egality on lengths as itunes source may be removed
|
||||
# by plugin
|
||||
self.assertTrue(len(res) >= len(fetchart.SOURCES_ALL) - 1 and
|
||||
res[0] == 'google' and res[-1] == 'amazon')
|
||||
|
||||
|
||||
def suite():
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
import os
|
||||
import _common
|
||||
import sys
|
||||
import re
|
||||
from _common import unittest
|
||||
from beetsplug import lyrics
|
||||
from beets.library import Item
|
||||
|
|
@ -163,7 +164,7 @@ class LyricsPluginTest(unittest.TestCase):
|
|||
|
||||
|
||||
def url_to_filename(url):
|
||||
url = url.replace('http://', '').replace('www.', '')
|
||||
url = re.sub(r'https?://|www.', '', url)
|
||||
fn = "".join(x for x in url if (x.isalnum() or x == '/'))
|
||||
fn = fn.split('/')
|
||||
fn = os.path.join(LYRICS_ROOT_DIR, fn[0], fn[-1]) + '.txt'
|
||||
|
|
@ -206,7 +207,9 @@ DEFAULT_SOURCES = [
|
|||
dict(DEFAULT_SONG, url=u'http://lyrics.wikia.com/',
|
||||
path=u'The_Beatles:Lady_Madonna'),
|
||||
dict(DEFAULT_SONG, url='http://www.lyrics.com/',
|
||||
path=u'lady-madonna-lyrics-the-beatles.html')
|
||||
path=u'lady-madonna-lyrics-the-beatles.html'),
|
||||
dict(DEFAULT_SONG, url='https://www.musixmatch.com/',
|
||||
path=u'lyrics/The-Beatles/Lady-Madonna'),
|
||||
]
|
||||
|
||||
# Every source entered in default beets google custom search engine
|
||||
|
|
@ -307,8 +310,9 @@ class LyricsGooglePluginTest(unittest.TestCase):
|
|||
"""Test default engines with the default query"""
|
||||
if not check_lyrics_fetched():
|
||||
self.skipTest("Run lyrics_download_samples.py script first.")
|
||||
for (fun, s) in zip((lyrics.fetch_lyricswiki, lyrics.fetch_lyricscom),
|
||||
DEFAULT_SOURCES):
|
||||
for (fun, s) in zip([lyrics.fetch_lyricswiki,
|
||||
lyrics.fetch_lyricscom,
|
||||
lyrics.fetch_musixmatch], DEFAULT_SOURCES):
|
||||
if os.path.isfile(url_to_filename(
|
||||
s['url'] + s['path'])):
|
||||
res = fun(s['artist'], s['title'])
|
||||
|
|
|
|||
|
|
@ -151,6 +151,16 @@ class ItemTypeConflictTest(unittest.TestCase, TestHelper):
|
|||
self.assertNotEqual(None, plugins.types(Item))
|
||||
|
||||
|
||||
class HelpersTest(unittest.TestCase):
|
||||
|
||||
def test_sanitize_choices(self):
|
||||
self.assertEqual(plugins.sanitize_choices(['A', 'Z'], ('A', 'B')),
|
||||
['A'])
|
||||
self.assertEqual(plugins.sanitize_choices(['A', 'A'], ('A')), ['A'])
|
||||
self.assertEqual(plugins.sanitize_choices(['D', '*', 'A'],
|
||||
('A', 'B', 'C', 'D')), ['D', 'B', 'C', 'A'])
|
||||
|
||||
|
||||
def suite():
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue