beets/beetsplug/lyrics.py
Adrian Sampson 7a410f636b happy new year
For future reference, this command did the trick:
ack -l 'Copyright 201' | xargs perl -pi -E 's/Copyright 201./Copyright 2013/'
2013-01-11 10:43:41 -08:00

225 lines
6.9 KiB
Python

# This file is part of beets.
# Copyright 2013, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Fetches, embeds, and displays lyrics.
"""
from __future__ import print_function
import urllib
import re
import logging
from beets.plugins import BeetsPlugin
from beets import ui
from beets import config
# Global logger.
log = logging.getLogger('beets')
# Lyrics scrapers.
COMMENT_RE = re.compile(r'<!--.*-->', re.S)
DIV_RE = re.compile(r'<(/?)div>?')
TAG_RE = re.compile(r'<[^>]*>')
BREAK_RE = re.compile(r'<br\s*/?>')
def fetch_url(url):
"""Retrieve the content at a given URL, or return None if the source
is unreachable.
"""
try:
return urllib.urlopen(url).read()
except IOError as exc:
log.debug(u'failed to fetch: {0} ({1})'.format(url, unicode(exc)))
return None
def unescape(text):
"""Resolves &#xxx; HTML entities (and some others)."""
if isinstance(text, str):
text = text.decode('utf8', 'ignore')
out = text.replace(u'&nbsp;', u' ')
def replchar(m):
num = m.group(1)
return unichr(int(num))
out = re.sub(u"&#(\d+);", replchar, out)
return out
def extract_text(html, starttag):
"""Extract the text from a <DIV> tag in the HTML starting with
``starttag``. Returns None if parsing fails.
"""
# Strip off the leading text before opening tag.
try:
_, html = html.split(starttag, 1)
except ValueError:
return
print(html)
# Walk through balanced DIV tags.
level = 0
parts = []
pos = 0
for match in DIV_RE.finditer(html):
if match.group(1): # Closing tag.
level -= 1
if level == 0:
pos = match.end()
else: # Opening tag.
if level == 0:
parts.append(html[pos:match.start()])
level += 1
if level == -1:
parts.append(html[pos:match.start()])
break
else:
print('no closing tag found!')
return
lyrics = ''.join(parts)
# Strip cruft.
lyrics = COMMENT_RE.sub('', lyrics)
lyrics = unescape(lyrics)
lyrics = re.sub(r'\s+', ' ', lyrics) # Whitespace collapse.
lyrics = BREAK_RE.sub('\n', lyrics) # <BR> newlines.
lyrics = re.sub(r'\n +', '\n', lyrics)
lyrics = re.sub(r' +\n', '\n', lyrics)
lyrics = TAG_RE.sub('', lyrics) # Strip remaining HTML tags.
lyrics = lyrics.strip()
return lyrics
LYRICSWIKI_URL_PATTERN = 'http://lyrics.wikia.com/%s:%s'
def _lw_encode(s):
s = re.sub(r'\s+', '_', s)
s = s.replace("<", "Less_Than")
s = s.replace(">", "Greater_Than")
s = s.replace("#", "Number_")
s = re.sub(r'[\[\{]', '(', s)
s = re.sub(r'[\]\}]', ')', s)
if isinstance(s, unicode):
s = s.encode('utf8', 'ignore')
return urllib.quote(s)
def fetch_lyricswiki(artist, title):
"""Fetch lyrics from LyricsWiki."""
url = LYRICSWIKI_URL_PATTERN % (_lw_encode(artist), _lw_encode(title))
html = fetch_url(url)
if not html:
return
lyrics = extract_text(html, "<div class='lyricbox'>")
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
return lyrics
LYRICSCOM_URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html'
LYRICSCOM_NOT_FOUND = (
'Sorry, we do not have the lyric',
'Submit Lyrics',
)
def _lc_encode(s):
s = re.sub(r'\s+', '-', s)
if isinstance(s, unicode):
s = s.encode('utf8', 'ignore')
return urllib.quote(s)
def fetch_lyricscom(artist, title):
"""Fetch lyrics from Lyrics.com."""
url = LYRICSCOM_URL_PATTERN % (_lc_encode(title), _lc_encode(artist))
html = fetch_url(url)
if not html:
return
lyrics = extract_text(html, '<div id="lyric_space">')
if not lyrics:
return
for not_found_str in LYRICSCOM_NOT_FOUND:
if not_found_str in lyrics:
return
parts = lyrics.split('\n---\nLyrics powered by', 1)
if parts:
return parts[0]
BACKENDS = [fetch_lyricswiki, fetch_lyricscom]
def get_lyrics(artist, title):
"""Fetch lyrics, trying each source in turn."""
for backend in BACKENDS:
lyrics = backend(artist, title)
if lyrics:
if isinstance(lyrics, str):
lyrics = lyrics.decode('utf8', 'ignore')
log.debug(u'got lyrics from backend: {0}'.format(backend.__name__))
return lyrics
# Plugin logic.
def fetch_item_lyrics(lib, loglevel, item, write):
"""Fetch and store lyrics for a single item. If ``write``, then the
lyrics will also be written to the file itself. The ``loglevel``
parameter controls the visibility of the function's status log
messages.
"""
# Skip if the item already has lyrics.
if item.lyrics:
log.log(loglevel, u'lyrics already present: %s - %s' %
(item.artist, item.title))
return
# Fetch lyrics.
lyrics = get_lyrics(item.artist, item.title)
if not lyrics:
log.log(loglevel, u'lyrics not found: %s - %s' %
(item.artist, item.title))
return
log.log(loglevel, u'fetched lyrics: %s - %s' %
(item.artist, item.title))
item.lyrics = lyrics
if write:
item.write()
lib.store(item)
AUTOFETCH = True
class LyricsPlugin(BeetsPlugin):
def __init__(self):
super(LyricsPlugin, self).__init__()
self.import_stages = [self.imported]
self.config.add({
'auto': True,
})
def commands(self):
cmd = ui.Subcommand('lyrics', help='fetch song lyrics')
cmd.parser.add_option('-p', '--print', dest='printlyr',
action='store_true', default=False,
help='print lyrics to console')
def func(lib, opts, args):
# The "write to files" option corresponds to the
# import_write config value.
write = config['import']['write'].get(bool)
for item in lib.items(ui.decargs(args)):
fetch_item_lyrics(lib, logging.INFO, item, write)
if opts.printlyr and item.lyrics:
ui.print_(item.lyrics)
cmd.func = func
return [cmd]
# Auto-fetch lyrics on import.
def imported(self, session, task):
if self.config['auto']:
for item in task.imported_items():
fetch_item_lyrics(session.lib, logging.DEBUG, item, False)