mirror of
https://github.com/beetbox/beets.git
synced 2025-12-06 08:39:17 +01:00
For future reference, this command did the trick: ack -l 'Copyright 201' | xargs perl -pi -E 's/Copyright 201./Copyright 2013/'
225 lines
6.9 KiB
Python
225 lines
6.9 KiB
Python
# This file is part of beets.
|
|
# Copyright 2013, Adrian Sampson.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
"""Fetches, embeds, and displays lyrics.
|
|
"""
|
|
from __future__ import print_function
|
|
|
|
import urllib
|
|
import re
|
|
import logging
|
|
|
|
from beets.plugins import BeetsPlugin
|
|
from beets import ui
|
|
from beets import config
|
|
|
|
|
|
# Global logger.
|
|
|
|
log = logging.getLogger('beets')
|
|
|
|
|
|
# Lyrics scrapers.
|
|
|
|
COMMENT_RE = re.compile(r'<!--.*-->', re.S)
|
|
DIV_RE = re.compile(r'<(/?)div>?')
|
|
TAG_RE = re.compile(r'<[^>]*>')
|
|
BREAK_RE = re.compile(r'<br\s*/?>')
|
|
|
|
def fetch_url(url):
|
|
"""Retrieve the content at a given URL, or return None if the source
|
|
is unreachable.
|
|
"""
|
|
try:
|
|
return urllib.urlopen(url).read()
|
|
except IOError as exc:
|
|
log.debug(u'failed to fetch: {0} ({1})'.format(url, unicode(exc)))
|
|
return None
|
|
|
|
def unescape(text):
|
|
"""Resolves &#xxx; HTML entities (and some others)."""
|
|
if isinstance(text, str):
|
|
text = text.decode('utf8', 'ignore')
|
|
out = text.replace(u' ', u' ')
|
|
def replchar(m):
|
|
num = m.group(1)
|
|
return unichr(int(num))
|
|
out = re.sub(u"&#(\d+);", replchar, out)
|
|
return out
|
|
|
|
def extract_text(html, starttag):
|
|
"""Extract the text from a <DIV> tag in the HTML starting with
|
|
``starttag``. Returns None if parsing fails.
|
|
"""
|
|
# Strip off the leading text before opening tag.
|
|
try:
|
|
_, html = html.split(starttag, 1)
|
|
except ValueError:
|
|
return
|
|
print(html)
|
|
|
|
# Walk through balanced DIV tags.
|
|
level = 0
|
|
parts = []
|
|
pos = 0
|
|
for match in DIV_RE.finditer(html):
|
|
if match.group(1): # Closing tag.
|
|
level -= 1
|
|
if level == 0:
|
|
pos = match.end()
|
|
else: # Opening tag.
|
|
if level == 0:
|
|
parts.append(html[pos:match.start()])
|
|
|
|
level += 1
|
|
|
|
if level == -1:
|
|
parts.append(html[pos:match.start()])
|
|
break
|
|
else:
|
|
print('no closing tag found!')
|
|
return
|
|
lyrics = ''.join(parts)
|
|
|
|
# Strip cruft.
|
|
lyrics = COMMENT_RE.sub('', lyrics)
|
|
lyrics = unescape(lyrics)
|
|
lyrics = re.sub(r'\s+', ' ', lyrics) # Whitespace collapse.
|
|
lyrics = BREAK_RE.sub('\n', lyrics) # <BR> newlines.
|
|
lyrics = re.sub(r'\n +', '\n', lyrics)
|
|
lyrics = re.sub(r' +\n', '\n', lyrics)
|
|
lyrics = TAG_RE.sub('', lyrics) # Strip remaining HTML tags.
|
|
lyrics = lyrics.strip()
|
|
return lyrics
|
|
|
|
LYRICSWIKI_URL_PATTERN = 'http://lyrics.wikia.com/%s:%s'
|
|
def _lw_encode(s):
|
|
s = re.sub(r'\s+', '_', s)
|
|
s = s.replace("<", "Less_Than")
|
|
s = s.replace(">", "Greater_Than")
|
|
s = s.replace("#", "Number_")
|
|
s = re.sub(r'[\[\{]', '(', s)
|
|
s = re.sub(r'[\]\}]', ')', s)
|
|
if isinstance(s, unicode):
|
|
s = s.encode('utf8', 'ignore')
|
|
return urllib.quote(s)
|
|
def fetch_lyricswiki(artist, title):
|
|
"""Fetch lyrics from LyricsWiki."""
|
|
url = LYRICSWIKI_URL_PATTERN % (_lw_encode(artist), _lw_encode(title))
|
|
html = fetch_url(url)
|
|
if not html:
|
|
return
|
|
|
|
lyrics = extract_text(html, "<div class='lyricbox'>")
|
|
if lyrics and 'Unfortunately, we are not licensed' not in lyrics:
|
|
return lyrics
|
|
|
|
LYRICSCOM_URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html'
|
|
LYRICSCOM_NOT_FOUND = (
|
|
'Sorry, we do not have the lyric',
|
|
'Submit Lyrics',
|
|
)
|
|
def _lc_encode(s):
|
|
s = re.sub(r'\s+', '-', s)
|
|
if isinstance(s, unicode):
|
|
s = s.encode('utf8', 'ignore')
|
|
return urllib.quote(s)
|
|
def fetch_lyricscom(artist, title):
|
|
"""Fetch lyrics from Lyrics.com."""
|
|
url = LYRICSCOM_URL_PATTERN % (_lc_encode(title), _lc_encode(artist))
|
|
html = fetch_url(url)
|
|
if not html:
|
|
return
|
|
|
|
lyrics = extract_text(html, '<div id="lyric_space">')
|
|
if not lyrics:
|
|
return
|
|
for not_found_str in LYRICSCOM_NOT_FOUND:
|
|
if not_found_str in lyrics:
|
|
return
|
|
|
|
parts = lyrics.split('\n---\nLyrics powered by', 1)
|
|
if parts:
|
|
return parts[0]
|
|
|
|
BACKENDS = [fetch_lyricswiki, fetch_lyricscom]
|
|
def get_lyrics(artist, title):
|
|
"""Fetch lyrics, trying each source in turn."""
|
|
for backend in BACKENDS:
|
|
lyrics = backend(artist, title)
|
|
if lyrics:
|
|
if isinstance(lyrics, str):
|
|
lyrics = lyrics.decode('utf8', 'ignore')
|
|
log.debug(u'got lyrics from backend: {0}'.format(backend.__name__))
|
|
return lyrics
|
|
|
|
|
|
# Plugin logic.
|
|
|
|
def fetch_item_lyrics(lib, loglevel, item, write):
|
|
"""Fetch and store lyrics for a single item. If ``write``, then the
|
|
lyrics will also be written to the file itself. The ``loglevel``
|
|
parameter controls the visibility of the function's status log
|
|
messages.
|
|
"""
|
|
# Skip if the item already has lyrics.
|
|
if item.lyrics:
|
|
log.log(loglevel, u'lyrics already present: %s - %s' %
|
|
(item.artist, item.title))
|
|
return
|
|
|
|
# Fetch lyrics.
|
|
lyrics = get_lyrics(item.artist, item.title)
|
|
if not lyrics:
|
|
log.log(loglevel, u'lyrics not found: %s - %s' %
|
|
(item.artist, item.title))
|
|
return
|
|
|
|
log.log(loglevel, u'fetched lyrics: %s - %s' %
|
|
(item.artist, item.title))
|
|
item.lyrics = lyrics
|
|
if write:
|
|
item.write()
|
|
lib.store(item)
|
|
|
|
AUTOFETCH = True
|
|
class LyricsPlugin(BeetsPlugin):
|
|
def __init__(self):
|
|
super(LyricsPlugin, self).__init__()
|
|
self.import_stages = [self.imported]
|
|
self.config.add({
|
|
'auto': True,
|
|
})
|
|
|
|
def commands(self):
|
|
cmd = ui.Subcommand('lyrics', help='fetch song lyrics')
|
|
cmd.parser.add_option('-p', '--print', dest='printlyr',
|
|
action='store_true', default=False,
|
|
help='print lyrics to console')
|
|
def func(lib, opts, args):
|
|
# The "write to files" option corresponds to the
|
|
# import_write config value.
|
|
write = config['import']['write'].get(bool)
|
|
for item in lib.items(ui.decargs(args)):
|
|
fetch_item_lyrics(lib, logging.INFO, item, write)
|
|
if opts.printlyr and item.lyrics:
|
|
ui.print_(item.lyrics)
|
|
cmd.func = func
|
|
return [cmd]
|
|
|
|
# Auto-fetch lyrics on import.
|
|
def imported(self, session, task):
|
|
if self.config['auto']:
|
|
for item in task.imported_items():
|
|
fetch_item_lyrics(session.lib, logging.DEBUG, item, False)
|