# This file is part of beets. # Copyright 2013, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. """Fetches, embeds, and displays lyrics. """ from __future__ import print_function import urllib import re import logging from beets.plugins import BeetsPlugin from beets import ui from beets import config # Global logger. log = logging.getLogger('beets') # Lyrics scrapers. COMMENT_RE = re.compile(r'', re.S) DIV_RE = re.compile(r'<(/?)div>?') TAG_RE = re.compile(r'<[^>]*>') BREAK_RE = re.compile(r'') def fetch_url(url): """Retrieve the content at a given URL, or return None if the source is unreachable. """ try: return urllib.urlopen(url).read() except IOError as exc: log.debug(u'failed to fetch: {0} ({1})'.format(url, unicode(exc))) return None def unescape(text): """Resolves &#xxx; HTML entities (and some others).""" if isinstance(text, str): text = text.decode('utf8', 'ignore') out = text.replace(u' ', u' ') def replchar(m): num = m.group(1) return unichr(int(num)) out = re.sub(u"&#(\d+);", replchar, out) return out def extract_text(html, starttag): """Extract the text from a
tag in the HTML starting with ``starttag``. Returns None if parsing fails. """ # Strip off the leading text before opening tag. try: _, html = html.split(starttag, 1) except ValueError: return # Walk through balanced DIV tags. level = 0 parts = [] pos = 0 for match in DIV_RE.finditer(html): if match.group(1): # Closing tag. level -= 1 if level == 0: pos = match.end() else: # Opening tag. if level == 0: parts.append(html[pos:match.start()]) level += 1 if level == -1: parts.append(html[pos:match.start()]) break else: print('no closing tag found!') return lyrics = ''.join(parts) # Strip cruft. lyrics = COMMENT_RE.sub('', lyrics) lyrics = unescape(lyrics) lyrics = re.sub(r'\s+', ' ', lyrics) # Whitespace collapse. lyrics = BREAK_RE.sub('\n', lyrics) #
newlines. lyrics = re.sub(r'\n +', '\n', lyrics) lyrics = re.sub(r' +\n', '\n', lyrics) lyrics = TAG_RE.sub('', lyrics) # Strip remaining HTML tags. lyrics = lyrics.strip() return lyrics def _encode(s): """Encode the string for inclusion in a URL (common to both LyricsWiki and Lyrics.com). """ if isinstance(s, unicode): # Replace "fancy" apostrophes with straight ones. s = s.replace(u'\u2019', u"'") s = s.encode('utf8', 'ignore') return urllib.quote(s) LYRICSWIKI_URL_PATTERN = 'http://lyrics.wikia.com/%s:%s' def _lw_encode(s): s = re.sub(r'\s+', '_', s) s = s.replace("<", "Less_Than") s = s.replace(">", "Greater_Than") s = s.replace("#", "Number_") s = re.sub(r'[\[\{]', '(', s) s = re.sub(r'[\]\}]', ')', s) return _encode(s) def fetch_lyricswiki(artist, title): """Fetch lyrics from LyricsWiki.""" url = LYRICSWIKI_URL_PATTERN % (_lw_encode(artist), _lw_encode(title)) html = fetch_url(url) if not html: return lyrics = extract_text(html, "
") if lyrics and 'Unfortunately, we are not licensed' not in lyrics: return lyrics LYRICSCOM_URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html' LYRICSCOM_NOT_FOUND = ( 'Sorry, we do not have the lyric', 'Submit Lyrics', ) def _lc_encode(s): s = re.sub(r'\s+', '-', s) return _encode(s) def fetch_lyricscom(artist, title): """Fetch lyrics from Lyrics.com.""" url = LYRICSCOM_URL_PATTERN % (_lc_encode(title), _lc_encode(artist)) html = fetch_url(url) if not html: return lyrics = extract_text(html, '
') if not lyrics: return for not_found_str in LYRICSCOM_NOT_FOUND: if not_found_str in lyrics: return parts = lyrics.split('\n---\nLyrics powered by', 1) if parts: return parts[0] BACKENDS = [fetch_lyricswiki, fetch_lyricscom] def get_lyrics(artist, title): """Fetch lyrics, trying each source in turn.""" for backend in BACKENDS: lyrics = backend(artist, title) if lyrics: if isinstance(lyrics, str): lyrics = lyrics.decode('utf8', 'ignore') log.debug(u'got lyrics from backend: {0}'.format(backend.__name__)) return lyrics # Plugin logic. def fetch_item_lyrics(lib, loglevel, item, write): """Fetch and store lyrics for a single item. If ``write``, then the lyrics will also be written to the file itself. The ``loglevel`` parameter controls the visibility of the function's status log messages. """ # Skip if the item already has lyrics. if item.lyrics: log.log(loglevel, u'lyrics already present: %s - %s' % (item.artist, item.title)) return # Fetch lyrics. lyrics = get_lyrics(item.artist, item.title) if not lyrics: log.log(loglevel, u'lyrics not found: %s - %s' % (item.artist, item.title)) return log.log(loglevel, u'fetched lyrics: %s - %s' % (item.artist, item.title)) item.lyrics = lyrics if write: item.write() lib.store(item) AUTOFETCH = True class LyricsPlugin(BeetsPlugin): def __init__(self): super(LyricsPlugin, self).__init__() self.import_stages = [self.imported] self.config.add({ 'auto': True, }) def commands(self): cmd = ui.Subcommand('lyrics', help='fetch song lyrics') cmd.parser.add_option('-p', '--print', dest='printlyr', action='store_true', default=False, help='print lyrics to console') def func(lib, opts, args): # The "write to files" option corresponds to the # import_write config value. write = config['import']['write'].get(bool) for item in lib.items(ui.decargs(args)): fetch_item_lyrics(lib, logging.INFO, item, write) if opts.printlyr and item.lyrics: ui.print_(item.lyrics) cmd.func = func return [cmd] # Auto-fetch lyrics on import. def imported(self, session, task): if self.config['auto']: for item in task.imported_items(): fetch_item_lyrics(session.lib, logging.DEBUG, item, False)