From 01a54e2e0e253645cdfa30966e3d3e2353d3989d Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Thu, 19 Jan 2012 12:25:11 -0800 Subject: [PATCH] first stab at revamped lyrics plugin (#137) --- beetsplug/lyrics.py | 165 ++++++++++++++++++++++++++++++++++++++++ docs/changelog.rst | 7 ++ docs/plugins/index.rst | 5 +- docs/plugins/lyrics.rst | 41 ++++++++++ 4 files changed, 214 insertions(+), 4 deletions(-) create mode 100644 beetsplug/lyrics.py create mode 100644 docs/plugins/lyrics.rst diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py new file mode 100644 index 000000000..365415237 --- /dev/null +++ b/beetsplug/lyrics.py @@ -0,0 +1,165 @@ +# This file is part of beets. +# Copyright 2012, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Fetches, embeds, and displays lyrics. +""" +import urllib +import re +import logging + +from beets.plugins import BeetsPlugin +from beets import ui +from beets.ui import commands + + +# Lyrics scrapers. + +COMMENT_RE = re.compile(r'', re.S) +DIV_RE = re.compile(r'<(/?)div>?') +TAG_RE = re.compile(r'<[^>]*>') +BREAK_RE = re.compile(r'') + +def unescape(text): + """Resolves &#xxx; HTML entities.""" + def replchar(m): + num = m.group(1) + return unichr(int(num)) + return re.sub("&#(\d+);", replchar, text) + +def extract_text(html, starttag): + """Extract the text from a
tag in the HTML starting with + ``starttag``. Returns None if parsing fails. + """ + # Strip off the leading text before opening tag. + try: + _, html = html.split(starttag, 1) + except ValueError: + return + + # Walk through balanced DIV tags. + level = 0 + parts = [] + pos = 0 + for match in DIV_RE.finditer(html): + if match.group(1): # Closing tag. + level -= 1 + if level == 0: + pos = match.end() + else: # Opening tag. + if level == 0: + parts.append(html[pos:match.start()]) + + level += 1 + + if level == -1: + parts.append(html[pos:match.start()]) + break + else: + print 'no closing tag found!' + return + lyrics = ''.join(parts) + + # Strip cruft. + lyrics = COMMENT_RE.sub('', lyrics) + lyrics = unescape(lyrics) + lyrics = re.sub(r'\s+', ' ', lyrics) # Whitespace collapse. + lyrics = BREAK_RE.sub('\n', lyrics) #
newlines. + lyrics = re.sub(r'\n +', '\n', lyrics) + lyrics = re.sub(r' +\n', '\n', lyrics) + lyrics = TAG_RE.sub('', lyrics) # Strip remaining HTML tags. + lyrics = lyrics.strip() + return lyrics + +LYRICSWIKI_URL_PATTERN = 'http://lyrics.wikia.com/%s:%s' +def _lw_encode(s): + s = re.sub(r'\s+', '_', s) + s = s.replace("<", "Less_Than") + s = s.replace(">", "Greater_Than") + s = s.replace("#", "Number_") + s = re.sub(r'[\[\{]', '(', s) + s = re.sub(r'[\]\}]', ')', s) + return urllib.quote(s) +def fetch_lyricswiki(artist, title): + """Fetch lyrics from LyricsWiki.""" + url = LYRICSWIKI_URL_PATTERN % (_lw_encode(artist), _lw_encode(title)) + html = urllib.urlopen(url).read() + + lyrics = extract_text(html, "
") + if lyrics and 'Unfortunately, we are not licensed' not in lyrics: + return lyrics + +LYRICSCOM_URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html' +def _lc_encode(s): + s = re.sub(r'\s+', '-', s) + return urllib.quote(s) +def fetch_lyricscom(artist, title): + """Fetch lyrics from Lyrics.com.""" + url = LYRICSCOM_URL_PATTERN % (_lc_encode(title), _lc_encode(artist)) + html = urllib.urlopen(url).read() + + lyrics = extract_text(html, '
') + if lyrics: + lyrics, _ = lyrics.split('\n---\nLyrics powered by', 1) + return lyrics + +BACKENDS = [fetch_lyricswiki, fetch_lyricscom] +def get_lyrics(artist, title): + """Fetch lyrics, trying each source in turn.""" + for backend in BACKENDS: + lyrics = backend(artist, title) + if lyrics: + return lyrics + + +# Plugin logic. + +log = logging.getLogger('beets') + +def fetch_lyrics(lib, query, write): + """Fetch and store lyrics for each matched item. If ``write``, then + the lyrics will also be written to the file itself. + """ + for item in lib.items(query): + # Skip if the item already has lyrics. + if item.lyrics: + log.info(u'lyrics already present: %s - %s' % + (item.artist, item.title)) + continue + + # Fetch lyrics. + lyrics = get_lyrics(item.artist, item.title) + if not lyrics: + log.info(u'lyrics not found: %s - %s' % + (item.artist, item.title)) + continue + + log.info(u'fetched lyrics: %s - %s' % + (item.artist, item.title)) + item.lyrics = lyrics + if write: + item.write() + lib.store(item) + lib.save() + +class LyricsPlugin(BeetsPlugin): + def commands(self): + cmd = ui.Subcommand('lyrics', help='fetch song lyrics') + def func(lib, config, opts, args): + # The "write to files" option corresponds to the + # import_write config value. + write = ui.config_val(config, 'beets', 'import_write', + commands.DEFAULT_IMPORT_WRITE, bool) + fetch_lyrics(lib, ui.decargs(args), write) + cmd.func = func + return [cmd] diff --git a/docs/changelog.rst b/docs/changelog.rst index 7620cc701..b86e727bb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,13 @@ Changelog ========= +1.0b13 (in development) +----------------------- + +* The :doc:`/plugins/lyrics`, originally by `Peter Brunner`_, is revamped and + included with beets, making it easy to fetch **song lyrics**. + + 1.0b12 (January 16, 2012) ------------------------- diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index 5b852ed73..5d2b52f14 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -36,6 +36,7 @@ disabled by default, but you can turn them on as described above: :maxdepth: 1 chroma + lyrics bpd mpdupdate embedart @@ -53,8 +54,6 @@ Other Plugins Here are a few of the plugins written by the beets community: -* `beets-lyrics`_ searches Web repositories for song lyrics and adds them to your files. - * `beetFs`_ is a FUSE filesystem for browsing the music in your beets library. (Might be out of date.) @@ -63,8 +62,6 @@ Here are a few of the plugins written by the beets community: * `A cmus plugin`_ integrates with the `cmus`_ console music player. -.. _beets-replaygain: https://github.com/Lugoues/beets-replaygain/ -.. _beets-lyrics: https://github.com/Lugoues/beets-lyrics/ .. _beetFs: http://code.google.com/p/beetfs/ .. _Beet-MusicBrainz-Collection: https://github.com/jeffayle/Beet-MusicBrainz-Collection/ diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst new file mode 100644 index 000000000..70a521c98 --- /dev/null +++ b/docs/plugins/lyrics.rst @@ -0,0 +1,41 @@ +Lyrics Plugin +============= + +The ``lyrics`` plugin fetches and stores song lyrics from databases on the Web. +Namely, the current version of the plugin uses `Lyric Wiki`_ and `Lyrics.com`_. + +.. _Lyric Wiki: http://lyrics.wikia.com/ +.. _Lyrics.com: http://www.lyrics.com/ + +Fetch Lyrics During Import +-------------------------- + +To automatically fetch lyrics for songs you import, just enable the plugin by +putting ``lyrics`` on your config file's ``plugins`` line (see +:doc:`/plugins/index`). When importing new files, beets will now fetch lyrics +for files that don't already have them. The lyrics will be stored in the beets +database. If the ``import_write`` config option is on, then the lyrics will also +be written to the files' tags. + +This behavior can be disabled with the ``autofetch`` config option (see below). + +Fetching Lyrics Manually +------------------------ + +The ``lyrics`` command provided by this plugin fetches lyrics for items that +match a query (see :doc:`/reference/query`). For example, ``beet lyrics magnetic +fields absolutely cuckoo`` will get the lyrics for the appropriate Magnetic +Fields song, ``beet lyrics magnetic fields`` will get lyrics for all my tracks +by that band, and ``beet lyrics`` will get lyrics for my entire library. The +lyrics will be added to the beets database and, if ``import_write`` is on, +embedded into files' metadata. + +Configuring +----------- + +The plugin has one configuration option, ``autofetch``, which lets you disable +automatic lyrics fetching during import. To do so, add this to your +``~/.beetsconfig``:: + + [lyrics] + autofetch: no