Merge pull request #424 from pscn/echoplus

New plugin to fetch extended information from the EchoNest (call for feedback)
This commit is contained in:
Peter Schnebel 2013-10-27 03:10:29 -07:00
commit 479f92f250
5 changed files with 513 additions and 0 deletions

416
beetsplug/echonest.py Normal file
View file

@ -0,0 +1,416 @@
# This file is part of beets.
import time
import logging
import socket
import math
from beets import util, config, plugins, ui, library
import pyechonest
import pyechonest.song
import pyechonest.track
log = logging.getLogger('beets')
# If a request at the EchoNest fails, we want to retry the request RETRIES
# times and wait between retries for RETRY_INTERVAL seconds.
RETRIES = 10
RETRY_INTERVAL = 10
# for converting files
import os
import tempfile
from string import Template
from subprocess import Popen
DEVNULL = open(os.devnull, 'wb')
# The attributes we can import and where to store them
# Note: We use echonest_id (song_id) and echonest_fingerprint to speed up
# lookups. They are not listed as attributes here.
ATTRIBUTES = {
'energy' : 'energy',
'liveness' : 'liveness',
'speechiness' : 'speechiness',
'acousticness' : 'acousticness',
'danceability' : 'danceability',
'valence' : 'valence',
'tempo' : 'bpm',
}
def _splitstrip(string):
"""Split string at comma and return the stripped values as array."""
return [ s.strip() for s in string.split(u',') ]
class EchonestMetadataPlugin(plugins.BeetsPlugin):
_songs = {}
_attributes = []
_no_mapping = []
def __init__(self):
super(EchonestMetadataPlugin, self).__init__()
self.config.add({
'auto' : True,
'apikey' : u'NY2KTZHQ0QDSHBAP6',
'codegen' : None,
'upload' : True,
'convert' : True,
})
for k, v in ATTRIBUTES.iteritems():
self.config.add({k:v})
pyechonest.config.ECHO_NEST_API_KEY = \
config['echonest']['apikey'].get(unicode)
if config['echonest']['codegen'].get() is not None:
pyechonest.config.CODEGEN_BINARY_OVERRIDE = \
config['echonest']['codegen'].get(unicode)
self.register_listener('import_task_start', self.fetch_song_task)
self.register_listener('import_task_apply', self.apply_metadata_task)
def _echofun(self, func, **kwargs):
"""Wrapper for requests to the EchoNest API. Will retry up to RETRIES
times and wait between retries for RETRY_INTERVAL seconds.
"""
for i in range(RETRIES):
try:
result = func(**kwargs)
except pyechonest.util.EchoNestAPIError as e:
if e.code == 3:
# reached access limit per minute
time.sleep(RETRY_INTERVAL)
elif e.code == 5:
# specified identifier does not exist
# no use in trying again.
log.debug(u'echonest: {}'.format(e))
return None
else:
log.error(u'echonest: {0}'.format(e.args[0][0]))
return None
except (pyechonest.util.EchoNestIOError, socket.error) as e:
log.warn(u'echonest: IO error: {0}'.format(e))
time.sleep(RETRY_INTERVAL)
else:
break
else:
# If we exited the loop without breaking, then we used up all
# our allotted retries.
raise Exception(u'exceeded retries')
return None
return result
def fingerprint(self, item):
"""Get the fingerprint for this item from the EchoNest. If we already
have a fingerprint, return it and don't calculate it again.
"""
if item.get('echonest_fingerprint', None) is None:
try:
code = self._echofun(pyechonest.util.codegen, filename=item.path.decode('utf-8'))
item['echonest_fingerprint'] = code[0]['code']
item.write()
except Exception as exc:
log.error(u'echonest: fingerprinting failed: {0}'
.format(str(exc)))
return None
return item.get('echonest_fingerprint')
def convert(self, item):
"""Converts an item in an unsupported media format to ogg. Config
pending.
This is stolen from Jakob Schnitzers convert plugin.
"""
fd, dest = tempfile.mkstemp(u'.ogg')
os.close(fd)
source = item.path
# FIXME: use avconv?
command = u'ffmpeg -i $source -y -acodec libvorbis -vn -aq 2 $dest'.split(u' ')
log.info(u'echonest: encoding {0} to {1}'
.format(util.displayable_path(source),
util.displayable_path(dest)))
opts = []
for arg in command:
arg = arg.encode('utf-8')
opts.append(Template(arg).substitute({
'source': source,
'dest': dest
}))
try:
encode = Popen(opts, close_fds=True, stderr=DEVNULL)
encode.wait()
except Exception as exc:
log.error(u'echonest: encode failed: {0}'.format(str(exc)))
util.remove(dest)
util.prune_dirs(os.path.dirname(dest))
return None
if encode.returncode != 0:
log.info(u'echonest: encoding {0} failed ({1}). Cleaning up...'
.format(util.displayable_path(source), encode.returncode))
util.remove(dest)
util.prune_dirs(os.path.dirname(dest))
return None
log.info(u'Finished encoding {0}'.format(util.displayable_path(source)))
return dest
def analyze(self, item):
"""Upload the item to the EchoNest for analysis. May require to
convert the item to a supported media format.
"""
try:
source = item.path
if item.format.lower() not in ['wav', 'mp3', 'au', 'ogg', 'mp4', 'm4a']:
if not config['echonest']['convert'].get(bool):
raise Exception(u'format {} not supported for upload'
.format(item.format))
else:
source = self.convert(item)
if source is None:
raise Exception(u'failed to convert file'
.format(item.format))
log.info(u'echonest: uploading file, be patient')
track = self._echofun(pyechonest.track.track_from_filename,
filename=source)
if track is None:
raise Exception(u'failed to upload file')
# Sometimes we have a track but no song. I guess this happens for
# new / unverified songs. We need to 'extract' the audio_summary
# from the track object 'manually'. I don't know why the
# pyechonest API handles tracks (merge audio_summary to __dict__)
# and songs (keep audio_summary in an extra attribute)
# differently.
# Maybe a patch for pyechonest could help?
from_track = {}
from_track['energy'] = track.energy
from_track['liveness'] = track.liveness
from_track['speechiness'] = track.speechiness
from_track['acousticness'] = track.acousticness
from_track['danceability'] = track.danceability
from_track['valence'] = track.valence
from_track['tempo'] = track.tempo
from_track['duration'] = track.duration
ids = []
try:
ids = [track.song_id]
except Exception:
return from_track
songs = self._echofun(pyechonest.song.profile,
ids=ids, track_ids=[track.id],
buckets=['audio_summary'])
if songs is None:
raise Exception(u'failed to retrieve info from upload')
pick = self._pick_song(songs, item)
if pick is None:
return from_track
return pick
except Exception as exc:
log.error(u'echonest: analysis failed: {0}'.format(str(exc)))
return None
def identify(self, item):
"""Try to identify the song at the EchoNest.
"""
try:
code = self.fingerprint(item)
if code is None:
raise Exception(u'can not identify without a fingerprint')
songs = self._echofun(pyechonest.song.identify, code=code)
if not songs:
raise Exception(u'no songs found')
return max(songs, key=lambda s: s.score)
except Exception as exc:
log.error(u'echonest: identification failed: {0}'.format(str(exc)))
return None
def _pick_song(self, songs, item):
"""Helper method to pick the best matching song from a list of songs
returned by the EchoNest. Compares artist, title and duration. If
the artist and title match and the duration difference is <= 1.0
seconds, it's considered a match.
"""
pick = None
if songs:
min_dist = item.length
for song in songs:
if song.artist_name.lower() == item.artist.lower() \
and song.title.lower() == item.title.lower():
dist = abs(item.length - song.audio_summary['duration'])
if dist < min_dist:
min_dist = dist
pick = song
if min_dist > 2.5:
return None
return pick
def search(self, item):
"""Search the item at the EchoNest by artist and title.
"""
try:
songs = self._echofun(pyechonest.song.search, title=item.title,
results=100, artist=item.artist,
buckets=['id:musicbrainz', 'tracks'])
pick = self._pick_song(songs, item)
if pick is None:
raise Exception(u'no (matching) songs found')
return pick
except Exception as exc:
log.error(u'echonest: search failed: {0}'.format(str(exc)))
return None
def profile(self, item):
"""Do a lookup on the EchoNest by MusicBrainz ID.
"""
try:
if item.get('echonest_id', None) is None:
if not item.mb_trackid:
raise Exception(u'musicbrainz ID not available')
mbid = 'musicbrainz:track:{0}'.format(item.mb_trackid)
track = self._echofun(pyechonest.track.track_from_id, identifier=mbid)
if not track:
raise Exception(u'could not get track from ID')
ids = track.song_id
else:
ids = item.get('echonest_id')
songs = self._echofun(pyechonest.song.profile, ids=ids,
buckets=['id:musicbrainz', 'audio_summary'])
if not songs:
raise Exception(u'could not get songs from track ID')
return self._pick_song(songs, item)
except Exception as exc:
log.debug(u'echonest: profile failed: {0}'.format(str(exc)))
return None
def fetch_song(self, item):
"""Try all methods, to get a matching song object from the EchoNest.
"""
methods = [self.profile, self.search]
if config['echonest']['codegen'].get() is not None:
methods.append(self.identify)
if config['echonest']['upload'].get(bool):
methods.append(self.analyze)
for method in methods:
try:
song = method(item)
if not song is None:
if isinstance(song, pyechonest.song.Song):
log.debug(u'echonest: got song through {0}: {1} - {2} [{3}]'
.format(method.im_func.func_name,
song.artist_name, song.title,
song.audio_summary['duration']))
else: # it's our dict filled from a track object
log.debug(u'echonest: got song through {0}: {1} - {2} [{3}]'
.format(method.im_func.func_name,
item.artist, item.title,
song['duration']))
return song
except Exception as exc:
log.debug(u'echonest: profile failed: {0}'.format(str(exc)))
return None
def apply_metadata(self, item):
"""Copy the metadata from the EchoNest to the item.
"""
if item.path in self._songs:
# song can be a dict
if isinstance(self._songs[item.path], pyechonest.song.Song):
log.debug(u'echonest: metadata: echonest_id = {0}'
.format(self._songs[item.path].id))
item.echonest_id = self._songs[item.path].id
values = self._songs[item.path].audio_summary
else:
values = self._songs[item.path]
for k, v in values.iteritems():
if ATTRIBUTES.has_key(k) and ATTRIBUTES[k] is not None:
log.debug(u'echonest: metadata: {0} = {1}'
.format(ATTRIBUTES[k], v))
item[ATTRIBUTES[k]] = v
if config['import']['write'].get(bool):
log.info(u'echonest: writing metadata: {0}'
.format(util.displayable_path(item.path)))
item.write()
if item._lib:
item.store()
else:
log.warn(u'echonest: no metadata available')
def requires_update(self, item):
"""Check if this item requires an update from the EchoNest aka data is
missing.
"""
for k, v in ATTRIBUTES.iteritems():
if v is None:
continue
if item.get(v, None) is None:
return True
log.info(u'echonest: no update required')
return False
def fetch_song_task(self, task, session):
items = task.items if task.is_album else [task.item]
for item in items:
song = self.fetch_song(item)
if not song is None:
self._songs[item.path] = song
def apply_metadata_task(self, task, session):
for item in task.imported_items():
self.apply_metadata(item)
def commands(self):
cmd = ui.Subcommand('echonest',
help='Fetch metadata from the EchoNest')
cmd.parser.add_option('-f', '--force', dest='force',
action='store_true', default=False,
help='(re-)download information from the EchoNest')
def func(lib, opts, args):
self.config.set_args(opts)
for item in lib.items(ui.decargs(args)):
log.info(u'echonest: {0} - {1} [{2}]'.format(item.artist,
item.title, item.length))
if self.config['force'] or self.requires_update(item):
song = self.fetch_song(item)
if not song is None:
self._songs[item.path] = song
self.apply_metadata(item)
cmd.func = func
return [cmd]
def diff(item1, item2, attributes):
result = 0.0
for attr in attributes:
try:
result += abs(
float(item1.get(attr, None)) -
float(item2.get(attr, None))
)
except TypeError:
result += 1.0
return result
def similar(lib, src_item, threshold=0.15):
attributes = []
for attr in ['energy', 'danceability', 'valence', 'speechiness',
'acousticness', 'liveness']:
if ATTRIBUTES[attr] is not None:
attributes.append(ATTRIBUTES[attr])
for item in lib.items():
if not item.path == src_item.path:
d = diff(item, src_item, attributes)
if d < threshold:
print(u'{1:2.2f}: {0}'.format(item.path, d))
class EchonestSimilarPlugin(plugins.BeetsPlugin):
def commands(self):
cmd = ui.Subcommand('echosim', help='show related files')
def func(lib, opts, args):
self.config.set_args(opts)
for item in lib.items(ui.decargs(args)):
similar(lib, item)
cmd.func = func
return [cmd]
# eof

View file

@ -38,6 +38,14 @@ Little fixes:
* :doc:`/plugins/convert`: Fix an "Item has no library" error when using the
``auto`` config option.
New stuff:
* :doc:`/plugins/echonest`: A drop in replacement of
:doc:`/plugins/echonest_tempo` that also fetches `Acoustic Attributes`_ from
`The Echo Nest`_.
.. _Acoustic Attributes: http://developer.echonest.com/acoustic-attributes.html
1.3.1 (October 12, 2013)
------------------------

82
docs/plugins/echonest.rst Normal file
View file

@ -0,0 +1,82 @@
Echonest Plugin
===============
The ``echonest`` plugin will automatically fetch and store the following audio
descriptors from the `EchoNest API`_. All except for ``tempo`` will be stored in
flexattrs and *not* in the audio file itself. ``tempo`` will be stored in
``bpm``:
- danceability
- energy
- liveness
- loudness
- speechiness
- tempo
See `Acoustic Attributes`_ for a detailed description.
.. _EchoNest API: http://developer.echonest.com/
.. _Acoustic Attributes: http://developer.echonest.com/acoustic-attributes.html
Installing Dependencies
-----------------------
This plugin requires the pyechonest library in order to talk to the EchoNest
API. At least version 8.0.1 is required.
There are packages for most major linux distributions, you can download the
library from the Echo Nest, or you can install the library from `pip`_,
like so::
$ pip install pyechonest
To transcode music, this plugin requires the `ffmpeg`_ command-line tool.
To get fingerprinting working, you'll need to install the `ENMFP`_ codegen
command-line tool. The ``ENMFP`` codegen binary distribution has executables
for all major OSs and architectures. Please note that fingerprinting is not
required if ``upload`` and ``convert`` is enabled, which is the default.
.. _pip: http://pip.openplans.org/
.. _FFmpeg: http://ffmpeg.org
.. _ENMFP: http://static.echonest.com/ENMFP_codegen.zip
Configuring
-----------
Beets includes its own Echo Nest API key, but you can `apply for your own`_ for
free from the Echo Nest. To specify your own API key, add the key to your
:doc:`configuration file </reference/config>` as the value for ``apikey`` under
the key ``echonest_tempo`` like so::
echonest:
apikey: YOUR_API_KEY
In addition, the ``auto`` config option lets you disable automatic metadata
fetching during import. To do so, add this to your ``config.yaml``::
echonest:
auto: no
The ``echonest`` plugin tries to upload files to the Echo Nest server if it
can not be identified by other means. If you don't want that, disable the
``upload`` config option like so::
echonest:
upload: no
The Echo Nest server only supports a limited range of file formats. The
``plugin`` automatically converts unsupported files to ``ogg``. If you don't
want that, disable the ``convert`` config option like so::
echonest:
convert: no
If the ``ENMFP`` binary is not in your path, you'll need to add a key called
``codegen`` under the ``echonest`` section like so::
echonest:
codegen: PATH/TO/YOUR/CODEGEN/BINARY
.. _apply for your own: http://developer.echonest.com/account/register

View file

@ -1,10 +1,14 @@
EchoNest Tempo Plugin
=====================
*Note*: A new plugin :doc:`echonest` is available, that in addition to
``tempo`` also fetches `Acoustic Attributes`_ from the EchoNest.
The ``echonest_tempo`` plugin fetches and stores a track's tempo (the "bpm"
field) from the `EchoNest API`_
.. _EchoNest API: http://developer.echonest.com/
.. _Acoustic Attributes: http://developer.echonest.com/acoustic-attributes.html
Installing Dependencies
-----------------------

View file

@ -22,6 +22,7 @@ by typing ``beet version``.
chroma
lyrics
echonest_tempo
echonest
bpd
mpdupdate
fetchart
@ -68,6 +69,8 @@ Metadata
* :doc:`lyrics`: Automatically fetch song lyrics.
* :doc:`echonest_tempo`: Automatically fetch song tempos (bpm).
* :doc:`echonest`: Automatically fetch metadata from EchoNest (energy,
danceability, ...).
* :doc:`lastgenre`: Fetch genres based on Last.fm tags.
* :doc:`mbsync`: Fetch updated metadata from MusicBrainz
* :doc:`fetchart`: Fetch album cover art from various sources.