chroma plugin (for acoustid fingerprinting) (#152)

--HG-- rename : docs/plugins/lastid.rst => docs/plugins/chroma.rst
2026-01-29 19:43:20 +01:00 · 2011-11-12 15:33:00 -08:00 · 2011-11-12 15:33:00 -08:00 · f2bb220f5a
commit f2bb220f5a
parent 1085d14e9e
5 changed files with 219 additions and 12 deletions
--- a/beetsplug/chroma.py
+++ b/beetsplug/chroma.py
@ -0,0 +1,125 @@
+# This file is part of beets.
+# Copyright 2011, Adrian Sampson.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+# 
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+"""Adds Chromaprint/Acoustid acoustic fingerprinting support to the
+autotagger. Requires the pyacoustid library.
+"""
+from __future__ import with_statement
+from beets import plugins
+from beets.autotag import hooks
+import acoustid
+import logging
+from collections import defaultdict
+
+API_KEY = '1vOwZtEn'
+SCORE_THRESH = 0.5
+TRACK_ID_WEIGHT = 10.0
+COMMON_REL_THRESH = 0.6 # How many tracks must have an album in common?
+
+log = logging.getLogger('beets')
+
+class _cached(object):
+    """Decorator implementing memoization."""
+    def __init__(self, func):
+        self.func = func
+        self.cache = {}
+
+    def __call__(self, *args, **kwargs):
+        cache_key = (args, tuple(sorted(kwargs.iteritems())))
+        if cache_key in self.cache:
+            return self.cache[cache_key]
+        res = self.func(*args, **kwargs)
+        self.cache[cache_key] = res
+        return res
+
+@_cached
+def acoustid_match(path, metadata=None):
+    """Gets metadata for a file from Acoustid. Returns a recording ID
+    and a list of release IDs if a match is found; otherwise, returns
+    None.
+    """
+    try:
+        res = acoustid.match(API_KEY, path, meta='recordings releases',
+                             parse=False)
+    except acoustid.AcoustidError, exc:
+        log.debug('fingerprint matching %s failed: %s' % 
+                  (repr(path), str(exc)))
+        return None
+    log.debug('fingerprinted: %s' % repr(path))
+    
+    # Ensure the response is usable and parse it.
+    if res['status'] != 'ok' or not res.get('results'):
+        return None
+    result = res['results'][0]
+    if result['score'] < SCORE_THRESH or not result.get('recordings'):
+        return None
+    recording = result['recordings'][0]
+    recording_id = recording['id']
+    release_ids = [rel['id'] for rel in recording['releases']]
+
+    return recording_id, release_ids
+
+def _all_releases(items):
+    """Given an iterable of Items, determines (according to Acoustid)
+    which releases the items have in common. Generates release IDs.
+    """
+    # Count the number of "hits" for each release.
+    relcounts = defaultdict(int)
+    for item in items:
+        aidata = acoustid_match(item.path)
+        if not aidata:
+            continue
+        _, release_ids = aidata
+        for release_id in release_ids:
+            relcounts[release_id] += 1
+
+    for release_id, count in relcounts.iteritems():
+        if float(count) / len(items) > COMMON_REL_THRESH:
+            yield release_id
+
+class AcoustidPlugin(plugins.BeetsPlugin):
+    def track_distance(self, item, info):
+        aidata = acoustid_match(item.path)
+        if not aidata:
+            # Match failed.
+            return 0.0, 0.0
+
+        recording_id, _ = aidata
+        if info.track_id == recording_id:
+            dist = 0.0
+        else:
+            dist = TRACK_ID_WEIGHT
+        return dist, TRACK_ID_WEIGHT
+
+    def candidates(self, items):
+        albums = []
+        for relid in _all_releases(items):
+            album = hooks._album_for_id(relid)
+            if album:
+                albums.append(album)
+
+        log.debug('acoustid album candidates: %i' % len(albums))
+        return albums
+
+    def item_candidates(self, item):
+        aidata = acoustid_match(item.path)
+        if not aidata:
+            return []
+        recording_id, _ = aidata
+        track = hooks._track_for_id(recording_id)
+        if track:
+            log.debug('found acoustid item candidate')
+            return [track]
+        else:
+            log.debug('no acoustid item candidate found')
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -8,6 +8,10 @@ Changelog
  (NGS) service via `python-musicbrainz-ngs`_. The bindings are included with
  this version of beets, but a future version will make them an external
  dependency.
+* The new :doc:`/plugins/chroma` uses the `Acoustid`_ open-source acoustic
+  fingerprinting service. This replaces the old ``lastid`` plugin, which used
+  Last.fm fingerprinting and is now deprecated. Fingerprinting with this library
+  should be faster and more reliable.
 * The new :doc:`/plugins/lastgenre` automatically assigns genres to imported
  albums and items based on Last.fm tags and an internal whitelist. (Thanks to
  `KraYmer`_.)
@ -19,6 +23,7 @@ Changelog
 .. _KraYmer: https://github.com/KraYmer
 .. _Next Generation Schema: http://musicbrainz.org/doc/XML_Web_Service/Version_2
 .. _python-musicbrainz-ngs: https://github.com/alastair/python-musicbrainz-ngs
+.. _acoustid: http://acoustid.org/

 1.0b10 (September 22, 2011)
 ---------------------------
--- a/docs/guides/tagger.rst
+++ b/docs/guides/tagger.rst
@ -50,10 +50,10 @@ all of these limitations.
  actually not a hard-and-fast rule: using the *E* option described below, it's
  entirely possible to search for a release to tag a given album.) This is
  because beets by default infers tags based on existing metadata. The
-  :doc:`LastID plugin </plugins/lastid>` extends the autotagger to use acoustic
-  fingerprinting to find information for arbitrary audio. Install that plugin if
-  you're willing to spend a little more CPU power to get tags for unidentified
-  albums.
+  :doc:`Acoustid plugin </plugins/chroma>` extends the autotagger to use
+  acoustic fingerprinting to find information for arbitrary audio. Install that
+  plugin if you're willing to spend a little more CPU power to get tags for
+  unidentified albums.

 * There isn't currently a good solution for multi-disc albums. Currently, every
  disc is treated as a separate release, so you'll see "69 Love Songs (disc 1)",
@ -199,14 +199,12 @@ You may have noticed by now that beets' autotagger works pretty well for most
 files, but can get confused when files don't have any metadata (or have wildly
 incorrect metadata). In this case, you need *acoustic fingerprinting*, a
 technology that identifies songs from the audio itself. With fingerprinting,
-beets can autotag files that have very bad or missing tags. The :doc:`"lastid"
-plugin </plugins/lastid>`, distributed with beets, uses `Last.fm's open-source
-fingerprinting implementation`_, but it's disabled by default. That's because
-it's sort of tricky to install. See the :doc:`/plugins/lastid` page for a guide
+beets can autotag files that have very bad or missing tags. The :doc:`"chroma"
+plugin </plugins/chroma>`, distributed with beets, uses the `Chromaprint`_ open-source fingerprinting technology, but it's disabled by default. That's because
+it's sort of tricky to install. See the :doc:`/plugins/chroma` page for a guide
 to getting it set up.

-.. _Last.fm's open-source fingerprinting implementation:
-   http://github.com/lastfm/Fingerprinter 
+.. _Chromaprint: http://acoustid.org/chromaprint

 Missing Albums?
 ---------------
--- a/docs/plugins/chroma.rst
+++ b/docs/plugins/chroma.rst
@ -0,0 +1,74 @@
+Chromaprint/Acoustid Plugin
+===========================
+
+Acoustic fingerprinting is a technique for identifying songs from the way they
+"sound" rather from their existing metadata. That means that beets' autotagger
+can theoretically use fingerprinting to tag files that don't have any ID3
+information at all (or have completely incorrect data).  This plugin uses an
+open-source fingerprinting technology called `Chromaprint`_ and its associated
+Web service, called `Acoustid`_.
+
+.. _Chromaprint: http://acoustid.org/chromaprint
+.. _acoustid: http://acoustid.org/
+
+Turning on fingerprinting can increase the accuracy of the
+autotagger---especially on files with very poor metadata---but it comes at a
+cost. First, it can be trickier to set up than beets itself (you need to set up
+the native fingerprinting library, whereas all of the beets core is written in
+pure Python).  Also, fingerprinting takes significantly more CPU and memory than
+ordinary tagging---which means that imports will go substantially slower.
+
+If you're willing to pay the performance cost for fingerprinting, read on!
+
+Installing Dependencies
+-----------------------
+
+To get fingerprinting working, you'll need to install three things: the
+`Chromaprint`_ library, an audio decoder, and the `pyacoustid`_ Python library.
+
+First, you will need to install the `Chromaprint`_ dynamic library. The
+Chromaprint site has links to packages for major Linux distributions. On Mac OS
+X and Windows, you will need to build the library yourself; the site also has
+good directions for that.
+
+Next, you will need a mechanism for decoding audio files supported by the
+`audioread`_ library. Mac OS X has a number of decoders already built into Core
+Audio; on Linux, you can install `GStreamer for Python`_, `FFmpeg`_, or `MAD`
+and `pymad`_. (Let me know if you have a good source for installing a decoder on
+Windows.) How you install these will depend on your distribution. For example:
+
+.. _audioread: https://github.com/sampsyo/audioread
+.. _pyacoustid: http://github.com/sampsyo/pyacoustid
+.. _GStreamer for Python:
+    http://gstreamer.freedesktop.org/modules/gst-python.html
+.. _FFmpeg: http://ffmpeg.org/
+.. _MAD: http://spacepants.org/src/pymad/
+.. _pymad: http://www.underbit.com/products/mad/
+.. _Core Audio: http://developer.apple.com/technologies/mac/audio-and-video.html
+
+* On Ubuntu, run ``apt-get install python-gst0.10-dev``.
+
+* On Arch Linux, you want ``pacman -S gstreamer0.10-python``. 
+
+To decode audio formats (MP3, FLAC, etc.) with GStreamer, you'll need the
+standard set of Gstreamer plugins. For example, on Ubuntu, install the packages
+``gstreamer0.10-plugins-good``, ``gstreamer0.10-plugins-bad``, and
+``gstreamer0.10-plugins-ugly``.
+
+Then, install pyacoustid itself. You can do this using `pip`_, like so::
+
+    $ pip install pyacoustid
+
+.. _pip: http://pip.openplans.org/
+
+Using
+-----
+
+Once you have all the dependencies sorted out, you can enable fingerprinting by
+editing your :doc:`/reference/config`. Put ``chroma`` on your ``plugins:``
+line. Your config file should contain something like this::
+
+    [beets]
+    plugins: chroma
+
+With that, beets will use fingerprinting the next time you run ``beet import``.
--- a/docs/plugins/index.rst
+++ b/docs/plugins/index.rst
@ -36,13 +36,18 @@ disabled by default, but you can turn them on as described above:
 .. toctree::
   :maxdepth: 1

-   lastid
+   chroma
   bpd
   mpdupdate
   embedart
   web
   lastgenre

+.. toctree::
+   :hidden:
+
+   lastid
+
 .. _other-plugins:

 Other Plugins
@ -208,7 +213,7 @@ Extend the Autotagger
 ^^^^^^^^^^^^^^^^^^^^^

 Plugins in 1.0b5 can also enhance the functionality of the autotagger. For a
-comprehensive example, try looking at the ``lastid`` plugin, which is included
+comprehensive example, try looking at the ``chroma`` plugin, which is included
 with beets.

 A plugin can extend three parts of the autotagger's process: the track distance