lastid plugin can now generate its own candidates

That is, in addition to contributing to the distance function, the plugin can also influence the initial search into MusicBrainz. This will allow albums to be tagged even when they're missing metadata entirely. This change required a significant bit of refactoring to the "mb" module -- it's much nicer now.
2026-03-26 23:33:43 +01:00 · 2010-09-16 12:27:04 -07:00 · 2010-09-16 12:27:04 -07:00 · 8b02dd176c
commit 8b02dd176c
parent 22b4dd44f9
4 changed files with 101 additions and 42 deletions
--- a/beets/autotag/init.py
+++ b/beets/autotag/init.py
@ -421,28 +421,35 @@ def tag_album(items, search_artist=None, search_album=None):
    if not search_artist or not search_album:
        raise InsufficientMetadataError()
    candidates = mb.match_album(search_artist, search_album, len(items))
-    candidates = candidates[:MAX_CANDIDATES]
+    candidates = list(candidates)[:MAX_CANDIDATES]

    # Get candidates from plugins.
-    # candidates += plugins.candidates(items)
+    candidates.extend(plugins.candidates(items))
    
    # Get the distance to each candidate.
+    log.debug('Evaluating %i candidates:' % len(candidates))
    for info in candidates:
+        log.debug('Candidate: %s - %s' % (info['artist'], info['album']))
+
        # Don't duplicate.
        if info['album_id'] in out_tuples:
+            log.debug('Duplicate.')
            continue

        # Make sure the album has the correct number of tracks.
        if len(items) != len(info['tracks']):
+            log.debug('Track count mismatch.')
            continue
    
        # Put items in order.
        ordered = order_items(items, info['tracks'])
        if not ordered:
+            log.debug('Not orderable.')
            continue
    
        # Get the change distance.
        dist = distance(ordered, info)
+        log.debug('Success. Distance: %f' % dist)

        out_tuples[info['album_id']] = dist, ordered, info
    
--- a/beets/autotag/mb.py
+++ b/beets/autotag/mb.py
@ -27,6 +27,8 @@ import datetime
 import musicbrainz2.webservice as mbws
 from threading import Lock

+SEARCH_LIMIT = 10
+
 class ServerBusyError(Exception): pass

 # MusicBrainz requires that a client does not query the server more
@ -66,6 +68,30 @@ def _query_wrap(fun, *args, **kwargs):
        raise ServerBusyError()
    # FIXME exponential backoff?

+def get_releases(**params):
+    """Given a list of parameters to ReleaseFilter, executes the
+    query and yields release dicts (complete with tracks).
+    """
+    filt = mbws.ReleaseFilter(**params)
+    results = _query_wrap(mbws.Query().getReleases, filter=filt)
+
+    for result in results:
+        release = result.release
+        tracks, _ = release_info(release.id)
+        yield release_dict(release, tracks)
+
+def release_info(release_id):
+    """Given a MusicBrainz release ID, fetch a list of tracks on the
+    release and the release group ID. If the release is not found,
+    returns None.
+    """
+    inc = mbws.ReleaseIncludes(tracks=True, releaseGroup=True)
+    release = _query_wrap(mbws.Query().getReleaseById, release_id, inc)
+    if release:
+        return release.getTracks(), release.getReleaseGroup().getId()
+    else:
+        return None
+
 def _lucene_escape(text):
    """Escapes a string so it may be used verbatim in a Lucene query
    string.
@ -73,9 +99,8 @@ def _lucene_escape(text):
    # Regex stolen from MusicBrainz Picard.
    return re.sub(r'([+\-&|!(){}\[\]\^"~*?:\\])', r'\\\1', text)

-# Workings of this function more or less stolen from Picard.
-def find_releases(criteria, limit=25):
-    """Get a list of `ReleaseResult` objects from the MusicBrainz
+def find_releases(criteria, limit=SEARCH_LIMIT):
+    """Get a list of release dictionaries from the MusicBrainz
    database that match `criteria`. The latter is a dictionary whose
    keys are MusicBrainz field names and whose values are search terms
    for those fields.
@ -93,8 +118,7 @@ def find_releases(criteria, limit=25):
    query = u' '.join(query_parts)
    
    # Build the filter and send the query.
-    filt = mbws.ReleaseFilter(limit=limit, query=query)
-    return _query_wrap(mbws.Query().getReleases, filter=filt)
+    return get_releases(limit=limit, query=query)

 def release_dict(release, tracks=None):
    """Takes a MusicBrainz `Release` object and returns a dictionary
@ -138,18 +162,6 @@ def release_dict(release, tracks=None):

    return out

-def release_info(release_id):
-    """Given a MusicBrainz release ID, fetch a list of tracks on the
-    release and the release group ID. If the release is not found,
-    returns None.
-    """
-    inc = mbws.ReleaseIncludes(tracks=True, releaseGroup=True)
-    release = _query_wrap(mbws.Query().getReleaseById, release_id, inc)
-    if release:
-        return release.getTracks(), release.getReleaseGroup().getId()
-    else:
-        return None
-
 def match_album(artist, album, tracks=None):
    """Searches for a single album ("release" in MusicBrainz parlance)
    and returns an iterator over dictionaries of information (as
@ -164,12 +176,7 @@ def match_album(artist, album, tracks=None):
        criteria['tracks'] = str(tracks)

    # Search for the release.
-    results = find_releases(criteria, 10)
-
-    for result in results:
-        release = result.release
-        tracks, _ = release_info(release.id)
-        yield release_dict(release, tracks)
+    return find_releases(criteria)

 def album_for_id(albumid):
    """Fetches an album by its MusicBrainz ID and returns an
--- a/beets/plugins.py
+++ b/beets/plugins.py
@ -51,6 +51,12 @@ class BeetsPlugin(object):
        """
        return 0.0, 0.0

+    def candidates(self, items):
+        """Should return a sequence of MusicBrainz info dictionaries
+        that match the album whose items are provided.
+        """
+        return ()
+
    listeners = None
    @classmethod
    def listen(cls, event):
@ -143,6 +149,14 @@ def album_distance(items, info):
        dist_max += dm
    return dist, dist_max

+def candidates(items):
+    """Gets MusicBrainz candidates for an album from each plugin.
+    """
+    out = []
+    for plugin in find_plugins():
+        out.extend(plugin.candidates(items))
+    return out
+

 # Event dispatch.

--- a/beetsplug/lastid.py
+++ b/beetsplug/lastid.py
@ -19,6 +19,7 @@ Requires the pylastfp library.
 from __future__ import with_statement
 from beets.plugins import BeetsPlugin
 from beets import autotag
+from beets.autotag import mb
 import lastfp
 import logging

@ -53,6 +54,27 @@ def match(path, metadata=None):
    _match_cache[path] = match
    return match

+def get_cur_artist(items):
+    """Given a sequence of items, returns the current artist and
+    artist ID that is most popular among the fingerprinted metadata
+    for the tracks.
+    """
+    # Get "fingerprinted" artists for each track.
+    artists = []
+    artist_ids = []
+    for item in items:
+        last_data = match(item.path)
+        if last_data:
+            artists.append(last_data['artist'])
+            if last_data['artist_mbid']:
+                artist_ids.append(last_data['artist_mbid'])
+
+    # Vote on the most popular artist.
+    artist = autotag._plurality(artists)
+    artist_id = autotag._plurality(artist_ids)
+
+    return artist, artist_id
+
 class LastIdPlugin(BeetsPlugin):
    def track_distance(self, item, info):
        last_data = match(item.path)
@ -70,31 +92,19 @@ class LastIdPlugin(BeetsPlugin):
        
        # MusicBrainz track ID.
        if last_data['track_mbid']:
-            log.debug('Last track ID match: %s/%s' %
-                      (last_data['track_mbid'], track_data['id']))
+            # log.debug('Last track ID match: %s/%s' %
+            #           (last_data['track_mbid'], track_data['id']))
            if last_data['track_mbid'] != track_data['id']:
                dist += autotag.TRACK_ID_WEIGHT
            dist_max += autotag.TRACK_ID_WEIGHT

-        log.debug('Last data: %s; distance: %f' %
-                  (str(last_data), dist/dist_max))
+        # log.debug('Last data: %s; distance: %f' %
+        #           (str(last_data), dist/dist_max))

        return dist * DISTANCE_SCALE, dist_max * DISTANCE_SCALE

    def album_distance(self, items, info):
-        # Get "fingerprinted" artists for each track.
-        artists = []
-        artist_ids = []
-        for item in items:
-            last_data = match(item.path)
-            if last_data:
-                artists.append(last_data['artist'])
-                if last_data['artist_mbid']:
-                    artist_ids.append(last_data['artist_mbid'])
-
-        # Vote on the most popular artist.
-        last_artist = autotag._plurality(artists)
-        last_artist_id = autotag._plurality(artist_ids)
+        last_artist, last_artist_id = get_cur_artist(items)

        # Compare artist to MusicBrainz metadata.
        dist, dist_max = 0.0, 0.0
@ -107,3 +117,24 @@ class LastIdPlugin(BeetsPlugin):

        #fixme: artist MBID currently ignored (as in vanilla tagger)
        return dist, dist_max
+
+    def candidates(self, items):
+        last_artist, last_artist_id = get_cur_artist(items)
+
+        # Build the search criteria. Use the artist ID if we have one;
+        # otherwise use the artist name. Unfortunately, Last.fm doesn't
+        # give us album information.
+        criteria = {'trackCount': len(items)}
+        if last_artist_id:
+            criteria['artistId'] = last_artist_id
+        else:
+            criteria['artistName'] = last_artist_name
+
+        # Perform the search.
+        cands = mb.get_releases(**criteria)
+        cands = list(cands)[:autotag.MAX_CANDIDATES]
+
+        log.debug('Matched last candidates: %s' %
+                  ', '.join([cand['album'] for cand in cands]))
+
+        return cands