From 8b02dd176cba733338be4e47c79e2595a09fa818 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Thu, 16 Sep 2010 12:27:04 -0700 Subject: [PATCH] lastid plugin can now generate its own candidates That is, in addition to contributing to the distance function, the plugin can also influence the initial search into MusicBrainz. This will allow albums to be tagged even when they're missing metadata entirely. This change required a significant bit of refactoring to the "mb" module -- it's much nicer now. --- beets/autotag/__init__.py | 11 +++++-- beets/autotag/mb.py | 53 +++++++++++++++++-------------- beets/plugins.py | 14 +++++++++ beetsplug/lastid.py | 65 +++++++++++++++++++++++++++++---------- 4 files changed, 101 insertions(+), 42 deletions(-) diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index 1922b1fa5..0b25b97f5 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -421,28 +421,35 @@ def tag_album(items, search_artist=None, search_album=None): if not search_artist or not search_album: raise InsufficientMetadataError() candidates = mb.match_album(search_artist, search_album, len(items)) - candidates = candidates[:MAX_CANDIDATES] + candidates = list(candidates)[:MAX_CANDIDATES] # Get candidates from plugins. - # candidates += plugins.candidates(items) + candidates.extend(plugins.candidates(items)) # Get the distance to each candidate. + log.debug('Evaluating %i candidates:' % len(candidates)) for info in candidates: + log.debug('Candidate: %s - %s' % (info['artist'], info['album'])) + # Don't duplicate. if info['album_id'] in out_tuples: + log.debug('Duplicate.') continue # Make sure the album has the correct number of tracks. if len(items) != len(info['tracks']): + log.debug('Track count mismatch.') continue # Put items in order. ordered = order_items(items, info['tracks']) if not ordered: + log.debug('Not orderable.') continue # Get the change distance. dist = distance(ordered, info) + log.debug('Success. Distance: %f' % dist) out_tuples[info['album_id']] = dist, ordered, info diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 8666d65ff..c8fd61992 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -27,6 +27,8 @@ import datetime import musicbrainz2.webservice as mbws from threading import Lock +SEARCH_LIMIT = 10 + class ServerBusyError(Exception): pass # MusicBrainz requires that a client does not query the server more @@ -66,6 +68,30 @@ def _query_wrap(fun, *args, **kwargs): raise ServerBusyError() # FIXME exponential backoff? +def get_releases(**params): + """Given a list of parameters to ReleaseFilter, executes the + query and yields release dicts (complete with tracks). + """ + filt = mbws.ReleaseFilter(**params) + results = _query_wrap(mbws.Query().getReleases, filter=filt) + + for result in results: + release = result.release + tracks, _ = release_info(release.id) + yield release_dict(release, tracks) + +def release_info(release_id): + """Given a MusicBrainz release ID, fetch a list of tracks on the + release and the release group ID. If the release is not found, + returns None. + """ + inc = mbws.ReleaseIncludes(tracks=True, releaseGroup=True) + release = _query_wrap(mbws.Query().getReleaseById, release_id, inc) + if release: + return release.getTracks(), release.getReleaseGroup().getId() + else: + return None + def _lucene_escape(text): """Escapes a string so it may be used verbatim in a Lucene query string. @@ -73,9 +99,8 @@ def _lucene_escape(text): # Regex stolen from MusicBrainz Picard. return re.sub(r'([+\-&|!(){}\[\]\^"~*?:\\])', r'\\\1', text) -# Workings of this function more or less stolen from Picard. -def find_releases(criteria, limit=25): - """Get a list of `ReleaseResult` objects from the MusicBrainz +def find_releases(criteria, limit=SEARCH_LIMIT): + """Get a list of release dictionaries from the MusicBrainz database that match `criteria`. The latter is a dictionary whose keys are MusicBrainz field names and whose values are search terms for those fields. @@ -93,8 +118,7 @@ def find_releases(criteria, limit=25): query = u' '.join(query_parts) # Build the filter and send the query. - filt = mbws.ReleaseFilter(limit=limit, query=query) - return _query_wrap(mbws.Query().getReleases, filter=filt) + return get_releases(limit=limit, query=query) def release_dict(release, tracks=None): """Takes a MusicBrainz `Release` object and returns a dictionary @@ -138,18 +162,6 @@ def release_dict(release, tracks=None): return out -def release_info(release_id): - """Given a MusicBrainz release ID, fetch a list of tracks on the - release and the release group ID. If the release is not found, - returns None. - """ - inc = mbws.ReleaseIncludes(tracks=True, releaseGroup=True) - release = _query_wrap(mbws.Query().getReleaseById, release_id, inc) - if release: - return release.getTracks(), release.getReleaseGroup().getId() - else: - return None - def match_album(artist, album, tracks=None): """Searches for a single album ("release" in MusicBrainz parlance) and returns an iterator over dictionaries of information (as @@ -164,12 +176,7 @@ def match_album(artist, album, tracks=None): criteria['tracks'] = str(tracks) # Search for the release. - results = find_releases(criteria, 10) - - for result in results: - release = result.release - tracks, _ = release_info(release.id) - yield release_dict(release, tracks) + return find_releases(criteria) def album_for_id(albumid): """Fetches an album by its MusicBrainz ID and returns an diff --git a/beets/plugins.py b/beets/plugins.py index b6c0c6994..4663a49ea 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -51,6 +51,12 @@ class BeetsPlugin(object): """ return 0.0, 0.0 + def candidates(self, items): + """Should return a sequence of MusicBrainz info dictionaries + that match the album whose items are provided. + """ + return () + listeners = None @classmethod def listen(cls, event): @@ -143,6 +149,14 @@ def album_distance(items, info): dist_max += dm return dist, dist_max +def candidates(items): + """Gets MusicBrainz candidates for an album from each plugin. + """ + out = [] + for plugin in find_plugins(): + out.extend(plugin.candidates(items)) + return out + # Event dispatch. diff --git a/beetsplug/lastid.py b/beetsplug/lastid.py index 64625e733..c696a394b 100644 --- a/beetsplug/lastid.py +++ b/beetsplug/lastid.py @@ -19,6 +19,7 @@ Requires the pylastfp library. from __future__ import with_statement from beets.plugins import BeetsPlugin from beets import autotag +from beets.autotag import mb import lastfp import logging @@ -53,6 +54,27 @@ def match(path, metadata=None): _match_cache[path] = match return match +def get_cur_artist(items): + """Given a sequence of items, returns the current artist and + artist ID that is most popular among the fingerprinted metadata + for the tracks. + """ + # Get "fingerprinted" artists for each track. + artists = [] + artist_ids = [] + for item in items: + last_data = match(item.path) + if last_data: + artists.append(last_data['artist']) + if last_data['artist_mbid']: + artist_ids.append(last_data['artist_mbid']) + + # Vote on the most popular artist. + artist = autotag._plurality(artists) + artist_id = autotag._plurality(artist_ids) + + return artist, artist_id + class LastIdPlugin(BeetsPlugin): def track_distance(self, item, info): last_data = match(item.path) @@ -70,31 +92,19 @@ class LastIdPlugin(BeetsPlugin): # MusicBrainz track ID. if last_data['track_mbid']: - log.debug('Last track ID match: %s/%s' % - (last_data['track_mbid'], track_data['id'])) + # log.debug('Last track ID match: %s/%s' % + # (last_data['track_mbid'], track_data['id'])) if last_data['track_mbid'] != track_data['id']: dist += autotag.TRACK_ID_WEIGHT dist_max += autotag.TRACK_ID_WEIGHT - log.debug('Last data: %s; distance: %f' % - (str(last_data), dist/dist_max)) + # log.debug('Last data: %s; distance: %f' % + # (str(last_data), dist/dist_max)) return dist * DISTANCE_SCALE, dist_max * DISTANCE_SCALE def album_distance(self, items, info): - # Get "fingerprinted" artists for each track. - artists = [] - artist_ids = [] - for item in items: - last_data = match(item.path) - if last_data: - artists.append(last_data['artist']) - if last_data['artist_mbid']: - artist_ids.append(last_data['artist_mbid']) - - # Vote on the most popular artist. - last_artist = autotag._plurality(artists) - last_artist_id = autotag._plurality(artist_ids) + last_artist, last_artist_id = get_cur_artist(items) # Compare artist to MusicBrainz metadata. dist, dist_max = 0.0, 0.0 @@ -107,3 +117,24 @@ class LastIdPlugin(BeetsPlugin): #fixme: artist MBID currently ignored (as in vanilla tagger) return dist, dist_max + + def candidates(self, items): + last_artist, last_artist_id = get_cur_artist(items) + + # Build the search criteria. Use the artist ID if we have one; + # otherwise use the artist name. Unfortunately, Last.fm doesn't + # give us album information. + criteria = {'trackCount': len(items)} + if last_artist_id: + criteria['artistId'] = last_artist_id + else: + criteria['artistName'] = last_artist_name + + # Perform the search. + cands = mb.get_releases(**criteria) + cands = list(cands)[:autotag.MAX_CANDIDATES] + + log.debug('Matched last candidates: %s' % + ', '.join([cand['album'] for cand in cands])) + + return cands