lastid plugin can now generate its own candidates

That is, in addition to contributing to the distance function, the plugin can
also influence the initial search into MusicBrainz. This will allow albums to be
tagged even when they're missing metadata entirely. This change required a
significant bit of refactoring to the "mb" module -- it's much nicer now.
This commit is contained in:
Adrian Sampson 2010-09-16 12:27:04 -07:00
parent 22b4dd44f9
commit 8b02dd176c
4 changed files with 101 additions and 42 deletions

View file

@ -421,28 +421,35 @@ def tag_album(items, search_artist=None, search_album=None):
if not search_artist or not search_album:
raise InsufficientMetadataError()
candidates = mb.match_album(search_artist, search_album, len(items))
candidates = candidates[:MAX_CANDIDATES]
candidates = list(candidates)[:MAX_CANDIDATES]
# Get candidates from plugins.
# candidates += plugins.candidates(items)
candidates.extend(plugins.candidates(items))
# Get the distance to each candidate.
log.debug('Evaluating %i candidates:' % len(candidates))
for info in candidates:
log.debug('Candidate: %s - %s' % (info['artist'], info['album']))
# Don't duplicate.
if info['album_id'] in out_tuples:
log.debug('Duplicate.')
continue
# Make sure the album has the correct number of tracks.
if len(items) != len(info['tracks']):
log.debug('Track count mismatch.')
continue
# Put items in order.
ordered = order_items(items, info['tracks'])
if not ordered:
log.debug('Not orderable.')
continue
# Get the change distance.
dist = distance(ordered, info)
log.debug('Success. Distance: %f' % dist)
out_tuples[info['album_id']] = dist, ordered, info

View file

@ -27,6 +27,8 @@ import datetime
import musicbrainz2.webservice as mbws
from threading import Lock
SEARCH_LIMIT = 10
class ServerBusyError(Exception): pass
# MusicBrainz requires that a client does not query the server more
@ -66,6 +68,30 @@ def _query_wrap(fun, *args, **kwargs):
raise ServerBusyError()
# FIXME exponential backoff?
def get_releases(**params):
"""Given a list of parameters to ReleaseFilter, executes the
query and yields release dicts (complete with tracks).
"""
filt = mbws.ReleaseFilter(**params)
results = _query_wrap(mbws.Query().getReleases, filter=filt)
for result in results:
release = result.release
tracks, _ = release_info(release.id)
yield release_dict(release, tracks)
def release_info(release_id):
"""Given a MusicBrainz release ID, fetch a list of tracks on the
release and the release group ID. If the release is not found,
returns None.
"""
inc = mbws.ReleaseIncludes(tracks=True, releaseGroup=True)
release = _query_wrap(mbws.Query().getReleaseById, release_id, inc)
if release:
return release.getTracks(), release.getReleaseGroup().getId()
else:
return None
def _lucene_escape(text):
"""Escapes a string so it may be used verbatim in a Lucene query
string.
@ -73,9 +99,8 @@ def _lucene_escape(text):
# Regex stolen from MusicBrainz Picard.
return re.sub(r'([+\-&|!(){}\[\]\^"~*?:\\])', r'\\\1', text)
# Workings of this function more or less stolen from Picard.
def find_releases(criteria, limit=25):
"""Get a list of `ReleaseResult` objects from the MusicBrainz
def find_releases(criteria, limit=SEARCH_LIMIT):
"""Get a list of release dictionaries from the MusicBrainz
database that match `criteria`. The latter is a dictionary whose
keys are MusicBrainz field names and whose values are search terms
for those fields.
@ -93,8 +118,7 @@ def find_releases(criteria, limit=25):
query = u' '.join(query_parts)
# Build the filter and send the query.
filt = mbws.ReleaseFilter(limit=limit, query=query)
return _query_wrap(mbws.Query().getReleases, filter=filt)
return get_releases(limit=limit, query=query)
def release_dict(release, tracks=None):
"""Takes a MusicBrainz `Release` object and returns a dictionary
@ -138,18 +162,6 @@ def release_dict(release, tracks=None):
return out
def release_info(release_id):
"""Given a MusicBrainz release ID, fetch a list of tracks on the
release and the release group ID. If the release is not found,
returns None.
"""
inc = mbws.ReleaseIncludes(tracks=True, releaseGroup=True)
release = _query_wrap(mbws.Query().getReleaseById, release_id, inc)
if release:
return release.getTracks(), release.getReleaseGroup().getId()
else:
return None
def match_album(artist, album, tracks=None):
"""Searches for a single album ("release" in MusicBrainz parlance)
and returns an iterator over dictionaries of information (as
@ -164,12 +176,7 @@ def match_album(artist, album, tracks=None):
criteria['tracks'] = str(tracks)
# Search for the release.
results = find_releases(criteria, 10)
for result in results:
release = result.release
tracks, _ = release_info(release.id)
yield release_dict(release, tracks)
return find_releases(criteria)
def album_for_id(albumid):
"""Fetches an album by its MusicBrainz ID and returns an

View file

@ -51,6 +51,12 @@ class BeetsPlugin(object):
"""
return 0.0, 0.0
def candidates(self, items):
"""Should return a sequence of MusicBrainz info dictionaries
that match the album whose items are provided.
"""
return ()
listeners = None
@classmethod
def listen(cls, event):
@ -143,6 +149,14 @@ def album_distance(items, info):
dist_max += dm
return dist, dist_max
def candidates(items):
"""Gets MusicBrainz candidates for an album from each plugin.
"""
out = []
for plugin in find_plugins():
out.extend(plugin.candidates(items))
return out
# Event dispatch.

View file

@ -19,6 +19,7 @@ Requires the pylastfp library.
from __future__ import with_statement
from beets.plugins import BeetsPlugin
from beets import autotag
from beets.autotag import mb
import lastfp
import logging
@ -53,6 +54,27 @@ def match(path, metadata=None):
_match_cache[path] = match
return match
def get_cur_artist(items):
"""Given a sequence of items, returns the current artist and
artist ID that is most popular among the fingerprinted metadata
for the tracks.
"""
# Get "fingerprinted" artists for each track.
artists = []
artist_ids = []
for item in items:
last_data = match(item.path)
if last_data:
artists.append(last_data['artist'])
if last_data['artist_mbid']:
artist_ids.append(last_data['artist_mbid'])
# Vote on the most popular artist.
artist = autotag._plurality(artists)
artist_id = autotag._plurality(artist_ids)
return artist, artist_id
class LastIdPlugin(BeetsPlugin):
def track_distance(self, item, info):
last_data = match(item.path)
@ -70,31 +92,19 @@ class LastIdPlugin(BeetsPlugin):
# MusicBrainz track ID.
if last_data['track_mbid']:
log.debug('Last track ID match: %s/%s' %
(last_data['track_mbid'], track_data['id']))
# log.debug('Last track ID match: %s/%s' %
# (last_data['track_mbid'], track_data['id']))
if last_data['track_mbid'] != track_data['id']:
dist += autotag.TRACK_ID_WEIGHT
dist_max += autotag.TRACK_ID_WEIGHT
log.debug('Last data: %s; distance: %f' %
(str(last_data), dist/dist_max))
# log.debug('Last data: %s; distance: %f' %
# (str(last_data), dist/dist_max))
return dist * DISTANCE_SCALE, dist_max * DISTANCE_SCALE
def album_distance(self, items, info):
# Get "fingerprinted" artists for each track.
artists = []
artist_ids = []
for item in items:
last_data = match(item.path)
if last_data:
artists.append(last_data['artist'])
if last_data['artist_mbid']:
artist_ids.append(last_data['artist_mbid'])
# Vote on the most popular artist.
last_artist = autotag._plurality(artists)
last_artist_id = autotag._plurality(artist_ids)
last_artist, last_artist_id = get_cur_artist(items)
# Compare artist to MusicBrainz metadata.
dist, dist_max = 0.0, 0.0
@ -107,3 +117,24 @@ class LastIdPlugin(BeetsPlugin):
#fixme: artist MBID currently ignored (as in vanilla tagger)
return dist, dist_max
def candidates(self, items):
last_artist, last_artist_id = get_cur_artist(items)
# Build the search criteria. Use the artist ID if we have one;
# otherwise use the artist name. Unfortunately, Last.fm doesn't
# give us album information.
criteria = {'trackCount': len(items)}
if last_artist_id:
criteria['artistId'] = last_artist_id
else:
criteria['artistName'] = last_artist_name
# Perform the search.
cands = mb.get_releases(**criteria)
cands = list(cands)[:autotag.MAX_CANDIDATES]
log.debug('Matched last candidates: %s' %
', '.join([cand['album'] for cand in cands]))
return cands