album-level distance function in lastid plugin

This involves yet another new plugin method: album_distance. This leaves as the
last major puzzle piece for lastid the ability to augment the initial search
into MB (i.e., can start a search using fingerprinted metadata).
This commit is contained in:
Adrian Sampson 2010-09-14 14:42:51 -07:00
parent 99cd36e34f
commit 4d978f3541
5 changed files with 87 additions and 25 deletions

View file

@ -1,3 +1,4 @@
^dist/
^beets\.egg-info/
^build/
^MANIFEST$

View file

@ -165,37 +165,34 @@ def _ie_dist(str1, str2):
return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
def _plurality(objs):
"""Given a sequence of comparable objects, returns the object that
is most common in the set.
"""
# Calculate frequencies.
freqs = defaultdict(int)
for obj in objs:
freqs[obj] += 1
# Find object with maximum frequency.
max_freq = 0
res = None
for obj, freq in freqs.items():
if freq > max_freq:
max_freq = freq
res = obj
return res
def current_metadata(items):
"""Returns the most likely artist and album for a set of Items.
Each is determined by tag reflected by the plurality of the Items.
"""
# The tags we'll try to determine.
keys = 'artist', 'album'
# Make dictionaries in which to count the freqencies of different
# artist and album tags. We'll use this to find the most likely
# artist and album. Defaultdicts let the frequency default to zero.
freqs = {}
for key in keys:
freqs[key] = defaultdict(int)
# Count the frequencies.
for item in items:
for key in keys:
value = getattr(item, key)
if value: # Don't count empty tags.
freqs[key][value] += 1
# Find max-frequency tags.
likelies = {}
for key in keys:
max_freq = 0
likelies[key] = None
for tag, freq in freqs[key].items():
if freq > max_freq:
max_freq = freq
likelies[key] = tag
values = [getattr(item, key) for item in items]
likelies[key] = _plurality(values)
return likelies['artist'], likelies['album']
def order_items(items, trackinfo):
@ -292,6 +289,11 @@ def distance(items, info):
dist += track_distance(item, track_data, i+1) * TRACK_WEIGHT
dist_max += TRACK_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.album_distance(items, info)
dist += plugin_d
dist_max += plugin_dm
# Normalize distance, avoiding divide-by-zero.
if dist_max == 0.0:
return 0.0

View file

@ -45,6 +45,12 @@ class BeetsPlugin(object):
"""
return 0.0, 0.0
def album_distance(self, items, info):
"""Should return a (distance, distance_max) pair to be added
to the distance value for every album-level comparison.
"""
return 0.0, 0.0
listeners = None
@classmethod
def listen(cls, event):
@ -127,6 +133,17 @@ def track_distance(item, info):
dist_max += dm
return dist, dist_max
def album_distance(items, info):
"""Returns the album distance calculated by plugins."""
dist = 0.0
dist_max = 0.0
for plugin in find_plugins():
d, dm = plugin.album_distance(items, info)
dist += d
dist_max += dm
return dist, dist_max
# Event dispatch.
# All the handlers for the event system.

View file

@ -64,7 +64,7 @@ class LastIdPlugin(BeetsPlugin):
# Track title distance.
dist += autotag._ie_dist(last_data['title'],
info['title']) \
info['title']) \
* autotag.TRACK_TITLE_WEIGHT
dist_max += autotag.TRACK_TITLE_WEIGHT
@ -80,3 +80,30 @@ class LastIdPlugin(BeetsPlugin):
(str(last_data), dist/dist_max))
return dist * DISTANCE_SCALE, dist_max * DISTANCE_SCALE
def album_distance(self, items, info):
# Get "fingerprinted" artists for each track.
artists = []
artist_ids = []
for item in items:
last_data = match(item.path)
if last_data:
artists.append(last_data['artist'])
if last_data['artist_mbid']:
artist_ids.append(last_data['artist_mbid'])
# Vote on the most popular artist.
last_artist = autotag._plurality(artists)
last_artist_id = autotag._plurality(artist_ids)
# Compare artist to MusicBrainz metadata.
dist, dist_max = 0.0, 0.0
dist += autotag._ie_dist(last_artist, info['artist']) \
* autotag.ARTIST_WEIGHT
dist_max += autotag.ARTIST_WEIGHT
log.debug('Last artist (%s/%s) distance: %f' %
(last_artist, info['artist'], dist/dist_max))
#fixme: artist MBID currently ignored (as in vanilla tagger)
return dist, dist_max

View file

@ -25,6 +25,21 @@ from beets import autotag
from beets.library import Item
class AutotagTest(unittest.TestCase):
def test_plurality_consensus(self):
objs = [1, 1, 1, 1]
obj = autotag._plurality(objs)
self.assertEqual(obj, 1)
def test_plurality_near_consensus(self):
objs = [1, 1, 2, 1]
obj = autotag._plurality(objs)
self.assertEqual(obj, 1)
def test_plurality_conflict(self):
objs = [1, 1, 2, 2, 3]
obj = autotag._plurality(objs)
self.assert_(obj in (1, 2))
def test_current_metadata_finds_pluralities(self):
items = [Item({'artist': 'The Beetles', 'album': 'The White Album'}),
Item({'artist': 'The Beatles', 'album': 'The White Album'}),