From 4d978f3541ee304ca34dc11b4efc85e1e4ebe6d0 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Tue, 14 Sep 2010 14:42:51 -0700 Subject: [PATCH] album-level distance function in lastid plugin This involves yet another new plugin method: album_distance. This leaves as the last major puzzle piece for lastid the ability to augment the initial search into MB (i.e., can start a search using fingerprinted metadata). --- .hgignore | 1 + beets/autotag/__init__.py | 50 ++++++++++++++++++++------------------- beets/plugins.py | 17 +++++++++++++ beetsplug/lastid.py | 29 ++++++++++++++++++++++- test/test_autotag.py | 15 ++++++++++++ 5 files changed, 87 insertions(+), 25 deletions(-) diff --git a/.hgignore b/.hgignore index 0c31cc76e..1453aed71 100644 --- a/.hgignore +++ b/.hgignore @@ -1,3 +1,4 @@ ^dist/ ^beets\.egg-info/ ^build/ +^MANIFEST$ diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index eb728666f..2faf2cbb5 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -165,37 +165,34 @@ def _ie_dist(str1, str2): return levenshtein(str1, str2) / float(max(len(str1), len(str2))) +def _plurality(objs): + """Given a sequence of comparable objects, returns the object that + is most common in the set. + """ + # Calculate frequencies. + freqs = defaultdict(int) + for obj in objs: + freqs[obj] += 1 + + # Find object with maximum frequency. + max_freq = 0 + res = None + for obj, freq in freqs.items(): + if freq > max_freq: + max_freq = freq + res = obj + + return res + def current_metadata(items): """Returns the most likely artist and album for a set of Items. Each is determined by tag reflected by the plurality of the Items. """ - # The tags we'll try to determine. keys = 'artist', 'album' - - # Make dictionaries in which to count the freqencies of different - # artist and album tags. We'll use this to find the most likely - # artist and album. Defaultdicts let the frequency default to zero. - freqs = {} - for key in keys: - freqs[key] = defaultdict(int) - - # Count the frequencies. - for item in items: - for key in keys: - value = getattr(item, key) - if value: # Don't count empty tags. - freqs[key][value] += 1 - - # Find max-frequency tags. likelies = {} for key in keys: - max_freq = 0 - likelies[key] = None - for tag, freq in freqs[key].items(): - if freq > max_freq: - max_freq = freq - likelies[key] = tag - + values = [getattr(item, key) for item in items] + likelies[key] = _plurality(values) return likelies['artist'], likelies['album'] def order_items(items, trackinfo): @@ -292,6 +289,11 @@ def distance(items, info): dist += track_distance(item, track_data, i+1) * TRACK_WEIGHT dist_max += TRACK_WEIGHT + # Plugin distances. + plugin_d, plugin_dm = plugins.album_distance(items, info) + dist += plugin_d + dist_max += plugin_dm + # Normalize distance, avoiding divide-by-zero. if dist_max == 0.0: return 0.0 diff --git a/beets/plugins.py b/beets/plugins.py index 6ae86e146..b6c0c6994 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -45,6 +45,12 @@ class BeetsPlugin(object): """ return 0.0, 0.0 + def album_distance(self, items, info): + """Should return a (distance, distance_max) pair to be added + to the distance value for every album-level comparison. + """ + return 0.0, 0.0 + listeners = None @classmethod def listen(cls, event): @@ -127,6 +133,17 @@ def track_distance(item, info): dist_max += dm return dist, dist_max +def album_distance(items, info): + """Returns the album distance calculated by plugins.""" + dist = 0.0 + dist_max = 0.0 + for plugin in find_plugins(): + d, dm = plugin.album_distance(items, info) + dist += d + dist_max += dm + return dist, dist_max + + # Event dispatch. # All the handlers for the event system. diff --git a/beetsplug/lastid.py b/beetsplug/lastid.py index 62b35dcb9..64625e733 100644 --- a/beetsplug/lastid.py +++ b/beetsplug/lastid.py @@ -64,7 +64,7 @@ class LastIdPlugin(BeetsPlugin): # Track title distance. dist += autotag._ie_dist(last_data['title'], - info['title']) \ + info['title']) \ * autotag.TRACK_TITLE_WEIGHT dist_max += autotag.TRACK_TITLE_WEIGHT @@ -80,3 +80,30 @@ class LastIdPlugin(BeetsPlugin): (str(last_data), dist/dist_max)) return dist * DISTANCE_SCALE, dist_max * DISTANCE_SCALE + + def album_distance(self, items, info): + # Get "fingerprinted" artists for each track. + artists = [] + artist_ids = [] + for item in items: + last_data = match(item.path) + if last_data: + artists.append(last_data['artist']) + if last_data['artist_mbid']: + artist_ids.append(last_data['artist_mbid']) + + # Vote on the most popular artist. + last_artist = autotag._plurality(artists) + last_artist_id = autotag._plurality(artist_ids) + + # Compare artist to MusicBrainz metadata. + dist, dist_max = 0.0, 0.0 + dist += autotag._ie_dist(last_artist, info['artist']) \ + * autotag.ARTIST_WEIGHT + dist_max += autotag.ARTIST_WEIGHT + + log.debug('Last artist (%s/%s) distance: %f' % + (last_artist, info['artist'], dist/dist_max)) + + #fixme: artist MBID currently ignored (as in vanilla tagger) + return dist, dist_max diff --git a/test/test_autotag.py b/test/test_autotag.py index 3ba7cd98e..6a04c5fb1 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -25,6 +25,21 @@ from beets import autotag from beets.library import Item class AutotagTest(unittest.TestCase): + def test_plurality_consensus(self): + objs = [1, 1, 1, 1] + obj = autotag._plurality(objs) + self.assertEqual(obj, 1) + + def test_plurality_near_consensus(self): + objs = [1, 1, 2, 1] + obj = autotag._plurality(objs) + self.assertEqual(obj, 1) + + def test_plurality_conflict(self): + objs = [1, 1, 2, 2, 3] + obj = autotag._plurality(objs) + self.assert_(obj in (1, 2)) + def test_current_metadata_finds_pluralities(self): items = [Item({'artist': 'The Beetles', 'album': 'The White Album'}), Item({'artist': 'The Beatles', 'album': 'The White Album'}),