mirror of
https://github.com/beetbox/beets.git
synced 2026-01-30 12:02:41 +01:00
album-level distance function in lastid plugin
This involves yet another new plugin method: album_distance. This leaves as the last major puzzle piece for lastid the ability to augment the initial search into MB (i.e., can start a search using fingerprinted metadata).
This commit is contained in:
parent
99cd36e34f
commit
4d978f3541
5 changed files with 87 additions and 25 deletions
|
|
@ -1,3 +1,4 @@
|
|||
^dist/
|
||||
^beets\.egg-info/
|
||||
^build/
|
||||
^MANIFEST$
|
||||
|
|
|
|||
|
|
@ -165,37 +165,34 @@ def _ie_dist(str1, str2):
|
|||
|
||||
return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
|
||||
|
||||
def _plurality(objs):
|
||||
"""Given a sequence of comparable objects, returns the object that
|
||||
is most common in the set.
|
||||
"""
|
||||
# Calculate frequencies.
|
||||
freqs = defaultdict(int)
|
||||
for obj in objs:
|
||||
freqs[obj] += 1
|
||||
|
||||
# Find object with maximum frequency.
|
||||
max_freq = 0
|
||||
res = None
|
||||
for obj, freq in freqs.items():
|
||||
if freq > max_freq:
|
||||
max_freq = freq
|
||||
res = obj
|
||||
|
||||
return res
|
||||
|
||||
def current_metadata(items):
|
||||
"""Returns the most likely artist and album for a set of Items.
|
||||
Each is determined by tag reflected by the plurality of the Items.
|
||||
"""
|
||||
# The tags we'll try to determine.
|
||||
keys = 'artist', 'album'
|
||||
|
||||
# Make dictionaries in which to count the freqencies of different
|
||||
# artist and album tags. We'll use this to find the most likely
|
||||
# artist and album. Defaultdicts let the frequency default to zero.
|
||||
freqs = {}
|
||||
for key in keys:
|
||||
freqs[key] = defaultdict(int)
|
||||
|
||||
# Count the frequencies.
|
||||
for item in items:
|
||||
for key in keys:
|
||||
value = getattr(item, key)
|
||||
if value: # Don't count empty tags.
|
||||
freqs[key][value] += 1
|
||||
|
||||
# Find max-frequency tags.
|
||||
likelies = {}
|
||||
for key in keys:
|
||||
max_freq = 0
|
||||
likelies[key] = None
|
||||
for tag, freq in freqs[key].items():
|
||||
if freq > max_freq:
|
||||
max_freq = freq
|
||||
likelies[key] = tag
|
||||
|
||||
values = [getattr(item, key) for item in items]
|
||||
likelies[key] = _plurality(values)
|
||||
return likelies['artist'], likelies['album']
|
||||
|
||||
def order_items(items, trackinfo):
|
||||
|
|
@ -292,6 +289,11 @@ def distance(items, info):
|
|||
dist += track_distance(item, track_data, i+1) * TRACK_WEIGHT
|
||||
dist_max += TRACK_WEIGHT
|
||||
|
||||
# Plugin distances.
|
||||
plugin_d, plugin_dm = plugins.album_distance(items, info)
|
||||
dist += plugin_d
|
||||
dist_max += plugin_dm
|
||||
|
||||
# Normalize distance, avoiding divide-by-zero.
|
||||
if dist_max == 0.0:
|
||||
return 0.0
|
||||
|
|
|
|||
|
|
@ -45,6 +45,12 @@ class BeetsPlugin(object):
|
|||
"""
|
||||
return 0.0, 0.0
|
||||
|
||||
def album_distance(self, items, info):
|
||||
"""Should return a (distance, distance_max) pair to be added
|
||||
to the distance value for every album-level comparison.
|
||||
"""
|
||||
return 0.0, 0.0
|
||||
|
||||
listeners = None
|
||||
@classmethod
|
||||
def listen(cls, event):
|
||||
|
|
@ -127,6 +133,17 @@ def track_distance(item, info):
|
|||
dist_max += dm
|
||||
return dist, dist_max
|
||||
|
||||
def album_distance(items, info):
|
||||
"""Returns the album distance calculated by plugins."""
|
||||
dist = 0.0
|
||||
dist_max = 0.0
|
||||
for plugin in find_plugins():
|
||||
d, dm = plugin.album_distance(items, info)
|
||||
dist += d
|
||||
dist_max += dm
|
||||
return dist, dist_max
|
||||
|
||||
|
||||
# Event dispatch.
|
||||
|
||||
# All the handlers for the event system.
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class LastIdPlugin(BeetsPlugin):
|
|||
|
||||
# Track title distance.
|
||||
dist += autotag._ie_dist(last_data['title'],
|
||||
info['title']) \
|
||||
info['title']) \
|
||||
* autotag.TRACK_TITLE_WEIGHT
|
||||
dist_max += autotag.TRACK_TITLE_WEIGHT
|
||||
|
||||
|
|
@ -80,3 +80,30 @@ class LastIdPlugin(BeetsPlugin):
|
|||
(str(last_data), dist/dist_max))
|
||||
|
||||
return dist * DISTANCE_SCALE, dist_max * DISTANCE_SCALE
|
||||
|
||||
def album_distance(self, items, info):
|
||||
# Get "fingerprinted" artists for each track.
|
||||
artists = []
|
||||
artist_ids = []
|
||||
for item in items:
|
||||
last_data = match(item.path)
|
||||
if last_data:
|
||||
artists.append(last_data['artist'])
|
||||
if last_data['artist_mbid']:
|
||||
artist_ids.append(last_data['artist_mbid'])
|
||||
|
||||
# Vote on the most popular artist.
|
||||
last_artist = autotag._plurality(artists)
|
||||
last_artist_id = autotag._plurality(artist_ids)
|
||||
|
||||
# Compare artist to MusicBrainz metadata.
|
||||
dist, dist_max = 0.0, 0.0
|
||||
dist += autotag._ie_dist(last_artist, info['artist']) \
|
||||
* autotag.ARTIST_WEIGHT
|
||||
dist_max += autotag.ARTIST_WEIGHT
|
||||
|
||||
log.debug('Last artist (%s/%s) distance: %f' %
|
||||
(last_artist, info['artist'], dist/dist_max))
|
||||
|
||||
#fixme: artist MBID currently ignored (as in vanilla tagger)
|
||||
return dist, dist_max
|
||||
|
|
|
|||
|
|
@ -25,6 +25,21 @@ from beets import autotag
|
|||
from beets.library import Item
|
||||
|
||||
class AutotagTest(unittest.TestCase):
|
||||
def test_plurality_consensus(self):
|
||||
objs = [1, 1, 1, 1]
|
||||
obj = autotag._plurality(objs)
|
||||
self.assertEqual(obj, 1)
|
||||
|
||||
def test_plurality_near_consensus(self):
|
||||
objs = [1, 1, 2, 1]
|
||||
obj = autotag._plurality(objs)
|
||||
self.assertEqual(obj, 1)
|
||||
|
||||
def test_plurality_conflict(self):
|
||||
objs = [1, 1, 2, 2, 3]
|
||||
obj = autotag._plurality(objs)
|
||||
self.assert_(obj in (1, 2))
|
||||
|
||||
def test_current_metadata_finds_pluralities(self):
|
||||
items = [Item({'artist': 'The Beetles', 'album': 'The White Album'}),
|
||||
Item({'artist': 'The Beatles', 'album': 'The White Album'}),
|
||||
|
|
|
|||
Loading…
Reference in a new issue