From 4dc4025b5f5e692f8a265ca7817bd5acb1eb877a Mon Sep 17 00:00:00 2001 From: Simon Chopin Date: Tue, 22 Nov 2011 12:25:10 +0100 Subject: [PATCH 1/4] autotag: Can now compute the distance for incomplete albums If the user has some songs from a specific album, but not all of them, the real solution is immediately discarded. This commit is the first of a series that will implement support for these incomplete albums. The point of this patch is to make sure missing commits are taken into account when calculating the distance between an album and its canonical data. Note that in order not to break API compatibility, the album_distance call for the plugins receives a purged version of both the items and the album info, resulting in some potential accuracy if the plugin bases itself on the index of a track in album_info.tracks. --- beets/autotag/match.py | 26 +++++++++++++++++++++----- test/test_autotag.py | 15 +++++++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 435e6fb03..bc6a506f6 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -17,6 +17,7 @@ releases and tracks. """ import logging import re +import copy from munkres import Munkres from unidecode import unidecode @@ -31,6 +32,8 @@ ARTIST_WEIGHT = 3.0 ALBUM_WEIGHT = 3.0 # The weight of the entire distance calculated for a given track. TRACK_WEIGHT = 1.0 +# The weight of a missing track. +MISSING_WEIGHT = 0.3 # These distances are components of the track distance (that is, they # compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT; # the overall TRACK_WEIGHT does that). @@ -161,7 +164,7 @@ def current_metadata(items): likelies = {} consensus = {} for key in keys: - values = [getattr(item, key) for item in items] + values = [getattr(item, key) for item in items if item] likelies[key], freq = plurality(values) consensus[key] = (freq == len(values)) return likelies['artist'], likelies['album'], consensus['artist'] @@ -269,12 +272,25 @@ def distance(items, album_info): # Track distances. for i, (item, track_info) in enumerate(zip(items, album_info.tracks)): - dist += track_distance(item, track_info, i+1, album_info.va) * \ - TRACK_WEIGHT - dist_max += TRACK_WEIGHT + if item: + dist += track_distance(item, track_info, i+1, album_info.va) * \ + TRACK_WEIGHT + dist_max += TRACK_WEIGHT + else: + dist += MISSING_WEIGHT + dist_max += MISSING_WEIGHT # Plugin distances. - plugin_d, plugin_dm = plugins.album_distance(items, album_info) + # In order not to break compatibility, send purged lists + purged_items, purged_tracks = [], [] + for i, t in zip(items, album_info.tracks): + if i: + purged_items.append(i) + purged_tracks.append(t) + purged_album_info = copy.copy(album_info) + purged_album_info.tracks = purged_tracks + + plugin_d, plugin_dm = plugins.album_distance(purged_items, purged_album_info) dist += plugin_d dist_max += plugin_dm diff --git a/test/test_autotag.py b/test/test_autotag.py index e2f71d94a..0b5ccc535 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -94,6 +94,21 @@ class AlbumDistanceTest(unittest.TestCase): ) self.assertEqual(match.distance(items, info), 0) + def test_incomplete_album(self): + items = [] + items.append(self.item('one', 1)) + items.append(self.item('three', 3)) + info = AlbumInfo( + artist = 'some artist', + album = 'some album', + tracks = self.trackinfo(), + va = False, + album_id = None, artist_id = None, + ) + self.assertNotEqual(match.distance(items, info), 0) + # Make sure the distance is not too great + self.assertTrue(match.distance(items, info) < 0.2) + def test_global_artists_differ(self): items = [] items.append(self.item('one', 1)) From bb964a7c479e0e070e010d8374385c45a7099ead Mon Sep 17 00:00:00 2001 From: Simon Chopin Date: Tue, 22 Nov 2011 12:51:37 +0100 Subject: [PATCH 2/4] autotag: Fill the blanks when ordering incomplete album In the function order_items, instead of automatically reject the canonical candidate if it has more tracks, the function still tries to find matches for the tracks amongst the items, and otherwise uses None to fill the void in order to keep the information about the track numbers --- beets/autotag/match.py | 7 ++++--- test/test_autotag.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index bc6a506f6..d6566c2dd 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -174,8 +174,9 @@ def order_items(items, trackinfo): information. This always produces a result if the numbers of tracks match. """ - # Make sure lengths match. - if len(items) != len(trackinfo): + # Make sure lengths match: If there is less items, it might just be that + # there is some tracks missing. + if len(items) > len(trackinfo): return None # Construct the cost matrix. @@ -190,7 +191,7 @@ def order_items(items, trackinfo): matching = Munkres().compute(costs) # Order items based on the matching. - ordered_items = [None]*len(items) + ordered_items = [None]*len(trackinfo) for cur_idx, canon_idx in matching: ordered_items[canon_idx] = items[cur_idx] return ordered_items diff --git a/test/test_autotag.py b/test/test_autotag.py index 0b5ccc535..c8fd3dca2 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -251,11 +251,24 @@ class OrderingTest(unittest.TestCase): items = [] items.append(self.item('one', 1)) items.append(self.item('two', 2)) + items.append(self.item('three', 3)) + items.append(self.item('four',4)) trackinfo = [] trackinfo.append(TrackInfo('one', None)) ordered = match.order_items(items, trackinfo) self.assertEqual(ordered, None) + def test_order_works_with_missing_tracks(self) + items = [] + items.append(self.item('one', 1)) + items.append(self.item('two', 2)) + trackinfo = [] + trackinfo.append(TrackInfo('one', None)) + ordered = match.order_items(items, trackinfo) + self.assertEqual(ordered[0].title, 'one') + self.assertEqual(ordered[1].title, 'two') + self.assertEqual(ordered[2], None) + def test_order_corrects_when_track_names_are_entirely_wrong(self): # A real-world test case contributed by a user. def item(i, length): From 9a7a551d92e4a260fa0e2e6f2a55158a36f0ca64 Mon Sep 17 00:00:00 2001 From: Simon Chopin Date: Tue, 22 Nov 2011 13:00:58 +0100 Subject: [PATCH 3/4] Enable import of incomplete albums This commit disables the autoreject for incomplete albums. There is several one-liner fixes in autotag/__init__.py and importer.py, as well as some UI additions to report to the user when a track seems missing. --- beets/autotag/__init__.py | 2 ++ beets/autotag/match.py | 2 +- beets/importer.py | 6 +++--- beets/ui/commands.py | 7 +++++++ 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index e6588ee30..a9d5ab765 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -74,6 +74,8 @@ def apply_metadata(items, album_info): """ for index, (item, track_info) in enumerate(zip(items, album_info.tracks)): # Album, artist, track count. + if not item: + continue if track_info.artist: item.artist = track_info.artist else: diff --git a/beets/autotag/match.py b/beets/autotag/match.py index d6566c2dd..ce94e009a 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -365,7 +365,7 @@ def validate_candidate(items, tuple_dict, info): return # Make sure the album has the correct number of tracks. - if len(items) != len(info.tracks): + if len(items) > len(info.tracks): log.debug('Track count mismatch.') return diff --git a/beets/importer.py b/beets/importer.py index 2b24047bf..515e6a7be 100644 --- a/beets/importer.py +++ b/beets/importer.py @@ -107,7 +107,7 @@ def _duplicate_check(lib, task, recent=None): recent.add((artist, album)) # Look in the library. - cur_paths = set(i.path for i in task.items) + cur_paths = set(i.path for i in task.items if i) for album_cand in lib.albums(artist=artist): if album_cand.album == album: # Check whether the album is identical in contents, in which @@ -591,7 +591,7 @@ def apply_choices(config): if task.should_skip(): continue - items = task.items if task.is_album else [task.item] + items = [i for i in task.items if i] if task.is_album else [task.item] # Clear IDs in case the items are being re-tagged. for item in items: item.id = None @@ -644,7 +644,7 @@ def apply_choices(config): # Add new ones. if task.is_album: # Add an album. - album = lib.add_album(task.items) + album = lib.add_album([i for i in task.items if i]) task.album_id = album.id else: # Add tracks. diff --git a/beets/ui/commands.py b/beets/ui/commands.py index fdbd5a377..21ecd1017 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -154,7 +154,11 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True): print_('(Similarity: %s)' % dist_string(dist, color)) # Tracks. + missing_tracks = [] for i, (item, track_info) in enumerate(zip(items, info.tracks)): + if not item: + missing_tracks.append((i, track_info)) + continue cur_track = unicode(item.track) new_track = unicode(i+1) cur_title = item.title @@ -179,6 +183,9 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True): print_(u" * %s -> %s" % (cur_title, new_title)) elif cur_track != new_track: print_(u" * %s (%s -> %s)" % (item.title, cur_track, new_track)) + for i, track_info in missing_tracks: + print_(ui.colorize('red', u' * Missing track: %s (%d)' % \ + (track_info.title, i+1))) def show_item_change(item, info, dist, color): """Print out the change that would occur by tagging `item` with the From 7aaab602c002ba11d17f3fe55cd889d1729ab44e Mon Sep 17 00:00:00 2001 From: Simon Chopin Date: Thu, 24 Nov 2011 16:03:56 +0100 Subject: [PATCH 4/4] ui.commands: Warn the user when the candidates are partial matches --- beets/ui/commands.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 21ecd1017..752c0813c 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -101,6 +101,8 @@ DEFAULT_IGNORE = ['.AppleDouble', '._*', '*~', '.DS_Store'] VARIOUS_ARTISTS = u'Various Artists' +PARTIAL_MATCH_STRING = ui.colorize('green', u'(Partial match !)') + # Importer utilities and support. def dist_string(dist, color): @@ -122,13 +124,22 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True): tags are changed from (cur_artist, cur_album, items) to info with distance dist. """ - def show_album(artist, album): + def show_album(artist, album, partial=False): if artist: - print_(' %s - %s' % (artist, album)) + album_description = ' %s - %s' % (artist, album) elif album: - print_(' %s' % album) + album_description = ' %s' % album else: - print_(' (unknown album)') + album_description = ' (unknown album)' + + # Add a suffix indicating a partial match + if partial: + print_('%s %s' % (album_description, PARTIAL_MATCH_STRING)) + else: + print_(album_description) + + # Record if the match is partial or not. + partial_match = None in items # Identify the album in question. if cur_artist != info.artist or \ @@ -147,6 +158,9 @@ def show_change(cur_artist, cur_album, items, info, dist, color=True): show_album(artist_l, album_l) print_("To:") show_album(artist_r, album_r) + elif partial_match: + print_("Tagging: %s - %s %s" % (info.artist, info.album, + PARTIAL_MATCH_STRING)) else: print_("Tagging: %s - %s" % (info.artist, info.album)) @@ -312,6 +326,11 @@ def choose_candidate(candidates, singleton, rec, color, timid, line += u' [%s]' % year line += ' (%s)' % dist_string(dist, color) + + # Pointing out the partial matches. + if None in items: + line += ' %s' % PARTIAL_MATCH_STRING + print_(line) # Ask the user for a choice.