From e6ac8e16461270d869e5472c5bdb3d1ccec489be Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Sun, 2 Jun 2013 16:33:07 +1000 Subject: [PATCH 01/21] Use a Distance object instead of floats for distance calculations. The new Distance object knows how to perform various types of distance calculations (expression, equality, number, priority, string). It will keep track of each individual penalty that has been applied so that we can utilise that information in the UI and when making decisions about the recommendation level. We now display the top 3 penalties (sorted by weight) on the release list (and "..." if there are more than 3), and we display all penalties on the album info line and track change line. The implementation of the `max_rec` setting has been simplified by removing duplicate validation and instead looking at the penalties that have been applied to a distance. As a result, we can now configure a maximum recommendation for any penalty that might be applied. We have a few new checks when calculating album distance: `match: preferred: countries` and `match: preferred: media` can each be set to a list of countries and media in order of your preference. These are empty by default. A value that matches the first item will have no penalty, and a value that doesn't match any item will have an unweighted penalty of 1.0. If `match: preferred: original_year` is set to "yes", beets will apply an unweighted penalty of 1.0 for each year of difference between the release year and the original year. We now configure individual weights for `mediums` (disctotal), `label`, `catalognum`, `country` and `albumdisambig` instead of a single generic `minor` weight. This gives more control, but more importantly separates and names the applied penalties so that the UI can convey exactly which fields have contributed to the overall distance penalty. Likewise, `missing tracks` and `unmatched tracks` are penalised and displayed in the UI separately, instead of a combined `partial` penalty. Display non-MusicBrainz source in the disambiguation string, and "source" in the list of penalties if a release is penalised for being a non-MusicBrainz. --- beets/autotag/match.py | 408 ++++++++++++++++++++++++-------------- beets/config_default.yaml | 46 ++++- beets/plugins.py | 32 ++- beets/ui/commands.py | 76 ++++--- beetsplug/chroma.py | 11 +- beetsplug/discogs.py | 12 +- docs/changelog.rst | 34 ++-- docs/reference/config.rst | 75 ++++--- test/test_autotag.py | 122 ++++++++++++ test/test_ui.py | 17 +- 10 files changed, 553 insertions(+), 280 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 8935165f3..7f2f01c56 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -30,7 +30,7 @@ from beets.util.enumeration import enum from beets.autotag import hooks # A configuration view for the distance weights. -weights = config['match']['weight'] +weights = config['match']['distance_weights'] # Parameters for string distance function. # Words that can be moved to the end of a string using a comma. @@ -187,62 +187,202 @@ def track_index_changed(item, track_info): """ return item.track not in (track_info.medium_index, track_info.index) +class Distance(object): + """Keeps track of multiple distance penalties. Provides a single weighted + distance for all penalties as well as a weighted distance for each + individual penalty. + """ + def __cmp__(self, other): + return cmp(self.distance, other) + + def __float__(self): + return self.distance + + def __getitem__(self, key): + """Returns the weighted distance for a named penalty. + """ + dist = sum(self.penalties[key]) * weights[key].as_number() + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + def __init__(self): + self.penalties = {} + + def __sub__(self, other): + return self.distance - other + + def __rsub__(self, other): + return other - self.distance + + def _eq(self, value1, value2): + """Returns True if `value1` is equal to `value2`. `value1` may be a + compiled regular expression, in which case it will be matched against + `value2`. + """ + if isinstance(value1, re._pattern_type): + return bool(value1.match(value2)) + return value1 == value2 + + def add(self, key, dist): + """Adds a distance penalty. `key` must correspond with a configured + weight setting. `dist` must be a float between 0.0 and 1.0, and will be + added to any existing distance penalties for the same key. + """ + if not 0.0 <= dist <= 1.0: + raise ValueError( + '`dist` must be between 0.0 and 1.0. It is: %r' % dist) + self.penalties.setdefault(key, []).append(dist) + + def add_equality(self, key, value, options): + """Adds a distance penalty of 1.0 if `value` doesn't match any of the + values in `options`. If an option is a compiled regular expression, it + will be considered equal if it matches against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + for opt in options: + if self._eq(opt, value): + dist = 0.0 + break + else: + dist = 1.0 + self.add(key, dist) + + def add_expr(self, key, expr): + """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0. + """ + if expr: + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_number(self, key, number1, number2): + """Adds a distance penalty of 1.0 for each number of difference between + `number1` and `number2`, or 0.0 when there is no difference. Use this + when there is no upper limit on the difference between the two numbers. + """ + diff = abs(number1 - number2) + if diff: + for i in range(diff): + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_priority(self, key, value, options): + """Adds a distance penalty that corresponds to the position at which + `value` appears in `options`. A distance penalty of 0.0 for the first + option, or 1.0 if there is no matching option. If an option is a + compiled regular expression, it will be considered equal if it matches + against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + unit = 1.0 / (len(options) + 1) + for i, opt in enumerate(options): + if self._eq(opt, value): + dist = i * unit + break + else: + dist = 1.0 + self.add(key, dist) + + def add_ratio(self, key, number1, number2): + """Adds a distance penalty for `number1` as a ratio of `number2`. + `number1` is bound at 0 and `number2`. + """ + number = float(max(min(number1, number2), 0)) + if number2: + dist = number / number2 + else: + dist = 0.0 + self.add(key, dist) + + def add_string(self, key, str1, str2): + """Adds a distance penalty based on the edit distance between `str1` + and `str2`. + """ + dist = string_dist(str1, str2) + self.add(key, dist) + + @property + def distance(self): + """Returns an overall weighted distance across all penalties. + """ + dist = 0.0 + for key, penalty in self.penalties.iteritems(): + dist += sum(penalty) * weights[key].as_number() + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + @property + def max_distance(self): + """Returns the maximum distance penalty. + """ + dist_max = 0.0 + for key, penalty in self.penalties.iteritems(): + dist_max += len(penalty) * weights[key].as_number() + return dist_max + + @property + def sorted(self): + """Returns a list of (dist, key) pairs, with `dist` being the weighted + distance, sorted from highest to lowest. + """ + list_ = [(self[key], key) for key in self.penalties] + return sorted(list_, key=lambda (dist, key): (0-dist, key)) + + def update(self, dist): + """Adds all the distance penalties from `dist`. + """ + if not isinstance(dist, Distance): + raise ValueError( + '`dist` must be a Distance object. It is: %r' % dist) + for key, penalties in dist.penalties.iteritems(): + self.penalties.setdefault(key, []).extend(penalties) + def track_distance(item, track_info, incl_artist=False): """Determines the significance of a track metadata change. Returns a - float in [0.0,1.0]. `incl_artist` indicates that a distance - component should be included for the track artist (i.e., for - various-artist releases). + Distance object. `incl_artist` indicates that a distance component should + be included for the track artist (i.e., for various-artist releases). """ - # Distance and normalization accumulators. - dist, dist_max = 0.0, 0.0 + dist = Distance() - # Check track length. - # If there's no length to check, apply no penalty. + # Length. if track_info.length: diff = abs(item.length - track_info.length) diff = max(diff - weights['track_length_grace'].as_number(), 0.0) diff = min(diff, weights['track_length_max'].as_number()) - dist += (diff / weights['track_length_max'].as_number()) * \ - weights['track_length'].as_number() - dist_max += weights['track_length'].as_number() + dist.add_ratio('track_length', diff, + weights['track_length_max'].as_number()) - # Track title. - dist += string_dist(item.title, track_info.title) * \ - weights['track_title'].as_number() - dist_max += weights['track_title'].as_number() + # Title. + dist.add_string('track_title', item.title, track_info.title) - # Track artist, if included. - # Attention: MB DB does not have artist info for all compilations, - # so only check artist distance if there is actually an artist in - # the MB track data. + # Artist. Only check if there is actually an artist in the track data. if incl_artist and track_info.artist and \ item.artist.lower() not in VA_ARTISTS: - dist += string_dist(item.artist, track_info.artist) * \ - weights['track_artist'].as_number() - dist_max += weights['track_artist'].as_number() + dist.add_string('track_artist', item.artist, track_info.artist) # Track index. if track_info.index and item.track: - if track_index_changed(item, track_info): - dist += weights['track_index'].as_number() - dist_max += weights['track_index'].as_number() + dist.add_expr('track_index', track_index_changed(item, track_info)) - # MusicBrainz track ID. + # Track ID. if item.mb_trackid: - if item.mb_trackid != track_info.track_id: - dist += weights['track_id'].as_number() - dist_max += weights['track_id'].as_number() + dist.add_expr('track_id', item.mb_trackid != track_info.track_id) - # Plugin distances. - plugin_d, plugin_dm = plugins.track_distance(item, track_info) - dist += plugin_d - dist_max += plugin_dm + # Plugins. + dist.update(plugins.track_distance(item, track_info)) - return dist / dist_max + return dist def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. - Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object + Returns a Distance object. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the @@ -251,100 +391,89 @@ def distance(items, album_info, mapping): """ likelies, _ = current_metadata(items) - # These accumulate the possible distance components. The final - # distance will be dist/dist_max. - dist = 0.0 - dist_max = 0.0 + dist = Distance() - # Artist/album metadata. + # Artist, if not various. if not album_info.va: - dist += string_dist(likelies['artist'], album_info.artist) * \ - weights['artist'].as_number() - dist_max += weights['artist'].as_number() - dist += string_dist(likelies['album'], album_info.album) * \ - weights['album'].as_number() - dist_max += weights['album'].as_number() + dist.add_string('artist', likelies['artist'], album_info.artist) - # Year. No penalty for matching release or original year. - if likelies['year'] and album_info.year: - if likelies['year'] not in (album_info.year, album_info.original_year): - diff = abs(album_info.year - likelies['year']) - if diff: - dist += (1.0 - 1.0 / diff) * weights['year'].as_number() - dist_max += weights['year'].as_number() + # Album. + dist.add_string('album', likelies['album'], album_info.album) - # Actual or preferred media. - preferred_media = config['match']['preferred_media'].get() + # Media. if likelies['media'] and album_info.media: - dist += string_dist(likelies['media'], album_info.media) * \ - weights['media'].as_number() - dist_max += weights['media'].as_number() - elif album_info.media and preferred_media: - dist += string_dist(album_info.media, preferred_media) * \ - weights['media'].as_number() - dist_max += weights['media'].as_number() + dist.add_string('media', likelies['media'], album_info.media) - # MusicBrainz album ID. - if likelies['mb_albumid']: - if likelies['mb_albumid'] != album_info.album_id: - dist += weights['album_id'].as_number() - dist_max += weights['album_id'].as_number() + # Preferred media. + preferred_media = [re.compile(r'(\d+x)?%s' % pattern, re.I) for pattern + in config['match']['preferred']['media'].get()] + if album_info.media and preferred_media: + dist.add_priority('media', album_info.media, preferred_media) - # Apply a small penalty for differences across many minor metadata. This - # helps prioritise releases that are nearly identical. + # Number of discs. + if likelies['disctotal'] and album_info.mediums: + dist.add_number('mediums', likelies['disctotal'], album_info.mediums) - if likelies['disctotal']: - if likelies['disctotal'] != album_info.mediums: - dist += weights['minor'].as_number() - dist_max += weights['minor'].as_number() + # Year. + if likelies['year'] and album_info.year: + # No penalty for matching release or original year. + if likelies['year'] in (album_info.year, album_info.original_year): + dist.add('year', 0.0) + else: + dist.add_number('year', likelies['year'], album_info.year) - if likelies['label'] and album_info.label: - dist += string_dist(likelies['label'], album_info.label) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() - - if likelies['catalognum'] and album_info.catalognum: - dist += string_dist(likelies['catalognum'], - album_info.catalognum) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + # Prefer earlier releases. + if album_info.year and album_info.original_year and \ + config['match']['preferred']['original_year'].get(): + dist.add_number('year', album_info.year, album_info.original_year) + # Country. if likelies['country'] and album_info.country: - dist += string_dist(likelies['country'], - album_info.country) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('country', likelies['country'], album_info.country) + # Preferred countries. + preferred_countries = [re.compile(pattern, re.I) for pattern + in config['match']['preferred']['countries'].get()] + if album_info.country and preferred_countries: + dist.add_priority('country', album_info.country, preferred_countries) + + # Label. + if likelies['label'] and album_info.label: + dist.add_string('label', likelies['label'], album_info.label) + + # Catalog number. + if likelies['catalognum'] and album_info.catalognum: + dist.add_string('catalognum', likelies['catalognum'], + album_info.catalognum) + + # Disambiguation. if likelies['albumdisambig'] and album_info.albumdisambig: - dist += string_dist(likelies['albumdisambig'], - album_info.albumdisambig) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('albumdisambig', likelies['albumdisambig'], + album_info.albumdisambig) - # Matched track distances. + # Album ID. + if likelies['mb_albumid']: + dist.add_equality('album_id', likelies['mb_albumid'], + album_info.album_id) + + # Tracks. + dist.tracks = {} for item, track in mapping.iteritems(): - dist += track_distance(item, track, album_info.va) * \ - weights['track'].as_number() - dist_max += weights['track'].as_number() + dist.tracks[track] = track_distance(item, track, album_info.va) + dist.add('tracks', dist.tracks[track].distance) - # Extra and unmatched tracks. - for track in set(album_info.tracks) - set(mapping.values()): - dist += weights['missing'].as_number() - dist_max += weights['missing'].as_number() - for item in set(items) - set(mapping.keys()): - dist += weights['unmatched'].as_number() - dist_max += weights['unmatched'].as_number() + # Missing tracks. + for i in range(len(album_info.tracks) - len(mapping)): + dist.add('missing_tracks', 1.0) - # Plugin distances. - plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping) - dist += plugin_d - dist_max += plugin_dm + # Unmatched tracks. + for i in range(len(items) - len(mapping)): + dist.add('unmatched_tracks', 1.0) - # Normalize distance, avoiding divide-by-zero. - if dist_max == 0.0: - return 0.0 - else: - return dist / dist_max + # Plugins. + dist.update(plugins.album_distance(items, album_info, mapping)) + + return dist def match_by_id(items): """If the items are tagged with a MusicBrainz album ID, returns an @@ -370,8 +499,8 @@ def _recommendation(results): recommendation based on the results' distances. If the recommendation is higher than the configured maximum for - certain situations, the recommendation will be downgraded to the - configured maximum. + an applied penalty, the recommendation will be downgraded to the + configured maximum for that penalty. """ if not results: # No candidates: no recommendation. @@ -393,45 +522,20 @@ def _recommendation(results): # Gap between first two candidates is large. rec = recommendation.low else: - # No conclusion. - rec = recommendation.none + # No conclusion. Return immediately. Can't be downgraded any further. + return recommendation.none - # "Downgrades" in certain configured situations. - if isinstance(results[0], hooks.AlbumMatch): - # Load the configured recommendation maxima. - max_rec = {} - for trigger in 'non_mb_source', 'partial', 'tracklength', 'tracknumber': - max_rec[trigger] = \ - config['match']['max_rec'][trigger].as_choice({ - 'strong': recommendation.strong, - 'medium': recommendation.medium, - 'low': recommendation.low, - 'none': recommendation.none, - }) - - # Non-MusicBrainz source. - if rec > max_rec['non_mb_source'] and \ - results[0].info.data_source != 'MusicBrainz': - rec = max_rec['non_mb_source'] - - # Partial match. - if rec > max_rec['partial'] and \ - (results[0].extra_items or results[0].extra_tracks): - rec = max_rec['partial'] - - # Check track number and duration for each item. - for item, track_info in results[0].mapping.items(): - # Track length differs. - if rec > max_rec['tracklength'] and \ - item.length and track_info.length and \ - abs(item.length - track_info.length) > \ - weights['track_length_grace'].as_number(): - rec = max_rec['tracklength'] - - # Track number differs. - if rec > max_rec['tracknumber'] and \ - track_index_changed(item, track_info): - rec = max_rec['tracknumber'] + # Downgrade to the max rec if it is lower than the current rec for an + # applied penalty. + for dist, key in results[0].distance.sorted: + if dist: + max_rec = config['match']['max_rec'][key].as_choice({ + 'strong': recommendation.strong, + 'medium': recommendation.medium, + 'low': recommendation.low, + 'none': recommendation.none, + }) + rec = min(rec, max_rec) return rec @@ -465,7 +569,7 @@ def tag_album(items, search_artist=None, search_album=None, - The current artist. - The current album. - A list of AlbumMatch objects. The candidates are sorted by - distance (i.e., best match first). + distance (i.e., best match first). - A recommendation. If search_artist and search_album or search_id are provided, then they are used as search terms in place of the current metadata. diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 7bbb16a6b..7b9867813 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -68,22 +68,42 @@ match: medium_rec_thresh: 0.25 rec_gap_thresh: 0.25 max_rec: - non_mb_source: strong - partial: medium - tracklength: strong - tracknumber: strong - preferred_media: CD - weight: + source: strong + artist: strong + album: strong + media: strong + mediums: strong + year: strong + country: strong + label: strong + catalognum: strong + albumdisambig: strong + album_id: strong + tracks: strong + missing_tracks: medium + unmatched_tracks: medium + track_title: strong + track_artist: strong + track_index: strong + track_length_grace: strong + track_length_max: strong + track_length: strong + track_id: strong + distance_weights: source: 2.0 artist: 3.0 album: 3.0 - year: 1.0 media: 1.0 + mediums: 1.0 + year: 1.0 + country: 0.5 + label: 0.5 + catalognum: 0.5 + albumdisambig: 0.5 album_id: 5.0 - minor: 0.5 - track: 1.0 - missing: 0.9 - unmatched: 0.6 + tracks: 2.0 + missing_tracks: 0.9 + unmatched_tracks: 0.6 track_title: 3.0 track_artist: 2.0 track_index: 1.0 @@ -91,3 +111,7 @@ match: track_length_max: 30 track_length: 2.0 track_id: 5.0 + preferred: + countries: [] + media: [] + original_year: no diff --git a/beets/plugins.py b/beets/plugins.py index 7d49ad3aa..d0c0a9654 100755 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -64,16 +64,16 @@ class BeetsPlugin(object): return {} def track_distance(self, item, info): - """Should return a (distance, distance_max) pair to be added - to the distance value for every track comparison. + """Should return a Distance object to be added to the + distance for every track comparison. """ - return 0.0, 0.0 + return beets.autotag.match.Distance() def album_distance(self, items, album_info, mapping): - """Should return a (distance, distance_max) pair to be added - to the distance value for every album-level comparison. + """Should return a Distance object to be added to the + distance for every album-level comparison. """ - return 0.0, 0.0 + return beets.autotag.match.Distance() def candidates(self, items, artist, album, va_likely): """Should return a sequence of AlbumInfo objects that match the @@ -242,25 +242,19 @@ def queries(): def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. - Returns a (distance, distance_max) pair. + Returns a Distance object. """ - dist = 0.0 - dist_max = 0.0 + dist = beets.autotag.match.Distance() for plugin in find_plugins(): - d, dm = plugin.track_distance(item, info) - dist += d - dist_max += dm - return dist, dist_max + dist.update(plugin.track_distance(item, info)) + return dist def album_distance(items, album_info, mapping): """Returns the album distance calculated by plugins.""" - dist = 0.0 - dist_max = 0.0 + dist = beets.autotag.match.Distance() for plugin in find_plugins(): - d, dm = plugin.album_distance(items, album_info, mapping) - dist += d - dist_max += dm - return dist, dist_max + dist.update(plugin.album_distance(items, album_info, mapping)) + return dist def candidates(items, artist, album, va_likely): """Gets MusicBrainz candidates for an album from each plugin. diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 9e42751ab..e306256d4 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -125,14 +125,14 @@ default_commands.append(fields_cmd) VARIOUS_ARTISTS = u'Various Artists' -PARTIAL_MATCH_MESSAGE = u'(partial match!)' - # Importer utilities and support. def disambig_string(info): - """Returns label, year and media disambiguation, if available. + """Returns source, media, year, country, and album disambiguation. """ disambig = [] + if info.data_source != 'MusicBrainz': + disambig.append(info.data_source) if info.media: if info.mediums > 1: disambig.append(u'{0}x{1}'.format( @@ -163,26 +163,35 @@ def dist_string(dist): out = ui.colorize('red', out) return out +def penalty_string(distance, limit=None): + """Returns a colorized string that indicates all the penalties applied to + a distance object. + """ + penalties = [] + for dist, key in distance.sorted: + if dist: + key = key.replace('album_', '') + key = key.replace('track_', '') + key = key.replace('_', ' ') + penalties.append(key) + if penalties: + if limit and len(penalties) > limit: + penalties = penalties[:limit] + ['...'] + return ui.colorize('yellow', '(%s)' % ', '.join(penalties)) + def show_change(cur_artist, cur_album, match): """Print out a representation of the changes that will be made if an album's tags are changed according to `match`, which must be an AlbumMatch object. """ - def show_album(artist, album, partial=False): + def show_album(artist, album): if artist: album_description = u' %s - %s' % (artist, album) elif album: album_description = u' %s' % album else: album_description = u' (unknown album)' - - out = album_description - - # Add a suffix if this is a partial match. - if partial: - out += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) - - print_(out) + print_(album_description) def format_index(track_info): """Return a string representing the track index of the given @@ -223,11 +232,7 @@ def show_change(cur_artist, cur_album, match): print_("To:") show_album(artist_r, album_r) else: - message = u"Tagging:\n %s - %s" % (match.info.artist, - match.info.album) - if match.extra_items or match.extra_tracks: - message += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) - print_(message) + print_(u"Tagging:\n %s - %s" % (match.info.artist, match.info.album)) # Data URL. if match.info.data_url: @@ -235,9 +240,13 @@ def show_change(cur_artist, cur_album, match): # Info line. info = [] + # Similarity. info.append('(Similarity: %s)' % dist_string(match.distance)) - if match.info.data_source != 'MusicBrainz': - info.append(ui.colorize('turquoise', '(%s)' % match.info.data_source)) + # Penalties. + penalties = penalty_string(match.distance) + if penalties: + info.append(penalties) + # Disambiguation. disambig = disambig_string(match.info) if disambig: info.append(ui.colorize('lightgray', '(%s)' % disambig)) @@ -315,18 +324,10 @@ def show_change(cur_artist, cur_album, match): rhs += templ.format(rhs_length) lhs_width += len(cur_length) + 3 - # Hidden penalties. No LHS/RHS diff is displayed, but we still want to - # indicate that a penalty has been applied to explain the similarity - # score. - penalties = [] - if match.info.va and track_info.artist and \ - item.artist.lower() not in VA_ARTISTS: - penalties.append('artist') - if item.mb_trackid and item.mb_trackid != track_info.track_id: - penalties.append('ID') + # Penalties. + penalties = penalty_string(match.distance.tracks[track_info]) if penalties: - rhs += ' %s' % ui.colorize('red', - '(%s)' % ', '.join(penalties)) + rhs += ' %s' % penalties if lhs != rhs: lines.append((' * %s' % lhs, rhs, lhs_width)) @@ -489,20 +490,17 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, (cur_artist, cur_album)) print_('Candidates:') for i, match in enumerate(candidates): + # Artist, album and distance. line = ['%i. %s - %s (%s)' % (i + 1, match.info.artist, match.info.album, dist_string(match.distance))] - # Point out the partial matches. - if match.extra_items or match.extra_tracks: - line.append(ui.colorize('yellow', - PARTIAL_MATCH_MESSAGE)) - - # Sources other than MusicBrainz. - source = match.info.data_source - if source != 'MusicBrainz': - line.append(ui.colorize('turquoise', '(%s)' % source)) + # Penalties. + penalties = penalty_string(match.distance, 3) + if penalties: + line.append(penalties) + # Disambiguation disambig = disambig_string(match.info) if disambig: line.append(ui.colorize('lightgray', '(%s)' % disambig)) diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 08a78e3af..006f85db0 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -21,6 +21,7 @@ from beets import util from beets import config from beets.util import confit from beets.autotag import hooks +from beets.autotag.match import Distance import acoustid import logging from collections import defaultdict @@ -113,16 +114,14 @@ def _all_releases(items): class AcoustidPlugin(plugins.BeetsPlugin): def track_distance(self, item, info): + dist = Distance() if item.path not in _matches or not info.track_id: # Match failed or no track ID. - return 0.0, 0.0 + return dist recording_ids, _ = _matches[item.path] - if info.track_id in recording_ids: - dist = 0.0 - else: - dist = TRACK_ID_WEIGHT - return dist, TRACK_ID_WEIGHT + dist.add_expr('track_id', info.track_id not in recording_ids) + return dist def candidates(self, items, artist, album, va_likely): albums = [] diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index bb8d37146..822ed59e3 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -17,7 +17,7 @@ discogs-client library. """ from beets import config from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.autotag.match import current_metadata, VA_ARTISTS +from beets.autotag.match import current_metadata, Distance, VA_ARTISTS from beets.plugins import BeetsPlugin from discogs_client import Artist, DiscogsAPIError, Release, Search import beets @@ -44,14 +44,12 @@ class DiscogsPlugin(BeetsPlugin): }) def album_distance(self, items, album_info, mapping): - """Returns the discogs source weight and the maximum source weight. + """Returns the album distance. """ + dist = Distance() if album_info.data_source == 'Discogs': - return self.config['source_weight'].as_number() * \ - config['match']['weight']['source'].as_number(), \ - config['match']['weight']['source'].as_number() - else: - return 0.0, 0.0 + dist.add('source', self.config['source_weight'].as_number()) + return dist def candidates(self, items, artist, album, va_likely): """Returns a list of AlbumInfo objects for discogs search results diff --git a/docs/changelog.rst b/docs/changelog.rst index 0f8b08b51..527982190 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -53,30 +53,36 @@ Changelog None. * Various UI enhancements to the importer due to Tai Lee: - * More consistent format and colorization of album and track metadata. - * Display data source URL for :doc:`/plugins/discogs` matches. This should - make it easier for people who would rather import and correct data from - Discogs into MusicBrainz. + * Display data source URL and source name in album disambiguation for + non-MusicBrainz matches. This should make it easier for people who want to + import and correct data from other sources into MusicBrainz. + * The top 3 distance penalties are now displayed on the release listing, + and all album and track penalties are now displayed on the track changes + list. This should make it clear exactly which metadata is contributing to a + low similarity score. * Display album disambiguation and disc titles in the track listing, when available. - * Track changes highlighted in yellow indicate a change in format to or from - :ref:`per_disc_numbering`. No penalty is applied because the track number - is still "correct", just in a different format. + * More consistent format and colorization of album and track metadata. + * Track changes highlighted in turquoise indicate a change in format to or + from :ref:`per_disc_numbering`. No penalty is applied because the track + number is still "correct", just in a different format. * Sort missing and unmatched tracks by index and title and group them together for better readability. - * Indicate MusicBrainz ID mismatches. -* Improve calculation of similarity score: +* Improve calculation of similarity score and recommendation: + * It is now possible to configure a :ref:`max_rec` for any field that is used + to calculate the similarity score. The recommendation will be downgraded if + a penalty is being applied to the specified field. * Strongly prefer releases with a matching MusicBrainz album ID. This helps beets re-identify the same release when re-importing existing files. * Prefer releases that are closest to the tagged ``year``. Tolerate files tagged with release or original year. - * Prefer CD releases by default, when there is no ``media`` tagged in the - files being imported. This can be changed with the :ref:`preferred_media` - setting. - * Apply minor penalties across a range of fields to differentiate between - nearly identical releases: ``disctotal``, ``label``, ``catalognum``, + * Add a :ref:`preferred` collection of settings, which allow the user to + specify a sorted list of preferred countries and media types, or prefer + releases closest to the original year for an album. + * Apply minor distance penalties across a range of fields to differentiate + between nearly identical releases: ``mediums``, ``label``, ``catalognum``, ``country`` and ``albumdisambig``. .. _Discogs: http://discogs.com/ diff --git a/docs/reference/config.rst b/docs/reference/config.rst index d23db6b02..ec194afde 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -394,40 +394,65 @@ max_rec As mentioned above, autotagger matches have *recommendations* that control how the UI behaves for a certain quality of match. The recommendation for a certain -match is usually based on the distance calculation. But you can also control -the recommendation for certain specific situations by defining *maximum* -recommendations when: +match is based on the overall distance calculation. But you can also control +the recommendation when a distance penalty is being applied for a specific +field by defining *maximum* recommendations for each field: -* a match came from a source other than MusicBrainz (e.g., the - :doc:`Discogs ` plugin); -* a match has missing or extra tracks; -* the length (duration) of at least one track differs; or -* at least one track number differs. - -To define maxima, use keys under ``max_rec:`` in the ``match`` section:: +To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here +are the defaults:: match: max_rec: - non_mb_source: strong - partial: medium - tracklength: strong - tracknumber: strong + source: strong + artist: strong + album: strong + media: strong + mediums: strong + year: strong + country: strong + label: strong + catalognum: strong + albumdisambig: strong + album_id: strong + tracks: strong + missing_tracks: medium + unmatched_tracks: medium + track_title: strong + track_artist: strong + track_index: strong + track_length_grace: strong + track_length_max: strong + track_length: strong + track_id: strong -If a recommendation is higher than the configured maximum and the condition is -met, the recommendation will be downgraded. The maximum for each condition can -be one of ``none``, ``low``, ``medium`` or ``strong``. When the maximum -recommendation is ``strong``, no "downgrading" occurs for that situation. +If a recommendation is higher than the configured maximum and a penalty is +being applied, the recommendation will be downgraded. The maximum for each +field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the +maximum recommendation is ``strong``, no "downgrading" occurs. -The above example shows the default ``max_rec`` settings. +.. _preferred: -.. _preferred_media: +preferred +~~~~~~~~~ -preferred_media -~~~~~~~~~~~~~~~ +In addition to comparing the tagged metadata with the match metadata for +similarity, you can also specify an ordered list of preferred countries and +media types. A distance penalty will be applied if the country or media type +from the match metadata doesn't match. The order is important, the first item +will be most preferred. -When comparing files that have no ``media`` tagged, prefer releases that more -closely resemble this media (using a string distance). When files are already -tagged with media, this setting is ignored. Default: ``CD``. +You can also tell the autotagger to prefer matches that have a release year +closest to the original year for an album. + +Here's an example:: + + match: + preferred: + countries: ['US', 'GB', 'UK'] + media: ['CD', 'Digital Media'] + original_year: yes + +By default, none of these options are enabled. .. _path-format-config: diff --git a/test/test_autotag.py b/test/test_autotag.py index 1a6188e7c..92088a7b8 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -23,6 +23,7 @@ import _common from _common import unittest from beets import autotag from beets.autotag import match +from beets.autotag.match import Distance from beets.library import Item from beets.util import plurality from beets.autotag import AlbumInfo, TrackInfo @@ -105,6 +106,127 @@ def _make_trackinfo(): TrackInfo(u'three', None, u'some artist', length=1, index=3), ] +class DistanceTest(unittest.TestCase): + def setUp(self): + self.dist = Distance() + + def test_add(self): + self.dist.add('add', 1.0) + self.assertEqual(self.dist.penalties, {'add': [1.0]}) + + def test_add_equality(self): + self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist.penalties['equality'], [0.0]) + + self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0]) + + self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) + self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0, 0.0]) + + def test_add_expr(self): + self.dist.add_expr('expr', True) + self.assertEqual(self.dist.penalties['expr'], [1.0]) + + self.dist.add_expr('expr', False) + self.assertEqual(self.dist.penalties['expr'], [1.0, 0.0]) + + def test_add_number(self): + # Add a full penalty for each number of difference between two numbers. + + self.dist.add_number('number', 1, 1) + self.assertEqual(self.dist.penalties['number'], [0.0]) + + self.dist.add_number('number', 1, 2) + self.assertEqual(self.dist.penalties['number'], [0.0, 1.0]) + + self.dist.add_number('number', 2, 1) + self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0]) + + self.dist.add_number('number', -1, 2) + self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0, 1.0, + 1.0, 1.0]) + + def test_add_priority(self): + self.dist.add_priority('priority', 'abc', 'abc') + self.assertEqual(self.dist.penalties['priority'], [0.0]) + + self.dist.add_priority('priority', 'def', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25]) + + self.dist.add_priority('priority', 'ghi', ['abc', 'def', + re.compile('GHI', re.I)]) + self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5]) + + self.dist.add_priority('priority', 'xyz', ['abc', 'def']) + self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5, 1.0]) + + def test_add_ratio(self): + self.dist.add_ratio('ratio', 25, 100) + self.assertEqual(self.dist.penalties['ratio'], [0.25]) + + self.dist.add_ratio('ratio', 10, 5) + self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0]) + + self.dist.add_ratio('ratio', -5, 5) + self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0]) + + self.dist.add_ratio('ratio', 5, 0) + self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) + + def test_add_string(self): + dist = match.string_dist(u'abc', u'bcd') + self.dist.add_string('string', u'abc', u'bcd') + self.assertEqual(self.dist.penalties['string'], [dist]) + + def test_distance(self): + config['match']['distance_weights']['album'] = 2.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('media', 0.25) + self.dist.add('media', 0.75) + self.assertEqual(self.dist.distance, 0.5) + + # __getitem__() + self.assertEqual(self.dist['album'], 0.25) + self.assertEqual(self.dist['media'], 0.25) + + def test_max_distance(self): + config['match']['distance_weights']['album'] = 3.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('medium', 0.0) + self.dist.add('medium', 0.0) + self.assertEqual(self.dist.max_distance, 5.0) + + def test_sorted(self): + config['match']['distance_weights']['album'] = 4.0 + config['match']['distance_weights']['medium'] = 2.0 + + self.dist.add('album', 0.1875) + self.dist.add('medium', 0.75) + self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')]) + + # Sort by key if distance is equal. + dist = Distance() + dist.add('album', 0.375) + dist.add('medium', 0.75) + self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')]) + + def test_update(self): + self.dist.add('album', 0.5) + self.dist.add('media', 1.0) + + dist = Distance() + dist.add('album', 0.75) + dist.add('album', 0.25) + self.dist.add('media', 0.05) + + self.dist.update(dist) + + self.assertEqual(self.dist.penalties, {'album': [0.5, 0.75, 0.25], + 'media': [1.0, 0.05]}) + class TrackDistanceTest(unittest.TestCase): def test_identical_tracks(self): item = _make_item(u'one', 1) diff --git a/test/test_ui.py b/test/test_ui.py index b679021f7..bfdd53ddd 100644 --- a/test/test_ui.py +++ b/test/test_ui.py @@ -27,6 +27,7 @@ from beets import library from beets import ui from beets.ui import commands from beets import autotag +from beets.autotag.match import distance from beets import importer from beets.mediafile import MediaFile from beets import config @@ -594,21 +595,23 @@ class ShowChangeTest(_common.TestCase): self.items[0].track = 1 self.items[0].path = '/path/to/file.mp3' self.info = autotag.AlbumInfo( - 'the album', 'album id', 'the artist', 'artist id', [ - autotag.TrackInfo('the title', 'track id', index=1) + u'the album', u'album id', u'the artist', u'artist id', [ + autotag.TrackInfo(u'the title', u'track id', index=1) ]) def _show_change(self, items=None, info=None, - cur_artist='the artist', cur_album='the album', + cur_artist=u'the artist', cur_album=u'the album', dist=0.1): items = items or self.items info = info or self.info mapping = dict(zip(items, info.tracks)) config['color'] = False + album_dist = distance(items, info, mapping) + album_dist.penalties = {'album': [dist]} commands.show_change( cur_artist, cur_album, - autotag.AlbumMatch(0.1, info, mapping, set(), set()), + autotag.AlbumMatch(album_dist, info, mapping, set(), set()), ) return self.io.getoutput().lower() @@ -623,7 +626,7 @@ class ShowChangeTest(_common.TestCase): self.assertTrue('correcting tags from:' in msg) def test_item_data_change(self): - self.items[0].title = 'different' + self.items[0].title = u'different' msg = self._show_change() self.assertTrue('different -> the title' in msg) @@ -638,12 +641,12 @@ class ShowChangeTest(_common.TestCase): self.assertTrue('correcting tags from:' in msg) def test_item_data_change_title_missing(self): - self.items[0].title = '' + self.items[0].title = u'' msg = re.sub(r' +', ' ', self._show_change()) self.assertTrue('file.mp3 -> the title' in msg) def test_item_data_change_title_missing_with_unicode_filename(self): - self.items[0].title = '' + self.items[0].title = u'' self.items[0].path = u'/path/to/caf\xe9.mp3'.encode('utf8') msg = re.sub(r' +', ' ', self._show_change().decode('utf8')) self.assertTrue(u'caf\xe9.mp3 -> the title' in msg From 4de5d36b71bc8355476091d08611a148d6c44fbb Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Sun, 2 Jun 2013 22:29:48 +1000 Subject: [PATCH 02/21] Use `add_ratio()` for year penalties, with the difference between now and the original year as the max. --- beets/autotag/match.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 7f2f01c56..bbfae4134 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -420,12 +420,17 @@ def distance(items, album_info, mapping): if likelies['year'] in (album_info.year, album_info.original_year): dist.add('year', 0.0) else: - dist.add_number('year', likelies['year'], album_info.year) + diff = abs(likelies['year'] - album_info.year) + diff_max = abs(datetime.date.today().year - + album_info.original_year) + dist.add_ratio('year', diff, diff_max) # Prefer earlier releases. if album_info.year and album_info.original_year and \ config['match']['preferred']['original_year'].get(): - dist.add_number('year', album_info.year, album_info.original_year) + diff = abs(album_info.year - album_info.original_year) + diff_max = abs(datetime.date.today().year - album_info.original_year) + dist.add_ratio('year', diff, diff_max) # Country. if likelies['country'] and album_info.country: From 083575314d45e18978ea40c8626aa6c8ef68b91e Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Sun, 2 Jun 2013 22:31:28 +1000 Subject: [PATCH 03/21] Remove redundant max/min calculations for track length distance. `add_ratio()` already does this. --- beets/autotag/match.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index bbfae4134..4f59680e8 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -353,9 +353,8 @@ def track_distance(item, track_info, incl_artist=False): # Length. if track_info.length: - diff = abs(item.length - track_info.length) - diff = max(diff - weights['track_length_grace'].as_number(), 0.0) - diff = min(diff, weights['track_length_max'].as_number()) + diff = abs(item.length - track_info.length) - \ + weights['track_length_grace'].as_number() dist.add_ratio('track_length', diff, weights['track_length_max'].as_number()) From 3254f2f3b0b63e594cabb6409507e214955e15b9 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Sun, 2 Jun 2013 22:53:53 +1000 Subject: [PATCH 04/21] Don't assume all releases know the original year. Use `add_ratio()` if they do, otherwise apply full penalty with `add()`. --- beets/autotag/match.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 4f59680e8..8762d3bf9 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -415,14 +415,18 @@ def distance(items, album_info, mapping): # Year. if likelies['year'] and album_info.year: - # No penalty for matching release or original year. if likelies['year'] in (album_info.year, album_info.original_year): + # No penalty for matching release or original year. dist.add('year', 0.0) - else: + elif album_info.original_year: + # Prefer matchest closest to the release year. diff = abs(likelies['year'] - album_info.year) diff_max = abs(datetime.date.today().year - album_info.original_year) dist.add_ratio('year', diff, diff_max) + else: + # Full penalty when there is no original year. + dist.add('year', 1.0) # Prefer earlier releases. if album_info.year and album_info.original_year and \ From 1b5d3c057f0100049327b581897869f078857eab Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Sun, 2 Jun 2013 22:54:48 +1000 Subject: [PATCH 05/21] Code style. --- beets/autotag/match.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 8762d3bf9..0d270d238 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -409,7 +409,7 @@ def distance(items, album_info, mapping): if album_info.media and preferred_media: dist.add_priority('media', album_info.media, preferred_media) - # Number of discs. + # Mediums. if likelies['disctotal'] and album_info.mediums: dist.add_number('mediums', likelies['disctotal'], album_info.mediums) @@ -430,7 +430,7 @@ def distance(items, album_info, mapping): # Prefer earlier releases. if album_info.year and album_info.original_year and \ - config['match']['preferred']['original_year'].get(): + config['match']['preferred']['original_year']: diff = abs(album_info.year - album_info.original_year) diff_max = abs(datetime.date.today().year - album_info.original_year) dist.add_ratio('year', diff, diff_max) From f6492e68eee8fbdaa67a02345291a5edfab33500 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Sun, 2 Jun 2013 23:16:28 +1000 Subject: [PATCH 06/21] Doc string update. --- beets/ui/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index e306256d4..18b539083 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -128,7 +128,7 @@ VARIOUS_ARTISTS = u'Various Artists' # Importer utilities and support. def disambig_string(info): - """Returns source, media, year, country, and album disambiguation. + """Returns source, media, year, country, label and album disambiguation. """ disambig = [] if info.data_source != 'MusicBrainz': From 51f40d26dc2812d4834799ae44e500195e3bbd3b Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 00:04:45 +1000 Subject: [PATCH 07/21] Rename `Distance.penalties` to `Distance._penalties`. It should be private. --- beets/autotag/match.py | 16 +++++++------- test/test_autotag.py | 47 +++++++++++++++++++++--------------------- test/test_ui.py | 2 +- 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 0d270d238..d6eab98d5 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -201,14 +201,14 @@ class Distance(object): def __getitem__(self, key): """Returns the weighted distance for a named penalty. """ - dist = sum(self.penalties[key]) * weights[key].as_number() + dist = sum(self._penalties[key]) * weights[key].as_number() dist_max = self.max_distance if dist_max: return dist / dist_max return 0.0 def __init__(self): - self.penalties = {} + self._penalties = {} def __sub__(self, other): return self.distance - other @@ -233,7 +233,7 @@ class Distance(object): if not 0.0 <= dist <= 1.0: raise ValueError( '`dist` must be between 0.0 and 1.0. It is: %r' % dist) - self.penalties.setdefault(key, []).append(dist) + self._penalties.setdefault(key, []).append(dist) def add_equality(self, key, value, options): """Adds a distance penalty of 1.0 if `value` doesn't match any of the @@ -311,7 +311,7 @@ class Distance(object): """Returns an overall weighted distance across all penalties. """ dist = 0.0 - for key, penalty in self.penalties.iteritems(): + for key, penalty in self._penalties.iteritems(): dist += sum(penalty) * weights[key].as_number() dist_max = self.max_distance if dist_max: @@ -323,7 +323,7 @@ class Distance(object): """Returns the maximum distance penalty. """ dist_max = 0.0 - for key, penalty in self.penalties.iteritems(): + for key, penalty in self._penalties.iteritems(): dist_max += len(penalty) * weights[key].as_number() return dist_max @@ -332,7 +332,7 @@ class Distance(object): """Returns a list of (dist, key) pairs, with `dist` being the weighted distance, sorted from highest to lowest. """ - list_ = [(self[key], key) for key in self.penalties] + list_ = [(self[key], key) for key in self._penalties] return sorted(list_, key=lambda (dist, key): (0-dist, key)) def update(self, dist): @@ -341,8 +341,8 @@ class Distance(object): if not isinstance(dist, Distance): raise ValueError( '`dist` must be a Distance object. It is: %r' % dist) - for key, penalties in dist.penalties.iteritems(): - self.penalties.setdefault(key, []).extend(penalties) + for key, penalties in dist._penalties.iteritems(): + self._penalties.setdefault(key, []).extend(penalties) def track_distance(item, track_info, incl_artist=False): """Determines the significance of a track metadata change. Returns a diff --git a/test/test_autotag.py b/test/test_autotag.py index 92088a7b8..b257f62c9 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -112,72 +112,73 @@ class DistanceTest(unittest.TestCase): def test_add(self): self.dist.add('add', 1.0) - self.assertEqual(self.dist.penalties, {'add': [1.0]}) + self.assertEqual(self.dist._penalties, {'add': [1.0]}) def test_add_equality(self): self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) - self.assertEqual(self.dist.penalties['equality'], [0.0]) + self.assertEqual(self.dist._penalties['equality'], [0.0]) self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) - self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0]) + self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0]) self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) - self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0, 0.0]) + self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0]) def test_add_expr(self): self.dist.add_expr('expr', True) - self.assertEqual(self.dist.penalties['expr'], [1.0]) + self.assertEqual(self.dist._penalties['expr'], [1.0]) self.dist.add_expr('expr', False) - self.assertEqual(self.dist.penalties['expr'], [1.0, 0.0]) + self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0]) def test_add_number(self): # Add a full penalty for each number of difference between two numbers. self.dist.add_number('number', 1, 1) - self.assertEqual(self.dist.penalties['number'], [0.0]) + self.assertEqual(self.dist._penalties['number'], [0.0]) self.dist.add_number('number', 1, 2) - self.assertEqual(self.dist.penalties['number'], [0.0, 1.0]) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0]) self.dist.add_number('number', 2, 1) - self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0]) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0]) self.dist.add_number('number', -1, 2) - self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0, 1.0, - 1.0, 1.0]) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0, + 1.0, 1.0]) def test_add_priority(self): self.dist.add_priority('priority', 'abc', 'abc') - self.assertEqual(self.dist.penalties['priority'], [0.0]) + self.assertEqual(self.dist._penalties['priority'], [0.0]) self.dist.add_priority('priority', 'def', ['abc', 'def', 'ghi']) - self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25]) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.25]) self.dist.add_priority('priority', 'ghi', ['abc', 'def', - re.compile('GHI', re.I)]) - self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5]) + re.compile('GHI', re.I)]) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.25, 0.5]) self.dist.add_priority('priority', 'xyz', ['abc', 'def']) - self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5, 1.0]) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.25, 0.5, + 1.0]) def test_add_ratio(self): self.dist.add_ratio('ratio', 25, 100) - self.assertEqual(self.dist.penalties['ratio'], [0.25]) + self.assertEqual(self.dist._penalties['ratio'], [0.25]) self.dist.add_ratio('ratio', 10, 5) - self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0]) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0]) self.dist.add_ratio('ratio', -5, 5) - self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0]) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0]) self.dist.add_ratio('ratio', 5, 0) - self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) def test_add_string(self): dist = match.string_dist(u'abc', u'bcd') self.dist.add_string('string', u'abc', u'bcd') - self.assertEqual(self.dist.penalties['string'], [dist]) + self.assertEqual(self.dist._penalties['string'], [dist]) def test_distance(self): config['match']['distance_weights']['album'] = 2.0 @@ -224,8 +225,8 @@ class DistanceTest(unittest.TestCase): self.dist.update(dist) - self.assertEqual(self.dist.penalties, {'album': [0.5, 0.75, 0.25], - 'media': [1.0, 0.05]}) + self.assertEqual(self.dist._penalties, {'album': [0.5, 0.75, 0.25], + 'media': [1.0, 0.05]}) class TrackDistanceTest(unittest.TestCase): def test_identical_tracks(self): diff --git a/test/test_ui.py b/test/test_ui.py index bfdd53ddd..6cb09dcf1 100644 --- a/test/test_ui.py +++ b/test/test_ui.py @@ -607,7 +607,7 @@ class ShowChangeTest(_common.TestCase): mapping = dict(zip(items, info.tracks)) config['color'] = False album_dist = distance(items, info, mapping) - album_dist.penalties = {'album': [dist]} + album_dist._penalties = {'album': [dist]} commands.show_change( cur_artist, cur_album, From ac4e86981fe20e1c9ab3f20128d35180de1353f7 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 00:07:20 +1000 Subject: [PATCH 08/21] Add `Distance.raw_distance`, to compliment `max_distance`. --- beets/autotag/match.py | 16 +++++++++++----- test/test_autotag.py | 8 ++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index d6eab98d5..a5d5fce14 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -308,14 +308,11 @@ class Distance(object): @property def distance(self): - """Returns an overall weighted distance across all penalties. + """Returns a weighted and normalised distance across all penalties. """ - dist = 0.0 - for key, penalty in self._penalties.iteritems(): - dist += sum(penalty) * weights[key].as_number() dist_max = self.max_distance if dist_max: - return dist / dist_max + return self.raw_distance / self.max_distance return 0.0 @property @@ -327,6 +324,15 @@ class Distance(object): dist_max += len(penalty) * weights[key].as_number() return dist_max + @property + def raw_distance(self): + """Returns the raw (denormalised) distance. + """ + dist_raw = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_raw += sum(penalty) * weights[key].as_number() + return dist_raw + @property def sorted(self): """Returns a list of (dist, key) pairs, with `dist` being the weighted diff --git a/test/test_autotag.py b/test/test_autotag.py index b257f62c9..f2dcbbc28 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -200,6 +200,14 @@ class DistanceTest(unittest.TestCase): self.dist.add('medium', 0.0) self.assertEqual(self.dist.max_distance, 5.0) + def test_raw_distance(self): + config['match']['distance_weights']['album'] = 3.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('medium', 0.25) + self.dist.add('medium', 0.5) + self.assertEqual(self.dist.raw_distance, 2.25) + def test_sorted(self): config['match']['distance_weights']['album'] = 4.0 config['match']['distance_weights']['medium'] = 2.0 From 809ea8c7f9f9d523078ad04da145e5076b28345f Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 00:20:19 +1000 Subject: [PATCH 09/21] Exclude zero value penalties from `Distance.sorted`. --- beets/autotag/match.py | 24 ++++++++++++++---------- beets/ui/commands.py | 9 ++++----- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index a5d5fce14..2f226cef4 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -336,9 +336,14 @@ class Distance(object): @property def sorted(self): """Returns a list of (dist, key) pairs, with `dist` being the weighted - distance, sorted from highest to lowest. + distance, sorted from highest to lowest. Does not include penalties + with a zero value. """ - list_ = [(self[key], key) for key in self._penalties] + list_ = [] + for key in self._penalties: + dist = self[key] + if dist: + list_.append((dist, key)) return sorted(list_, key=lambda (dist, key): (0-dist, key)) def update(self, dist): @@ -542,14 +547,13 @@ def _recommendation(results): # Downgrade to the max rec if it is lower than the current rec for an # applied penalty. for dist, key in results[0].distance.sorted: - if dist: - max_rec = config['match']['max_rec'][key].as_choice({ - 'strong': recommendation.strong, - 'medium': recommendation.medium, - 'low': recommendation.low, - 'none': recommendation.none, - }) - rec = min(rec, max_rec) + max_rec = config['match']['max_rec'][key].as_choice({ + 'strong': recommendation.strong, + 'medium': recommendation.medium, + 'low': recommendation.low, + 'none': recommendation.none, + }) + rec = min(rec, max_rec) return rec diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 18b539083..a3d4d8cdd 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -169,11 +169,10 @@ def penalty_string(distance, limit=None): """ penalties = [] for dist, key in distance.sorted: - if dist: - key = key.replace('album_', '') - key = key.replace('track_', '') - key = key.replace('_', ' ') - penalties.append(key) + key = key.replace('album_', '') + key = key.replace('track_', '') + key = key.replace('_', ' ') + penalties.append(key) if penalties: if limit and len(penalties) > limit: penalties = penalties[:limit] + ['...'] From 45dc99f1a9dd9ff8cf65bcd04135832ccd97a7ac Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 00:25:31 +1000 Subject: [PATCH 10/21] Group preferred media patterns, in case they contain "|" to keep them separate from the number of media. --- beets/autotag/match.py | 2 +- docs/reference/config.rst | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 2f226cef4..a44784c07 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -415,7 +415,7 @@ def distance(items, album_info, mapping): dist.add_string('media', likelies['media'], album_info.media) # Preferred media. - preferred_media = [re.compile(r'(\d+x)?%s' % pattern, re.I) for pattern + preferred_media = [re.compile(r'(\d+x)?(%s)' % pattern, re.I) for pattern in config['match']['preferred']['media'].get()] if album_info.media and preferred_media: dist.add_priority('media', album_info.media, preferred_media) diff --git a/docs/reference/config.rst b/docs/reference/config.rst index ec194afde..c9f36c5e1 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -437,9 +437,13 @@ preferred In addition to comparing the tagged metadata with the match metadata for similarity, you can also specify an ordered list of preferred countries and -media types. A distance penalty will be applied if the country or media type -from the match metadata doesn't match. The order is important, the first item -will be most preferred. +media types. + +A distance penalty will be applied if the country or media type from the match +metadata doesn't match. The order is important, the first item will be most +preferred. Each item may be a regular expression, and will be matched case +insensitively. The number of media will be stripped when matching preferred +media (e.g. "2x" in "2xCD"). You can also tell the autotagger to prefer matches that have a release year closest to the original year for an album. @@ -448,8 +452,8 @@ Here's an example:: match: preferred: - countries: ['US', 'GB', 'UK'] - media: ['CD', 'Digital Media'] + countries: ['US', 'GB|UK'] + media: ['CD', 'Digital Media|File'] original_year: yes By default, none of these options are enabled. From f3545860da0118c91c0c802b8b905f5554589a75 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 00:35:32 +1000 Subject: [PATCH 11/21] Add `ignored` setting. Don't show matches with specified penalties applied, e.g. missing tracks or unmatched tracks. If you know you never want these, they can clutter up the interface especially now that we have multiple data sources. --- beets/autotag/match.py | 9 ++++++++- beets/config_default.yaml | 1 + docs/changelog.rst | 2 ++ docs/reference/config.rst | 11 +++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index a44784c07..c2a2372ca 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -575,8 +575,15 @@ def _add_candidate(items, results, info): # Get the change distance. dist = distance(items, info, mapping) - log.debug('Success. Distance: %f' % dist) + # Skip matches with ignored penalties. + penalties = [key for _, key in dist.sorted] + for penalty in config['match']['ignored'].as_str_seq(): + if penalty in penalties: + log.debug('Ignored. Penalty: %s' % penalty) + return + + log.debug('Success. Distance: %f' % dist) results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, extra_items, extra_tracks) diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 7b9867813..44cb51051 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -115,3 +115,4 @@ match: countries: [] media: [] original_year: no + ignored: [] diff --git a/docs/changelog.rst b/docs/changelog.rst index 527982190..4076248d0 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -68,6 +68,8 @@ Changelog number is still "correct", just in a different format. * Sort missing and unmatched tracks by index and title and group them together for better readability. + * Don't show potential matches that have specific penalties applied, as + configured by the :ref:`ignored` setting. * Improve calculation of similarity score and recommendation: diff --git a/docs/reference/config.rst b/docs/reference/config.rst index c9f36c5e1..d320cd655 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -458,6 +458,17 @@ Here's an example:: By default, none of these options are enabled. +.. _ignored: + +ignored +~~~~~~~ + +You can completely avoid matches that have certain penalties applied by adding +the penalty name to the ``ignored`` setting:: + + match: + ignored: missing_tracks unmatched_tracks + .. _path-format-config: Path Format Configuration From ad52ede73674489c421053076597da603b186581 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 00:36:01 +1000 Subject: [PATCH 12/21] Code style. Use "_" when expanding variables we don't need. --- beets/autotag/match.py | 2 +- beets/ui/commands.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index c2a2372ca..5c6e4ffed 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -546,7 +546,7 @@ def _recommendation(results): # Downgrade to the max rec if it is lower than the current rec for an # applied penalty. - for dist, key in results[0].distance.sorted: + for _, key in results[0].distance.sorted: max_rec = config['match']['max_rec'][key].as_choice({ 'strong': recommendation.strong, 'medium': recommendation.medium, diff --git a/beets/ui/commands.py b/beets/ui/commands.py index a3d4d8cdd..6bf1db53c 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -168,7 +168,7 @@ def penalty_string(distance, limit=None): a distance object. """ penalties = [] - for dist, key in distance.sorted: + for _, key in distance.sorted: key = key.replace('album_', '') key = key.replace('track_', '') key = key.replace('_', ' ') From 461c3c047c64d60981eea26f274f05f948c5be7d Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 00:46:40 +1000 Subject: [PATCH 13/21] Colour benign track index changes in light gray, consistent with non-penalty supplementary information. --- beets/ui/commands.py | 2 +- docs/changelog.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 6bf1db53c..63d1df00d 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -293,7 +293,7 @@ def show_change(cur_artist, cur_album, match): cur_track, new_track = format_index(item), format_index(track_info) if cur_track != new_track: if item.track in (track_info.index, track_info.medium_index): - color = 'yellow' + color = 'lightgray' else: color = 'red' if (cur_track + new_track).count('-') == 1: diff --git a/docs/changelog.rst b/docs/changelog.rst index 4076248d0..9d9cb8403 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -63,7 +63,7 @@ Changelog * Display album disambiguation and disc titles in the track listing, when available. * More consistent format and colorization of album and track metadata. - * Track changes highlighted in turquoise indicate a change in format to or + * Track changes highlighted in light gray indicate a change in format to or from :ref:`per_disc_numbering`. No penalty is applied because the track number is still "correct", just in a different format. * Sort missing and unmatched tracks by index and title and group them From 2c175faa4677da1f4fea87aa024c582a8b3635cb Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 01:08:35 +1000 Subject: [PATCH 14/21] Colorise no-penalty text differences in a secondary colour, light grey. --- beets/ui/__init__.py | 14 ++++++++++---- docs/changelog.rst | 4 +++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/beets/ui/__init__.py b/beets/ui/__init__.py index 6789045f1..21d56ad0d 100644 --- a/beets/ui/__init__.py +++ b/beets/ui/__init__.py @@ -37,6 +37,7 @@ from beets.util.functemplate import Template from beets import config from beets.util import confit from beets.autotag import mb +from beets.autotag.match import string_dist # On Windows platforms, use colorama to support "ANSI" terminal colors. @@ -366,7 +367,7 @@ def colorize(color, text): else: return text -def _colordiff(a, b, highlight='red'): +def _colordiff(a, b, highlight='red', second_highlight='lightgray'): """Given two values, return the same pair of strings except with their differences highlighted in the specified color. Strings are highlighted intelligently to show differences; other values are @@ -402,9 +403,14 @@ def _colordiff(a, b, highlight='red'): # Left only. a_out.append(colorize(highlight, a[a_start:a_end])) elif op == 'replace': - # Right and left differ. - a_out.append(colorize(highlight, a[a_start:a_end])) - b_out.append(colorize(highlight, b[b_start:b_end])) + # Right and left differ. Colorise with second highlight if + # there's no distance penalty. + if string_dist(a[a_start:a_end], b[b_start:b_end]): + color = highlight + else: + color = second_highlight + a_out.append(colorize(color, a[a_start:a_end])) + b_out.append(colorize(color, b[b_start:b_end])) else: assert(False) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9d9cb8403..1daa09ef3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -62,7 +62,9 @@ Changelog low similarity score. * Display album disambiguation and disc titles in the track listing, when available. - * More consistent format and colorization of album and track metadata. + * More consistent format and colorization of album and track metadata. Red + for actual differences, yellow to indicate that a penalty is being applied, + and light gray for no-penalty supplementary data. * Track changes highlighted in light gray indicate a change in format to or from :ref:`per_disc_numbering`. No penalty is applied because the track number is still "correct", just in a different format. From b02974f68f8e71d526aec16da5c1f74eaa48c7e9 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 01:20:32 +1000 Subject: [PATCH 15/21] Don't bypass candidate selection in timid mode. Always show all candidates. Saves paranoid and interested users from having to either force all max recommendations to none or constantly go back to candidate selection from a recommendation to see if there is another slightly less similar but more preferred (by the user) candidate. --- beets/ui/commands.py | 2 +- docs/changelog.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 63d1df00d..e55068f80 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -466,7 +466,7 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, # Is the change good enough? bypass_candidates = False - if rec != recommendation.none: + if rec != recommendation.none and not config['import']['timid']: match = candidates[0] bypass_candidates = True diff --git a/docs/changelog.rst b/docs/changelog.rst index 1daa09ef3..362bbd5b3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -72,6 +72,7 @@ Changelog together for better readability. * Don't show potential matches that have specific penalties applied, as configured by the :ref:`ignored` setting. + * Don't bypass candidate selection in timid mode. Always show all candidates. * Improve calculation of similarity score and recommendation: From c12abb74abb7ffb7c7fcfd79a5d641f8d59cd324 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 12:49:55 +1000 Subject: [PATCH 16/21] Look at track penalties as well when downgrading recommendations for albums. --- beets/autotag/match.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 5c6e4ffed..b1332ddf4 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -546,7 +546,11 @@ def _recommendation(results): # Downgrade to the max rec if it is lower than the current rec for an # applied penalty. - for _, key in results[0].distance.sorted: + keys = set(key for _, key in min_dist.sorted) + if isinstance(results[0], hooks.AlbumMatch): + for track_dist in min_dist.tracks.values(): + keys.update(key for _, key in track_dist.sorted) + for key in keys: max_rec = config['match']['max_rec'][key].as_choice({ 'strong': recommendation.strong, 'medium': recommendation.medium, From 0c27d275f33880ebfcc4387c09810b2f2d67fb20 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 14:31:53 +1000 Subject: [PATCH 17/21] Improve preferred media/country and original year distance calculation. Check only preferred media/country, if specified. Don't apply penalty for preferred AND tagged mismatch. Assume original year is 1889 (first gramophone discs) when we don't know the original year. Allow single values to be specified in configuration, instead of requiring a list (e.g. use `as_str_seq()`). --- beets/autotag/match.py | 47 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index b1332ddf4..813105910 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -410,22 +410,29 @@ def distance(items, album_info, mapping): # Album. dist.add_string('album', likelies['album'], album_info.album) - # Media. - if likelies['media'] and album_info.media: - dist.add_string('media', likelies['media'], album_info.media) - # Preferred media. - preferred_media = [re.compile(r'(\d+x)?(%s)' % pattern, re.I) for pattern - in config['match']['preferred']['media'].get()] - if album_info.media and preferred_media: - dist.add_priority('media', album_info.media, preferred_media) + patterns = config['match']['preferred']['media'].as_str_seq() + options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns] + if album_info.media and options: + dist.add_priority('media', album_info.media, options) + # Media. + elif likelies['media'] and album_info.media: + dist.add_string('media', likelies['media'], album_info.media) # Mediums. if likelies['disctotal'] and album_info.mediums: dist.add_number('mediums', likelies['disctotal'], album_info.mediums) + # Prefer earliest release. + if album_info.year and config['match']['preferred']['original_year']: + # Assume 1889 (earliest first gramophone discs) if we don't know the + # original year. + original = album_info.original_year or 1889 + diff = abs(album_info.year - original) + diff_max = abs(datetime.date.today().year - original) + dist.add_ratio('year', diff, diff_max) # Year. - if likelies['year'] and album_info.year: + elif likelies['year'] and album_info.year: if likelies['year'] in (album_info.year, album_info.original_year): # No penalty for matching release or original year. dist.add('year', 0.0) @@ -439,22 +446,14 @@ def distance(items, album_info, mapping): # Full penalty when there is no original year. dist.add('year', 1.0) - # Prefer earlier releases. - if album_info.year and album_info.original_year and \ - config['match']['preferred']['original_year']: - diff = abs(album_info.year - album_info.original_year) - diff_max = abs(datetime.date.today().year - album_info.original_year) - dist.add_ratio('year', diff, diff_max) - - # Country. - if likelies['country'] and album_info.country: - dist.add_string('country', likelies['country'], album_info.country) - # Preferred countries. - preferred_countries = [re.compile(pattern, re.I) for pattern - in config['match']['preferred']['countries'].get()] - if album_info.country and preferred_countries: - dist.add_priority('country', album_info.country, preferred_countries) + patterns = config['match']['preferred']['countries'].as_str_seq() + options = [re.compile(pat, re.I) for pat in patterns] + if album_info.country and options: + dist.add_priority('country', album_info.country, options) + # Country. + elif likelies['country'] and album_info.country: + dist.add_string('country', likelies['country'], album_info.country) # Label. if likelies['label'] and album_info.label: From e92b8bb8fbcca499ca4566c87e524d9e83fcc3a8 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Mon, 3 Jun 2013 14:49:39 +1000 Subject: [PATCH 18/21] Fix `add_priority()` calculation. We were incorrectly adding 1 to the length of options to avoid a divide by zero, when we should instead default the length to 1. Otherwise we skew the penalty towards zero. --- beets/autotag/match.py | 2 +- test/test_autotag.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 813105910..ebf781421 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -279,7 +279,7 @@ class Distance(object): """ if not isinstance(options, (list, tuple)): options = [options] - unit = 1.0 / (len(options) + 1) + unit = 1.0 / (len(options) or 1) for i, opt in enumerate(options): if self._eq(opt, value): dist = i * unit diff --git a/test/test_autotag.py b/test/test_autotag.py index f2dcbbc28..c513dc530 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -151,15 +151,15 @@ class DistanceTest(unittest.TestCase): self.dist.add_priority('priority', 'abc', 'abc') self.assertEqual(self.dist._penalties['priority'], [0.0]) - self.dist.add_priority('priority', 'def', ['abc', 'def', 'ghi']) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.25]) + self.dist.add_priority('priority', 'def', ['abc', 'def']) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5]) - self.dist.add_priority('priority', 'ghi', ['abc', 'def', - re.compile('GHI', re.I)]) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.25, 0.5]) + self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef', + re.compile('GH', re.I)]) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75]) self.dist.add_priority('priority', 'xyz', ['abc', 'def']) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.25, 0.5, + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75, 1.0]) def test_add_ratio(self): From ea1becfea16289f961faa5e99ab6fe7409d2fdf0 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Thu, 6 Jun 2013 09:51:17 +1000 Subject: [PATCH 19/21] Add `Distance.__iter__()` and `Distance.__len__()`, for convenience. --- beets/autotag/match.py | 15 ++++++++++++--- beets/ui/commands.py | 2 +- test/test_autotag.py | 17 +++++++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index ebf781421..59f0d00f4 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -210,6 +210,12 @@ class Distance(object): def __init__(self): self._penalties = {} + def __iter__(self): + return iter(self.sorted) + + def __len__(self): + return len(self.sorted) + def __sub__(self, other): return self.distance - other @@ -344,6 +350,9 @@ class Distance(object): dist = self[key] if dist: list_.append((dist, key)) + # Convert distance into a negative float we can sort items in ascending + # order (for keys, when the penalty is equal) and still get the items + # with the biggest distance first. return sorted(list_, key=lambda (dist, key): (0-dist, key)) def update(self, dist): @@ -545,10 +554,10 @@ def _recommendation(results): # Downgrade to the max rec if it is lower than the current rec for an # applied penalty. - keys = set(key for _, key in min_dist.sorted) + keys = set(key for _, key in min_dist) if isinstance(results[0], hooks.AlbumMatch): for track_dist in min_dist.tracks.values(): - keys.update(key for _, key in track_dist.sorted) + keys.update(key for _, key in track_dist) for key in keys: max_rec = config['match']['max_rec'][key].as_choice({ 'strong': recommendation.strong, @@ -580,7 +589,7 @@ def _add_candidate(items, results, info): dist = distance(items, info, mapping) # Skip matches with ignored penalties. - penalties = [key for _, key in dist.sorted] + penalties = [key for _, key in dist] for penalty in config['match']['ignored'].as_str_seq(): if penalty in penalties: log.debug('Ignored. Penalty: %s' % penalty) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index e55068f80..96e67cde2 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -168,7 +168,7 @@ def penalty_string(distance, limit=None): a distance object. """ penalties = [] - for _, key in distance.sorted: + for _, key in distance: key = key.replace('album_', '') key = key.replace('track_', '') key = key.replace('_', ' ') diff --git a/test/test_autotag.py b/test/test_autotag.py index c513dc530..dc75ee0ab 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -200,6 +200,23 @@ class DistanceTest(unittest.TestCase): self.dist.add('medium', 0.0) self.assertEqual(self.dist.max_distance, 5.0) + def test_operators(self): + config['match']['distance_weights']['source'] = 1.0 + config['match']['distance_weights']['album'] = 2.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('source', 0.0) + self.dist.add('album', 0.5) + self.dist.add('medium', 0.25) + self.dist.add('medium', 0.75) + self.assertEqual(len(self.dist), 2) + self.assertEqual(list(self.dist), [(0.2, 'album'), (0.2, 'medium')]) + self.assertTrue(self.dist == 0.4) + self.assertTrue(self.dist < 1.0) + self.assertTrue(self.dist > 0.0) + self.assertEqual(self.dist - 0.4, 0.0) + self.assertEqual(0.4 - self.dist, 0.0) + self.assertEqual(float(self.dist), 0.4) + def test_raw_distance(self): config['match']['distance_weights']['album'] = 3.0 config['match']['distance_weights']['medium'] = 1.0 From 5ce996df0db8e2dc2fc4b4a85abbab0cd0d5257d Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Thu, 6 Jun 2013 10:18:01 +1000 Subject: [PATCH 20/21] Revert "Don't bypass candidate selection in timid mode. Always show all candidates." This reverts commit b02974f68f8e71d526aec16da5c1f74eaa48c7e9. --- beets/ui/commands.py | 2 +- docs/changelog.rst | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 96e67cde2..dfe3585c1 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -466,7 +466,7 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, # Is the change good enough? bypass_candidates = False - if rec != recommendation.none and not config['import']['timid']: + if rec != recommendation.none: match = candidates[0] bypass_candidates = True diff --git a/docs/changelog.rst b/docs/changelog.rst index 362bbd5b3..1daa09ef3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -72,7 +72,6 @@ Changelog together for better readability. * Don't show potential matches that have specific penalties applied, as configured by the :ref:`ignored` setting. - * Don't bypass candidate selection in timid mode. Always show all candidates. * Improve calculation of similarity score and recommendation: From c1ebae83bc44fb35e599813a30868e88c76a2d16 Mon Sep 17 00:00:00 2001 From: Tai Lee Date: Thu, 6 Jun 2013 10:44:24 +1000 Subject: [PATCH 21/21] Decouple `color_diff()` UI function from `string_dist()` matcher function. These are separate issues. We're still colorising case changes in light gray because these characters are effectively equivalent, but symbol and transliteration edits will continue to be colorised in red. --- beets/ui/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/beets/ui/__init__.py b/beets/ui/__init__.py index 21d56ad0d..460320a34 100644 --- a/beets/ui/__init__.py +++ b/beets/ui/__init__.py @@ -37,7 +37,6 @@ from beets.util.functemplate import Template from beets import config from beets.util import confit from beets.autotag import mb -from beets.autotag.match import string_dist # On Windows platforms, use colorama to support "ANSI" terminal colors. @@ -404,8 +403,8 @@ def _colordiff(a, b, highlight='red', second_highlight='lightgray'): a_out.append(colorize(highlight, a[a_start:a_end])) elif op == 'replace': # Right and left differ. Colorise with second highlight if - # there's no distance penalty. - if string_dist(a[a_start:a_end], b[b_start:b_end]): + # it's just a case change. + if a[a_start:a_end].lower() != b[b_start:b_end].lower(): color = highlight else: color = second_highlight