diff --git a/beets/autotag/match.py b/beets/autotag/match.py index bb00ee862..59f0d00f4 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -30,7 +30,7 @@ from beets.util.enumeration import enum from beets.autotag import hooks # A configuration view for the distance weights. -weights = config['match']['weight'] +weights = config['match']['distance_weights'] # Parameters for string distance function. # Words that can be moved to the end of a string using a comma. @@ -187,62 +187,221 @@ def track_index_changed(item, track_info): """ return item.track not in (track_info.medium_index, track_info.index) +class Distance(object): + """Keeps track of multiple distance penalties. Provides a single weighted + distance for all penalties as well as a weighted distance for each + individual penalty. + """ + def __cmp__(self, other): + return cmp(self.distance, other) + + def __float__(self): + return self.distance + + def __getitem__(self, key): + """Returns the weighted distance for a named penalty. + """ + dist = sum(self._penalties[key]) * weights[key].as_number() + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + def __init__(self): + self._penalties = {} + + def __iter__(self): + return iter(self.sorted) + + def __len__(self): + return len(self.sorted) + + def __sub__(self, other): + return self.distance - other + + def __rsub__(self, other): + return other - self.distance + + def _eq(self, value1, value2): + """Returns True if `value1` is equal to `value2`. `value1` may be a + compiled regular expression, in which case it will be matched against + `value2`. + """ + if isinstance(value1, re._pattern_type): + return bool(value1.match(value2)) + return value1 == value2 + + def add(self, key, dist): + """Adds a distance penalty. `key` must correspond with a configured + weight setting. `dist` must be a float between 0.0 and 1.0, and will be + added to any existing distance penalties for the same key. + """ + if not 0.0 <= dist <= 1.0: + raise ValueError( + '`dist` must be between 0.0 and 1.0. It is: %r' % dist) + self._penalties.setdefault(key, []).append(dist) + + def add_equality(self, key, value, options): + """Adds a distance penalty of 1.0 if `value` doesn't match any of the + values in `options`. If an option is a compiled regular expression, it + will be considered equal if it matches against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + for opt in options: + if self._eq(opt, value): + dist = 0.0 + break + else: + dist = 1.0 + self.add(key, dist) + + def add_expr(self, key, expr): + """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0. + """ + if expr: + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_number(self, key, number1, number2): + """Adds a distance penalty of 1.0 for each number of difference between + `number1` and `number2`, or 0.0 when there is no difference. Use this + when there is no upper limit on the difference between the two numbers. + """ + diff = abs(number1 - number2) + if diff: + for i in range(diff): + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_priority(self, key, value, options): + """Adds a distance penalty that corresponds to the position at which + `value` appears in `options`. A distance penalty of 0.0 for the first + option, or 1.0 if there is no matching option. If an option is a + compiled regular expression, it will be considered equal if it matches + against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + unit = 1.0 / (len(options) or 1) + for i, opt in enumerate(options): + if self._eq(opt, value): + dist = i * unit + break + else: + dist = 1.0 + self.add(key, dist) + + def add_ratio(self, key, number1, number2): + """Adds a distance penalty for `number1` as a ratio of `number2`. + `number1` is bound at 0 and `number2`. + """ + number = float(max(min(number1, number2), 0)) + if number2: + dist = number / number2 + else: + dist = 0.0 + self.add(key, dist) + + def add_string(self, key, str1, str2): + """Adds a distance penalty based on the edit distance between `str1` + and `str2`. + """ + dist = string_dist(str1, str2) + self.add(key, dist) + + @property + def distance(self): + """Returns a weighted and normalised distance across all penalties. + """ + dist_max = self.max_distance + if dist_max: + return self.raw_distance / self.max_distance + return 0.0 + + @property + def max_distance(self): + """Returns the maximum distance penalty. + """ + dist_max = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_max += len(penalty) * weights[key].as_number() + return dist_max + + @property + def raw_distance(self): + """Returns the raw (denormalised) distance. + """ + dist_raw = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_raw += sum(penalty) * weights[key].as_number() + return dist_raw + + @property + def sorted(self): + """Returns a list of (dist, key) pairs, with `dist` being the weighted + distance, sorted from highest to lowest. Does not include penalties + with a zero value. + """ + list_ = [] + for key in self._penalties: + dist = self[key] + if dist: + list_.append((dist, key)) + # Convert distance into a negative float we can sort items in ascending + # order (for keys, when the penalty is equal) and still get the items + # with the biggest distance first. + return sorted(list_, key=lambda (dist, key): (0-dist, key)) + + def update(self, dist): + """Adds all the distance penalties from `dist`. + """ + if not isinstance(dist, Distance): + raise ValueError( + '`dist` must be a Distance object. It is: %r' % dist) + for key, penalties in dist._penalties.iteritems(): + self._penalties.setdefault(key, []).extend(penalties) + def track_distance(item, track_info, incl_artist=False): """Determines the significance of a track metadata change. Returns a - float in [0.0,1.0]. `incl_artist` indicates that a distance - component should be included for the track artist (i.e., for - various-artist releases). + Distance object. `incl_artist` indicates that a distance component should + be included for the track artist (i.e., for various-artist releases). """ - # Distance and normalization accumulators. - dist, dist_max = 0.0, 0.0 + dist = Distance() - # Check track length. - # If there's no length to check, apply no penalty. + # Length. if track_info.length: - diff = abs(item.length - track_info.length) - diff = max(diff - weights['track_length_grace'].as_number(), 0.0) - diff = min(diff, weights['track_length_max'].as_number()) - dist += (diff / weights['track_length_max'].as_number()) * \ - weights['track_length'].as_number() - dist_max += weights['track_length'].as_number() + diff = abs(item.length - track_info.length) - \ + weights['track_length_grace'].as_number() + dist.add_ratio('track_length', diff, + weights['track_length_max'].as_number()) - # Track title. - dist += string_dist(item.title, track_info.title) * \ - weights['track_title'].as_number() - dist_max += weights['track_title'].as_number() + # Title. + dist.add_string('track_title', item.title, track_info.title) - # Track artist, if included. - # Attention: MB DB does not have artist info for all compilations, - # so only check artist distance if there is actually an artist in - # the MB track data. + # Artist. Only check if there is actually an artist in the track data. if incl_artist and track_info.artist and \ item.artist.lower() not in VA_ARTISTS: - dist += string_dist(item.artist, track_info.artist) * \ - weights['track_artist'].as_number() - dist_max += weights['track_artist'].as_number() + dist.add_string('track_artist', item.artist, track_info.artist) # Track index. if track_info.index and item.track: - if track_index_changed(item, track_info): - dist += weights['track_index'].as_number() - dist_max += weights['track_index'].as_number() + dist.add_expr('track_index', track_index_changed(item, track_info)) - # MusicBrainz track ID. + # Track ID. if item.mb_trackid: - if item.mb_trackid != track_info.track_id: - dist += weights['track_id'].as_number() - dist_max += weights['track_id'].as_number() + dist.add_expr('track_id', item.mb_trackid != track_info.track_id) - # Plugin distances. - plugin_d, plugin_dm = plugins.track_distance(item, track_info) - dist += plugin_d - dist_max += plugin_dm + # Plugins. + dist.update(plugins.track_distance(item, track_info)) - return dist / dist_max + return dist def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. - Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object + Returns a Distance object. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the @@ -251,97 +410,97 @@ def distance(items, album_info, mapping): """ likelies, _ = current_metadata(items) - # These accumulate the possible distance components. The final - # distance will be dist/dist_max. - dist = 0.0 - dist_max = 0.0 + dist = Distance() - # Artist/album metadata. + # Artist, if not various. if not album_info.va: - dist += string_dist(likelies['artist'], album_info.artist) * \ - weights['artist'].as_number() - dist_max += weights['artist'].as_number() - dist += string_dist(likelies['album'], album_info.album) * \ - weights['album'].as_number() - dist_max += weights['album'].as_number() + dist.add_string('artist', likelies['artist'], album_info.artist) - # Year. No penalty for matching release or original year. - if likelies['year'] and album_info.year: - if likelies['year'] not in (album_info.year, album_info.original_year): - diff = abs(album_info.year - likelies['year']) - if diff: - dist += (1.0 - 1.0 / diff) * weights['year'].as_number() - dist_max += weights['year'].as_number() + # Album. + dist.add_string('album', likelies['album'], album_info.album) - # Actual or preferred media. - if album_info.media: - compare_media = likelies['media'] or \ - config['match']['preferred_media'].get() - if compare_media and compare_media.lower() != album_info.media.lower(): - dist += weights['media'].as_number() - dist_max += weights['media'].as_number() + # Preferred media. + patterns = config['match']['preferred']['media'].as_str_seq() + options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns] + if album_info.media and options: + dist.add_priority('media', album_info.media, options) + # Media. + elif likelies['media'] and album_info.media: + dist.add_string('media', likelies['media'], album_info.media) - # MusicBrainz album ID. - if likelies['mb_albumid']: - if likelies['mb_albumid'] != album_info.album_id: - dist += weights['album_id'].as_number() - dist_max += weights['album_id'].as_number() + # Mediums. + if likelies['disctotal'] and album_info.mediums: + dist.add_number('mediums', likelies['disctotal'], album_info.mediums) - # Apply a small penalty for differences across many minor metadata. This - # helps prioritise releases that are nearly identical. + # Prefer earliest release. + if album_info.year and config['match']['preferred']['original_year']: + # Assume 1889 (earliest first gramophone discs) if we don't know the + # original year. + original = album_info.original_year or 1889 + diff = abs(album_info.year - original) + diff_max = abs(datetime.date.today().year - original) + dist.add_ratio('year', diff, diff_max) + # Year. + elif likelies['year'] and album_info.year: + if likelies['year'] in (album_info.year, album_info.original_year): + # No penalty for matching release or original year. + dist.add('year', 0.0) + elif album_info.original_year: + # Prefer matchest closest to the release year. + diff = abs(likelies['year'] - album_info.year) + diff_max = abs(datetime.date.today().year - + album_info.original_year) + dist.add_ratio('year', diff, diff_max) + else: + # Full penalty when there is no original year. + dist.add('year', 1.0) - if likelies['disctotal']: - if likelies['disctotal'] != album_info.mediums: - dist += weights['minor'].as_number() - dist_max += weights['minor'].as_number() + # Preferred countries. + patterns = config['match']['preferred']['countries'].as_str_seq() + options = [re.compile(pat, re.I) for pat in patterns] + if album_info.country and options: + dist.add_priority('country', album_info.country, options) + # Country. + elif likelies['country'] and album_info.country: + dist.add_string('country', likelies['country'], album_info.country) + # Label. if likelies['label'] and album_info.label: - dist += string_dist(likelies['label'], album_info.label) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('label', likelies['label'], album_info.label) + # Catalog number. if likelies['catalognum'] and album_info.catalognum: - dist += string_dist(likelies['catalognum'], - album_info.catalognum) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() - - if likelies['country'] and album_info.country: - dist += string_dist(likelies['country'], - album_info.country) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('catalognum', likelies['catalognum'], + album_info.catalognum) + # Disambiguation. if likelies['albumdisambig'] and album_info.albumdisambig: - dist += string_dist(likelies['albumdisambig'], - album_info.albumdisambig) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('albumdisambig', likelies['albumdisambig'], + album_info.albumdisambig) - # Matched track distances. + # Album ID. + if likelies['mb_albumid']: + dist.add_equality('album_id', likelies['mb_albumid'], + album_info.album_id) + + # Tracks. + dist.tracks = {} for item, track in mapping.iteritems(): - dist += track_distance(item, track, album_info.va) * \ - weights['track'].as_number() - dist_max += weights['track'].as_number() + dist.tracks[track] = track_distance(item, track, album_info.va) + dist.add('tracks', dist.tracks[track].distance) - # Extra and unmatched tracks. - for track in set(album_info.tracks) - set(mapping.values()): - dist += weights['missing'].as_number() - dist_max += weights['missing'].as_number() - for item in set(items) - set(mapping.keys()): - dist += weights['unmatched'].as_number() - dist_max += weights['unmatched'].as_number() + # Missing tracks. + for i in range(len(album_info.tracks) - len(mapping)): + dist.add('missing_tracks', 1.0) - # Plugin distances. - plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping) - dist += plugin_d - dist_max += plugin_dm + # Unmatched tracks. + for i in range(len(items) - len(mapping)): + dist.add('unmatched_tracks', 1.0) - # Normalize distance, avoiding divide-by-zero. - if dist_max == 0.0: - return 0.0 - else: - return dist / dist_max + # Plugins. + dist.update(plugins.album_distance(items, album_info, mapping)) + + return dist def match_by_id(items): """If the items are tagged with a MusicBrainz album ID, returns an @@ -367,8 +526,8 @@ def _recommendation(results): recommendation based on the results' distances. If the recommendation is higher than the configured maximum for - certain situations, the recommendation will be downgraded to the - configured maximum. + an applied penalty, the recommendation will be downgraded to the + configured maximum for that penalty. """ if not results: # No candidates: no recommendation. @@ -390,45 +549,23 @@ def _recommendation(results): # Gap between first two candidates is large. rec = recommendation.low else: - # No conclusion. - rec = recommendation.none + # No conclusion. Return immediately. Can't be downgraded any further. + return recommendation.none - # "Downgrades" in certain configured situations. + # Downgrade to the max rec if it is lower than the current rec for an + # applied penalty. + keys = set(key for _, key in min_dist) if isinstance(results[0], hooks.AlbumMatch): - # Load the configured recommendation maxima. - max_rec = {} - for trigger in 'non_mb_source', 'partial', 'tracklength', 'tracknumber': - max_rec[trigger] = \ - config['match']['max_rec'][trigger].as_choice({ - 'strong': recommendation.strong, - 'medium': recommendation.medium, - 'low': recommendation.low, - 'none': recommendation.none, - }) - - # Non-MusicBrainz source. - if rec > max_rec['non_mb_source'] and \ - results[0].info.data_source != 'MusicBrainz': - rec = max_rec['non_mb_source'] - - # Partial match. - if rec > max_rec['partial'] and \ - (results[0].extra_items or results[0].extra_tracks): - rec = max_rec['partial'] - - # Check track number and duration for each item. - for item, track_info in results[0].mapping.items(): - # Track length differs. - if rec > max_rec['tracklength'] and \ - item.length and track_info.length and \ - abs(item.length - track_info.length) > \ - weights['track_length_grace'].as_number(): - rec = max_rec['tracklength'] - - # Track number differs. - if rec > max_rec['tracknumber'] and \ - track_index_changed(item, track_info): - rec = max_rec['tracknumber'] + for track_dist in min_dist.tracks.values(): + keys.update(key for _, key in track_dist) + for key in keys: + max_rec = config['match']['max_rec'][key].as_choice({ + 'strong': recommendation.strong, + 'medium': recommendation.medium, + 'low': recommendation.low, + 'none': recommendation.none, + }) + rec = min(rec, max_rec) return rec @@ -450,8 +587,15 @@ def _add_candidate(items, results, info): # Get the change distance. dist = distance(items, info, mapping) - log.debug('Success. Distance: %f' % dist) + # Skip matches with ignored penalties. + penalties = [key for _, key in dist] + for penalty in config['match']['ignored'].as_str_seq(): + if penalty in penalties: + log.debug('Ignored. Penalty: %s' % penalty) + return + + log.debug('Success. Distance: %f' % dist) results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, extra_items, extra_tracks) @@ -462,7 +606,7 @@ def tag_album(items, search_artist=None, search_album=None, - The current artist. - The current album. - A list of AlbumMatch objects. The candidates are sorted by - distance (i.e., best match first). + distance (i.e., best match first). - A recommendation. If search_artist and search_album or search_id are provided, then they are used as search terms in place of the current metadata. diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 30b7bdac5..44cb51051 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -68,22 +68,42 @@ match: medium_rec_thresh: 0.25 rec_gap_thresh: 0.25 max_rec: - non_mb_source: strong - partial: medium - tracklength: strong - tracknumber: strong - preferred_media: null - weight: + source: strong + artist: strong + album: strong + media: strong + mediums: strong + year: strong + country: strong + label: strong + catalognum: strong + albumdisambig: strong + album_id: strong + tracks: strong + missing_tracks: medium + unmatched_tracks: medium + track_title: strong + track_artist: strong + track_index: strong + track_length_grace: strong + track_length_max: strong + track_length: strong + track_id: strong + distance_weights: source: 2.0 artist: 3.0 album: 3.0 - year: 1.0 media: 1.0 + mediums: 1.0 + year: 1.0 + country: 0.5 + label: 0.5 + catalognum: 0.5 + albumdisambig: 0.5 album_id: 5.0 - minor: 0.5 - track: 1.0 - missing: 0.9 - unmatched: 0.6 + tracks: 2.0 + missing_tracks: 0.9 + unmatched_tracks: 0.6 track_title: 3.0 track_artist: 2.0 track_index: 1.0 @@ -91,3 +111,8 @@ match: track_length_max: 30 track_length: 2.0 track_id: 5.0 + preferred: + countries: [] + media: [] + original_year: no + ignored: [] diff --git a/beets/plugins.py b/beets/plugins.py index 7d49ad3aa..d0c0a9654 100755 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -64,16 +64,16 @@ class BeetsPlugin(object): return {} def track_distance(self, item, info): - """Should return a (distance, distance_max) pair to be added - to the distance value for every track comparison. + """Should return a Distance object to be added to the + distance for every track comparison. """ - return 0.0, 0.0 + return beets.autotag.match.Distance() def album_distance(self, items, album_info, mapping): - """Should return a (distance, distance_max) pair to be added - to the distance value for every album-level comparison. + """Should return a Distance object to be added to the + distance for every album-level comparison. """ - return 0.0, 0.0 + return beets.autotag.match.Distance() def candidates(self, items, artist, album, va_likely): """Should return a sequence of AlbumInfo objects that match the @@ -242,25 +242,19 @@ def queries(): def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. - Returns a (distance, distance_max) pair. + Returns a Distance object. """ - dist = 0.0 - dist_max = 0.0 + dist = beets.autotag.match.Distance() for plugin in find_plugins(): - d, dm = plugin.track_distance(item, info) - dist += d - dist_max += dm - return dist, dist_max + dist.update(plugin.track_distance(item, info)) + return dist def album_distance(items, album_info, mapping): """Returns the album distance calculated by plugins.""" - dist = 0.0 - dist_max = 0.0 + dist = beets.autotag.match.Distance() for plugin in find_plugins(): - d, dm = plugin.album_distance(items, album_info, mapping) - dist += d - dist_max += dm - return dist, dist_max + dist.update(plugin.album_distance(items, album_info, mapping)) + return dist def candidates(items, artist, album, va_likely): """Gets MusicBrainz candidates for an album from each plugin. diff --git a/beets/ui/__init__.py b/beets/ui/__init__.py index 6789045f1..460320a34 100644 --- a/beets/ui/__init__.py +++ b/beets/ui/__init__.py @@ -366,7 +366,7 @@ def colorize(color, text): else: return text -def _colordiff(a, b, highlight='red'): +def _colordiff(a, b, highlight='red', second_highlight='lightgray'): """Given two values, return the same pair of strings except with their differences highlighted in the specified color. Strings are highlighted intelligently to show differences; other values are @@ -402,9 +402,14 @@ def _colordiff(a, b, highlight='red'): # Left only. a_out.append(colorize(highlight, a[a_start:a_end])) elif op == 'replace': - # Right and left differ. - a_out.append(colorize(highlight, a[a_start:a_end])) - b_out.append(colorize(highlight, b[b_start:b_end])) + # Right and left differ. Colorise with second highlight if + # it's just a case change. + if a[a_start:a_end].lower() != b[b_start:b_end].lower(): + color = highlight + else: + color = second_highlight + a_out.append(colorize(color, a[a_start:a_end])) + b_out.append(colorize(color, b[b_start:b_end])) else: assert(False) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 9e42751ab..dfe3585c1 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -125,14 +125,14 @@ default_commands.append(fields_cmd) VARIOUS_ARTISTS = u'Various Artists' -PARTIAL_MATCH_MESSAGE = u'(partial match!)' - # Importer utilities and support. def disambig_string(info): - """Returns label, year and media disambiguation, if available. + """Returns source, media, year, country, label and album disambiguation. """ disambig = [] + if info.data_source != 'MusicBrainz': + disambig.append(info.data_source) if info.media: if info.mediums > 1: disambig.append(u'{0}x{1}'.format( @@ -163,26 +163,34 @@ def dist_string(dist): out = ui.colorize('red', out) return out +def penalty_string(distance, limit=None): + """Returns a colorized string that indicates all the penalties applied to + a distance object. + """ + penalties = [] + for _, key in distance: + key = key.replace('album_', '') + key = key.replace('track_', '') + key = key.replace('_', ' ') + penalties.append(key) + if penalties: + if limit and len(penalties) > limit: + penalties = penalties[:limit] + ['...'] + return ui.colorize('yellow', '(%s)' % ', '.join(penalties)) + def show_change(cur_artist, cur_album, match): """Print out a representation of the changes that will be made if an album's tags are changed according to `match`, which must be an AlbumMatch object. """ - def show_album(artist, album, partial=False): + def show_album(artist, album): if artist: album_description = u' %s - %s' % (artist, album) elif album: album_description = u' %s' % album else: album_description = u' (unknown album)' - - out = album_description - - # Add a suffix if this is a partial match. - if partial: - out += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) - - print_(out) + print_(album_description) def format_index(track_info): """Return a string representing the track index of the given @@ -223,11 +231,7 @@ def show_change(cur_artist, cur_album, match): print_("To:") show_album(artist_r, album_r) else: - message = u"Tagging:\n %s - %s" % (match.info.artist, - match.info.album) - if match.extra_items or match.extra_tracks: - message += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) - print_(message) + print_(u"Tagging:\n %s - %s" % (match.info.artist, match.info.album)) # Data URL. if match.info.data_url: @@ -235,9 +239,13 @@ def show_change(cur_artist, cur_album, match): # Info line. info = [] + # Similarity. info.append('(Similarity: %s)' % dist_string(match.distance)) - if match.info.data_source != 'MusicBrainz': - info.append(ui.colorize('turquoise', '(%s)' % match.info.data_source)) + # Penalties. + penalties = penalty_string(match.distance) + if penalties: + info.append(penalties) + # Disambiguation. disambig = disambig_string(match.info) if disambig: info.append(ui.colorize('lightgray', '(%s)' % disambig)) @@ -285,7 +293,7 @@ def show_change(cur_artist, cur_album, match): cur_track, new_track = format_index(item), format_index(track_info) if cur_track != new_track: if item.track in (track_info.index, track_info.medium_index): - color = 'yellow' + color = 'lightgray' else: color = 'red' if (cur_track + new_track).count('-') == 1: @@ -315,18 +323,10 @@ def show_change(cur_artist, cur_album, match): rhs += templ.format(rhs_length) lhs_width += len(cur_length) + 3 - # Hidden penalties. No LHS/RHS diff is displayed, but we still want to - # indicate that a penalty has been applied to explain the similarity - # score. - penalties = [] - if match.info.va and track_info.artist and \ - item.artist.lower() not in VA_ARTISTS: - penalties.append('artist') - if item.mb_trackid and item.mb_trackid != track_info.track_id: - penalties.append('ID') + # Penalties. + penalties = penalty_string(match.distance.tracks[track_info]) if penalties: - rhs += ' %s' % ui.colorize('red', - '(%s)' % ', '.join(penalties)) + rhs += ' %s' % penalties if lhs != rhs: lines.append((' * %s' % lhs, rhs, lhs_width)) @@ -489,20 +489,17 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, (cur_artist, cur_album)) print_('Candidates:') for i, match in enumerate(candidates): + # Artist, album and distance. line = ['%i. %s - %s (%s)' % (i + 1, match.info.artist, match.info.album, dist_string(match.distance))] - # Point out the partial matches. - if match.extra_items or match.extra_tracks: - line.append(ui.colorize('yellow', - PARTIAL_MATCH_MESSAGE)) - - # Sources other than MusicBrainz. - source = match.info.data_source - if source != 'MusicBrainz': - line.append(ui.colorize('turquoise', '(%s)' % source)) + # Penalties. + penalties = penalty_string(match.distance, 3) + if penalties: + line.append(penalties) + # Disambiguation disambig = disambig_string(match.info) if disambig: line.append(ui.colorize('lightgray', '(%s)' % disambig)) diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 08a78e3af..006f85db0 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -21,6 +21,7 @@ from beets import util from beets import config from beets.util import confit from beets.autotag import hooks +from beets.autotag.match import Distance import acoustid import logging from collections import defaultdict @@ -113,16 +114,14 @@ def _all_releases(items): class AcoustidPlugin(plugins.BeetsPlugin): def track_distance(self, item, info): + dist = Distance() if item.path not in _matches or not info.track_id: # Match failed or no track ID. - return 0.0, 0.0 + return dist recording_ids, _ = _matches[item.path] - if info.track_id in recording_ids: - dist = 0.0 - else: - dist = TRACK_ID_WEIGHT - return dist, TRACK_ID_WEIGHT + dist.add_expr('track_id', info.track_id not in recording_ids) + return dist def candidates(self, items, artist, album, va_likely): albums = [] diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index bb8d37146..822ed59e3 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -17,7 +17,7 @@ discogs-client library. """ from beets import config from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.autotag.match import current_metadata, VA_ARTISTS +from beets.autotag.match import current_metadata, Distance, VA_ARTISTS from beets.plugins import BeetsPlugin from discogs_client import Artist, DiscogsAPIError, Release, Search import beets @@ -44,14 +44,12 @@ class DiscogsPlugin(BeetsPlugin): }) def album_distance(self, items, album_info, mapping): - """Returns the discogs source weight and the maximum source weight. + """Returns the album distance. """ + dist = Distance() if album_info.data_source == 'Discogs': - return self.config['source_weight'].as_number() * \ - config['match']['weight']['source'].as_number(), \ - config['match']['weight']['source'].as_number() - else: - return 0.0, 0.0 + dist.add('source', self.config['source_weight'].as_number()) + return dist def candidates(self, items, artist, album, va_likely): """Returns a list of AlbumInfo objects for discogs search results diff --git a/docs/changelog.rst b/docs/changelog.rst index 71d2d0a08..9c8a733aa 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -49,22 +49,29 @@ There are also three more big features added to beets core: In addition, the importer saw various UI enhancements, thanks to Tai Lee: -* More consistent format and colorization of album and track metadata. * Display data source URL for matches from the new data source plugins. This should make it easier to migrate data from Discogs or Beatport into MusicBrainz. +* The top 3 distance penalties are now displayed on the release listing, + and all album and track penalties are now displayed on the track changes + list. This should make it clear exactly which metadata is contributing to a + low similarity score. * Display album disambiguation and disc titles in the track listing, when available. +* More consistent format and colorization of album and track metadata. Red + for an actual difference, yellow to indicate that a distance penalty is being + applied, and light gray for no-penalty or disambiguation data. * Track changes are highlighted in yellow when they indicate a change in format to or from the style of :ref:`per_disc_numbering`. (As before, no penalty is applied because the track number is still "correct", just in a different format.) * Sort missing and unmatched tracks by index and title and group them together for better readability. -* Indicate MusicBrainz ID mismatches. +* Don't show potential matches that have specific penalties applied, as + configured by the :ref:`ignored` setting. The calculation of the similarity score for autotagger matches was also -approved, again thanks to Tai Lee. These changes, in general, help deal with +improved, again thanks to Tai Lee. These changes, in general, help deal with the new metadata sources and help disambiguate between similar releases in the same MusicBrainz release group: @@ -72,8 +79,12 @@ same MusicBrainz release group: beets re-identify the same release when re-importing existing files. * Prefer releases that are closest to the tagged ``year``. Tolerate files tagged with release or original year. -* The new :ref:`preferred_media` config option lets you prefer a certain media - type when the ``media`` field is unset on an album. +* Add a :ref:`preferred` collection of settings, which allow the user to + specify a sorted list of preferred countries and media types, or prefer + releases closest to the original year for an album. +* It is now possible to configure a :ref:`max_rec` for any field that is used + to calculate the similarity score. The recommendation will be downgraded if + a penalty is being applied to the specified field. * Apply minor penalties across a range of fields to differentiate between nearly identical releases: ``disctotal``, ``label``, ``catalognum``, ``country`` and ``albumdisambig``. diff --git a/docs/reference/config.rst b/docs/reference/config.rst index 05ef16b4f..d320cd655 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -394,43 +394,80 @@ max_rec As mentioned above, autotagger matches have *recommendations* that control how the UI behaves for a certain quality of match. The recommendation for a certain -match is usually based on the distance calculation. But you can also control -the recommendation for certain specific situations by defining *maximum* -recommendations when: +match is based on the overall distance calculation. But you can also control +the recommendation when a distance penalty is being applied for a specific +field by defining *maximum* recommendations for each field: -* a match came from a source other than MusicBrainz (e.g., the - :doc:`Discogs ` plugin); -* a match has missing or extra tracks; -* the length (duration) of at least one track differs; or -* at least one track number differs. - -To define maxima, use keys under ``max_rec:`` in the ``match`` section:: +To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here +are the defaults:: match: max_rec: - non_mb_source: strong - partial: medium - tracklength: strong - tracknumber: strong + source: strong + artist: strong + album: strong + media: strong + mediums: strong + year: strong + country: strong + label: strong + catalognum: strong + albumdisambig: strong + album_id: strong + tracks: strong + missing_tracks: medium + unmatched_tracks: medium + track_title: strong + track_artist: strong + track_index: strong + track_length_grace: strong + track_length_max: strong + track_length: strong + track_id: strong -If a recommendation is higher than the configured maximum and the condition is -met, the recommendation will be downgraded. The maximum for each condition can -be one of ``none``, ``low``, ``medium`` or ``strong``. When the maximum -recommendation is ``strong``, no "downgrading" occurs for that situation. +If a recommendation is higher than the configured maximum and a penalty is +being applied, the recommendation will be downgraded. The maximum for each +field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the +maximum recommendation is ``strong``, no "downgrading" occurs. -The above example shows the default ``max_rec`` settings. +.. _preferred: -.. _preferred_media: +preferred +~~~~~~~~~ -preferred_media -~~~~~~~~~~~~~~~ +In addition to comparing the tagged metadata with the match metadata for +similarity, you can also specify an ordered list of preferred countries and +media types. -When an album has its ``media`` field set, it is compared against matches to -prefer releases of the same media type. But this option lets you control what -happens when an album *doesn't* have ``media`` set (which is the case for most -albums that haven't already been run through a MusicBrainz tagger). Set this -option to ``CD``, for example, to prefer CD releases. Defaults to ``null``, -indicating no preference. +A distance penalty will be applied if the country or media type from the match +metadata doesn't match. The order is important, the first item will be most +preferred. Each item may be a regular expression, and will be matched case +insensitively. The number of media will be stripped when matching preferred +media (e.g. "2x" in "2xCD"). + +You can also tell the autotagger to prefer matches that have a release year +closest to the original year for an album. + +Here's an example:: + + match: + preferred: + countries: ['US', 'GB|UK'] + media: ['CD', 'Digital Media|File'] + original_year: yes + +By default, none of these options are enabled. + +.. _ignored: + +ignored +~~~~~~~ + +You can completely avoid matches that have certain penalties applied by adding +the penalty name to the ``ignored`` setting:: + + match: + ignored: missing_tracks unmatched_tracks .. _path-format-config: diff --git a/test/test_autotag.py b/test/test_autotag.py index 1a6188e7c..dc75ee0ab 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -23,6 +23,7 @@ import _common from _common import unittest from beets import autotag from beets.autotag import match +from beets.autotag.match import Distance from beets.library import Item from beets.util import plurality from beets.autotag import AlbumInfo, TrackInfo @@ -105,6 +106,153 @@ def _make_trackinfo(): TrackInfo(u'three', None, u'some artist', length=1, index=3), ] +class DistanceTest(unittest.TestCase): + def setUp(self): + self.dist = Distance() + + def test_add(self): + self.dist.add('add', 1.0) + self.assertEqual(self.dist._penalties, {'add': [1.0]}) + + def test_add_equality(self): + self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist._penalties['equality'], [0.0]) + + self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0]) + + self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) + self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0]) + + def test_add_expr(self): + self.dist.add_expr('expr', True) + self.assertEqual(self.dist._penalties['expr'], [1.0]) + + self.dist.add_expr('expr', False) + self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0]) + + def test_add_number(self): + # Add a full penalty for each number of difference between two numbers. + + self.dist.add_number('number', 1, 1) + self.assertEqual(self.dist._penalties['number'], [0.0]) + + self.dist.add_number('number', 1, 2) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0]) + + self.dist.add_number('number', 2, 1) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0]) + + self.dist.add_number('number', -1, 2) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0, + 1.0, 1.0]) + + def test_add_priority(self): + self.dist.add_priority('priority', 'abc', 'abc') + self.assertEqual(self.dist._penalties['priority'], [0.0]) + + self.dist.add_priority('priority', 'def', ['abc', 'def']) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5]) + + self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef', + re.compile('GH', re.I)]) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75]) + + self.dist.add_priority('priority', 'xyz', ['abc', 'def']) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75, + 1.0]) + + def test_add_ratio(self): + self.dist.add_ratio('ratio', 25, 100) + self.assertEqual(self.dist._penalties['ratio'], [0.25]) + + self.dist.add_ratio('ratio', 10, 5) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0]) + + self.dist.add_ratio('ratio', -5, 5) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0]) + + self.dist.add_ratio('ratio', 5, 0) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) + + def test_add_string(self): + dist = match.string_dist(u'abc', u'bcd') + self.dist.add_string('string', u'abc', u'bcd') + self.assertEqual(self.dist._penalties['string'], [dist]) + + def test_distance(self): + config['match']['distance_weights']['album'] = 2.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('media', 0.25) + self.dist.add('media', 0.75) + self.assertEqual(self.dist.distance, 0.5) + + # __getitem__() + self.assertEqual(self.dist['album'], 0.25) + self.assertEqual(self.dist['media'], 0.25) + + def test_max_distance(self): + config['match']['distance_weights']['album'] = 3.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('medium', 0.0) + self.dist.add('medium', 0.0) + self.assertEqual(self.dist.max_distance, 5.0) + + def test_operators(self): + config['match']['distance_weights']['source'] = 1.0 + config['match']['distance_weights']['album'] = 2.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('source', 0.0) + self.dist.add('album', 0.5) + self.dist.add('medium', 0.25) + self.dist.add('medium', 0.75) + self.assertEqual(len(self.dist), 2) + self.assertEqual(list(self.dist), [(0.2, 'album'), (0.2, 'medium')]) + self.assertTrue(self.dist == 0.4) + self.assertTrue(self.dist < 1.0) + self.assertTrue(self.dist > 0.0) + self.assertEqual(self.dist - 0.4, 0.0) + self.assertEqual(0.4 - self.dist, 0.0) + self.assertEqual(float(self.dist), 0.4) + + def test_raw_distance(self): + config['match']['distance_weights']['album'] = 3.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('medium', 0.25) + self.dist.add('medium', 0.5) + self.assertEqual(self.dist.raw_distance, 2.25) + + def test_sorted(self): + config['match']['distance_weights']['album'] = 4.0 + config['match']['distance_weights']['medium'] = 2.0 + + self.dist.add('album', 0.1875) + self.dist.add('medium', 0.75) + self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')]) + + # Sort by key if distance is equal. + dist = Distance() + dist.add('album', 0.375) + dist.add('medium', 0.75) + self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')]) + + def test_update(self): + self.dist.add('album', 0.5) + self.dist.add('media', 1.0) + + dist = Distance() + dist.add('album', 0.75) + dist.add('album', 0.25) + self.dist.add('media', 0.05) + + self.dist.update(dist) + + self.assertEqual(self.dist._penalties, {'album': [0.5, 0.75, 0.25], + 'media': [1.0, 0.05]}) + class TrackDistanceTest(unittest.TestCase): def test_identical_tracks(self): item = _make_item(u'one', 1) diff --git a/test/test_ui.py b/test/test_ui.py index b679021f7..6cb09dcf1 100644 --- a/test/test_ui.py +++ b/test/test_ui.py @@ -27,6 +27,7 @@ from beets import library from beets import ui from beets.ui import commands from beets import autotag +from beets.autotag.match import distance from beets import importer from beets.mediafile import MediaFile from beets import config @@ -594,21 +595,23 @@ class ShowChangeTest(_common.TestCase): self.items[0].track = 1 self.items[0].path = '/path/to/file.mp3' self.info = autotag.AlbumInfo( - 'the album', 'album id', 'the artist', 'artist id', [ - autotag.TrackInfo('the title', 'track id', index=1) + u'the album', u'album id', u'the artist', u'artist id', [ + autotag.TrackInfo(u'the title', u'track id', index=1) ]) def _show_change(self, items=None, info=None, - cur_artist='the artist', cur_album='the album', + cur_artist=u'the artist', cur_album=u'the album', dist=0.1): items = items or self.items info = info or self.info mapping = dict(zip(items, info.tracks)) config['color'] = False + album_dist = distance(items, info, mapping) + album_dist._penalties = {'album': [dist]} commands.show_change( cur_artist, cur_album, - autotag.AlbumMatch(0.1, info, mapping, set(), set()), + autotag.AlbumMatch(album_dist, info, mapping, set(), set()), ) return self.io.getoutput().lower() @@ -623,7 +626,7 @@ class ShowChangeTest(_common.TestCase): self.assertTrue('correcting tags from:' in msg) def test_item_data_change(self): - self.items[0].title = 'different' + self.items[0].title = u'different' msg = self._show_change() self.assertTrue('different -> the title' in msg) @@ -638,12 +641,12 @@ class ShowChangeTest(_common.TestCase): self.assertTrue('correcting tags from:' in msg) def test_item_data_change_title_missing(self): - self.items[0].title = '' + self.items[0].title = u'' msg = re.sub(r' +', ' ', self._show_change()) self.assertTrue('file.mp3 -> the title' in msg) def test_item_data_change_title_missing_with_unicode_filename(self): - self.items[0].title = '' + self.items[0].title = u'' self.items[0].path = u'/path/to/caf\xe9.mp3'.encode('utf8') msg = re.sub(r' +', ' ', self._show_change().decode('utf8')) self.assertTrue(u'caf\xe9.mp3 -> the title' in msg