diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 8935165f3..7f2f01c56 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -30,7 +30,7 @@ from beets.util.enumeration import enum from beets.autotag import hooks # A configuration view for the distance weights. -weights = config['match']['weight'] +weights = config['match']['distance_weights'] # Parameters for string distance function. # Words that can be moved to the end of a string using a comma. @@ -187,62 +187,202 @@ def track_index_changed(item, track_info): """ return item.track not in (track_info.medium_index, track_info.index) +class Distance(object): + """Keeps track of multiple distance penalties. Provides a single weighted + distance for all penalties as well as a weighted distance for each + individual penalty. + """ + def __cmp__(self, other): + return cmp(self.distance, other) + + def __float__(self): + return self.distance + + def __getitem__(self, key): + """Returns the weighted distance for a named penalty. + """ + dist = sum(self.penalties[key]) * weights[key].as_number() + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + def __init__(self): + self.penalties = {} + + def __sub__(self, other): + return self.distance - other + + def __rsub__(self, other): + return other - self.distance + + def _eq(self, value1, value2): + """Returns True if `value1` is equal to `value2`. `value1` may be a + compiled regular expression, in which case it will be matched against + `value2`. + """ + if isinstance(value1, re._pattern_type): + return bool(value1.match(value2)) + return value1 == value2 + + def add(self, key, dist): + """Adds a distance penalty. `key` must correspond with a configured + weight setting. `dist` must be a float between 0.0 and 1.0, and will be + added to any existing distance penalties for the same key. + """ + if not 0.0 <= dist <= 1.0: + raise ValueError( + '`dist` must be between 0.0 and 1.0. It is: %r' % dist) + self.penalties.setdefault(key, []).append(dist) + + def add_equality(self, key, value, options): + """Adds a distance penalty of 1.0 if `value` doesn't match any of the + values in `options`. If an option is a compiled regular expression, it + will be considered equal if it matches against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + for opt in options: + if self._eq(opt, value): + dist = 0.0 + break + else: + dist = 1.0 + self.add(key, dist) + + def add_expr(self, key, expr): + """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0. + """ + if expr: + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_number(self, key, number1, number2): + """Adds a distance penalty of 1.0 for each number of difference between + `number1` and `number2`, or 0.0 when there is no difference. Use this + when there is no upper limit on the difference between the two numbers. + """ + diff = abs(number1 - number2) + if diff: + for i in range(diff): + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_priority(self, key, value, options): + """Adds a distance penalty that corresponds to the position at which + `value` appears in `options`. A distance penalty of 0.0 for the first + option, or 1.0 if there is no matching option. If an option is a + compiled regular expression, it will be considered equal if it matches + against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + unit = 1.0 / (len(options) + 1) + for i, opt in enumerate(options): + if self._eq(opt, value): + dist = i * unit + break + else: + dist = 1.0 + self.add(key, dist) + + def add_ratio(self, key, number1, number2): + """Adds a distance penalty for `number1` as a ratio of `number2`. + `number1` is bound at 0 and `number2`. + """ + number = float(max(min(number1, number2), 0)) + if number2: + dist = number / number2 + else: + dist = 0.0 + self.add(key, dist) + + def add_string(self, key, str1, str2): + """Adds a distance penalty based on the edit distance between `str1` + and `str2`. + """ + dist = string_dist(str1, str2) + self.add(key, dist) + + @property + def distance(self): + """Returns an overall weighted distance across all penalties. + """ + dist = 0.0 + for key, penalty in self.penalties.iteritems(): + dist += sum(penalty) * weights[key].as_number() + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + @property + def max_distance(self): + """Returns the maximum distance penalty. + """ + dist_max = 0.0 + for key, penalty in self.penalties.iteritems(): + dist_max += len(penalty) * weights[key].as_number() + return dist_max + + @property + def sorted(self): + """Returns a list of (dist, key) pairs, with `dist` being the weighted + distance, sorted from highest to lowest. + """ + list_ = [(self[key], key) for key in self.penalties] + return sorted(list_, key=lambda (dist, key): (0-dist, key)) + + def update(self, dist): + """Adds all the distance penalties from `dist`. + """ + if not isinstance(dist, Distance): + raise ValueError( + '`dist` must be a Distance object. It is: %r' % dist) + for key, penalties in dist.penalties.iteritems(): + self.penalties.setdefault(key, []).extend(penalties) + def track_distance(item, track_info, incl_artist=False): """Determines the significance of a track metadata change. Returns a - float in [0.0,1.0]. `incl_artist` indicates that a distance - component should be included for the track artist (i.e., for - various-artist releases). + Distance object. `incl_artist` indicates that a distance component should + be included for the track artist (i.e., for various-artist releases). """ - # Distance and normalization accumulators. - dist, dist_max = 0.0, 0.0 + dist = Distance() - # Check track length. - # If there's no length to check, apply no penalty. + # Length. if track_info.length: diff = abs(item.length - track_info.length) diff = max(diff - weights['track_length_grace'].as_number(), 0.0) diff = min(diff, weights['track_length_max'].as_number()) - dist += (diff / weights['track_length_max'].as_number()) * \ - weights['track_length'].as_number() - dist_max += weights['track_length'].as_number() + dist.add_ratio('track_length', diff, + weights['track_length_max'].as_number()) - # Track title. - dist += string_dist(item.title, track_info.title) * \ - weights['track_title'].as_number() - dist_max += weights['track_title'].as_number() + # Title. + dist.add_string('track_title', item.title, track_info.title) - # Track artist, if included. - # Attention: MB DB does not have artist info for all compilations, - # so only check artist distance if there is actually an artist in - # the MB track data. + # Artist. Only check if there is actually an artist in the track data. if incl_artist and track_info.artist and \ item.artist.lower() not in VA_ARTISTS: - dist += string_dist(item.artist, track_info.artist) * \ - weights['track_artist'].as_number() - dist_max += weights['track_artist'].as_number() + dist.add_string('track_artist', item.artist, track_info.artist) # Track index. if track_info.index and item.track: - if track_index_changed(item, track_info): - dist += weights['track_index'].as_number() - dist_max += weights['track_index'].as_number() + dist.add_expr('track_index', track_index_changed(item, track_info)) - # MusicBrainz track ID. + # Track ID. if item.mb_trackid: - if item.mb_trackid != track_info.track_id: - dist += weights['track_id'].as_number() - dist_max += weights['track_id'].as_number() + dist.add_expr('track_id', item.mb_trackid != track_info.track_id) - # Plugin distances. - plugin_d, plugin_dm = plugins.track_distance(item, track_info) - dist += plugin_d - dist_max += plugin_dm + # Plugins. + dist.update(plugins.track_distance(item, track_info)) - return dist / dist_max + return dist def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. - Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object + Returns a Distance object. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the @@ -251,100 +391,89 @@ def distance(items, album_info, mapping): """ likelies, _ = current_metadata(items) - # These accumulate the possible distance components. The final - # distance will be dist/dist_max. - dist = 0.0 - dist_max = 0.0 + dist = Distance() - # Artist/album metadata. + # Artist, if not various. if not album_info.va: - dist += string_dist(likelies['artist'], album_info.artist) * \ - weights['artist'].as_number() - dist_max += weights['artist'].as_number() - dist += string_dist(likelies['album'], album_info.album) * \ - weights['album'].as_number() - dist_max += weights['album'].as_number() + dist.add_string('artist', likelies['artist'], album_info.artist) - # Year. No penalty for matching release or original year. - if likelies['year'] and album_info.year: - if likelies['year'] not in (album_info.year, album_info.original_year): - diff = abs(album_info.year - likelies['year']) - if diff: - dist += (1.0 - 1.0 / diff) * weights['year'].as_number() - dist_max += weights['year'].as_number() + # Album. + dist.add_string('album', likelies['album'], album_info.album) - # Actual or preferred media. - preferred_media = config['match']['preferred_media'].get() + # Media. if likelies['media'] and album_info.media: - dist += string_dist(likelies['media'], album_info.media) * \ - weights['media'].as_number() - dist_max += weights['media'].as_number() - elif album_info.media and preferred_media: - dist += string_dist(album_info.media, preferred_media) * \ - weights['media'].as_number() - dist_max += weights['media'].as_number() + dist.add_string('media', likelies['media'], album_info.media) - # MusicBrainz album ID. - if likelies['mb_albumid']: - if likelies['mb_albumid'] != album_info.album_id: - dist += weights['album_id'].as_number() - dist_max += weights['album_id'].as_number() + # Preferred media. + preferred_media = [re.compile(r'(\d+x)?%s' % pattern, re.I) for pattern + in config['match']['preferred']['media'].get()] + if album_info.media and preferred_media: + dist.add_priority('media', album_info.media, preferred_media) - # Apply a small penalty for differences across many minor metadata. This - # helps prioritise releases that are nearly identical. + # Number of discs. + if likelies['disctotal'] and album_info.mediums: + dist.add_number('mediums', likelies['disctotal'], album_info.mediums) - if likelies['disctotal']: - if likelies['disctotal'] != album_info.mediums: - dist += weights['minor'].as_number() - dist_max += weights['minor'].as_number() + # Year. + if likelies['year'] and album_info.year: + # No penalty for matching release or original year. + if likelies['year'] in (album_info.year, album_info.original_year): + dist.add('year', 0.0) + else: + dist.add_number('year', likelies['year'], album_info.year) - if likelies['label'] and album_info.label: - dist += string_dist(likelies['label'], album_info.label) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() - - if likelies['catalognum'] and album_info.catalognum: - dist += string_dist(likelies['catalognum'], - album_info.catalognum) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + # Prefer earlier releases. + if album_info.year and album_info.original_year and \ + config['match']['preferred']['original_year'].get(): + dist.add_number('year', album_info.year, album_info.original_year) + # Country. if likelies['country'] and album_info.country: - dist += string_dist(likelies['country'], - album_info.country) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('country', likelies['country'], album_info.country) + # Preferred countries. + preferred_countries = [re.compile(pattern, re.I) for pattern + in config['match']['preferred']['countries'].get()] + if album_info.country and preferred_countries: + dist.add_priority('country', album_info.country, preferred_countries) + + # Label. + if likelies['label'] and album_info.label: + dist.add_string('label', likelies['label'], album_info.label) + + # Catalog number. + if likelies['catalognum'] and album_info.catalognum: + dist.add_string('catalognum', likelies['catalognum'], + album_info.catalognum) + + # Disambiguation. if likelies['albumdisambig'] and album_info.albumdisambig: - dist += string_dist(likelies['albumdisambig'], - album_info.albumdisambig) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('albumdisambig', likelies['albumdisambig'], + album_info.albumdisambig) - # Matched track distances. + # Album ID. + if likelies['mb_albumid']: + dist.add_equality('album_id', likelies['mb_albumid'], + album_info.album_id) + + # Tracks. + dist.tracks = {} for item, track in mapping.iteritems(): - dist += track_distance(item, track, album_info.va) * \ - weights['track'].as_number() - dist_max += weights['track'].as_number() + dist.tracks[track] = track_distance(item, track, album_info.va) + dist.add('tracks', dist.tracks[track].distance) - # Extra and unmatched tracks. - for track in set(album_info.tracks) - set(mapping.values()): - dist += weights['missing'].as_number() - dist_max += weights['missing'].as_number() - for item in set(items) - set(mapping.keys()): - dist += weights['unmatched'].as_number() - dist_max += weights['unmatched'].as_number() + # Missing tracks. + for i in range(len(album_info.tracks) - len(mapping)): + dist.add('missing_tracks', 1.0) - # Plugin distances. - plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping) - dist += plugin_d - dist_max += plugin_dm + # Unmatched tracks. + for i in range(len(items) - len(mapping)): + dist.add('unmatched_tracks', 1.0) - # Normalize distance, avoiding divide-by-zero. - if dist_max == 0.0: - return 0.0 - else: - return dist / dist_max + # Plugins. + dist.update(plugins.album_distance(items, album_info, mapping)) + + return dist def match_by_id(items): """If the items are tagged with a MusicBrainz album ID, returns an @@ -370,8 +499,8 @@ def _recommendation(results): recommendation based on the results' distances. If the recommendation is higher than the configured maximum for - certain situations, the recommendation will be downgraded to the - configured maximum. + an applied penalty, the recommendation will be downgraded to the + configured maximum for that penalty. """ if not results: # No candidates: no recommendation. @@ -393,45 +522,20 @@ def _recommendation(results): # Gap between first two candidates is large. rec = recommendation.low else: - # No conclusion. - rec = recommendation.none + # No conclusion. Return immediately. Can't be downgraded any further. + return recommendation.none - # "Downgrades" in certain configured situations. - if isinstance(results[0], hooks.AlbumMatch): - # Load the configured recommendation maxima. - max_rec = {} - for trigger in 'non_mb_source', 'partial', 'tracklength', 'tracknumber': - max_rec[trigger] = \ - config['match']['max_rec'][trigger].as_choice({ - 'strong': recommendation.strong, - 'medium': recommendation.medium, - 'low': recommendation.low, - 'none': recommendation.none, - }) - - # Non-MusicBrainz source. - if rec > max_rec['non_mb_source'] and \ - results[0].info.data_source != 'MusicBrainz': - rec = max_rec['non_mb_source'] - - # Partial match. - if rec > max_rec['partial'] and \ - (results[0].extra_items or results[0].extra_tracks): - rec = max_rec['partial'] - - # Check track number and duration for each item. - for item, track_info in results[0].mapping.items(): - # Track length differs. - if rec > max_rec['tracklength'] and \ - item.length and track_info.length and \ - abs(item.length - track_info.length) > \ - weights['track_length_grace'].as_number(): - rec = max_rec['tracklength'] - - # Track number differs. - if rec > max_rec['tracknumber'] and \ - track_index_changed(item, track_info): - rec = max_rec['tracknumber'] + # Downgrade to the max rec if it is lower than the current rec for an + # applied penalty. + for dist, key in results[0].distance.sorted: + if dist: + max_rec = config['match']['max_rec'][key].as_choice({ + 'strong': recommendation.strong, + 'medium': recommendation.medium, + 'low': recommendation.low, + 'none': recommendation.none, + }) + rec = min(rec, max_rec) return rec @@ -465,7 +569,7 @@ def tag_album(items, search_artist=None, search_album=None, - The current artist. - The current album. - A list of AlbumMatch objects. The candidates are sorted by - distance (i.e., best match first). + distance (i.e., best match first). - A recommendation. If search_artist and search_album or search_id are provided, then they are used as search terms in place of the current metadata. diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 7bbb16a6b..7b9867813 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -68,22 +68,42 @@ match: medium_rec_thresh: 0.25 rec_gap_thresh: 0.25 max_rec: - non_mb_source: strong - partial: medium - tracklength: strong - tracknumber: strong - preferred_media: CD - weight: + source: strong + artist: strong + album: strong + media: strong + mediums: strong + year: strong + country: strong + label: strong + catalognum: strong + albumdisambig: strong + album_id: strong + tracks: strong + missing_tracks: medium + unmatched_tracks: medium + track_title: strong + track_artist: strong + track_index: strong + track_length_grace: strong + track_length_max: strong + track_length: strong + track_id: strong + distance_weights: source: 2.0 artist: 3.0 album: 3.0 - year: 1.0 media: 1.0 + mediums: 1.0 + year: 1.0 + country: 0.5 + label: 0.5 + catalognum: 0.5 + albumdisambig: 0.5 album_id: 5.0 - minor: 0.5 - track: 1.0 - missing: 0.9 - unmatched: 0.6 + tracks: 2.0 + missing_tracks: 0.9 + unmatched_tracks: 0.6 track_title: 3.0 track_artist: 2.0 track_index: 1.0 @@ -91,3 +111,7 @@ match: track_length_max: 30 track_length: 2.0 track_id: 5.0 + preferred: + countries: [] + media: [] + original_year: no diff --git a/beets/plugins.py b/beets/plugins.py index 7d49ad3aa..d0c0a9654 100755 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -64,16 +64,16 @@ class BeetsPlugin(object): return {} def track_distance(self, item, info): - """Should return a (distance, distance_max) pair to be added - to the distance value for every track comparison. + """Should return a Distance object to be added to the + distance for every track comparison. """ - return 0.0, 0.0 + return beets.autotag.match.Distance() def album_distance(self, items, album_info, mapping): - """Should return a (distance, distance_max) pair to be added - to the distance value for every album-level comparison. + """Should return a Distance object to be added to the + distance for every album-level comparison. """ - return 0.0, 0.0 + return beets.autotag.match.Distance() def candidates(self, items, artist, album, va_likely): """Should return a sequence of AlbumInfo objects that match the @@ -242,25 +242,19 @@ def queries(): def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. - Returns a (distance, distance_max) pair. + Returns a Distance object. """ - dist = 0.0 - dist_max = 0.0 + dist = beets.autotag.match.Distance() for plugin in find_plugins(): - d, dm = plugin.track_distance(item, info) - dist += d - dist_max += dm - return dist, dist_max + dist.update(plugin.track_distance(item, info)) + return dist def album_distance(items, album_info, mapping): """Returns the album distance calculated by plugins.""" - dist = 0.0 - dist_max = 0.0 + dist = beets.autotag.match.Distance() for plugin in find_plugins(): - d, dm = plugin.album_distance(items, album_info, mapping) - dist += d - dist_max += dm - return dist, dist_max + dist.update(plugin.album_distance(items, album_info, mapping)) + return dist def candidates(items, artist, album, va_likely): """Gets MusicBrainz candidates for an album from each plugin. diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 9e42751ab..e306256d4 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -125,14 +125,14 @@ default_commands.append(fields_cmd) VARIOUS_ARTISTS = u'Various Artists' -PARTIAL_MATCH_MESSAGE = u'(partial match!)' - # Importer utilities and support. def disambig_string(info): - """Returns label, year and media disambiguation, if available. + """Returns source, media, year, country, and album disambiguation. """ disambig = [] + if info.data_source != 'MusicBrainz': + disambig.append(info.data_source) if info.media: if info.mediums > 1: disambig.append(u'{0}x{1}'.format( @@ -163,26 +163,35 @@ def dist_string(dist): out = ui.colorize('red', out) return out +def penalty_string(distance, limit=None): + """Returns a colorized string that indicates all the penalties applied to + a distance object. + """ + penalties = [] + for dist, key in distance.sorted: + if dist: + key = key.replace('album_', '') + key = key.replace('track_', '') + key = key.replace('_', ' ') + penalties.append(key) + if penalties: + if limit and len(penalties) > limit: + penalties = penalties[:limit] + ['...'] + return ui.colorize('yellow', '(%s)' % ', '.join(penalties)) + def show_change(cur_artist, cur_album, match): """Print out a representation of the changes that will be made if an album's tags are changed according to `match`, which must be an AlbumMatch object. """ - def show_album(artist, album, partial=False): + def show_album(artist, album): if artist: album_description = u' %s - %s' % (artist, album) elif album: album_description = u' %s' % album else: album_description = u' (unknown album)' - - out = album_description - - # Add a suffix if this is a partial match. - if partial: - out += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) - - print_(out) + print_(album_description) def format_index(track_info): """Return a string representing the track index of the given @@ -223,11 +232,7 @@ def show_change(cur_artist, cur_album, match): print_("To:") show_album(artist_r, album_r) else: - message = u"Tagging:\n %s - %s" % (match.info.artist, - match.info.album) - if match.extra_items or match.extra_tracks: - message += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) - print_(message) + print_(u"Tagging:\n %s - %s" % (match.info.artist, match.info.album)) # Data URL. if match.info.data_url: @@ -235,9 +240,13 @@ def show_change(cur_artist, cur_album, match): # Info line. info = [] + # Similarity. info.append('(Similarity: %s)' % dist_string(match.distance)) - if match.info.data_source != 'MusicBrainz': - info.append(ui.colorize('turquoise', '(%s)' % match.info.data_source)) + # Penalties. + penalties = penalty_string(match.distance) + if penalties: + info.append(penalties) + # Disambiguation. disambig = disambig_string(match.info) if disambig: info.append(ui.colorize('lightgray', '(%s)' % disambig)) @@ -315,18 +324,10 @@ def show_change(cur_artist, cur_album, match): rhs += templ.format(rhs_length) lhs_width += len(cur_length) + 3 - # Hidden penalties. No LHS/RHS diff is displayed, but we still want to - # indicate that a penalty has been applied to explain the similarity - # score. - penalties = [] - if match.info.va and track_info.artist and \ - item.artist.lower() not in VA_ARTISTS: - penalties.append('artist') - if item.mb_trackid and item.mb_trackid != track_info.track_id: - penalties.append('ID') + # Penalties. + penalties = penalty_string(match.distance.tracks[track_info]) if penalties: - rhs += ' %s' % ui.colorize('red', - '(%s)' % ', '.join(penalties)) + rhs += ' %s' % penalties if lhs != rhs: lines.append((' * %s' % lhs, rhs, lhs_width)) @@ -489,20 +490,17 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, (cur_artist, cur_album)) print_('Candidates:') for i, match in enumerate(candidates): + # Artist, album and distance. line = ['%i. %s - %s (%s)' % (i + 1, match.info.artist, match.info.album, dist_string(match.distance))] - # Point out the partial matches. - if match.extra_items or match.extra_tracks: - line.append(ui.colorize('yellow', - PARTIAL_MATCH_MESSAGE)) - - # Sources other than MusicBrainz. - source = match.info.data_source - if source != 'MusicBrainz': - line.append(ui.colorize('turquoise', '(%s)' % source)) + # Penalties. + penalties = penalty_string(match.distance, 3) + if penalties: + line.append(penalties) + # Disambiguation disambig = disambig_string(match.info) if disambig: line.append(ui.colorize('lightgray', '(%s)' % disambig)) diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 08a78e3af..006f85db0 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -21,6 +21,7 @@ from beets import util from beets import config from beets.util import confit from beets.autotag import hooks +from beets.autotag.match import Distance import acoustid import logging from collections import defaultdict @@ -113,16 +114,14 @@ def _all_releases(items): class AcoustidPlugin(plugins.BeetsPlugin): def track_distance(self, item, info): + dist = Distance() if item.path not in _matches or not info.track_id: # Match failed or no track ID. - return 0.0, 0.0 + return dist recording_ids, _ = _matches[item.path] - if info.track_id in recording_ids: - dist = 0.0 - else: - dist = TRACK_ID_WEIGHT - return dist, TRACK_ID_WEIGHT + dist.add_expr('track_id', info.track_id not in recording_ids) + return dist def candidates(self, items, artist, album, va_likely): albums = [] diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index bb8d37146..822ed59e3 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -17,7 +17,7 @@ discogs-client library. """ from beets import config from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.autotag.match import current_metadata, VA_ARTISTS +from beets.autotag.match import current_metadata, Distance, VA_ARTISTS from beets.plugins import BeetsPlugin from discogs_client import Artist, DiscogsAPIError, Release, Search import beets @@ -44,14 +44,12 @@ class DiscogsPlugin(BeetsPlugin): }) def album_distance(self, items, album_info, mapping): - """Returns the discogs source weight and the maximum source weight. + """Returns the album distance. """ + dist = Distance() if album_info.data_source == 'Discogs': - return self.config['source_weight'].as_number() * \ - config['match']['weight']['source'].as_number(), \ - config['match']['weight']['source'].as_number() - else: - return 0.0, 0.0 + dist.add('source', self.config['source_weight'].as_number()) + return dist def candidates(self, items, artist, album, va_likely): """Returns a list of AlbumInfo objects for discogs search results diff --git a/docs/changelog.rst b/docs/changelog.rst index 0f8b08b51..527982190 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -53,30 +53,36 @@ Changelog None. * Various UI enhancements to the importer due to Tai Lee: - * More consistent format and colorization of album and track metadata. - * Display data source URL for :doc:`/plugins/discogs` matches. This should - make it easier for people who would rather import and correct data from - Discogs into MusicBrainz. + * Display data source URL and source name in album disambiguation for + non-MusicBrainz matches. This should make it easier for people who want to + import and correct data from other sources into MusicBrainz. + * The top 3 distance penalties are now displayed on the release listing, + and all album and track penalties are now displayed on the track changes + list. This should make it clear exactly which metadata is contributing to a + low similarity score. * Display album disambiguation and disc titles in the track listing, when available. - * Track changes highlighted in yellow indicate a change in format to or from - :ref:`per_disc_numbering`. No penalty is applied because the track number - is still "correct", just in a different format. + * More consistent format and colorization of album and track metadata. + * Track changes highlighted in turquoise indicate a change in format to or + from :ref:`per_disc_numbering`. No penalty is applied because the track + number is still "correct", just in a different format. * Sort missing and unmatched tracks by index and title and group them together for better readability. - * Indicate MusicBrainz ID mismatches. -* Improve calculation of similarity score: +* Improve calculation of similarity score and recommendation: + * It is now possible to configure a :ref:`max_rec` for any field that is used + to calculate the similarity score. The recommendation will be downgraded if + a penalty is being applied to the specified field. * Strongly prefer releases with a matching MusicBrainz album ID. This helps beets re-identify the same release when re-importing existing files. * Prefer releases that are closest to the tagged ``year``. Tolerate files tagged with release or original year. - * Prefer CD releases by default, when there is no ``media`` tagged in the - files being imported. This can be changed with the :ref:`preferred_media` - setting. - * Apply minor penalties across a range of fields to differentiate between - nearly identical releases: ``disctotal``, ``label``, ``catalognum``, + * Add a :ref:`preferred` collection of settings, which allow the user to + specify a sorted list of preferred countries and media types, or prefer + releases closest to the original year for an album. + * Apply minor distance penalties across a range of fields to differentiate + between nearly identical releases: ``mediums``, ``label``, ``catalognum``, ``country`` and ``albumdisambig``. .. _Discogs: http://discogs.com/ diff --git a/docs/reference/config.rst b/docs/reference/config.rst index d23db6b02..ec194afde 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -394,40 +394,65 @@ max_rec As mentioned above, autotagger matches have *recommendations* that control how the UI behaves for a certain quality of match. The recommendation for a certain -match is usually based on the distance calculation. But you can also control -the recommendation for certain specific situations by defining *maximum* -recommendations when: +match is based on the overall distance calculation. But you can also control +the recommendation when a distance penalty is being applied for a specific +field by defining *maximum* recommendations for each field: -* a match came from a source other than MusicBrainz (e.g., the - :doc:`Discogs ` plugin); -* a match has missing or extra tracks; -* the length (duration) of at least one track differs; or -* at least one track number differs. - -To define maxima, use keys under ``max_rec:`` in the ``match`` section:: +To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here +are the defaults:: match: max_rec: - non_mb_source: strong - partial: medium - tracklength: strong - tracknumber: strong + source: strong + artist: strong + album: strong + media: strong + mediums: strong + year: strong + country: strong + label: strong + catalognum: strong + albumdisambig: strong + album_id: strong + tracks: strong + missing_tracks: medium + unmatched_tracks: medium + track_title: strong + track_artist: strong + track_index: strong + track_length_grace: strong + track_length_max: strong + track_length: strong + track_id: strong -If a recommendation is higher than the configured maximum and the condition is -met, the recommendation will be downgraded. The maximum for each condition can -be one of ``none``, ``low``, ``medium`` or ``strong``. When the maximum -recommendation is ``strong``, no "downgrading" occurs for that situation. +If a recommendation is higher than the configured maximum and a penalty is +being applied, the recommendation will be downgraded. The maximum for each +field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the +maximum recommendation is ``strong``, no "downgrading" occurs. -The above example shows the default ``max_rec`` settings. +.. _preferred: -.. _preferred_media: +preferred +~~~~~~~~~ -preferred_media -~~~~~~~~~~~~~~~ +In addition to comparing the tagged metadata with the match metadata for +similarity, you can also specify an ordered list of preferred countries and +media types. A distance penalty will be applied if the country or media type +from the match metadata doesn't match. The order is important, the first item +will be most preferred. -When comparing files that have no ``media`` tagged, prefer releases that more -closely resemble this media (using a string distance). When files are already -tagged with media, this setting is ignored. Default: ``CD``. +You can also tell the autotagger to prefer matches that have a release year +closest to the original year for an album. + +Here's an example:: + + match: + preferred: + countries: ['US', 'GB', 'UK'] + media: ['CD', 'Digital Media'] + original_year: yes + +By default, none of these options are enabled. .. _path-format-config: diff --git a/test/test_autotag.py b/test/test_autotag.py index 1a6188e7c..92088a7b8 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -23,6 +23,7 @@ import _common from _common import unittest from beets import autotag from beets.autotag import match +from beets.autotag.match import Distance from beets.library import Item from beets.util import plurality from beets.autotag import AlbumInfo, TrackInfo @@ -105,6 +106,127 @@ def _make_trackinfo(): TrackInfo(u'three', None, u'some artist', length=1, index=3), ] +class DistanceTest(unittest.TestCase): + def setUp(self): + self.dist = Distance() + + def test_add(self): + self.dist.add('add', 1.0) + self.assertEqual(self.dist.penalties, {'add': [1.0]}) + + def test_add_equality(self): + self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist.penalties['equality'], [0.0]) + + self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0]) + + self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) + self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0, 0.0]) + + def test_add_expr(self): + self.dist.add_expr('expr', True) + self.assertEqual(self.dist.penalties['expr'], [1.0]) + + self.dist.add_expr('expr', False) + self.assertEqual(self.dist.penalties['expr'], [1.0, 0.0]) + + def test_add_number(self): + # Add a full penalty for each number of difference between two numbers. + + self.dist.add_number('number', 1, 1) + self.assertEqual(self.dist.penalties['number'], [0.0]) + + self.dist.add_number('number', 1, 2) + self.assertEqual(self.dist.penalties['number'], [0.0, 1.0]) + + self.dist.add_number('number', 2, 1) + self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0]) + + self.dist.add_number('number', -1, 2) + self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0, 1.0, + 1.0, 1.0]) + + def test_add_priority(self): + self.dist.add_priority('priority', 'abc', 'abc') + self.assertEqual(self.dist.penalties['priority'], [0.0]) + + self.dist.add_priority('priority', 'def', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25]) + + self.dist.add_priority('priority', 'ghi', ['abc', 'def', + re.compile('GHI', re.I)]) + self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5]) + + self.dist.add_priority('priority', 'xyz', ['abc', 'def']) + self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5, 1.0]) + + def test_add_ratio(self): + self.dist.add_ratio('ratio', 25, 100) + self.assertEqual(self.dist.penalties['ratio'], [0.25]) + + self.dist.add_ratio('ratio', 10, 5) + self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0]) + + self.dist.add_ratio('ratio', -5, 5) + self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0]) + + self.dist.add_ratio('ratio', 5, 0) + self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) + + def test_add_string(self): + dist = match.string_dist(u'abc', u'bcd') + self.dist.add_string('string', u'abc', u'bcd') + self.assertEqual(self.dist.penalties['string'], [dist]) + + def test_distance(self): + config['match']['distance_weights']['album'] = 2.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('media', 0.25) + self.dist.add('media', 0.75) + self.assertEqual(self.dist.distance, 0.5) + + # __getitem__() + self.assertEqual(self.dist['album'], 0.25) + self.assertEqual(self.dist['media'], 0.25) + + def test_max_distance(self): + config['match']['distance_weights']['album'] = 3.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('medium', 0.0) + self.dist.add('medium', 0.0) + self.assertEqual(self.dist.max_distance, 5.0) + + def test_sorted(self): + config['match']['distance_weights']['album'] = 4.0 + config['match']['distance_weights']['medium'] = 2.0 + + self.dist.add('album', 0.1875) + self.dist.add('medium', 0.75) + self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')]) + + # Sort by key if distance is equal. + dist = Distance() + dist.add('album', 0.375) + dist.add('medium', 0.75) + self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')]) + + def test_update(self): + self.dist.add('album', 0.5) + self.dist.add('media', 1.0) + + dist = Distance() + dist.add('album', 0.75) + dist.add('album', 0.25) + self.dist.add('media', 0.05) + + self.dist.update(dist) + + self.assertEqual(self.dist.penalties, {'album': [0.5, 0.75, 0.25], + 'media': [1.0, 0.05]}) + class TrackDistanceTest(unittest.TestCase): def test_identical_tracks(self): item = _make_item(u'one', 1) diff --git a/test/test_ui.py b/test/test_ui.py index b679021f7..bfdd53ddd 100644 --- a/test/test_ui.py +++ b/test/test_ui.py @@ -27,6 +27,7 @@ from beets import library from beets import ui from beets.ui import commands from beets import autotag +from beets.autotag.match import distance from beets import importer from beets.mediafile import MediaFile from beets import config @@ -594,21 +595,23 @@ class ShowChangeTest(_common.TestCase): self.items[0].track = 1 self.items[0].path = '/path/to/file.mp3' self.info = autotag.AlbumInfo( - 'the album', 'album id', 'the artist', 'artist id', [ - autotag.TrackInfo('the title', 'track id', index=1) + u'the album', u'album id', u'the artist', u'artist id', [ + autotag.TrackInfo(u'the title', u'track id', index=1) ]) def _show_change(self, items=None, info=None, - cur_artist='the artist', cur_album='the album', + cur_artist=u'the artist', cur_album=u'the album', dist=0.1): items = items or self.items info = info or self.info mapping = dict(zip(items, info.tracks)) config['color'] = False + album_dist = distance(items, info, mapping) + album_dist.penalties = {'album': [dist]} commands.show_change( cur_artist, cur_album, - autotag.AlbumMatch(0.1, info, mapping, set(), set()), + autotag.AlbumMatch(album_dist, info, mapping, set(), set()), ) return self.io.getoutput().lower() @@ -623,7 +626,7 @@ class ShowChangeTest(_common.TestCase): self.assertTrue('correcting tags from:' in msg) def test_item_data_change(self): - self.items[0].title = 'different' + self.items[0].title = u'different' msg = self._show_change() self.assertTrue('different -> the title' in msg) @@ -638,12 +641,12 @@ class ShowChangeTest(_common.TestCase): self.assertTrue('correcting tags from:' in msg) def test_item_data_change_title_missing(self): - self.items[0].title = '' + self.items[0].title = u'' msg = re.sub(r' +', ' ', self._show_change()) self.assertTrue('file.mp3 -> the title' in msg) def test_item_data_change_title_missing_with_unicode_filename(self): - self.items[0].title = '' + self.items[0].title = u'' self.items[0].path = u'/path/to/caf\xe9.mp3'.encode('utf8') msg = re.sub(r' +', ' ', self._show_change().decode('utf8')) self.assertTrue(u'caf\xe9.mp3 -> the title' in msg