Use a Distance object instead of floats for distance calculations.

The new Distance object knows how to perform various types of distance calculations (expression, equality, number, priority, string). It will keep track of each individual penalty that has been applied so that we can utilise that information in the UI and when making decisions about the recommendation level. We now display the top 3 penalties (sorted by weight) on the release list (and "..." if there are more than 3), and we display all penalties on the album info line and track change line. The implementation of the `max_rec` setting has been simplified by removing duplicate validation and instead looking at the penalties that have been applied to a distance. As a result, we can now configure a maximum recommendation for any penalty that might be applied. We have a few new checks when calculating album distance: `match: preferred: countries` and `match: preferred: media` can each be set to a list of countries and media in order of your preference. These are empty by default. A value that matches the first item will have no penalty, and a value that doesn't match any item will have an unweighted penalty of 1.0. If `match: preferred: original_year` is set to "yes", beets will apply an unweighted penalty of 1.0 for each year of difference between the release year and the original year. We now configure individual weights for `mediums` (disctotal), `label`, `catalognum`, `country` and `albumdisambig` instead of a single generic `minor` weight. This gives more control, but more importantly separates and names the applied penalties so that the UI can convey exactly which fields have contributed to the overall distance penalty. Likewise, `missing tracks` and `unmatched tracks` are penalised and displayed in the UI separately, instead of a combined `partial` penalty. Display non-MusicBrainz source in the disambiguation string, and "source" in the list of penalties if a release is penalised for being a non-MusicBrainz.
2026-02-25 16:53:17 +01:00 · 2013-06-02 16:33:07 +10:00 · 2013-06-02 16:33:07 +10:00 · e6ac8e1646
commit e6ac8e1646
parent cff06431cc
10 changed files with 553 additions and 280 deletions
--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@ -30,7 +30,7 @@ from beets.util.enumeration import enum
 from beets.autotag import hooks

 # A configuration view for the distance weights.
-weights = config['match']['weight']
+weights = config['match']['distance_weights']

 # Parameters for string distance function.
 # Words that can be moved to the end of a string using a comma.
@ -187,62 +187,202 @@ def track_index_changed(item, track_info):
    """
    return item.track not in (track_info.medium_index, track_info.index)

+class Distance(object):
+    """Keeps track of multiple distance penalties. Provides a single weighted
+    distance for all penalties as well as a weighted distance for each
+    individual penalty.
+    """
+    def __cmp__(self, other):
+        return cmp(self.distance, other)
+
+    def __float__(self):
+        return self.distance
+
+    def __getitem__(self, key):
+        """Returns the weighted distance for a named penalty.
+        """
+        dist = sum(self.penalties[key]) * weights[key].as_number()
+        dist_max = self.max_distance
+        if dist_max:
+            return dist / dist_max
+        return 0.0
+
+    def __init__(self):
+        self.penalties = {}
+
+    def __sub__(self, other):
+        return self.distance - other
+
+    def __rsub__(self, other):
+        return other - self.distance
+
+    def _eq(self, value1, value2):
+        """Returns True if `value1` is equal to `value2`. `value1` may be a
+        compiled regular expression, in which case it will be matched against
+        `value2`.
+        """
+        if isinstance(value1, re._pattern_type):
+            return bool(value1.match(value2))
+        return value1 == value2
+
+    def add(self, key, dist):
+        """Adds a distance penalty. `key` must correspond with a configured
+        weight setting. `dist` must be a float between 0.0 and 1.0, and will be
+        added to any existing distance penalties for the same key.
+        """
+        if not 0.0 <= dist <= 1.0:
+            raise ValueError(
+                    '`dist` must be between 0.0 and 1.0. It is: %r' % dist)
+        self.penalties.setdefault(key, []).append(dist)
+
+    def add_equality(self, key, value, options):
+        """Adds a distance penalty of 1.0 if `value` doesn't match any of the
+        values in `options`. If an option is a compiled regular expression, it
+        will be considered equal if it matches against `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        for opt in options:
+            if self._eq(opt, value):
+                dist = 0.0
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_expr(self, key, expr):
+        """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0.
+        """
+        if expr:
+            self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_number(self, key, number1, number2):
+        """Adds a distance penalty of 1.0 for each number of difference between
+        `number1` and `number2`, or 0.0 when there is no difference. Use this
+        when there is no upper limit on the difference between the two numbers.
+        """
+        diff = abs(number1 - number2)
+        if diff:
+            for i in range(diff):
+                self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_priority(self, key, value, options):
+        """Adds a distance penalty that corresponds to the position at which
+        `value` appears in `options`. A distance penalty of 0.0 for the first
+        option, or 1.0 if there is no matching option. If an option is a
+        compiled regular expression, it will be considered equal if it matches
+        against `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        unit = 1.0 / (len(options) + 1)
+        for i, opt in enumerate(options):
+            if self._eq(opt, value):
+                dist = i * unit
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_ratio(self, key, number1, number2):
+        """Adds a distance penalty for `number1` as a ratio of `number2`.
+        `number1` is bound at 0 and `number2`.
+        """
+        number = float(max(min(number1, number2), 0))
+        if number2:
+            dist = number / number2
+        else:
+            dist = 0.0
+        self.add(key, dist)
+
+    def add_string(self, key, str1, str2):
+        """Adds a distance penalty based on the edit distance between `str1`
+        and `str2`.
+        """
+        dist = string_dist(str1, str2)
+        self.add(key, dist)
+
+    @property
+    def distance(self):
+        """Returns an overall weighted distance across all penalties.
+        """
+        dist = 0.0
+        for key, penalty in self.penalties.iteritems():
+            dist += sum(penalty) * weights[key].as_number()
+        dist_max = self.max_distance
+        if dist_max:
+            return dist / dist_max
+        return 0.0
+
+    @property
+    def max_distance(self):
+        """Returns the maximum distance penalty.
+        """
+        dist_max = 0.0
+        for key, penalty in self.penalties.iteritems():
+            dist_max += len(penalty) * weights[key].as_number()
+        return dist_max
+
+    @property
+    def sorted(self):
+        """Returns a list of (dist, key) pairs, with `dist` being the weighted
+        distance, sorted from highest to lowest.
+        """
+        list_ = [(self[key], key) for key in self.penalties]
+        return sorted(list_, key=lambda (dist, key): (0-dist, key))
+
+    def update(self, dist):
+        """Adds all the distance penalties from `dist`.
+        """
+        if not isinstance(dist, Distance):
+            raise ValueError(
+                    '`dist` must be a Distance object. It is: %r' % dist)
+        for key, penalties in dist.penalties.iteritems():
+            self.penalties.setdefault(key, []).extend(penalties)
+
 def track_distance(item, track_info, incl_artist=False):
    """Determines the significance of a track metadata change. Returns a
-    float in [0.0,1.0]. `incl_artist` indicates that a distance
-    component should be included for the track artist (i.e., for
-    various-artist releases).
+    Distance object. `incl_artist` indicates that a distance component should
+    be included for the track artist (i.e., for various-artist releases).
    """
-    # Distance and normalization accumulators.
-    dist, dist_max = 0.0, 0.0
+    dist = Distance()

-    # Check track length.
-    # If there's no length to check, apply no penalty.
+    # Length.
    if track_info.length:
        diff = abs(item.length - track_info.length)
        diff = max(diff - weights['track_length_grace'].as_number(), 0.0)
        diff = min(diff, weights['track_length_max'].as_number())
-        dist += (diff / weights['track_length_max'].as_number()) * \
-                weights['track_length'].as_number()
-    dist_max += weights['track_length'].as_number()
+        dist.add_ratio('track_length', diff,
+                       weights['track_length_max'].as_number())

-    # Track title.
-    dist += string_dist(item.title, track_info.title) * \
-        weights['track_title'].as_number()
-    dist_max += weights['track_title'].as_number()
+    # Title.
+    dist.add_string('track_title', item.title, track_info.title)

-    # Track artist, if included.
-    # Attention: MB DB does not have artist info for all compilations,
-    # so only check artist distance if there is actually an artist in
-    # the MB track data.
+    # Artist. Only check if there is actually an artist in the track data.
    if incl_artist and track_info.artist and \
            item.artist.lower() not in VA_ARTISTS:
-        dist += string_dist(item.artist, track_info.artist) * \
-                weights['track_artist'].as_number()
-        dist_max += weights['track_artist'].as_number()
+        dist.add_string('track_artist', item.artist, track_info.artist)

    # Track index.
    if track_info.index and item.track:
-        if track_index_changed(item, track_info):
-            dist += weights['track_index'].as_number()
-        dist_max += weights['track_index'].as_number()
+        dist.add_expr('track_index', track_index_changed(item, track_info))

-    # MusicBrainz track ID.
+    # Track ID.
    if item.mb_trackid:
-        if item.mb_trackid != track_info.track_id:
-            dist += weights['track_id'].as_number()
-        dist_max += weights['track_id'].as_number()
+        dist.add_expr('track_id', item.mb_trackid != track_info.track_id)

-    # Plugin distances.
-    plugin_d, plugin_dm = plugins.track_distance(item, track_info)
-    dist += plugin_d
-    dist_max += plugin_dm
+    # Plugins.
+    dist.update(plugins.track_distance(item, track_info))

-    return dist / dist_max
+    return dist

 def distance(items, album_info, mapping):
    """Determines how "significant" an album metadata change would be.
-    Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object
+    Returns a Distance object. `album_info` is an AlbumInfo object
    reflecting the album to be compared. `items` is a sequence of all
    Item objects that will be matched (order is not important).
    `mapping` is a dictionary mapping Items to TrackInfo objects; the
@ -251,100 +391,89 @@ def distance(items, album_info, mapping):
    """
    likelies, _ = current_metadata(items)

-    # These accumulate the possible distance components. The final
-    # distance will be dist/dist_max.
-    dist = 0.0
-    dist_max = 0.0
+    dist = Distance()

-    # Artist/album metadata.
+    # Artist, if not various.
    if not album_info.va:
-        dist += string_dist(likelies['artist'], album_info.artist) * \
-                weights['artist'].as_number()
-        dist_max += weights['artist'].as_number()
-    dist += string_dist(likelies['album'], album_info.album) * \
-            weights['album'].as_number()
-    dist_max += weights['album'].as_number()
+        dist.add_string('artist', likelies['artist'], album_info.artist)

-    # Year. No penalty for matching release or original year.
-    if likelies['year'] and album_info.year:
-        if likelies['year'] not in (album_info.year, album_info.original_year):
-            diff = abs(album_info.year - likelies['year'])
-            if diff:
-                dist += (1.0 - 1.0 / diff) * weights['year'].as_number()
-        dist_max += weights['year'].as_number()
+    # Album.
+    dist.add_string('album', likelies['album'], album_info.album)

-    # Actual or preferred media.
-    preferred_media = config['match']['preferred_media'].get()
+    # Media.
    if likelies['media'] and album_info.media:
-        dist += string_dist(likelies['media'], album_info.media) * \
-                weights['media'].as_number()
-        dist_max += weights['media'].as_number()
-    elif album_info.media and preferred_media:
-        dist += string_dist(album_info.media, preferred_media) * \
-                weights['media'].as_number()
-        dist_max += weights['media'].as_number()
+        dist.add_string('media', likelies['media'], album_info.media)

-    # MusicBrainz album ID.
-    if likelies['mb_albumid']:
-        if likelies['mb_albumid'] != album_info.album_id:
-            dist += weights['album_id'].as_number()
-        dist_max += weights['album_id'].as_number()
+    # Preferred media.
+    preferred_media = [re.compile(r'(\d+x)?%s' % pattern, re.I) for pattern
+                       in config['match']['preferred']['media'].get()]
+    if album_info.media and preferred_media:
+        dist.add_priority('media', album_info.media, preferred_media)

-    # Apply a small penalty for differences across many minor metadata. This
-    # helps prioritise releases that are nearly identical.
+    # Number of discs.
+    if likelies['disctotal'] and album_info.mediums:
+        dist.add_number('mediums', likelies['disctotal'], album_info.mediums)

-    if likelies['disctotal']:
-        if likelies['disctotal'] != album_info.mediums:
-            dist += weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
+    # Year.
+    if likelies['year'] and album_info.year:
+        # No penalty for matching release or original year.
+        if likelies['year'] in (album_info.year, album_info.original_year):
+            dist.add('year', 0.0)
+        else:
+            dist.add_number('year', likelies['year'], album_info.year)

-    if likelies['label'] and album_info.label:
-        dist += string_dist(likelies['label'], album_info.label) * \
-                weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
-
-    if likelies['catalognum'] and album_info.catalognum:
-        dist += string_dist(likelies['catalognum'],
-                            album_info.catalognum) * \
-                weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
+    # Prefer earlier releases.
+    if album_info.year and album_info.original_year and \
+            config['match']['preferred']['original_year'].get():
+        dist.add_number('year', album_info.year, album_info.original_year)

+    # Country.
    if likelies['country'] and album_info.country:
-        dist += string_dist(likelies['country'],
-                            album_info.country) * \
-                weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
+        dist.add_string('country', likelies['country'], album_info.country)

+    # Preferred countries.
+    preferred_countries = [re.compile(pattern, re.I) for pattern
+                           in config['match']['preferred']['countries'].get()]
+    if album_info.country and preferred_countries:
+        dist.add_priority('country', album_info.country, preferred_countries)
+
+    # Label.
+    if likelies['label'] and album_info.label:
+        dist.add_string('label', likelies['label'], album_info.label)
+
+    # Catalog number.
+    if likelies['catalognum'] and album_info.catalognum:
+        dist.add_string('catalognum', likelies['catalognum'],
+                        album_info.catalognum)
+
+    # Disambiguation.
    if likelies['albumdisambig'] and album_info.albumdisambig:
-        dist += string_dist(likelies['albumdisambig'],
-                            album_info.albumdisambig) * \
-                weights['minor'].as_number()
-        dist_max += weights['minor'].as_number()
+        dist.add_string('albumdisambig', likelies['albumdisambig'],
+                        album_info.albumdisambig)

-    # Matched track distances.
+    # Album ID.
+    if likelies['mb_albumid']:
+        dist.add_equality('album_id', likelies['mb_albumid'],
+                          album_info.album_id)
+
+    # Tracks.
+    dist.tracks = {}
    for item, track in mapping.iteritems():
-        dist += track_distance(item, track, album_info.va) * \
-                weights['track'].as_number()
-        dist_max += weights['track'].as_number()
+        dist.tracks[track] = track_distance(item, track, album_info.va)
+        dist.add('tracks', dist.tracks[track].distance)

-    # Extra and unmatched tracks.
-    for track in set(album_info.tracks) - set(mapping.values()):
-        dist += weights['missing'].as_number()
-        dist_max += weights['missing'].as_number()
-    for item in set(items) - set(mapping.keys()):
-        dist += weights['unmatched'].as_number()
-        dist_max += weights['unmatched'].as_number()
+    # Missing tracks.
+    for i in range(len(album_info.tracks) - len(mapping)):
+        dist.add('missing_tracks', 1.0)

-    # Plugin distances.
-    plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping)
-    dist += plugin_d
-    dist_max += plugin_dm
+    # Unmatched tracks.
+    for i in range(len(items) - len(mapping)):
+        dist.add('unmatched_tracks', 1.0)

-    # Normalize distance, avoiding divide-by-zero.
-    if dist_max == 0.0:
-        return 0.0
-    else:
-        return dist / dist_max
+    # Plugins.
+    dist.update(plugins.album_distance(items, album_info, mapping))
+
+    return dist

 def match_by_id(items):
    """If the items are tagged with a MusicBrainz album ID, returns an
@ -370,8 +499,8 @@ def _recommendation(results):
    recommendation based on the results' distances.

    If the recommendation is higher than the configured maximum for
-    certain situations, the recommendation will be downgraded to the
-    configured maximum.
+    an applied penalty, the recommendation will be downgraded to the
+    configured maximum for that penalty.
    """
    if not results:
        # No candidates: no recommendation.
@ -393,45 +522,20 @@ def _recommendation(results):
        # Gap between first two candidates is large.
        rec = recommendation.low
    else:
-        # No conclusion.
-        rec = recommendation.none
+        # No conclusion. Return immediately. Can't be downgraded any further.
+        return recommendation.none

-    # "Downgrades" in certain configured situations.
-    if isinstance(results[0], hooks.AlbumMatch):
-        # Load the configured recommendation maxima.
-        max_rec = {}
-        for trigger in 'non_mb_source', 'partial', 'tracklength', 'tracknumber':
-            max_rec[trigger] = \
-                config['match']['max_rec'][trigger].as_choice({
-                    'strong': recommendation.strong,
-                    'medium': recommendation.medium,
-                    'low': recommendation.low,
-                    'none': recommendation.none,
-                })
-
-        # Non-MusicBrainz source.
-        if rec > max_rec['non_mb_source'] and \
-                results[0].info.data_source != 'MusicBrainz':
-            rec = max_rec['non_mb_source']
-
-        # Partial match.
-        if rec > max_rec['partial'] and \
-                (results[0].extra_items or results[0].extra_tracks):
-            rec = max_rec['partial']
-
-        # Check track number and duration for each item.
-        for item, track_info in results[0].mapping.items():
-            # Track length differs.
-            if rec > max_rec['tracklength'] and \
-                    item.length and track_info.length and \
-                    abs(item.length - track_info.length) > \
-                    weights['track_length_grace'].as_number():
-                rec = max_rec['tracklength']
-
-            # Track number differs.
-            if rec > max_rec['tracknumber'] and \
-                    track_index_changed(item, track_info):
-                rec = max_rec['tracknumber']
+    # Downgrade to the max rec if it is lower than the current rec for an
+    # applied penalty.
+    for dist, key in results[0].distance.sorted:
+        if dist:
+            max_rec = config['match']['max_rec'][key].as_choice({
+                'strong': recommendation.strong,
+                'medium': recommendation.medium,
+                'low': recommendation.low,
+                'none': recommendation.none,
+            })
+            rec = min(rec, max_rec)

    return rec

@ -465,7 +569,7 @@ def tag_album(items, search_artist=None, search_album=None,
        - The current artist.
        - The current album.
        - A list of AlbumMatch objects. The candidates are sorted by
-        distance (i.e., best match first).
+          distance (i.e., best match first).
        - A recommendation.
    If search_artist and search_album or search_id are provided, then
    they are used as search terms in place of the current metadata.
--- a/beets/config_default.yaml
+++ b/beets/config_default.yaml
@ -68,22 +68,42 @@ match:
    medium_rec_thresh: 0.25
    rec_gap_thresh: 0.25
    max_rec:
-        non_mb_source: strong
-        partial: medium
-        tracklength: strong
-        tracknumber: strong
-    preferred_media: CD
-    weight:
+        source: strong
+        artist: strong
+        album: strong
+        media: strong
+        mediums: strong
+        year: strong
+        country: strong
+        label: strong
+        catalognum: strong
+        albumdisambig: strong
+        album_id: strong
+        tracks: strong
+        missing_tracks: medium
+        unmatched_tracks: medium
+        track_title: strong
+        track_artist: strong
+        track_index: strong
+        track_length_grace: strong
+        track_length_max: strong
+        track_length: strong
+        track_id: strong
+    distance_weights:
        source: 2.0
        artist: 3.0
        album: 3.0
-        year: 1.0
        media: 1.0
+        mediums: 1.0
+        year: 1.0
+        country: 0.5
+        label: 0.5
+        catalognum: 0.5
+        albumdisambig: 0.5
        album_id: 5.0
-        minor: 0.5
-        track: 1.0
-        missing: 0.9
-        unmatched: 0.6
+        tracks: 2.0
+        missing_tracks: 0.9
+        unmatched_tracks: 0.6
        track_title: 3.0
        track_artist: 2.0
        track_index: 1.0
@ -91,3 +111,7 @@ match:
        track_length_max: 30
        track_length: 2.0
        track_id: 5.0
+    preferred:
+        countries: []
+        media: []
+        original_year: no
--- a/beets/plugins.py
+++ b/beets/plugins.py
@ -64,16 +64,16 @@ class BeetsPlugin(object):
        return {}

    def track_distance(self, item, info):
-        """Should return a (distance, distance_max) pair to be added
-        to the distance value for every track comparison.
+        """Should return a Distance object to be added to the
+        distance for every track comparison.
        """
-        return 0.0, 0.0
+        return beets.autotag.match.Distance()

    def album_distance(self, items, album_info, mapping):
-        """Should return a (distance, distance_max) pair to be added
-        to the distance value for every album-level comparison.
+        """Should return a Distance object to be added to the
+        distance for every album-level comparison.
        """
-        return 0.0, 0.0
+        return beets.autotag.match.Distance()

    def candidates(self, items, artist, album, va_likely):
        """Should return a sequence of AlbumInfo objects that match the
@ -242,25 +242,19 @@ def queries():

 def track_distance(item, info):
    """Gets the track distance calculated by all loaded plugins.
-    Returns a (distance, distance_max) pair.
+    Returns a Distance object.
    """
-    dist = 0.0
-    dist_max = 0.0
+    dist = beets.autotag.match.Distance()
    for plugin in find_plugins():
-        d, dm = plugin.track_distance(item, info)
-        dist += d
-        dist_max += dm
-    return dist, dist_max
+        dist.update(plugin.track_distance(item, info))
+    return dist

 def album_distance(items, album_info, mapping):
    """Returns the album distance calculated by plugins."""
-    dist = 0.0
-    dist_max = 0.0
+    dist = beets.autotag.match.Distance()
    for plugin in find_plugins():
-        d, dm = plugin.album_distance(items, album_info, mapping)
-        dist += d
-        dist_max += dm
-    return dist, dist_max
+        dist.update(plugin.album_distance(items, album_info, mapping))
+    return dist

 def candidates(items, artist, album, va_likely):
    """Gets MusicBrainz candidates for an album from each plugin.
--- a/beets/ui/commands.py
+++ b/beets/ui/commands.py
@ -125,14 +125,14 @@ default_commands.append(fields_cmd)

 VARIOUS_ARTISTS = u'Various Artists'

-PARTIAL_MATCH_MESSAGE = u'(partial match!)'
-
 # Importer utilities and support.

 def disambig_string(info):
-    """Returns label, year and media disambiguation, if available.
+    """Returns source, media, year, country, and album disambiguation.
    """
    disambig = []
+    if info.data_source != 'MusicBrainz':
+        disambig.append(info.data_source)
    if info.media:
        if info.mediums > 1:
            disambig.append(u'{0}x{1}'.format(
@ -163,26 +163,35 @@ def dist_string(dist):
        out = ui.colorize('red', out)
    return out

+def penalty_string(distance, limit=None):
+    """Returns a colorized string that indicates all the penalties applied to
+    a distance object.
+    """
+    penalties = []
+    for dist, key in distance.sorted:
+        if dist:
+            key = key.replace('album_', '')
+            key = key.replace('track_', '')
+            key = key.replace('_', ' ')
+            penalties.append(key)
+    if penalties:
+        if limit and len(penalties) > limit:
+            penalties = penalties[:limit] + ['...']
+        return ui.colorize('yellow', '(%s)' % ', '.join(penalties))
+
 def show_change(cur_artist, cur_album, match):
    """Print out a representation of the changes that will be made if an
    album's tags are changed according to `match`, which must be an AlbumMatch
    object.
    """
-    def show_album(artist, album, partial=False):
+    def show_album(artist, album):
        if artist:
            album_description = u'    %s - %s' % (artist, album)
        elif album:
            album_description = u'    %s' % album
        else:
            album_description = u'    (unknown album)'
-
-        out = album_description
-
-        # Add a suffix if this is a partial match.
-        if partial:
-            out += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE)
-
-        print_(out)
+        print_(album_description)

    def format_index(track_info):
        """Return a string representing the track index of the given
@ -223,11 +232,7 @@ def show_change(cur_artist, cur_album, match):
        print_("To:")
        show_album(artist_r, album_r)
    else:
-        message = u"Tagging:\n    %s - %s" % (match.info.artist,
-                                              match.info.album)
-        if match.extra_items or match.extra_tracks:
-            message += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE)
-        print_(message)
+        print_(u"Tagging:\n    %s - %s" % (match.info.artist, match.info.album))

    # Data URL.
    if match.info.data_url:
@ -235,9 +240,13 @@ def show_change(cur_artist, cur_album, match):

    # Info line.
    info = []
+    # Similarity.
    info.append('(Similarity: %s)' % dist_string(match.distance))
-    if match.info.data_source != 'MusicBrainz':
-        info.append(ui.colorize('turquoise', '(%s)' % match.info.data_source))
+    # Penalties.
+    penalties = penalty_string(match.distance)
+    if penalties:
+        info.append(penalties)
+    # Disambiguation.
    disambig = disambig_string(match.info)
    if disambig:
        info.append(ui.colorize('lightgray', '(%s)' % disambig))
@ -315,18 +324,10 @@ def show_change(cur_artist, cur_album, match):
            rhs += templ.format(rhs_length)
            lhs_width += len(cur_length) + 3

-        # Hidden penalties. No LHS/RHS diff is displayed, but we still want to
-        # indicate that a penalty has been applied to explain the similarity
-        # score.
-        penalties = []
-        if match.info.va and track_info.artist and \
-                item.artist.lower() not in VA_ARTISTS:
-            penalties.append('artist')
-        if item.mb_trackid and item.mb_trackid != track_info.track_id:
-            penalties.append('ID')
+        # Penalties.
+        penalties = penalty_string(match.distance.tracks[track_info])
        if penalties:
-            rhs += ' %s' % ui.colorize('red',
-                                       '(%s)' % ', '.join(penalties))
+            rhs += ' %s' % penalties

        if lhs != rhs:
            lines.append((' * %s' % lhs, rhs, lhs_width))
@ -489,20 +490,17 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None,
                       (cur_artist, cur_album))
                print_('Candidates:')
                for i, match in enumerate(candidates):
+                    # Artist, album and distance.
                    line = ['%i. %s - %s (%s)' % (i + 1, match.info.artist,
                                                  match.info.album,
                                                  dist_string(match.distance))]

-                    # Point out the partial matches.
-                    if match.extra_items or match.extra_tracks:
-                        line.append(ui.colorize('yellow',
-                                                PARTIAL_MATCH_MESSAGE))
-
-                    # Sources other than MusicBrainz.
-                    source = match.info.data_source
-                    if source != 'MusicBrainz':
-                        line.append(ui.colorize('turquoise', '(%s)' % source))
+                    # Penalties.
+                    penalties = penalty_string(match.distance, 3)
+                    if penalties:
+                        line.append(penalties)

+                    # Disambiguation
                    disambig = disambig_string(match.info)
                    if disambig:
                        line.append(ui.colorize('lightgray', '(%s)' % disambig))
--- a/beetsplug/chroma.py
+++ b/beetsplug/chroma.py
@ -21,6 +21,7 @@ from beets import util
 from beets import config
 from beets.util import confit
 from beets.autotag import hooks
+from beets.autotag.match import Distance
 import acoustid
 import logging
 from collections import defaultdict
@ -113,16 +114,14 @@ def _all_releases(items):

 class AcoustidPlugin(plugins.BeetsPlugin):
    def track_distance(self, item, info):
+        dist = Distance()
        if item.path not in _matches or not info.track_id:
            # Match failed or no track ID.
-            return 0.0, 0.0
+            return dist

        recording_ids, _ = _matches[item.path]
-        if info.track_id in recording_ids:
-            dist = 0.0
-        else:
-            dist = TRACK_ID_WEIGHT
-        return dist, TRACK_ID_WEIGHT
+        dist.add_expr('track_id', info.track_id not in recording_ids)
+        return dist

    def candidates(self, items, artist, album, va_likely):
        albums = []
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@ -17,7 +17,7 @@ discogs-client library.
 """
 from beets import config
 from beets.autotag.hooks import AlbumInfo, TrackInfo
-from beets.autotag.match import current_metadata, VA_ARTISTS
+from beets.autotag.match import current_metadata, Distance, VA_ARTISTS
 from beets.plugins import BeetsPlugin
 from discogs_client import Artist, DiscogsAPIError, Release, Search
 import beets
@ -44,14 +44,12 @@ class DiscogsPlugin(BeetsPlugin):
        })

    def album_distance(self, items, album_info, mapping):
-        """Returns the discogs source weight and the maximum source weight.
+        """Returns the album distance.
        """
+        dist = Distance()
        if album_info.data_source == 'Discogs':
-            return self.config['source_weight'].as_number() * \
-                    config['match']['weight']['source'].as_number(), \
-                    config['match']['weight']['source'].as_number()
-        else:
-            return 0.0, 0.0
+            dist.add('source', self.config['source_weight'].as_number())
+        return dist

    def candidates(self, items, artist, album, va_likely):
        """Returns a list of AlbumInfo objects for discogs search results
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -53,30 +53,36 @@ Changelog
  None.
 * Various UI enhancements to the importer due to Tai Lee:

-  * More consistent format and colorization of album and track metadata.
-  * Display data source URL for :doc:`/plugins/discogs` matches. This should
-    make it easier for people who would rather import and correct data from
-    Discogs into MusicBrainz.
+  * Display data source URL and source name in album disambiguation for
+    non-MusicBrainz matches. This should make it easier for people who want to
+    import and correct data from other sources into MusicBrainz.
+  * The top 3 distance penalties are now displayed on the release listing,
+    and all album and track penalties are now displayed on the track changes
+    list. This should make it clear exactly which metadata is contributing to a
+    low similarity score.
  * Display album disambiguation and disc titles in the track listing, when
    available.
-  * Track changes highlighted in yellow indicate a change in format to or from
-    :ref:`per_disc_numbering`. No penalty is applied because the track number
-    is still "correct", just in a different format.
+  * More consistent format and colorization of album and track metadata.
+  * Track changes highlighted in turquoise indicate a change in format to or
+    from :ref:`per_disc_numbering`. No penalty is applied because the track
+    number is still "correct", just in a different format.
  * Sort missing and unmatched tracks by index and title and group them
    together for better readability.
-  * Indicate MusicBrainz ID mismatches.

-* Improve calculation of similarity score:
+* Improve calculation of similarity score and recommendation:

+  * It is now possible to configure a :ref:`max_rec` for any field that is used
+    to calculate the similarity score. The recommendation will be downgraded if
+    a penalty is being applied to the specified field.
  * Strongly prefer releases with a matching MusicBrainz album ID. This helps
    beets re-identify the same release when re-importing existing files.
  * Prefer releases that are closest to the tagged ``year``. Tolerate files
    tagged with release or original year.
-  * Prefer CD releases by default, when there is no ``media`` tagged in the
-    files being imported. This can be changed with the :ref:`preferred_media`
-    setting.
-  * Apply minor penalties across a range of fields to differentiate between
-    nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
+  * Add a :ref:`preferred` collection of settings, which allow the user to
+    specify a sorted list of preferred countries and media types, or prefer
+    releases closest to the original year for an album.
+  * Apply minor distance penalties across a range of fields to differentiate
+    between nearly identical releases: ``mediums``, ``label``, ``catalognum``,
    ``country`` and ``albumdisambig``.

 .. _Discogs: http://discogs.com/
--- a/docs/reference/config.rst
+++ b/docs/reference/config.rst
@ -394,40 +394,65 @@ max_rec

 As mentioned above, autotagger matches have *recommendations* that control how
 the UI behaves for a certain quality of match. The recommendation for a certain
-match is usually based on the distance calculation. But you can also control
-the recommendation for certain specific situations by defining *maximum*
-recommendations when:
+match is based on the overall distance calculation. But you can also control
+the recommendation when a distance penalty is being applied for a specific
+field by defining *maximum* recommendations for each field:

-* a match came from a source other than MusicBrainz (e.g., the
-  :doc:`Discogs </plugins/discogs>` plugin);
-* a match has missing or extra tracks;
-* the length (duration) of at least one track differs; or
-* at least one track number differs.
-
-To define maxima, use keys under ``max_rec:`` in the ``match`` section::
+To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here
+are the defaults::

    match:
        max_rec:
-            non_mb_source: strong
-            partial: medium
-            tracklength: strong
-            tracknumber: strong
+            source: strong
+            artist: strong
+            album: strong
+            media: strong
+            mediums: strong
+            year: strong
+            country: strong
+            label: strong
+            catalognum: strong
+            albumdisambig: strong
+            album_id: strong
+            tracks: strong
+            missing_tracks: medium
+            unmatched_tracks: medium
+            track_title: strong
+            track_artist: strong
+            track_index: strong
+            track_length_grace: strong
+            track_length_max: strong
+            track_length: strong
+            track_id: strong

-If a recommendation is higher than the configured maximum and the condition is
-met, the recommendation will be downgraded. The maximum for each condition can
-be one of ``none``, ``low``, ``medium`` or ``strong``. When the maximum
-recommendation is ``strong``, no "downgrading" occurs for that situation.
+If a recommendation is higher than the configured maximum and a penalty is
+being applied, the recommendation will be downgraded. The maximum for each
+field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the
+maximum recommendation is ``strong``, no "downgrading" occurs.

-The above example shows the default ``max_rec`` settings.
+.. _preferred:

-.. _preferred_media:
+preferred
+~~~~~~~~~

-preferred_media
-~~~~~~~~~~~~~~~
+In addition to comparing the tagged metadata with the match metadata for
+similarity, you can also specify an ordered list of preferred countries and
+media types. A distance penalty will be applied if the country or media type
+from the match metadata doesn't match. The order is important, the first item
+will be most preferred.

-When comparing files that have no ``media`` tagged, prefer releases that more
-closely resemble this media (using a string distance). When files are already
-tagged with media, this setting is ignored. Default: ``CD``.
+You can also tell the autotagger to prefer matches that have a release year
+closest to the original year for an album.
+
+Here's an example::
+
+    match:
+        preferred:
+            countries: ['US', 'GB', 'UK']
+            media: ['CD', 'Digital Media']
+            original_year: yes
+
+By default, none of these options are enabled.

 .. _path-format-config:

--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@ -23,6 +23,7 @@ import _common
 from _common import unittest
 from beets import autotag
 from beets.autotag import match
+from beets.autotag.match import Distance
 from beets.library import Item
 from beets.util import plurality
 from beets.autotag import AlbumInfo, TrackInfo
@ -105,6 +106,127 @@ def _make_trackinfo():
        TrackInfo(u'three', None, u'some artist', length=1, index=3),
    ]

+class DistanceTest(unittest.TestCase):
+    def setUp(self):
+        self.dist = Distance()
+
+    def test_add(self):
+        self.dist.add('add', 1.0)
+        self.assertEqual(self.dist.penalties, {'add': [1.0]})
+
+    def test_add_equality(self):
+        self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi'])
+        self.assertEqual(self.dist.penalties['equality'], [0.0])
+
+        self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi'])
+        self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0])
+
+        self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I))
+        self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0, 0.0])
+
+    def test_add_expr(self):
+        self.dist.add_expr('expr', True)
+        self.assertEqual(self.dist.penalties['expr'], [1.0])
+
+        self.dist.add_expr('expr', False)
+        self.assertEqual(self.dist.penalties['expr'], [1.0, 0.0])
+
+    def test_add_number(self):
+        # Add a full penalty for each number of difference between two numbers.
+
+        self.dist.add_number('number', 1, 1)
+        self.assertEqual(self.dist.penalties['number'], [0.0])
+
+        self.dist.add_number('number', 1, 2)
+        self.assertEqual(self.dist.penalties['number'], [0.0, 1.0])
+
+        self.dist.add_number('number', 2, 1)
+        self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0])
+
+        self.dist.add_number('number', -1, 2)
+        self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0, 1.0,
+                                                         1.0, 1.0])
+
+    def test_add_priority(self):
+        self.dist.add_priority('priority', 'abc', 'abc')
+        self.assertEqual(self.dist.penalties['priority'], [0.0])
+
+        self.dist.add_priority('priority', 'def', ['abc', 'def', 'ghi'])
+        self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25])
+
+        self.dist.add_priority('priority', 'ghi', ['abc', 'def',
+                                              re.compile('GHI', re.I)])
+        self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5])
+
+        self.dist.add_priority('priority', 'xyz', ['abc', 'def'])
+        self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5, 1.0])
+
+    def test_add_ratio(self):
+        self.dist.add_ratio('ratio', 25, 100)
+        self.assertEqual(self.dist.penalties['ratio'], [0.25])
+
+        self.dist.add_ratio('ratio', 10, 5)
+        self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0])
+
+        self.dist.add_ratio('ratio', -5, 5)
+        self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0])
+
+        self.dist.add_ratio('ratio', 5, 0)
+        self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0, 0.0])
+
+    def test_add_string(self):
+        dist = match.string_dist(u'abc', u'bcd')
+        self.dist.add_string('string', u'abc', u'bcd')
+        self.assertEqual(self.dist.penalties['string'], [dist])
+
+    def test_distance(self):
+        config['match']['distance_weights']['album'] = 2.0
+        config['match']['distance_weights']['medium'] = 1.0
+        self.dist.add('album', 0.5)
+        self.dist.add('media', 0.25)
+        self.dist.add('media', 0.75)
+        self.assertEqual(self.dist.distance, 0.5)
+
+        # __getitem__()
+        self.assertEqual(self.dist['album'], 0.25)
+        self.assertEqual(self.dist['media'], 0.25)
+
+    def test_max_distance(self):
+        config['match']['distance_weights']['album'] = 3.0
+        config['match']['distance_weights']['medium'] = 1.0
+        self.dist.add('album', 0.5)
+        self.dist.add('medium', 0.0)
+        self.dist.add('medium', 0.0)
+        self.assertEqual(self.dist.max_distance, 5.0)
+
+    def test_sorted(self):
+        config['match']['distance_weights']['album'] = 4.0
+        config['match']['distance_weights']['medium'] = 2.0
+
+        self.dist.add('album', 0.1875)
+        self.dist.add('medium', 0.75)
+        self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')])
+
+        # Sort by key if distance is equal.
+        dist = Distance()
+        dist.add('album', 0.375)
+        dist.add('medium', 0.75)
+        self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')])
+
+    def test_update(self):
+        self.dist.add('album', 0.5)
+        self.dist.add('media', 1.0)
+
+        dist = Distance()
+        dist.add('album', 0.75)
+        dist.add('album', 0.25)
+        self.dist.add('media', 0.05)
+
+        self.dist.update(dist)
+
+        self.assertEqual(self.dist.penalties, {'album': [0.5, 0.75, 0.25],
+                                               'media': [1.0, 0.05]})
+
 class TrackDistanceTest(unittest.TestCase):
    def test_identical_tracks(self):
        item = _make_item(u'one', 1)
--- a/test/test_ui.py
+++ b/test/test_ui.py
@ -27,6 +27,7 @@ from beets import library
 from beets import ui
 from beets.ui import commands
 from beets import autotag
+from beets.autotag.match import distance
 from beets import importer
 from beets.mediafile import MediaFile
 from beets import config
@ -594,21 +595,23 @@ class ShowChangeTest(_common.TestCase):
        self.items[0].track = 1
        self.items[0].path = '/path/to/file.mp3'
        self.info = autotag.AlbumInfo(
-            'the album', 'album id', 'the artist', 'artist id', [
-                autotag.TrackInfo('the title', 'track id', index=1)
+            u'the album', u'album id', u'the artist', u'artist id', [
+                autotag.TrackInfo(u'the title', u'track id', index=1)
        ])

    def _show_change(self, items=None, info=None,
-                     cur_artist='the artist', cur_album='the album',
+                     cur_artist=u'the artist', cur_album=u'the album',
                     dist=0.1):
        items = items or self.items
        info = info or self.info
        mapping = dict(zip(items, info.tracks))
        config['color'] = False
+        album_dist = distance(items, info, mapping)
+        album_dist.penalties = {'album': [dist]}
        commands.show_change(
            cur_artist,
            cur_album,
-            autotag.AlbumMatch(0.1, info, mapping, set(), set()),
+            autotag.AlbumMatch(album_dist, info, mapping, set(), set()),
        )
        return self.io.getoutput().lower()

@ -623,7 +626,7 @@ class ShowChangeTest(_common.TestCase):
        self.assertTrue('correcting tags from:' in msg)

    def test_item_data_change(self):
-        self.items[0].title = 'different'
+        self.items[0].title = u'different'
        msg = self._show_change()
        self.assertTrue('different -> the title' in msg)

@ -638,12 +641,12 @@ class ShowChangeTest(_common.TestCase):
        self.assertTrue('correcting tags from:' in msg)

    def test_item_data_change_title_missing(self):
-        self.items[0].title = ''
+        self.items[0].title = u''
        msg = re.sub(r'  +', ' ', self._show_change())
        self.assertTrue('file.mp3 -> the title' in msg)

    def test_item_data_change_title_missing_with_unicode_filename(self):
-        self.items[0].title = ''
+        self.items[0].title = u''
        self.items[0].path = u'/path/to/caf\xe9.mp3'.encode('utf8')
        msg = re.sub(r'  +', ' ', self._show_change().decode('utf8'))
        self.assertTrue(u'caf\xe9.mp3 -> the title' in msg