Merge branch 'master' of https://github.com/sampsyo/beets

2025-12-12 19:42:23 +01:00 · 2013-06-12 00:09:44 +02:00 · 2013-06-12 00:09:44 +02:00 · a64d277bae
commit a64d277bae
parent b3747189e5 2f053b0ecd
13 changed files with 490 additions and 460 deletions
--- a/beets/autotag/hooks.py
+++ b/beets/autotag/hooks.py
@ -15,9 +15,13 @@
 """Glue between metadata sources and the matching logic."""
 import logging
 from collections import namedtuple
+import re

 from beets import plugins
+from beets import config
 from beets.autotag import mb
+from beets.util import levenshtein
+from unidecode import unidecode

 log = logging.getLogger('beets')

@ -158,6 +162,296 @@ class TrackInfo(object):
            if isinstance(value, str):
                setattr(self, fld, value.decode(codec, 'ignore'))

+
+# Candidate distance scoring.
+
+# Parameters for string distance function.
+# Words that can be moved to the end of a string using a comma.
+SD_END_WORDS = ['the', 'a', 'an']
+# Reduced weights for certain portions of the string.
+SD_PATTERNS = [
+    (r'^the ', 0.1),
+    (r'[\[\(]?(ep|single)[\]\)]?', 0.0),
+    (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
+    (r'\(.*?\)', 0.3),
+    (r'\[.*?\]', 0.3),
+    (r'(, )?(pt\.|part) .+', 0.2),
+]
+# Replacements to use before testing distance.
+SD_REPLACE = [
+    (r'&', 'and'),
+]
+
+def _string_dist_basic(str1, str2):
+    """Basic edit distance between two strings, ignoring
+    non-alphanumeric characters and case. Comparisons are based on a
+    transliteration/lowering to ASCII characters. Normalized by string
+    length.
+    """
+    str1 = unidecode(str1)
+    str2 = unidecode(str2)
+    str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
+    str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
+    if not str1 and not str2:
+        return 0.0
+    return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
+
+def string_dist(str1, str2):
+    """Gives an "intuitive" edit distance between two strings. This is
+    an edit distance, normalized by the string length, with a number of
+    tweaks that reflect intuition about text.
+    """
+    str1 = str1.lower()
+    str2 = str2.lower()
+
+    # Don't penalize strings that move certain words to the end. For
+    # example, "the something" should be considered equal to
+    # "something, the".
+    for word in SD_END_WORDS:
+        if str1.endswith(', %s' % word):
+            str1 = '%s %s' % (word, str1[:-len(word)-2])
+        if str2.endswith(', %s' % word):
+            str2 = '%s %s' % (word, str2[:-len(word)-2])
+
+    # Perform a couple of basic normalizing substitutions.
+    for pat, repl in SD_REPLACE:
+        str1 = re.sub(pat, repl, str1)
+        str2 = re.sub(pat, repl, str2)
+
+    # Change the weight for certain string portions matched by a set
+    # of regular expressions. We gradually change the strings and build
+    # up penalties associated with parts of the string that were
+    # deleted.
+    base_dist = _string_dist_basic(str1, str2)
+    penalty = 0.0
+    for pat, weight in SD_PATTERNS:
+        # Get strings that drop the pattern.
+        case_str1 = re.sub(pat, '', str1)
+        case_str2 = re.sub(pat, '', str2)
+
+        if case_str1 != str1 or case_str2 != str2:
+            # If the pattern was present (i.e., it is deleted in the
+            # the current case), recalculate the distances for the
+            # modified strings.
+            case_dist = _string_dist_basic(case_str1, case_str2)
+            case_delta = max(0.0, base_dist - case_dist)
+            if case_delta == 0.0:
+                continue
+
+            # Shift our baseline strings down (to avoid rematching the
+            # same part of the string) and add a scaled distance
+            # amount to the penalties.
+            str1 = case_str1
+            str2 = case_str2
+            base_dist = case_dist
+            penalty += weight * case_delta
+    dist = base_dist + penalty
+
+    return dist
+
+class Distance(object):
+    """Keeps track of multiple distance penalties. Provides a single
+    weighted distance for all penalties as well as a weighted distance
+    for each individual penalty.
+    """
+    def __init__(self):
+        self._penalties = {}
+
+        weights_view = config['match']['distance_weights']
+        self._weights = {}
+        for key in weights_view.keys():
+            self._weights[key] = weights_view[key].as_number()
+
+
+    # Access the components and their aggregates.
+
+    @property
+    def distance(self):
+        """Return a weighted and normalized distance across all
+        penalties.
+        """
+        dist_max = self.max_distance
+        if dist_max:
+            return self.raw_distance / self.max_distance
+        return 0.0
+
+    @property
+    def max_distance(self):
+        """Return the maximum distance penalty (normalization factor).
+        """
+        dist_max = 0.0
+        for key, penalty in self._penalties.iteritems():
+            dist_max += len(penalty) * self._weights[key]
+        return dist_max
+
+    @property
+    def raw_distance(self):
+        """Return the raw (denormalized) distance.
+        """
+        dist_raw = 0.0
+        for key, penalty in self._penalties.iteritems():
+            dist_raw += sum(penalty) * self._weights[key]
+        return dist_raw
+
+    def items(self):
+        """Return a list of (key, dist) pairs, with `dist` being the
+        weighted distance, sorted from highest to lowest. Does not
+        include penalties with a zero value.
+        """
+        list_ = []
+        for key in self._penalties:
+            dist = self[key]
+            if dist:
+                list_.append((key, dist))
+        # Convert distance into a negative float we can sort items in
+        # ascending order (for keys, when the penalty is equal) and
+        # still get the items with the biggest distance first.
+        return sorted(list_, key=lambda (key, dist): (0-dist, key))
+
+
+    # Behave like a float.
+
+    def __cmp__(self, other):
+        return cmp(self.distance, other)
+
+    def __float__(self):
+        return self.distance
+    def __sub__(self, other):
+        return self.distance - other
+
+    def __rsub__(self, other):
+        return other - self.distance
+
+
+    # Behave like a dict.
+
+    def __getitem__(self, key):
+        """Returns the weighted distance for a named penalty.
+        """
+        dist = sum(self._penalties[key]) * self._weights[key]
+        dist_max = self.max_distance
+        if dist_max:
+            return dist / dist_max
+        return 0.0
+
+    def __iter__(self):
+        return iter(self.items())
+
+    def __len__(self):
+        return len(self.items())
+
+    def keys(self):
+        return [key for key, _ in self.items()]
+
+    def update(self, dist):
+        """Adds all the distance penalties from `dist`.
+        """
+        if not isinstance(dist, Distance):
+            raise ValueError(
+                    '`dist` must be a Distance object. It is: %r' % dist)
+        for key, penalties in dist._penalties.iteritems():
+            self._penalties.setdefault(key, []).extend(penalties)
+
+
+    # Adding components.
+
+    def _eq(self, value1, value2):
+        """Returns True if `value1` is equal to `value2`. `value1` may
+        be a compiled regular expression, in which case it will be
+        matched against `value2`.
+        """
+        if isinstance(value1, re._pattern_type):
+            return bool(value1.match(value2))
+        return value1 == value2
+
+    def add(self, key, dist):
+        """Adds a distance penalty. `key` must correspond with a
+        configured weight setting. `dist` must be a float between 0.0
+        and 1.0, and will be added to any existing distance penalties
+        for the same key.
+        """
+        if not 0.0 <= dist <= 1.0:
+            raise ValueError(
+                    '`dist` must be between 0.0 and 1.0. It is: %r' % dist)
+        self._penalties.setdefault(key, []).append(dist)
+
+    def add_equality(self, key, value, options):
+        """Adds a distance penalty of 1.0 if `value` doesn't match any
+        of the values in `options`. If an option is a compiled regular
+        expression, it will be considered equal if it matches against
+        `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        for opt in options:
+            if self._eq(opt, value):
+                dist = 0.0
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_expr(self, key, expr):
+        """Adds a distance penalty of 1.0 if `expr` evaluates to True,
+        or 0.0.
+        """
+        if expr:
+            self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_number(self, key, number1, number2):
+        """Adds a distance penalty of 1.0 for each number of difference
+        between `number1` and `number2`, or 0.0 when there is no
+        difference. Use this when there is no upper limit on the
+        difference between the two numbers.
+        """
+        diff = abs(number1 - number2)
+        if diff:
+            for i in range(diff):
+                self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_priority(self, key, value, options):
+        """Adds a distance penalty that corresponds to the position at
+        which `value` appears in `options`. A distance penalty of 0.0
+        for the first option, or 1.0 if there is no matching option. If
+        an option is a compiled regular expression, it will be
+        considered equal if it matches against `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        unit = 1.0 / (len(options) or 1)
+        for i, opt in enumerate(options):
+            if self._eq(opt, value):
+                dist = i * unit
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_ratio(self, key, number1, number2):
+        """Adds a distance penalty for `number1` as a ratio of `number2`.
+        `number1` is bound at 0 and `number2`.
+        """
+        number = float(max(min(number1, number2), 0))
+        if number2:
+            dist = number / number2
+        else:
+            dist = 0.0
+        self.add(key, dist)
+
+    def add_string(self, key, str1, str2):
+        """Adds a distance penalty based on the edit distance between
+        `str1` and `str2`.
+        """
+        dist = string_dist(str1, str2)
+        self.add(key, dist)
+
+
+# Structures that compose all the information for a candidate match.
+
 AlbumMatch = namedtuple('AlbumMatch', ['distance', 'info', 'mapping',
                                       'extra_items', 'extra_tracks'])

--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@ -21,34 +21,13 @@ import datetime
 import logging
 import re
 from munkres import Munkres
-from unidecode import unidecode

 from beets import plugins
 from beets import config
-from beets.util import levenshtein, plurality
+from beets.util import plurality
 from beets.util.enumeration import enum
 from beets.autotag import hooks

-# A configuration view for the distance weights.
-weights = config['match']['distance_weights']
-
-# Parameters for string distance function.
-# Words that can be moved to the end of a string using a comma.
-SD_END_WORDS = ['the', 'a', 'an']
-# Reduced weights for certain portions of the string.
-SD_PATTERNS = [
-    (r'^the ', 0.1),
-    (r'[\[\(]?(ep|single)[\]\)]?', 0.0),
-    (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
-    (r'\(.*?\)', 0.3),
-    (r'\[.*?\]', 0.3),
-    (r'(, )?(pt\.|part) .+', 0.2),
-]
-# Replacements to use before testing distance.
-SD_REPLACE = [
-    (r'&', 'and'),
-]
-
 # Recommendation enumeration.
 recommendation = enum('none', 'low', 'medium', 'strong', name='recommendation')

@ -64,73 +43,6 @@ log = logging.getLogger('beets')

 # Primary matching functionality.

-def _string_dist_basic(str1, str2):
-    """Basic edit distance between two strings, ignoring
-    non-alphanumeric characters and case. Comparisons are based on a
-    transliteration/lowering to ASCII characters. Normalized by string
-    length.
-    """
-    str1 = unidecode(str1)
-    str2 = unidecode(str2)
-    str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
-    str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
-    if not str1 and not str2:
-        return 0.0
-    return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
-
-def string_dist(str1, str2):
-    """Gives an "intuitive" edit distance between two strings. This is
-    an edit distance, normalized by the string length, with a number of
-    tweaks that reflect intuition about text.
-    """
-    str1 = str1.lower()
-    str2 = str2.lower()
-
-    # Don't penalize strings that move certain words to the end. For
-    # example, "the something" should be considered equal to
-    # "something, the".
-    for word in SD_END_WORDS:
-        if str1.endswith(', %s' % word):
-            str1 = '%s %s' % (word, str1[:-len(word)-2])
-        if str2.endswith(', %s' % word):
-            str2 = '%s %s' % (word, str2[:-len(word)-2])
-
-    # Perform a couple of basic normalizing substitutions.
-    for pat, repl in SD_REPLACE:
-        str1 = re.sub(pat, repl, str1)
-        str2 = re.sub(pat, repl, str2)
-
-    # Change the weight for certain string portions matched by a set
-    # of regular expressions. We gradually change the strings and build
-    # up penalties associated with parts of the string that were
-    # deleted.
-    base_dist = _string_dist_basic(str1, str2)
-    penalty = 0.0
-    for pat, weight in SD_PATTERNS:
-        # Get strings that drop the pattern.
-        case_str1 = re.sub(pat, '', str1)
-        case_str2 = re.sub(pat, '', str2)
-
-        if case_str1 != str1 or case_str2 != str2:
-            # If the pattern was present (i.e., it is deleted in the
-            # the current case), recalculate the distances for the
-            # modified strings.
-            case_dist = _string_dist_basic(case_str1, case_str2)
-            case_delta = max(0.0, base_dist - case_dist)
-            if case_delta == 0.0:
-                continue
-
-            # Shift our baseline strings down (to avoid rematching the
-            # same part of the string) and add a scaled distance
-            # amount to the penalties.
-            str1 = case_str1
-            str2 = case_str2
-            base_dist = case_dist
-            penalty += weight * case_delta
-    dist = base_dist + penalty
-
-    return dist
-
 def current_metadata(items):
    """Extract the likely current metadata for an album given a list of its
    items. Return two dictionaries:
@ -187,196 +99,19 @@ def track_index_changed(item, track_info):
    """
    return item.track not in (track_info.medium_index, track_info.index)

-class Distance(object):
-    """Keeps track of multiple distance penalties. Provides a single weighted
-    distance for all penalties as well as a weighted distance for each
-    individual penalty.
-    """
-    def __cmp__(self, other):
-        return cmp(self.distance, other)
-
-    def __float__(self):
-        return self.distance
-
-    def __getitem__(self, key):
-        """Returns the weighted distance for a named penalty.
-        """
-        dist = sum(self._penalties[key]) * weights[key].as_number()
-        dist_max = self.max_distance
-        if dist_max:
-            return dist / dist_max
-        return 0.0
-
-    def __init__(self):
-        self._penalties = {}
-
-    def __iter__(self):
-        return iter(self.sorted)
-
-    def __len__(self):
-        return len(self.sorted)
-
-    def __sub__(self, other):
-        return self.distance - other
-
-    def __rsub__(self, other):
-        return other - self.distance
-
-    def _eq(self, value1, value2):
-        """Returns True if `value1` is equal to `value2`. `value1` may be a
-        compiled regular expression, in which case it will be matched against
-        `value2`.
-        """
-        if isinstance(value1, re._pattern_type):
-            return bool(value1.match(value2))
-        return value1 == value2
-
-    def add(self, key, dist):
-        """Adds a distance penalty. `key` must correspond with a configured
-        weight setting. `dist` must be a float between 0.0 and 1.0, and will be
-        added to any existing distance penalties for the same key.
-        """
-        if not 0.0 <= dist <= 1.0:
-            raise ValueError(
-                    '`dist` must be between 0.0 and 1.0. It is: %r' % dist)
-        self._penalties.setdefault(key, []).append(dist)
-
-    def add_equality(self, key, value, options):
-        """Adds a distance penalty of 1.0 if `value` doesn't match any of the
-        values in `options`. If an option is a compiled regular expression, it
-        will be considered equal if it matches against `value`.
-        """
-        if not isinstance(options, (list, tuple)):
-            options = [options]
-        for opt in options:
-            if self._eq(opt, value):
-                dist = 0.0
-                break
-        else:
-            dist = 1.0
-        self.add(key, dist)
-
-    def add_expr(self, key, expr):
-        """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0.
-        """
-        if expr:
-            self.add(key, 1.0)
-        else:
-            self.add(key, 0.0)
-
-    def add_number(self, key, number1, number2):
-        """Adds a distance penalty of 1.0 for each number of difference between
-        `number1` and `number2`, or 0.0 when there is no difference. Use this
-        when there is no upper limit on the difference between the two numbers.
-        """
-        diff = abs(number1 - number2)
-        if diff:
-            for i in range(diff):
-                self.add(key, 1.0)
-        else:
-            self.add(key, 0.0)
-
-    def add_priority(self, key, value, options):
-        """Adds a distance penalty that corresponds to the position at which
-        `value` appears in `options`. A distance penalty of 0.0 for the first
-        option, or 1.0 if there is no matching option. If an option is a
-        compiled regular expression, it will be considered equal if it matches
-        against `value`.
-        """
-        if not isinstance(options, (list, tuple)):
-            options = [options]
-        unit = 1.0 / (len(options) or 1)
-        for i, opt in enumerate(options):
-            if self._eq(opt, value):
-                dist = i * unit
-                break
-        else:
-            dist = 1.0
-        self.add(key, dist)
-
-    def add_ratio(self, key, number1, number2):
-        """Adds a distance penalty for `number1` as a ratio of `number2`.
-        `number1` is bound at 0 and `number2`.
-        """
-        number = float(max(min(number1, number2), 0))
-        if number2:
-            dist = number / number2
-        else:
-            dist = 0.0
-        self.add(key, dist)
-
-    def add_string(self, key, str1, str2):
-        """Adds a distance penalty based on the edit distance between `str1`
-        and `str2`.
-        """
-        dist = string_dist(str1, str2)
-        self.add(key, dist)
-
-    @property
-    def distance(self):
-        """Returns a weighted and normalised distance across all penalties.
-        """
-        dist_max = self.max_distance
-        if dist_max:
-            return self.raw_distance / self.max_distance
-        return 0.0
-
-    @property
-    def max_distance(self):
-        """Returns the maximum distance penalty.
-        """
-        dist_max = 0.0
-        for key, penalty in self._penalties.iteritems():
-            dist_max += len(penalty) * weights[key].as_number()
-        return dist_max
-
-    @property
-    def raw_distance(self):
-        """Returns the raw (denormalised) distance.
-        """
-        dist_raw = 0.0
-        for key, penalty in self._penalties.iteritems():
-            dist_raw += sum(penalty) * weights[key].as_number()
-        return dist_raw
-
-    @property
-    def sorted(self):
-        """Returns a list of (dist, key) pairs, with `dist` being the weighted
-        distance, sorted from highest to lowest. Does not include penalties
-        with a zero value.
-        """
-        list_ = []
-        for key in self._penalties:
-            dist = self[key]
-            if dist:
-                list_.append((dist, key))
-        # Convert distance into a negative float we can sort items in ascending
-        # order (for keys, when the penalty is equal) and still get the items
-        # with the biggest distance first.
-        return sorted(list_, key=lambda (dist, key): (0-dist, key))
-
-    def update(self, dist):
-        """Adds all the distance penalties from `dist`.
-        """
-        if not isinstance(dist, Distance):
-            raise ValueError(
-                    '`dist` must be a Distance object. It is: %r' % dist)
-        for key, penalties in dist._penalties.iteritems():
-            self._penalties.setdefault(key, []).extend(penalties)
-
 def track_distance(item, track_info, incl_artist=False):
    """Determines the significance of a track metadata change. Returns a
    Distance object. `incl_artist` indicates that a distance component should
    be included for the track artist (i.e., for various-artist releases).
    """
-    dist = Distance()
+    dist = hooks.Distance()

    # Length.
    if track_info.length:
        diff = abs(item.length - track_info.length) - \
-               weights['track_length_grace'].as_number()
+               config['match']['track_length_grace'].as_number()
        dist.add_ratio('track_length', diff,
-                       weights['track_length_max'].as_number())
+                       config['match']['track_length_max'].as_number())

    # Title.
    dist.add_string('track_title', item.title, track_info.title)
@ -410,7 +145,7 @@ def distance(items, album_info, mapping):
    """
    likelies, _ = current_metadata(items)

-    dist = Distance()
+    dist = hooks.Distance()

    # Artist, if not various.
    if not album_info.va:
@ -556,18 +291,20 @@ def _recommendation(results):

    # Downgrade to the max rec if it is lower than the current rec for an
    # applied penalty.
-    keys = set(key for _, key in min_dist)
+    keys = set(min_dist.keys())
    if isinstance(results[0], hooks.AlbumMatch):
        for track_dist in min_dist.tracks.values():
-            keys.update(key for _, key in track_dist)
+            keys.update(track_dist.keys())
+    max_rec_view = config['match']['max_rec']
    for key in keys:
-        max_rec = config['match']['max_rec'][key].as_choice({
-            'strong': recommendation.strong,
-            'medium': recommendation.medium,
-            'low': recommendation.low,
-            'none': recommendation.none,
-        })
-        rec = min(rec, max_rec)
+        if key in max_rec_view.keys():
+            max_rec = max_rec_view[key].as_choice({
+                'strong': recommendation.strong,
+                'medium': recommendation.medium,
+                'low': recommendation.low,
+                'none': recommendation.none,
+            })
+            rec = min(rec, max_rec)

    return rec

--- a/beets/config_default.yaml
+++ b/beets/config_default.yaml
@ -68,27 +68,8 @@ match:
    medium_rec_thresh: 0.25
    rec_gap_thresh: 0.25
    max_rec:
-        source: strong
-        artist: strong
-        album: strong
-        media: strong
-        mediums: strong
-        year: strong
-        country: strong
-        label: strong
-        catalognum: strong
-        albumdisambig: strong
-        album_id: strong
-        tracks: strong
        missing_tracks: medium
        unmatched_tracks: medium
-        track_title: strong
-        track_artist: strong
-        track_index: strong
-        track_length_grace: strong
-        track_length_max: strong
-        track_length: strong
-        track_id: strong
    distance_weights:
        source: 2.0
        artist: 3.0
@ -107,8 +88,6 @@ match:
        track_title: 3.0
        track_artist: 2.0
        track_index: 1.0
-        track_length_grace: 10
-        track_length_max: 30
        track_length: 2.0
        track_id: 5.0
    preferred:
@ -116,3 +95,5 @@ match:
        media: []
        original_year: no
    ignored: []
+    track_length_grace: 10
+    track_length_max: 30
--- a/beets/plugins.py
+++ b/beets/plugins.py
@ -67,13 +67,13 @@ class BeetsPlugin(object):
        """Should return a Distance object to be added to the
        distance for every track comparison.
        """
-        return beets.autotag.match.Distance()
+        return beets.autotag.hooks.Distance()

    def album_distance(self, items, album_info, mapping):
        """Should return a Distance object to be added to the
        distance for every album-level comparison.
        """
-        return beets.autotag.match.Distance()
+        return beets.autotag.hooks.Distance()

    def candidates(self, items, artist, album, va_likely):
        """Should return a sequence of AlbumInfo objects that match the
@ -244,14 +244,16 @@ def track_distance(item, info):
    """Gets the track distance calculated by all loaded plugins.
    Returns a Distance object.
    """
-    dist = beets.autotag.match.Distance()
+    from beets.autotag.hooks import Distance
+    dist = Distance()
    for plugin in find_plugins():
        dist.update(plugin.track_distance(item, info))
    return dist

 def album_distance(items, album_info, mapping):
    """Returns the album distance calculated by plugins."""
-    dist = beets.autotag.match.Distance()
+    from beets.autotag.hooks import Distance
+    dist = Distance()
    for plugin in find_plugins():
        dist.update(plugin.album_distance(items, album_info, mapping))
    return dist
--- a/beets/ui/init.py
+++ b/beets/ui/init.py
@ -366,7 +366,7 @@ def colorize(color, text):
    else:
        return text

-def _colordiff(a, b, highlight='red', second_highlight='lightgray'):
+def _colordiff(a, b, highlight='red', minor_highlight='lightgray'):
    """Given two values, return the same pair of strings except with
    their differences highlighted in the specified color. Strings are
    highlighted intelligently to show differences; other values are
@ -407,7 +407,7 @@ def _colordiff(a, b, highlight='red', second_highlight='lightgray'):
            if a[a_start:a_end].lower() != b[b_start:b_end].lower():
                color = highlight
            else:
-                color = second_highlight
+                color = minor_highlight
            a_out.append(colorize(color, a[a_start:a_end]))
            b_out.append(colorize(color, b[b_start:b_end]))
        else:
--- a/beets/ui/commands.py
+++ b/beets/ui/commands.py
@ -164,11 +164,11 @@ def dist_string(dist):
    return out

 def penalty_string(distance, limit=None):
-    """Returns a colorized string that indicates all the penalties applied to
-    a distance object.
+    """Returns a colorized string that indicates all the penalties
+    applied to a distance object.
    """
    penalties = []
-    for _, key in distance:
+    for key in distance.keys():
        key = key.replace('album_', '')
        key = key.replace('track_', '')
        key = key.replace('_', ' ')
--- a/beetsplug/beatport.py
+++ b/beetsplug/beatport.py
@ -20,8 +20,7 @@ from datetime import datetime, timedelta

 import requests

-from beets.autotag.hooks import AlbumInfo, TrackInfo
-from beets.autotag.match import Distance
+from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance
 from beets.plugins import BeetsPlugin

 log = logging.getLogger('beets')
--- a/beetsplug/chroma.py
+++ b/beetsplug/chroma.py
@ -21,7 +21,6 @@ from beets import util
 from beets import config
 from beets.util import confit
 from beets.autotag import hooks
-from beets.autotag.match import Distance
 import acoustid
 import logging
 from collections import defaultdict
@ -114,7 +113,7 @@ def _all_releases(items):

 class AcoustidPlugin(plugins.BeetsPlugin):
    def track_distance(self, item, info):
-        dist = Distance()
+        dist = hooks.Distance()
        if item.path not in _matches or not info.track_id:
            # Match failed or no track ID.
            return dist
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@ -15,11 +15,9 @@
 """Adds Discogs album search support to the autotagger. Requires the
 discogs-client library.
 """
-from beets import config
-from beets.autotag.hooks import AlbumInfo, TrackInfo
-from beets.autotag.match import current_metadata, Distance, VA_ARTISTS
+from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance
 from beets.plugins import BeetsPlugin
-from discogs_client import Artist, DiscogsAPIError, Release, Search
+from discogs_client import DiscogsAPIError, Release, Search
 import beets
 import discogs_client
 import logging
--- a/beetsplug/scrub.py
+++ b/beetsplug/scrub.py
@ -108,6 +108,11 @@ def _scrub(path):
            # remove them. In this case, we just remove all the tags.
            for tag in f.keys():
                del f[tag]
+        except IOError as exc:
+            log.error(u'could not scrub {0}: {1}'.format(
+                util.displayable_path(path),
+                exc,
+            ))
        f.save()

 # Automatically embed art into imported albums.
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -36,6 +36,7 @@ And some bug fixes:
  Intzoglou.
 * Fix an occasional crash in the :doc:`/plugins/beatport` when a length
  field was missing from the API response. Thanks to Timothy Appnel.
+* :doc:`/plugins/scrub`: Handle and log I/O errors.


 1.2.0 (June 5, 2013)
@ -103,7 +104,7 @@ same MusicBrainz release group:
  beets re-identify the same release when re-importing existing files.
 * Prefer releases that are closest to the tagged ``year``. Tolerate files
  tagged with release or original year.
-* The new :ref:`preferred_media` config option lets you prefer a certain media
+* The new ``preferred_media`` config option lets you prefer a certain media
  type when the ``media`` field is unset on an album.
 * Apply minor penalties across a range of fields to differentiate between
  nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
--- a/docs/reference/config.rst
+++ b/docs/reference/config.rst
@ -395,40 +395,43 @@ max_rec
 As mentioned above, autotagger matches have *recommendations* that control how
 the UI behaves for a certain quality of match. The recommendation for a certain
 match is based on the overall distance calculation. But you can also control
-the recommendation when a distance penalty is being applied for a specific
-field by defining *maximum* recommendations for each field:
+the recommendation when a specific distance penalty is applied by defining
+*maximum* recommendations for each field:

-To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here
-are the defaults::
+To define maxima, use keys under ``max_rec:`` in the ``match`` section. The
+defaults are "medium" for missing and unmatched tracks and "strong" (i.e., no
+maximum) for everything else::

    match:
        max_rec:
-            source: strong
-            artist: strong
-            album: strong
-            media: strong
-            mediums: strong
-            year: strong
-            country: strong
-            label: strong
-            catalognum: strong
-            albumdisambig: strong
-            album_id: strong
-            tracks: strong
            missing_tracks: medium
            unmatched_tracks: medium
-            track_title: strong
-            track_artist: strong
-            track_index: strong
-            track_length_grace: strong
-            track_length_max: strong
-            track_length: strong
-            track_id: strong

-If a recommendation is higher than the configured maximum and a penalty is
-being applied, the recommendation will be downgraded. The maximum for each
-field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the
-maximum recommendation is ``strong``, no "downgrading" occurs.
+If a recommendation is higher than the configured maximum and the indicated
+penalty is applied, the recommendation is downgraded. The setting for
+each field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the
+maximum recommendation is ``strong``, no "downgrading" occurs. The available
+penalty names here are:
+
+* source
+* artist
+* album
+* media
+* mediums
+* year
+* country
+* label
+* catalognum
+* albumdisambig
+* album_id
+* tracks
+* missing_tracks
+* unmatched_tracks
+* track_title
+* track_artist
+* track_index
+* track_length
+* track_id

 .. _preferred:

@ -440,10 +443,10 @@ similarity, you can also specify an ordered list of preferred countries and
 media types.

 A distance penalty will be applied if the country or media type from the match
-metadata doesn't match. The order is important, the first item will be most
-preferred. Each item may be a regular expression, and will be matched case
-insensitively. The number of media will be stripped when matching preferred
-media (e.g. "2x" in "2xCD").
+metadata doesn't match. The specified values are preferred in descending order
+(i.e., the first item will be most preferred). Each item may be a regular
+expression, and will be matched case insensitively. The number of media will
+be stripped when matching preferred media (e.g. "2x" in "2xCD").

 You can also tell the autotagger to prefer matches that have a release year
 closest to the original year for an album.
@ -469,6 +472,8 @@ the penalty name to the ``ignored`` setting::
    match:
        ignored: missing_tracks unmatched_tracks

+The available penalties are the same as those for the :ref:`max_rec` setting.
+
 .. _path-format-config:

 Path Format Configuration
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@ -23,13 +23,13 @@ import _common
 from _common import unittest
 from beets import autotag
 from beets.autotag import match
-from beets.autotag.match import Distance
+from beets.autotag.hooks import Distance, string_dist
 from beets.library import Item
 from beets.util import plurality
 from beets.autotag import AlbumInfo, TrackInfo
 from beets import config

-class PluralityTest(unittest.TestCase):
+class PluralityTest(_common.TestCase):
    def test_plurality_consensus(self):
        objs = [1, 1, 1, 1]
        obj, freq = plurality(objs)
@ -106,154 +106,163 @@ def _make_trackinfo():
        TrackInfo(u'three', None, u'some artist', length=1, index=3),
    ]

-class DistanceTest(unittest.TestCase):
-    def setUp(self):
-        self.dist = Distance()
-
+class DistanceTest(_common.TestCase):
    def test_add(self):
-        self.dist.add('add', 1.0)
-        self.assertEqual(self.dist._penalties, {'add': [1.0]})
+        dist = Distance()
+        dist.add('add', 1.0)
+        self.assertEqual(dist._penalties, {'add': [1.0]})

    def test_add_equality(self):
-        self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi'])
-        self.assertEqual(self.dist._penalties['equality'], [0.0])
+        dist = Distance()
+        dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi'])
+        self.assertEqual(dist._penalties['equality'], [0.0])

-        self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi'])
-        self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0])
+        dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi'])
+        self.assertEqual(dist._penalties['equality'], [0.0, 1.0])

-        self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I))
-        self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0])
+        dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I))
+        self.assertEqual(dist._penalties['equality'], [0.0, 1.0, 0.0])

    def test_add_expr(self):
-        self.dist.add_expr('expr', True)
-        self.assertEqual(self.dist._penalties['expr'], [1.0])
+        dist = Distance()
+        dist.add_expr('expr', True)
+        self.assertEqual(dist._penalties['expr'], [1.0])

-        self.dist.add_expr('expr', False)
-        self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0])
+        dist.add_expr('expr', False)
+        self.assertEqual(dist._penalties['expr'], [1.0, 0.0])

    def test_add_number(self):
+        dist = Distance()
        # Add a full penalty for each number of difference between two numbers.

-        self.dist.add_number('number', 1, 1)
-        self.assertEqual(self.dist._penalties['number'], [0.0])
+        dist.add_number('number', 1, 1)
+        self.assertEqual(dist._penalties['number'], [0.0])

-        self.dist.add_number('number', 1, 2)
-        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0])
+        dist.add_number('number', 1, 2)
+        self.assertEqual(dist._penalties['number'], [0.0, 1.0])

-        self.dist.add_number('number', 2, 1)
-        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0])
+        dist.add_number('number', 2, 1)
+        self.assertEqual(dist._penalties['number'], [0.0, 1.0, 1.0])

-        self.dist.add_number('number', -1, 2)
-        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0,
+        dist.add_number('number', -1, 2)
+        self.assertEqual(dist._penalties['number'], [0.0, 1.0, 1.0, 1.0,
                                                          1.0, 1.0])

    def test_add_priority(self):
-        self.dist.add_priority('priority', 'abc', 'abc')
-        self.assertEqual(self.dist._penalties['priority'], [0.0])
+        dist = Distance()
+        dist.add_priority('priority', 'abc', 'abc')
+        self.assertEqual(dist._penalties['priority'], [0.0])

-        self.dist.add_priority('priority', 'def', ['abc', 'def'])
-        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5])
+        dist.add_priority('priority', 'def', ['abc', 'def'])
+        self.assertEqual(dist._penalties['priority'], [0.0, 0.5])

-        self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef',
+        dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef',
                                                  re.compile('GH', re.I)])
-        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75])
+        self.assertEqual(dist._penalties['priority'], [0.0, 0.5, 0.75])

-        self.dist.add_priority('priority', 'xyz', ['abc', 'def'])
-        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75,
+        dist.add_priority('priority', 'xyz', ['abc', 'def'])
+        self.assertEqual(dist._penalties['priority'], [0.0, 0.5, 0.75,
                                                            1.0])

    def test_add_ratio(self):
-        self.dist.add_ratio('ratio', 25, 100)
-        self.assertEqual(self.dist._penalties['ratio'], [0.25])
+        dist = Distance()
+        dist.add_ratio('ratio', 25, 100)
+        self.assertEqual(dist._penalties['ratio'], [0.25])

-        self.dist.add_ratio('ratio', 10, 5)
-        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0])
+        dist.add_ratio('ratio', 10, 5)
+        self.assertEqual(dist._penalties['ratio'], [0.25, 1.0])

-        self.dist.add_ratio('ratio', -5, 5)
-        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0])
+        dist.add_ratio('ratio', -5, 5)
+        self.assertEqual(dist._penalties['ratio'], [0.25, 1.0, 0.0])

-        self.dist.add_ratio('ratio', 5, 0)
-        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0])
+        dist.add_ratio('ratio', 5, 0)
+        self.assertEqual(dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0])

    def test_add_string(self):
-        dist = match.string_dist(u'abc', u'bcd')
-        self.dist.add_string('string', u'abc', u'bcd')
-        self.assertEqual(self.dist._penalties['string'], [dist])
+        dist = Distance()
+        sdist = string_dist(u'abc', u'bcd')
+        dist.add_string('string', u'abc', u'bcd')
+        self.assertEqual(dist._penalties['string'], [sdist])

    def test_distance(self):
        config['match']['distance_weights']['album'] = 2.0
        config['match']['distance_weights']['medium'] = 1.0
-        self.dist.add('album', 0.5)
-        self.dist.add('media', 0.25)
-        self.dist.add('media', 0.75)
-        self.assertEqual(self.dist.distance, 0.5)
+        dist = Distance()
+        dist.add('album', 0.5)
+        dist.add('media', 0.25)
+        dist.add('media', 0.75)
+        self.assertEqual(dist.distance, 0.5)

        # __getitem__()
-        self.assertEqual(self.dist['album'], 0.25)
-        self.assertEqual(self.dist['media'], 0.25)
+        self.assertEqual(dist['album'], 0.25)
+        self.assertEqual(dist['media'], 0.25)

    def test_max_distance(self):
        config['match']['distance_weights']['album'] = 3.0
        config['match']['distance_weights']['medium'] = 1.0
-        self.dist.add('album', 0.5)
-        self.dist.add('medium', 0.0)
-        self.dist.add('medium', 0.0)
-        self.assertEqual(self.dist.max_distance, 5.0)
+        dist = Distance()
+        dist.add('album', 0.5)
+        dist.add('medium', 0.0)
+        dist.add('medium', 0.0)
+        self.assertEqual(dist.max_distance, 5.0)

    def test_operators(self):
        config['match']['distance_weights']['source'] = 1.0
        config['match']['distance_weights']['album'] = 2.0
        config['match']['distance_weights']['medium'] = 1.0
-        self.dist.add('source', 0.0)
-        self.dist.add('album', 0.5)
-        self.dist.add('medium', 0.25)
-        self.dist.add('medium', 0.75)
-        self.assertEqual(len(self.dist), 2)
-        self.assertEqual(list(self.dist), [(0.2, 'album'), (0.2, 'medium')])
-        self.assertTrue(self.dist == 0.4)
-        self.assertTrue(self.dist < 1.0)
-        self.assertTrue(self.dist > 0.0)
-        self.assertEqual(self.dist - 0.4, 0.0)
-        self.assertEqual(0.4 - self.dist, 0.0)
-        self.assertEqual(float(self.dist), 0.4)
+        dist = Distance()
+        dist.add('source', 0.0)
+        dist.add('album', 0.5)
+        dist.add('medium', 0.25)
+        dist.add('medium', 0.75)
+        self.assertEqual(len(dist), 2)
+        self.assertEqual(list(dist), [('album', 0.2), ('medium', 0.2)])
+        self.assertTrue(dist == 0.4)
+        self.assertTrue(dist < 1.0)
+        self.assertTrue(dist > 0.0)
+        self.assertEqual(dist - 0.4, 0.0)
+        self.assertEqual(0.4 - dist, 0.0)
+        self.assertEqual(float(dist), 0.4)

    def test_raw_distance(self):
        config['match']['distance_weights']['album'] = 3.0
        config['match']['distance_weights']['medium'] = 1.0
-        self.dist.add('album', 0.5)
-        self.dist.add('medium', 0.25)
-        self.dist.add('medium', 0.5)
-        self.assertEqual(self.dist.raw_distance, 2.25)
+        dist = Distance()
+        dist.add('album', 0.5)
+        dist.add('medium', 0.25)
+        dist.add('medium', 0.5)
+        self.assertEqual(dist.raw_distance, 2.25)

-    def test_sorted(self):
+    def test_items(self):
        config['match']['distance_weights']['album'] = 4.0
        config['match']['distance_weights']['medium'] = 2.0
-
-        self.dist.add('album', 0.1875)
-        self.dist.add('medium', 0.75)
-        self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')])
+        dist = Distance()
+        dist.add('album', 0.1875)
+        dist.add('medium', 0.75)
+        self.assertEqual(dist.items(), [('medium', 0.25), ('album', 0.125)])

        # Sort by key if distance is equal.
        dist = Distance()
        dist.add('album', 0.375)
        dist.add('medium', 0.75)
-        self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')])
+        self.assertEqual(dist.items(), [('album', 0.25), ('medium', 0.25)])

    def test_update(self):
-        self.dist.add('album', 0.5)
-        self.dist.add('media', 1.0)
+        dist1 = Distance()
+        dist1.add('album', 0.5)
+        dist1.add('media', 1.0)

-        dist = Distance()
-        dist.add('album', 0.75)
-        dist.add('album', 0.25)
-        self.dist.add('media', 0.05)
+        dist2 = Distance()
+        dist2.add('album', 0.75)
+        dist2.add('album', 0.25)
+        dist2.add('media', 0.05)

-        self.dist.update(dist)
+        dist1.update(dist2)

-        self.assertEqual(self.dist._penalties, {'album': [0.5, 0.75, 0.25],
-                                                'media': [1.0, 0.05]})
+        self.assertEqual(dist1._penalties, {'album': [0.5, 0.75, 0.25],
+                                             'media': [1.0, 0.05]})

-class TrackDistanceTest(unittest.TestCase):
+class TrackDistanceTest(_common.TestCase):
    def test_identical_tracks(self):
        item = _make_item(u'one', 1)
        info = _make_trackinfo()[0]
@ -280,7 +289,7 @@ class TrackDistanceTest(unittest.TestCase):
        dist = match.track_distance(item, info, incl_artist=True)
        self.assertEqual(dist, 0.0)

-class AlbumDistanceTest(unittest.TestCase):
+class AlbumDistanceTest(_common.TestCase):
    def _mapping(self, items, info):
        out = {}
        for i, t in zip(items, info.tracks):
@ -863,77 +872,77 @@ class ApplyCompilationTest(_common.TestCase, ApplyTestUtil):

 class StringDistanceTest(unittest.TestCase):
    def test_equal_strings(self):
-        dist = match.string_dist(u'Some String', u'Some String')
+        dist = string_dist(u'Some String', u'Some String')
        self.assertEqual(dist, 0.0)

    def test_different_strings(self):
-        dist = match.string_dist(u'Some String', u'Totally Different')
+        dist = string_dist(u'Some String', u'Totally Different')
        self.assertNotEqual(dist, 0.0)

    def test_punctuation_ignored(self):
-        dist = match.string_dist(u'Some String', u'Some.String!')
+        dist = string_dist(u'Some String', u'Some.String!')
        self.assertEqual(dist, 0.0)

    def test_case_ignored(self):
-        dist = match.string_dist(u'Some String', u'sOME sTring')
+        dist = string_dist(u'Some String', u'sOME sTring')
        self.assertEqual(dist, 0.0)

    def test_leading_the_has_lower_weight(self):
-        dist1 = match.string_dist(u'XXX Band Name', u'Band Name')
-        dist2 = match.string_dist(u'The Band Name', u'Band Name')
+        dist1 = string_dist(u'XXX Band Name', u'Band Name')
+        dist2 = string_dist(u'The Band Name', u'Band Name')
        self.assert_(dist2 < dist1)

    def test_parens_have_lower_weight(self):
-        dist1 = match.string_dist(u'One .Two.', u'One')
-        dist2 = match.string_dist(u'One (Two)', u'One')
+        dist1 = string_dist(u'One .Two.', u'One')
+        dist2 = string_dist(u'One (Two)', u'One')
        self.assert_(dist2 < dist1)

    def test_brackets_have_lower_weight(self):
-        dist1 = match.string_dist(u'One .Two.', u'One')
-        dist2 = match.string_dist(u'One [Two]', u'One')
+        dist1 = string_dist(u'One .Two.', u'One')
+        dist2 = string_dist(u'One [Two]', u'One')
        self.assert_(dist2 < dist1)

    def test_ep_label_has_zero_weight(self):
-        dist = match.string_dist(u'My Song (EP)', u'My Song')
+        dist = string_dist(u'My Song (EP)', u'My Song')
        self.assertEqual(dist, 0.0)

    def test_featured_has_lower_weight(self):
-        dist1 = match.string_dist(u'My Song blah Someone', u'My Song')
-        dist2 = match.string_dist(u'My Song feat Someone', u'My Song')
+        dist1 = string_dist(u'My Song blah Someone', u'My Song')
+        dist2 = string_dist(u'My Song feat Someone', u'My Song')
        self.assert_(dist2 < dist1)

    def test_postfix_the(self):
-        dist = match.string_dist(u'The Song Title', u'Song Title, The')
+        dist = string_dist(u'The Song Title', u'Song Title, The')
        self.assertEqual(dist, 0.0)

    def test_postfix_a(self):
-        dist = match.string_dist(u'A Song Title', u'Song Title, A')
+        dist = string_dist(u'A Song Title', u'Song Title, A')
        self.assertEqual(dist, 0.0)

    def test_postfix_an(self):
-        dist = match.string_dist(u'An Album Title', u'Album Title, An')
+        dist = string_dist(u'An Album Title', u'Album Title, An')
        self.assertEqual(dist, 0.0)

    def test_empty_strings(self):
-        dist = match.string_dist(u'', u'')
+        dist = string_dist(u'', u'')
        self.assertEqual(dist, 0.0)

    def test_solo_pattern(self):
        # Just make sure these don't crash.
-        match.string_dist(u'The ', u'')
-        match.string_dist(u'(EP)', u'(EP)')
-        match.string_dist(u', An', u'')
+        string_dist(u'The ', u'')
+        string_dist(u'(EP)', u'(EP)')
+        string_dist(u', An', u'')

    def test_heuristic_does_not_harm_distance(self):
-        dist = match.string_dist(u'Untitled', u'[Untitled]')
+        dist = string_dist(u'Untitled', u'[Untitled]')
        self.assertEqual(dist, 0.0)

    def test_ampersand_expansion(self):
-        dist = match.string_dist(u'And', u'&')
+        dist = string_dist(u'And', u'&')
        self.assertEqual(dist, 0.0)

    def test_accented_characters(self):
-        dist = match.string_dist(u'\xe9\xe1\xf1', u'ean')
+        dist = string_dist(u'\xe9\xe1\xf1', u'ean')
        self.assertEqual(dist, 0.0)

 def suite():