From f6faf723288d7cf83ca30b946d6b9306aaa5537f Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 15:21:32 -0700
Subject: [PATCH 1/9] move Distance class to hooks module

---
 beets/autotag/hooks.py | 292 +++++++++++++++++++++++++++++++++++++++++
 beets/autotag/match.py | 268 +------------------------------------
 beets/plugins.py       |  10 +-
 beetsplug/beatport.py  |   3 +-
 beetsplug/chroma.py    |   3 +-
 beetsplug/discogs.py   |   6 +-
 test/test_autotag.py   | 142 ++++++++++----------
 7 files changed, 379 insertions(+), 345 deletions(-)

diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py
index 96de6b674..c29cf576f 100644
--- a/beets/autotag/hooks.py
+++ b/beets/autotag/hooks.py
@@ -15,9 +15,13 @@
 """Glue between metadata sources and the matching logic."""
 import logging
 from collections import namedtuple
+import re
 
 from beets import plugins
+from beets import config
 from beets.autotag import mb
+from beets.util import levenshtein
+from unidecode import unidecode
 
 log = logging.getLogger('beets')
 
@@ -158,6 +162,294 @@ class TrackInfo(object):
             if isinstance(value, str):
                 setattr(self, fld, value.decode(codec, 'ignore'))
 
+
+# Candidate distance scoring.
+
+# Parameters for string distance function.
+# Words that can be moved to the end of a string using a comma.
+SD_END_WORDS = ['the', 'a', 'an']
+# Reduced weights for certain portions of the string.
+SD_PATTERNS = [
+    (r'^the ', 0.1),
+    (r'[\[\(]?(ep|single)[\]\)]?', 0.0),
+    (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
+    (r'\(.*?\)', 0.3),
+    (r'\[.*?\]', 0.3),
+    (r'(, )?(pt\.|part) .+', 0.2),
+]
+# Replacements to use before testing distance.
+SD_REPLACE = [
+    (r'&', 'and'),
+]
+
+def _string_dist_basic(str1, str2):
+    """Basic edit distance between two strings, ignoring
+    non-alphanumeric characters and case. Comparisons are based on a
+    transliteration/lowering to ASCII characters. Normalized by string
+    length.
+    """
+    str1 = unidecode(str1)
+    str2 = unidecode(str2)
+    str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
+    str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
+    if not str1 and not str2:
+        return 0.0
+    return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
+
+def string_dist(str1, str2):
+    """Gives an "intuitive" edit distance between two strings. This is
+    an edit distance, normalized by the string length, with a number of
+    tweaks that reflect intuition about text.
+    """
+    str1 = str1.lower()
+    str2 = str2.lower()
+
+    # Don't penalize strings that move certain words to the end. For
+    # example, "the something" should be considered equal to
+    # "something, the".
+    for word in SD_END_WORDS:
+        if str1.endswith(', %s' % word):
+            str1 = '%s %s' % (word, str1[:-len(word)-2])
+        if str2.endswith(', %s' % word):
+            str2 = '%s %s' % (word, str2[:-len(word)-2])
+
+    # Perform a couple of basic normalizing substitutions.
+    for pat, repl in SD_REPLACE:
+        str1 = re.sub(pat, repl, str1)
+        str2 = re.sub(pat, repl, str2)
+
+    # Change the weight for certain string portions matched by a set
+    # of regular expressions. We gradually change the strings and build
+    # up penalties associated with parts of the string that were
+    # deleted.
+    base_dist = _string_dist_basic(str1, str2)
+    penalty = 0.0
+    for pat, weight in SD_PATTERNS:
+        # Get strings that drop the pattern.
+        case_str1 = re.sub(pat, '', str1)
+        case_str2 = re.sub(pat, '', str2)
+
+        if case_str1 != str1 or case_str2 != str2:
+            # If the pattern was present (i.e., it is deleted in the
+            # the current case), recalculate the distances for the
+            # modified strings.
+            case_dist = _string_dist_basic(case_str1, case_str2)
+            case_delta = max(0.0, base_dist - case_dist)
+            if case_delta == 0.0:
+                continue
+
+            # Shift our baseline strings down (to avoid rematching the
+            # same part of the string) and add a scaled distance
+            # amount to the penalties.
+            str1 = case_str1
+            str2 = case_str2
+            base_dist = case_dist
+            penalty += weight * case_delta
+    dist = base_dist + penalty
+
+    return dist
+
+class Distance(object):
+    """Keeps track of multiple distance penalties. Provides a single
+    weighted distance for all penalties as well as a weighted distance
+    for each individual penalty.
+    """
+    def __init__(self):
+        self._penalties = {}
+
+        weights_view = config['match']['distance_weights']
+        self._weights = {}
+        for key in weights_view.keys():
+            self._weights[key] = weights_view[key].as_number()
+
+
+    # Access the components and their aggregates.
+
+    @property
+    def distance(self):
+        """Returns a weighted and normalised distance across all
+        penalties.
+        """
+        dist_max = self.max_distance
+        if dist_max:
+            return self.raw_distance / self.max_distance
+        return 0.0
+
+    @property
+    def max_distance(self):
+        """Returns the maximum distance penalty.
+        """
+        dist_max = 0.0
+        for key, penalty in self._penalties.iteritems():
+            dist_max += len(penalty) * self._weights[key]
+        return dist_max
+
+    @property
+    def raw_distance(self):
+        """Returns the raw (denormalized) distance.
+        """
+        dist_raw = 0.0
+        for key, penalty in self._penalties.iteritems():
+            dist_raw += sum(penalty) * self._weights[key]
+        return dist_raw
+
+    @property
+    def sorted(self):
+        """Returns a list of (dist, key) pairs, with `dist` being the
+        weighted distance, sorted from highest to lowest. Does not
+        include penalties with a zero value.
+        """
+        list_ = []
+        for key in self._penalties:
+            dist = self[key]
+            if dist:
+                list_.append((dist, key))
+        # Convert distance into a negative float we can sort items in ascending
+        # order (for keys, when the penalty is equal) and still get the items
+        # with the biggest distance first.
+        return sorted(list_, key=lambda (dist, key): (0-dist, key))
+
+
+    # Behave like a float.
+
+    def __cmp__(self, other):
+        return cmp(self.distance, other)
+
+    def __float__(self):
+        return self.distance
+    def __sub__(self, other):
+        return self.distance - other
+
+    def __rsub__(self, other):
+        return other - self.distance
+
+
+    # Behave like a dict.
+
+    def __getitem__(self, key):
+        """Returns the weighted distance for a named penalty.
+        """
+        dist = sum(self._penalties[key]) * self._weights[key]
+        dist_max = self.max_distance
+        if dist_max:
+            return dist / dist_max
+        return 0.0
+
+    def __iter__(self):
+        return iter(self.sorted)
+
+    def __len__(self):
+        return len(self.sorted)
+
+    def update(self, dist):
+        """Adds all the distance penalties from `dist`.
+        """
+        if not isinstance(dist, Distance):
+            raise ValueError(
+                    '`dist` must be a Distance object. It is: %r' % dist)
+        for key, penalties in dist._penalties.iteritems():
+            self._penalties.setdefault(key, []).extend(penalties)
+
+
+    # Adding components.
+
+    def _eq(self, value1, value2):
+        """Returns True if `value1` is equal to `value2`. `value1` may
+        be a compiled regular expression, in which case it will be
+        matched against `value2`.
+        """
+        if isinstance(value1, re._pattern_type):
+            return bool(value1.match(value2))
+        return value1 == value2
+
+    def add(self, key, dist):
+        """Adds a distance penalty. `key` must correspond with a
+        configured weight setting. `dist` must be a float between 0.0
+        and 1.0, and will be added to any existing distance penalties
+        for the same key.
+        """
+        if not 0.0 <= dist <= 1.0:
+            raise ValueError(
+                    '`dist` must be between 0.0 and 1.0. It is: %r' % dist)
+        self._penalties.setdefault(key, []).append(dist)
+
+    def add_equality(self, key, value, options):
+        """Adds a distance penalty of 1.0 if `value` doesn't match any
+        of the values in `options`. If an option is a compiled regular
+        expression, it will be considered equal if it matches against
+        `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        for opt in options:
+            if self._eq(opt, value):
+                dist = 0.0
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_expr(self, key, expr):
+        """Adds a distance penalty of 1.0 if `expr` evaluates to True,
+        or 0.0.
+        """
+        if expr:
+            self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_number(self, key, number1, number2):
+        """Adds a distance penalty of 1.0 for each number of difference
+        between `number1` and `number2`, or 0.0 when there is no
+        difference. Use this when there is no upper limit on the
+        difference between the two numbers.
+        """
+        diff = abs(number1 - number2)
+        if diff:
+            for i in range(diff):
+                self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_priority(self, key, value, options):
+        """Adds a distance penalty that corresponds to the position at
+        which `value` appears in `options`. A distance penalty of 0.0
+        for the first option, or 1.0 if there is no matching option. If
+        an option is a compiled regular expression, it will be
+        considered equal if it matches against `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        unit = 1.0 / (len(options) or 1)
+        for i, opt in enumerate(options):
+            if self._eq(opt, value):
+                dist = i * unit
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_ratio(self, key, number1, number2):
+        """Adds a distance penalty for `number1` as a ratio of `number2`.
+        `number1` is bound at 0 and `number2`.
+        """
+        number = float(max(min(number1, number2), 0))
+        if number2:
+            dist = number / number2
+        else:
+            dist = 0.0
+        self.add(key, dist)
+
+    def add_string(self, key, str1, str2):
+        """Adds a distance penalty based on the edit distance between
+        `str1` and `str2`.
+        """
+        dist = string_dist(str1, str2)
+        self.add(key, dist)
+
+
+# Structures that compose all the information for a candidate match.
+
 AlbumMatch = namedtuple('AlbumMatch', ['distance', 'info', 'mapping',
                                        'extra_items', 'extra_tracks'])
 
diff --git a/beets/autotag/match.py b/beets/autotag/match.py
index cc32d6e8f..455d22be7 100644
--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@@ -21,34 +21,16 @@ import datetime
 import logging
 import re
 from munkres import Munkres
-from unidecode import unidecode
 
 from beets import plugins
 from beets import config
-from beets.util import levenshtein, plurality
+from beets.util import plurality
 from beets.util.enumeration import enum
 from beets.autotag import hooks
 
 # A configuration view for the distance weights.
 weights = config['match']['distance_weights']
 
-# Parameters for string distance function.
-# Words that can be moved to the end of a string using a comma.
-SD_END_WORDS = ['the', 'a', 'an']
-# Reduced weights for certain portions of the string.
-SD_PATTERNS = [
-    (r'^the ', 0.1),
-    (r'[\[\(]?(ep|single)[\]\)]?', 0.0),
-    (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
-    (r'\(.*?\)', 0.3),
-    (r'\[.*?\]', 0.3),
-    (r'(, )?(pt\.|part) .+', 0.2),
-]
-# Replacements to use before testing distance.
-SD_REPLACE = [
-    (r'&', 'and'),
-]
-
 # Recommendation enumeration.
 recommendation = enum('none', 'low', 'medium', 'strong', name='recommendation')
 
@@ -64,73 +46,6 @@ log = logging.getLogger('beets')
 
 # Primary matching functionality.
 
-def _string_dist_basic(str1, str2):
-    """Basic edit distance between two strings, ignoring
-    non-alphanumeric characters and case. Comparisons are based on a
-    transliteration/lowering to ASCII characters. Normalized by string
-    length.
-    """
-    str1 = unidecode(str1)
-    str2 = unidecode(str2)
-    str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
-    str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
-    if not str1 and not str2:
-        return 0.0
-    return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
-
-def string_dist(str1, str2):
-    """Gives an "intuitive" edit distance between two strings. This is
-    an edit distance, normalized by the string length, with a number of
-    tweaks that reflect intuition about text.
-    """
-    str1 = str1.lower()
-    str2 = str2.lower()
-
-    # Don't penalize strings that move certain words to the end. For
-    # example, "the something" should be considered equal to
-    # "something, the".
-    for word in SD_END_WORDS:
-        if str1.endswith(', %s' % word):
-            str1 = '%s %s' % (word, str1[:-len(word)-2])
-        if str2.endswith(', %s' % word):
-            str2 = '%s %s' % (word, str2[:-len(word)-2])
-
-    # Perform a couple of basic normalizing substitutions.
-    for pat, repl in SD_REPLACE:
-        str1 = re.sub(pat, repl, str1)
-        str2 = re.sub(pat, repl, str2)
-
-    # Change the weight for certain string portions matched by a set
-    # of regular expressions. We gradually change the strings and build
-    # up penalties associated with parts of the string that were
-    # deleted.
-    base_dist = _string_dist_basic(str1, str2)
-    penalty = 0.0
-    for pat, weight in SD_PATTERNS:
-        # Get strings that drop the pattern.
-        case_str1 = re.sub(pat, '', str1)
-        case_str2 = re.sub(pat, '', str2)
-
-        if case_str1 != str1 or case_str2 != str2:
-            # If the pattern was present (i.e., it is deleted in the
-            # the current case), recalculate the distances for the
-            # modified strings.
-            case_dist = _string_dist_basic(case_str1, case_str2)
-            case_delta = max(0.0, base_dist - case_dist)
-            if case_delta == 0.0:
-                continue
-
-            # Shift our baseline strings down (to avoid rematching the
-            # same part of the string) and add a scaled distance
-            # amount to the penalties.
-            str1 = case_str1
-            str2 = case_str2
-            base_dist = case_dist
-            penalty += weight * case_delta
-    dist = base_dist + penalty
-
-    return dist
-
 def current_metadata(items):
     """Extract the likely current metadata for an album given a list of its
     items. Return two dictionaries:
@@ -187,189 +102,12 @@ def track_index_changed(item, track_info):
     """
     return item.track not in (track_info.medium_index, track_info.index)
 
-class Distance(object):
-    """Keeps track of multiple distance penalties. Provides a single weighted
-    distance for all penalties as well as a weighted distance for each
-    individual penalty.
-    """
-    def __cmp__(self, other):
-        return cmp(self.distance, other)
-
-    def __float__(self):
-        return self.distance
-
-    def __getitem__(self, key):
-        """Returns the weighted distance for a named penalty.
-        """
-        dist = sum(self._penalties[key]) * weights[key].as_number()
-        dist_max = self.max_distance
-        if dist_max:
-            return dist / dist_max
-        return 0.0
-
-    def __init__(self):
-        self._penalties = {}
-
-    def __iter__(self):
-        return iter(self.sorted)
-
-    def __len__(self):
-        return len(self.sorted)
-
-    def __sub__(self, other):
-        return self.distance - other
-
-    def __rsub__(self, other):
-        return other - self.distance
-
-    def _eq(self, value1, value2):
-        """Returns True if `value1` is equal to `value2`. `value1` may be a
-        compiled regular expression, in which case it will be matched against
-        `value2`.
-        """
-        if isinstance(value1, re._pattern_type):
-            return bool(value1.match(value2))
-        return value1 == value2
-
-    def add(self, key, dist):
-        """Adds a distance penalty. `key` must correspond with a configured
-        weight setting. `dist` must be a float between 0.0 and 1.0, and will be
-        added to any existing distance penalties for the same key.
-        """
-        if not 0.0 <= dist <= 1.0:
-            raise ValueError(
-                    '`dist` must be between 0.0 and 1.0. It is: %r' % dist)
-        self._penalties.setdefault(key, []).append(dist)
-
-    def add_equality(self, key, value, options):
-        """Adds a distance penalty of 1.0 if `value` doesn't match any of the
-        values in `options`. If an option is a compiled regular expression, it
-        will be considered equal if it matches against `value`.
-        """
-        if not isinstance(options, (list, tuple)):
-            options = [options]
-        for opt in options:
-            if self._eq(opt, value):
-                dist = 0.0
-                break
-        else:
-            dist = 1.0
-        self.add(key, dist)
-
-    def add_expr(self, key, expr):
-        """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0.
-        """
-        if expr:
-            self.add(key, 1.0)
-        else:
-            self.add(key, 0.0)
-
-    def add_number(self, key, number1, number2):
-        """Adds a distance penalty of 1.0 for each number of difference between
-        `number1` and `number2`, or 0.0 when there is no difference. Use this
-        when there is no upper limit on the difference between the two numbers.
-        """
-        diff = abs(number1 - number2)
-        if diff:
-            for i in range(diff):
-                self.add(key, 1.0)
-        else:
-            self.add(key, 0.0)
-
-    def add_priority(self, key, value, options):
-        """Adds a distance penalty that corresponds to the position at which
-        `value` appears in `options`. A distance penalty of 0.0 for the first
-        option, or 1.0 if there is no matching option. If an option is a
-        compiled regular expression, it will be considered equal if it matches
-        against `value`.
-        """
-        if not isinstance(options, (list, tuple)):
-            options = [options]
-        unit = 1.0 / (len(options) or 1)
-        for i, opt in enumerate(options):
-            if self._eq(opt, value):
-                dist = i * unit
-                break
-        else:
-            dist = 1.0
-        self.add(key, dist)
-
-    def add_ratio(self, key, number1, number2):
-        """Adds a distance penalty for `number1` as a ratio of `number2`.
-        `number1` is bound at 0 and `number2`.
-        """
-        number = float(max(min(number1, number2), 0))
-        if number2:
-            dist = number / number2
-        else:
-            dist = 0.0
-        self.add(key, dist)
-
-    def add_string(self, key, str1, str2):
-        """Adds a distance penalty based on the edit distance between `str1`
-        and `str2`.
-        """
-        dist = string_dist(str1, str2)
-        self.add(key, dist)
-
-    @property
-    def distance(self):
-        """Returns a weighted and normalised distance across all penalties.
-        """
-        dist_max = self.max_distance
-        if dist_max:
-            return self.raw_distance / self.max_distance
-        return 0.0
-
-    @property
-    def max_distance(self):
-        """Returns the maximum distance penalty.
-        """
-        dist_max = 0.0
-        for key, penalty in self._penalties.iteritems():
-            dist_max += len(penalty) * weights[key].as_number()
-        return dist_max
-
-    @property
-    def raw_distance(self):
-        """Returns the raw (denormalised) distance.
-        """
-        dist_raw = 0.0
-        for key, penalty in self._penalties.iteritems():
-            dist_raw += sum(penalty) * weights[key].as_number()
-        return dist_raw
-
-    @property
-    def sorted(self):
-        """Returns a list of (dist, key) pairs, with `dist` being the weighted
-        distance, sorted from highest to lowest. Does not include penalties
-        with a zero value.
-        """
-        list_ = []
-        for key in self._penalties:
-            dist = self[key]
-            if dist:
-                list_.append((dist, key))
-        # Convert distance into a negative float we can sort items in ascending
-        # order (for keys, when the penalty is equal) and still get the items
-        # with the biggest distance first.
-        return sorted(list_, key=lambda (dist, key): (0-dist, key))
-
-    def update(self, dist):
-        """Adds all the distance penalties from `dist`.
-        """
-        if not isinstance(dist, Distance):
-            raise ValueError(
-                    '`dist` must be a Distance object. It is: %r' % dist)
-        for key, penalties in dist._penalties.iteritems():
-            self._penalties.setdefault(key, []).extend(penalties)
-
 def track_distance(item, track_info, incl_artist=False):
     """Determines the significance of a track metadata change. Returns a
     Distance object. `incl_artist` indicates that a distance component should
     be included for the track artist (i.e., for various-artist releases).
     """
-    dist = Distance()
+    dist = hooks.Distance()
 
     # Length.
     if track_info.length:
@@ -410,7 +148,7 @@ def distance(items, album_info, mapping):
     """
     likelies, _ = current_metadata(items)
 
-    dist = Distance()
+    dist = hooks.Distance()
 
     # Artist, if not various.
     if not album_info.va:
diff --git a/beets/plugins.py b/beets/plugins.py
index d0c0a9654..1df98fac6 100755
--- a/beets/plugins.py
+++ b/beets/plugins.py
@@ -67,13 +67,13 @@ class BeetsPlugin(object):
         """Should return a Distance object to be added to the
         distance for every track comparison.
         """
-        return beets.autotag.match.Distance()
+        return beets.autotag.hooks.Distance()
 
     def album_distance(self, items, album_info, mapping):
         """Should return a Distance object to be added to the
         distance for every album-level comparison.
         """
-        return beets.autotag.match.Distance()
+        return beets.autotag.hooks.Distance()
 
     def candidates(self, items, artist, album, va_likely):
         """Should return a sequence of AlbumInfo objects that match the
@@ -244,14 +244,16 @@ def track_distance(item, info):
     """Gets the track distance calculated by all loaded plugins.
     Returns a Distance object.
     """
-    dist = beets.autotag.match.Distance()
+    from beets.autotag.hooks import Distance
+    dist = Distance()
     for plugin in find_plugins():
         dist.update(plugin.track_distance(item, info))
     return dist
 
 def album_distance(items, album_info, mapping):
     """Returns the album distance calculated by plugins."""
-    dist = beets.autotag.match.Distance()
+    from beets.autotag.hooks import Distance
+    dist = Distance()
     for plugin in find_plugins():
         dist.update(plugin.album_distance(items, album_info, mapping))
     return dist
diff --git a/beetsplug/beatport.py b/beetsplug/beatport.py
index c68901cc1..05e33637c 100644
--- a/beetsplug/beatport.py
+++ b/beetsplug/beatport.py
@@ -20,8 +20,7 @@ from datetime import datetime, timedelta
 
 import requests
 
-from beets.autotag.hooks import AlbumInfo, TrackInfo
-from beets.autotag.match import Distance
+from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance
 from beets.plugins import BeetsPlugin
 
 log = logging.getLogger('beets')
diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py
index 006f85db0..83f67c0a9 100644
--- a/beetsplug/chroma.py
+++ b/beetsplug/chroma.py
@@ -21,7 +21,6 @@ from beets import util
 from beets import config
 from beets.util import confit
 from beets.autotag import hooks
-from beets.autotag.match import Distance
 import acoustid
 import logging
 from collections import defaultdict
@@ -114,7 +113,7 @@ def _all_releases(items):
 
 class AcoustidPlugin(plugins.BeetsPlugin):
     def track_distance(self, item, info):
-        dist = Distance()
+        dist = hooks.Distance()
         if item.path not in _matches or not info.track_id:
             # Match failed or no track ID.
             return dist
diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index 822ed59e3..e9e6477b1 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -15,11 +15,9 @@
 """Adds Discogs album search support to the autotagger. Requires the
 discogs-client library.
 """
-from beets import config
-from beets.autotag.hooks import AlbumInfo, TrackInfo
-from beets.autotag.match import current_metadata, Distance, VA_ARTISTS
+from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance
 from beets.plugins import BeetsPlugin
-from discogs_client import Artist, DiscogsAPIError, Release, Search
+from discogs_client import DiscogsAPIError, Release, Search
 import beets
 import discogs_client
 import logging
diff --git a/test/test_autotag.py b/test/test_autotag.py
index dc75ee0ab..13c167fcf 100644
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@@ -23,13 +23,13 @@ import _common
 from _common import unittest
 from beets import autotag
 from beets.autotag import match
-from beets.autotag.match import Distance
+from beets.autotag.hooks import Distance, string_dist
 from beets.library import Item
 from beets.util import plurality
 from beets.autotag import AlbumInfo, TrackInfo
 from beets import config
 
-class PluralityTest(unittest.TestCase):
+class PluralityTest(_common.TestCase):
     def test_plurality_consensus(self):
         objs = [1, 1, 1, 1]
         obj, freq = plurality(objs)
@@ -106,8 +106,9 @@ def _make_trackinfo():
         TrackInfo(u'three', None, u'some artist', length=1, index=3),
     ]
 
-class DistanceTest(unittest.TestCase):
+class DistanceTest(_common.TestCase):
     def setUp(self):
+        super(DistanceTest, self).setUp()
         self.dist = Distance()
 
     def test_add(self):
@@ -176,62 +177,66 @@ class DistanceTest(unittest.TestCase):
         self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0])
 
     def test_add_string(self):
-        dist = match.string_dist(u'abc', u'bcd')
+        dist = string_dist(u'abc', u'bcd')
         self.dist.add_string('string', u'abc', u'bcd')
         self.assertEqual(self.dist._penalties['string'], [dist])
 
     def test_distance(self):
         config['match']['distance_weights']['album'] = 2.0
         config['match']['distance_weights']['medium'] = 1.0
-        self.dist.add('album', 0.5)
-        self.dist.add('media', 0.25)
-        self.dist.add('media', 0.75)
-        self.assertEqual(self.dist.distance, 0.5)
+        dist = Distance()
+        dist.add('album', 0.5)
+        dist.add('media', 0.25)
+        dist.add('media', 0.75)
+        self.assertEqual(dist.distance, 0.5)
 
         # __getitem__()
-        self.assertEqual(self.dist['album'], 0.25)
-        self.assertEqual(self.dist['media'], 0.25)
+        self.assertEqual(dist['album'], 0.25)
+        self.assertEqual(dist['media'], 0.25)
 
     def test_max_distance(self):
         config['match']['distance_weights']['album'] = 3.0
         config['match']['distance_weights']['medium'] = 1.0
-        self.dist.add('album', 0.5)
-        self.dist.add('medium', 0.0)
-        self.dist.add('medium', 0.0)
-        self.assertEqual(self.dist.max_distance, 5.0)
+        dist = Distance()
+        dist.add('album', 0.5)
+        dist.add('medium', 0.0)
+        dist.add('medium', 0.0)
+        self.assertEqual(dist.max_distance, 5.0)
 
     def test_operators(self):
         config['match']['distance_weights']['source'] = 1.0
         config['match']['distance_weights']['album'] = 2.0
         config['match']['distance_weights']['medium'] = 1.0
-        self.dist.add('source', 0.0)
-        self.dist.add('album', 0.5)
-        self.dist.add('medium', 0.25)
-        self.dist.add('medium', 0.75)
-        self.assertEqual(len(self.dist), 2)
-        self.assertEqual(list(self.dist), [(0.2, 'album'), (0.2, 'medium')])
-        self.assertTrue(self.dist == 0.4)
-        self.assertTrue(self.dist < 1.0)
-        self.assertTrue(self.dist > 0.0)
-        self.assertEqual(self.dist - 0.4, 0.0)
-        self.assertEqual(0.4 - self.dist, 0.0)
-        self.assertEqual(float(self.dist), 0.4)
+        dist = Distance()
+        dist.add('source', 0.0)
+        dist.add('album', 0.5)
+        dist.add('medium', 0.25)
+        dist.add('medium', 0.75)
+        self.assertEqual(len(dist), 2)
+        self.assertEqual(list(dist), [(0.2, 'album'), (0.2, 'medium')])
+        self.assertTrue(dist == 0.4)
+        self.assertTrue(dist < 1.0)
+        self.assertTrue(dist > 0.0)
+        self.assertEqual(dist - 0.4, 0.0)
+        self.assertEqual(0.4 - dist, 0.0)
+        self.assertEqual(float(dist), 0.4)
 
     def test_raw_distance(self):
         config['match']['distance_weights']['album'] = 3.0
         config['match']['distance_weights']['medium'] = 1.0
-        self.dist.add('album', 0.5)
-        self.dist.add('medium', 0.25)
-        self.dist.add('medium', 0.5)
-        self.assertEqual(self.dist.raw_distance, 2.25)
+        dist = Distance()
+        dist.add('album', 0.5)
+        dist.add('medium', 0.25)
+        dist.add('medium', 0.5)
+        self.assertEqual(dist.raw_distance, 2.25)
 
     def test_sorted(self):
         config['match']['distance_weights']['album'] = 4.0
         config['match']['distance_weights']['medium'] = 2.0
-
-        self.dist.add('album', 0.1875)
-        self.dist.add('medium', 0.75)
-        self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')])
+        dist = Distance()
+        dist.add('album', 0.1875)
+        dist.add('medium', 0.75)
+        self.assertEqual(dist.sorted, [(0.25, 'medium'), (0.125, 'album')])
 
         # Sort by key if distance is equal.
         dist = Distance()
@@ -240,20 +245,21 @@ class DistanceTest(unittest.TestCase):
         self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')])
 
     def test_update(self):
-        self.dist.add('album', 0.5)
-        self.dist.add('media', 1.0)
+        dist1 = Distance()
+        dist1.add('album', 0.5)
+        dist1.add('media', 1.0)
 
-        dist = Distance()
-        dist.add('album', 0.75)
-        dist.add('album', 0.25)
-        self.dist.add('media', 0.05)
+        dist2 = Distance()
+        dist2.add('album', 0.75)
+        dist2.add('album', 0.25)
+        dist2.add('media', 0.05)
 
-        self.dist.update(dist)
+        dist1.update(dist2)
 
-        self.assertEqual(self.dist._penalties, {'album': [0.5, 0.75, 0.25],
-                                                'media': [1.0, 0.05]})
+        self.assertEqual(dist1._penalties, {'album': [0.5, 0.75, 0.25],
+                                             'media': [1.0, 0.05]})
 
-class TrackDistanceTest(unittest.TestCase):
+class TrackDistanceTest(_common.TestCase):
     def test_identical_tracks(self):
         item = _make_item(u'one', 1)
         info = _make_trackinfo()[0]
@@ -280,7 +286,7 @@ class TrackDistanceTest(unittest.TestCase):
         dist = match.track_distance(item, info, incl_artist=True)
         self.assertEqual(dist, 0.0)
 
-class AlbumDistanceTest(unittest.TestCase):
+class AlbumDistanceTest(_common.TestCase):
     def _mapping(self, items, info):
         out = {}
         for i, t in zip(items, info.tracks):
@@ -863,77 +869,77 @@ class ApplyCompilationTest(_common.TestCase, ApplyTestUtil):
 
 class StringDistanceTest(unittest.TestCase):
     def test_equal_strings(self):
-        dist = match.string_dist(u'Some String', u'Some String')
+        dist = string_dist(u'Some String', u'Some String')
         self.assertEqual(dist, 0.0)
 
     def test_different_strings(self):
-        dist = match.string_dist(u'Some String', u'Totally Different')
+        dist = string_dist(u'Some String', u'Totally Different')
         self.assertNotEqual(dist, 0.0)
 
     def test_punctuation_ignored(self):
-        dist = match.string_dist(u'Some String', u'Some.String!')
+        dist = string_dist(u'Some String', u'Some.String!')
         self.assertEqual(dist, 0.0)
 
     def test_case_ignored(self):
-        dist = match.string_dist(u'Some String', u'sOME sTring')
+        dist = string_dist(u'Some String', u'sOME sTring')
         self.assertEqual(dist, 0.0)
 
     def test_leading_the_has_lower_weight(self):
-        dist1 = match.string_dist(u'XXX Band Name', u'Band Name')
-        dist2 = match.string_dist(u'The Band Name', u'Band Name')
+        dist1 = string_dist(u'XXX Band Name', u'Band Name')
+        dist2 = string_dist(u'The Band Name', u'Band Name')
         self.assert_(dist2 < dist1)
 
     def test_parens_have_lower_weight(self):
-        dist1 = match.string_dist(u'One .Two.', u'One')
-        dist2 = match.string_dist(u'One (Two)', u'One')
+        dist1 = string_dist(u'One .Two.', u'One')
+        dist2 = string_dist(u'One (Two)', u'One')
         self.assert_(dist2 < dist1)
 
     def test_brackets_have_lower_weight(self):
-        dist1 = match.string_dist(u'One .Two.', u'One')
-        dist2 = match.string_dist(u'One [Two]', u'One')
+        dist1 = string_dist(u'One .Two.', u'One')
+        dist2 = string_dist(u'One [Two]', u'One')
         self.assert_(dist2 < dist1)
 
     def test_ep_label_has_zero_weight(self):
-        dist = match.string_dist(u'My Song (EP)', u'My Song')
+        dist = string_dist(u'My Song (EP)', u'My Song')
         self.assertEqual(dist, 0.0)
 
     def test_featured_has_lower_weight(self):
-        dist1 = match.string_dist(u'My Song blah Someone', u'My Song')
-        dist2 = match.string_dist(u'My Song feat Someone', u'My Song')
+        dist1 = string_dist(u'My Song blah Someone', u'My Song')
+        dist2 = string_dist(u'My Song feat Someone', u'My Song')
         self.assert_(dist2 < dist1)
 
     def test_postfix_the(self):
-        dist = match.string_dist(u'The Song Title', u'Song Title, The')
+        dist = string_dist(u'The Song Title', u'Song Title, The')
         self.assertEqual(dist, 0.0)
 
     def test_postfix_a(self):
-        dist = match.string_dist(u'A Song Title', u'Song Title, A')
+        dist = string_dist(u'A Song Title', u'Song Title, A')
         self.assertEqual(dist, 0.0)
 
     def test_postfix_an(self):
-        dist = match.string_dist(u'An Album Title', u'Album Title, An')
+        dist = string_dist(u'An Album Title', u'Album Title, An')
         self.assertEqual(dist, 0.0)
 
     def test_empty_strings(self):
-        dist = match.string_dist(u'', u'')
+        dist = string_dist(u'', u'')
         self.assertEqual(dist, 0.0)
 
     def test_solo_pattern(self):
         # Just make sure these don't crash.
-        match.string_dist(u'The ', u'')
-        match.string_dist(u'(EP)', u'(EP)')
-        match.string_dist(u', An', u'')
+        string_dist(u'The ', u'')
+        string_dist(u'(EP)', u'(EP)')
+        string_dist(u', An', u'')
 
     def test_heuristic_does_not_harm_distance(self):
-        dist = match.string_dist(u'Untitled', u'[Untitled]')
+        dist = string_dist(u'Untitled', u'[Untitled]')
         self.assertEqual(dist, 0.0)
 
     def test_ampersand_expansion(self):
-        dist = match.string_dist(u'And', u'&')
+        dist = string_dist(u'And', u'&')
         self.assertEqual(dist, 0.0)
 
     def test_accented_characters(self):
-        dist = match.string_dist(u'\xe9\xe1\xf1', u'ean')
+        dist = string_dist(u'\xe9\xe1\xf1', u'ean')
         self.assertEqual(dist, 0.0)
 
 def suite():

From c85e43ee2a6ea0ae70a899f3778cbadf104f799f Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 15:24:25 -0700
Subject: [PATCH 2/9] finish removing setUp from DistanceTest

I had to do this for the tests that use the config, so I thought I might as
well finish the job.
---
 test/test_autotag.py | 89 +++++++++++++++++++++++---------------------
 1 file changed, 46 insertions(+), 43 deletions(-)

diff --git a/test/test_autotag.py b/test/test_autotag.py
index 13c167fcf..4a1eba17b 100644
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@@ -107,79 +107,82 @@ def _make_trackinfo():
     ]
 
 class DistanceTest(_common.TestCase):
-    def setUp(self):
-        super(DistanceTest, self).setUp()
-        self.dist = Distance()
-
     def test_add(self):
-        self.dist.add('add', 1.0)
-        self.assertEqual(self.dist._penalties, {'add': [1.0]})
+        dist = Distance()
+        dist.add('add', 1.0)
+        self.assertEqual(dist._penalties, {'add': [1.0]})
 
     def test_add_equality(self):
-        self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi'])
-        self.assertEqual(self.dist._penalties['equality'], [0.0])
+        dist = Distance()
+        dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi'])
+        self.assertEqual(dist._penalties['equality'], [0.0])
 
-        self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi'])
-        self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0])
+        dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi'])
+        self.assertEqual(dist._penalties['equality'], [0.0, 1.0])
 
-        self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I))
-        self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0])
+        dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I))
+        self.assertEqual(dist._penalties['equality'], [0.0, 1.0, 0.0])
 
     def test_add_expr(self):
-        self.dist.add_expr('expr', True)
-        self.assertEqual(self.dist._penalties['expr'], [1.0])
+        dist = Distance()
+        dist.add_expr('expr', True)
+        self.assertEqual(dist._penalties['expr'], [1.0])
 
-        self.dist.add_expr('expr', False)
-        self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0])
+        dist.add_expr('expr', False)
+        self.assertEqual(dist._penalties['expr'], [1.0, 0.0])
 
     def test_add_number(self):
+        dist = Distance()
         # Add a full penalty for each number of difference between two numbers.
 
-        self.dist.add_number('number', 1, 1)
-        self.assertEqual(self.dist._penalties['number'], [0.0])
+        dist.add_number('number', 1, 1)
+        self.assertEqual(dist._penalties['number'], [0.0])
 
-        self.dist.add_number('number', 1, 2)
-        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0])
+        dist.add_number('number', 1, 2)
+        self.assertEqual(dist._penalties['number'], [0.0, 1.0])
 
-        self.dist.add_number('number', 2, 1)
-        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0])
+        dist.add_number('number', 2, 1)
+        self.assertEqual(dist._penalties['number'], [0.0, 1.0, 1.0])
 
-        self.dist.add_number('number', -1, 2)
-        self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0,
+        dist.add_number('number', -1, 2)
+        self.assertEqual(dist._penalties['number'], [0.0, 1.0, 1.0, 1.0,
                                                           1.0, 1.0])
 
     def test_add_priority(self):
-        self.dist.add_priority('priority', 'abc', 'abc')
-        self.assertEqual(self.dist._penalties['priority'], [0.0])
+        dist = Distance()
+        dist.add_priority('priority', 'abc', 'abc')
+        self.assertEqual(dist._penalties['priority'], [0.0])
 
-        self.dist.add_priority('priority', 'def', ['abc', 'def'])
-        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5])
+        dist.add_priority('priority', 'def', ['abc', 'def'])
+        self.assertEqual(dist._penalties['priority'], [0.0, 0.5])
 
-        self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef',
+        dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef',
                                                   re.compile('GH', re.I)])
-        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75])
+        self.assertEqual(dist._penalties['priority'], [0.0, 0.5, 0.75])
 
-        self.dist.add_priority('priority', 'xyz', ['abc', 'def'])
-        self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75,
+        dist.add_priority('priority', 'xyz', ['abc', 'def'])
+        self.assertEqual(dist._penalties['priority'], [0.0, 0.5, 0.75,
                                                             1.0])
 
     def test_add_ratio(self):
-        self.dist.add_ratio('ratio', 25, 100)
-        self.assertEqual(self.dist._penalties['ratio'], [0.25])
+        dist = Distance()
+        dist.add_ratio('ratio', 25, 100)
+        self.assertEqual(dist._penalties['ratio'], [0.25])
 
-        self.dist.add_ratio('ratio', 10, 5)
-        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0])
+        dist.add_ratio('ratio', 10, 5)
+        self.assertEqual(dist._penalties['ratio'], [0.25, 1.0])
 
-        self.dist.add_ratio('ratio', -5, 5)
-        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0])
+        dist.add_ratio('ratio', -5, 5)
+        self.assertEqual(dist._penalties['ratio'], [0.25, 1.0, 0.0])
 
-        self.dist.add_ratio('ratio', 5, 0)
-        self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0])
+        dist.add_ratio('ratio', 5, 0)
+        self.assertEqual(dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0])
 
     def test_add_string(self):
-        dist = string_dist(u'abc', u'bcd')
-        self.dist.add_string('string', u'abc', u'bcd')
-        self.assertEqual(self.dist._penalties['string'], [dist])
+        dist = Distance()
+        sdist = string_dist(u'abc', u'bcd')
+        dist.add_string('string', u'abc', u'bcd')
+        self.assertEqual(dist._penalties['string'], [sdist])
 
     def test_distance(self):
         config['match']['distance_weights']['album'] = 2.0

From c818663539e096f2a74e7d9a750fa91c74a80caa Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 15:31:25 -0700
Subject: [PATCH 3/9] replace Distance.sorted() with .items()

This is an effort to make the distance object feel slightly more dict-like.
The name changed and order of tuples is reversed: we now yield (key, value)
instead of (value, key), which I think is a little more intuitive.
---
 beets/autotag/hooks.py | 18 +++++++++---------
 beets/ui/commands.py   |  6 +++---
 test/test_autotag.py   |  8 ++++----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py
index c29cf576f..6156abf2c 100644
--- a/beets/autotag/hooks.py
+++ b/beets/autotag/hooks.py
@@ -294,8 +294,8 @@ class Distance(object):
         return dist_raw
 
     @property
-    def sorted(self):
-        """Returns a list of (dist, key) pairs, with `dist` being the
+    def items(self):
+        """Returns a list of (key, dist) pairs, with `dist` being the
         weighted distance, sorted from highest to lowest. Does not
         include penalties with a zero value.
         """
@@ -303,11 +303,11 @@ class Distance(object):
         for key in self._penalties:
             dist = self[key]
             if dist:
-                list_.append((dist, key))
-        # Convert distance into a negative float we can sort items in ascending
-        # order (for keys, when the penalty is equal) and still get the items
-        # with the biggest distance first.
-        return sorted(list_, key=lambda (dist, key): (0-dist, key))
+                list_.append((key, dist))
+        # Convert distance into a negative float we can sort items in
+        # ascending order (for keys, when the penalty is equal) and
+        # still get the items with the biggest distance first.
+        return sorted(list_, key=lambda (key, dist): (0-dist, key))
 
 
     # Behave like a float.
@@ -336,10 +336,10 @@ class Distance(object):
         return 0.0
 
     def __iter__(self):
-        return iter(self.sorted)
+        return iter(self.items)
 
     def __len__(self):
-        return len(self.sorted)
+        return len(self.items)
 
     def update(self, dist):
         """Adds all the distance penalties from `dist`.
diff --git a/beets/ui/commands.py b/beets/ui/commands.py
index dfe3585c1..0512cb585 100644
--- a/beets/ui/commands.py
+++ b/beets/ui/commands.py
@@ -164,11 +164,11 @@ def dist_string(dist):
     return out
 
 def penalty_string(distance, limit=None):
-    """Returns a colorized string that indicates all the penalties applied to
-    a distance object.
+    """Returns a colorized string that indicates all the penalties
+    applied to a distance object.
     """
     penalties = []
-    for _, key in distance:
+    for key, _ in distance:
         key = key.replace('album_', '')
         key = key.replace('track_', '')
         key = key.replace('_', ' ')
diff --git a/test/test_autotag.py b/test/test_autotag.py
index 4a1eba17b..889584e0c 100644
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@@ -216,7 +216,7 @@ class DistanceTest(_common.TestCase):
         dist.add('medium', 0.25)
         dist.add('medium', 0.75)
         self.assertEqual(len(dist), 2)
-        self.assertEqual(list(dist), [(0.2, 'album'), (0.2, 'medium')])
+        self.assertEqual(list(dist), [('album', 0.2), ('medium', 0.2)])
         self.assertTrue(dist == 0.4)
         self.assertTrue(dist < 1.0)
         self.assertTrue(dist > 0.0)
@@ -233,19 +233,19 @@ class DistanceTest(_common.TestCase):
         dist.add('medium', 0.5)
         self.assertEqual(dist.raw_distance, 2.25)
 
-    def test_sorted(self):
+    def test_items(self):
         config['match']['distance_weights']['album'] = 4.0
         config['match']['distance_weights']['medium'] = 2.0
         dist = Distance()
         dist.add('album', 0.1875)
         dist.add('medium', 0.75)
-        self.assertEqual(dist.sorted, [(0.25, 'medium'), (0.125, 'album')])
+        self.assertEqual(dist.items, [('medium', 0.25), ('album', 0.125)])
 
         # Sort by key if distance is equal.
         dist = Distance()
         dist.add('album', 0.375)
         dist.add('medium', 0.75)
-        self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')])
+        self.assertEqual(dist.items, [('album', 0.25), ('medium', 0.25)])
 
     def test_update(self):
         dist1 = Distance()

From 33ff001d0a6f3bc6802546c0fed5c6b6a03f8ae6 Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 15:35:05 -0700
Subject: [PATCH 4/9] move track length params out of weights section

These aren't really weights, so I'm moving them out of "distance_weights".
---
 beets/autotag/match.py    | 11 ++++-------
 beets/config_default.yaml |  6 ++----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/beets/autotag/match.py b/beets/autotag/match.py
index 455d22be7..630a0de55 100644
--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@@ -28,9 +28,6 @@ from beets.util import plurality
 from beets.util.enumeration import enum
 from beets.autotag import hooks
 
-# A configuration view for the distance weights.
-weights = config['match']['distance_weights']
-
 # Recommendation enumeration.
 recommendation = enum('none', 'low', 'medium', 'strong', name='recommendation')
 
@@ -112,9 +109,9 @@ def track_distance(item, track_info, incl_artist=False):
     # Length.
     if track_info.length:
         diff = abs(item.length - track_info.length) - \
-               weights['track_length_grace'].as_number()
+               config['match']['track_length_grace'].as_number()
         dist.add_ratio('track_length', diff,
-                       weights['track_length_max'].as_number())
+                       config['match']['track_length_max'].as_number())
 
     # Title.
     dist.add_string('track_title', item.title, track_info.title)
@@ -294,10 +291,10 @@ def _recommendation(results):
 
     # Downgrade to the max rec if it is lower than the current rec for an
     # applied penalty.
-    keys = set(key for _, key in min_dist)
+    keys = set(key for key, _ in min_dist)
     if isinstance(results[0], hooks.AlbumMatch):
         for track_dist in min_dist.tracks.values():
-            keys.update(key for _, key in track_dist)
+            keys.update(key for key, _ in track_dist)
     for key in keys:
         max_rec = config['match']['max_rec'][key].as_choice({
             'strong': recommendation.strong,
diff --git a/beets/config_default.yaml b/beets/config_default.yaml
index 44cb51051..05ceb6272 100644
--- a/beets/config_default.yaml
+++ b/beets/config_default.yaml
@@ -85,8 +85,6 @@ match:
         track_title: strong
         track_artist: strong
         track_index: strong
-        track_length_grace: strong
-        track_length_max: strong
         track_length: strong
         track_id: strong
     distance_weights:
@@ -107,8 +105,6 @@ match:
         track_title: 3.0
         track_artist: 2.0
         track_index: 1.0
-        track_length_grace: 10
-        track_length_max: 30
         track_length: 2.0
         track_id: 5.0
     preferred:
@@ -116,3 +112,5 @@ match:
         media: []
         original_year: no
     ignored: []
+    track_length_grace: 10
+    track_length_max: 30

From 7983c94ef825bd30969b6cf4cda7a330bfa202e1 Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 15:40:51 -0700
Subject: [PATCH 5/9] add keys() method to Distance

---
 beets/autotag/hooks.py | 16 +++++++++-------
 beets/autotag/match.py |  4 ++--
 beets/ui/commands.py   |  2 +-
 test/test_autotag.py   |  2 +-
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py
index 6156abf2c..5938c2308 100644
--- a/beets/autotag/hooks.py
+++ b/beets/autotag/hooks.py
@@ -267,7 +267,7 @@ class Distance(object):
 
     @property
     def distance(self):
-        """Returns a weighted and normalised distance across all
+        """Return a weighted and normalized distance across all
         penalties.
         """
         dist_max = self.max_distance
@@ -277,7 +277,7 @@ class Distance(object):
 
     @property
     def max_distance(self):
-        """Returns the maximum distance penalty.
+        """Return the maximum distance penalty (normalization factor).
         """
         dist_max = 0.0
         for key, penalty in self._penalties.iteritems():
@@ -286,16 +286,15 @@ class Distance(object):
 
     @property
     def raw_distance(self):
-        """Returns the raw (denormalized) distance.
+        """Return the raw (denormalized) distance.
         """
         dist_raw = 0.0
         for key, penalty in self._penalties.iteritems():
             dist_raw += sum(penalty) * self._weights[key]
         return dist_raw
 
-    @property
     def items(self):
-        """Returns a list of (key, dist) pairs, with `dist` being the
+        """Return a list of (key, dist) pairs, with `dist` being the
         weighted distance, sorted from highest to lowest. Does not
         include penalties with a zero value.
         """
@@ -336,10 +335,13 @@ class Distance(object):
         return 0.0
 
     def __iter__(self):
-        return iter(self.items)
+        return iter(self.items())
 
     def __len__(self):
-        return len(self.items)
+        return len(self.items())
+
+    def keys(self):
+        return [key for key, _ in self.items()]
 
     def update(self, dist):
         """Adds all the distance penalties from `dist`.
diff --git a/beets/autotag/match.py b/beets/autotag/match.py
index 630a0de55..1f1ba2d43 100644
--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@@ -291,10 +291,10 @@ def _recommendation(results):
 
     # Downgrade to the max rec if it is lower than the current rec for an
     # applied penalty.
-    keys = set(key for key, _ in min_dist)
+    keys = set(min_dist.keys())
     if isinstance(results[0], hooks.AlbumMatch):
         for track_dist in min_dist.tracks.values():
-            keys.update(key for key, _ in track_dist)
+            keys.update(track_dist.keys())
     for key in keys:
         max_rec = config['match']['max_rec'][key].as_choice({
             'strong': recommendation.strong,
diff --git a/beets/ui/commands.py b/beets/ui/commands.py
index 0512cb585..f9e3bc6eb 100644
--- a/beets/ui/commands.py
+++ b/beets/ui/commands.py
@@ -168,7 +168,7 @@ def penalty_string(distance, limit=None):
     applied to a distance object.
     """
     penalties = []
-    for key, _ in distance:
+    for key in distance.keys():
         key = key.replace('album_', '')
         key = key.replace('track_', '')
         key = key.replace('_', ' ')
diff --git a/test/test_autotag.py b/test/test_autotag.py
index 889584e0c..4684f9719 100644
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@@ -239,7 +239,7 @@ class DistanceTest(_common.TestCase):
         dist = Distance()
         dist.add('album', 0.1875)
         dist.add('medium', 0.75)
-        self.assertEqual(dist.items, [('medium', 0.25), ('album', 0.125)])
+        self.assertEqual(dist.items(), [('medium', 0.25), ('album', 0.125)])
 
         # Sort by key if distance is equal.
         dist = Distance()

From 5c4b17685e9dfd7ee214e844c79bbbe846486a85 Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 15:45:22 -0700
Subject: [PATCH 6/9] parameter name change

---
 beets/ui/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/beets/ui/__init__.py b/beets/ui/__init__.py
index 460320a34..f7288141f 100644
--- a/beets/ui/__init__.py
+++ b/beets/ui/__init__.py
@@ -366,7 +366,7 @@ def colorize(color, text):
     else:
         return text
 
-def _colordiff(a, b, highlight='red', second_highlight='lightgray'):
+def _colordiff(a, b, highlight='red', minor_highlight='lightgray'):
     """Given two values, return the same pair of strings except with
     their differences highlighted in the specified color. Strings are
     highlighted intelligently to show differences; other values are
@@ -407,7 +407,7 @@ def _colordiff(a, b, highlight='red', second_highlight='lightgray'):
             if a[a_start:a_end].lower() != b[b_start:b_end].lower():
                 color = highlight
             else:
-                color = second_highlight
+                color = minor_highlight
             a_out.append(colorize(color, a[a_start:a_end]))
             b_out.append(colorize(color, b[b_start:b_end]))
         else:

From 82991ce6145537bcf1eeddd9dc7d7f6de42ab3a9 Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 15:53:06 -0700
Subject: [PATCH 7/9] documentation enhancements for new config opts

---
 docs/changelog.rst        |  2 +-
 docs/reference/config.rst | 69 +++++++++++++++++++++------------------
 2 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 784d2eb88..5f7ce39cc 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -103,7 +103,7 @@ same MusicBrainz release group:
   beets re-identify the same release when re-importing existing files.
 * Prefer releases that are closest to the tagged ``year``. Tolerate files
   tagged with release or original year.
-* The new :ref:`preferred_media` config option lets you prefer a certain media
+* The new ``preferred_media`` config option lets you prefer a certain media
   type when the ``media`` field is unset on an album.
 * Apply minor penalties across a range of fields to differentiate between
   nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
diff --git a/docs/reference/config.rst b/docs/reference/config.rst
index d320cd655..617700c53 100644
--- a/docs/reference/config.rst
+++ b/docs/reference/config.rst
@@ -395,40 +395,45 @@ max_rec
 As mentioned above, autotagger matches have *recommendations* that control how
 the UI behaves for a certain quality of match. The recommendation for a certain
 match is based on the overall distance calculation. But you can also control
-the recommendation when a distance penalty is being applied for a specific
-field by defining *maximum* recommendations for each field:
+the recommendation when a specific distance penalty is applied by defining
+*maximum* recommendations for each field:
 
-To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here
-are the defaults::
+To define maxima, use keys under ``max_rec:`` in the ``match`` section. The
+defaults are "medium" for missing and unmatched tracks and "strong" (i.e., no
+maximum) for everything else::
 
     match:
         max_rec:
-            source: strong
-            artist: strong
-            album: strong
-            media: strong
-            mediums: strong
-            year: strong
-            country: strong
-            label: strong
-            catalognum: strong
-            albumdisambig: strong
-            album_id: strong
-            tracks: strong
             missing_tracks: medium
             unmatched_tracks: medium
-            track_title: strong
-            track_artist: strong
-            track_index: strong
-            track_length_grace: strong
-            track_length_max: strong
-            track_length: strong
-            track_id: strong
 
-If a recommendation is higher than the configured maximum and a penalty is
-being applied, the recommendation will be downgraded. The maximum for each
-field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the
-maximum recommendation is ``strong``, no "downgrading" occurs.
+If a recommendation is higher than the configured maximum and the indicated
+penalty is applied, the recommendation is downgraded. The setting for
+each field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the
+maximum recommendation is ``strong``, no "downgrading" occurs. The available
+penalty names here are:
+
+* source
+* artist
+* album
+* media
+* mediums
+* year
+* country
+* label
+* catalognum
+* albumdisambig
+* album_id
+* tracks
+* missing_tracks
+* unmatched_tracks
+* track_title
+* track_artist
+* track_index
+* track_length_grace
+* track_length_max
+* track_length
+* track_id
 
 .. _preferred:
 
@@ -440,10 +445,10 @@ similarity, you can also specify an ordered list of preferred countries and
 media types.
 
 A distance penalty will be applied if the country or media type from the match
-metadata doesn't match. The order is important, the first item will be most
-preferred. Each item may be a regular expression, and will be matched case
-insensitively. The number of media will be stripped when matching preferred
-media (e.g. "2x" in "2xCD").
+metadata doesn't match. The specified values are preferred in descending order
+(i.e., the first item will be most preferred). Each item may be a regular
+expression, and will be matched case insensitively. The number of media will
+be stripped when matching preferred media (e.g. "2x" in "2xCD").
 
 You can also tell the autotagger to prefer matches that have a release year
 closest to the original year for an album.
@@ -469,6 +474,8 @@ the penalty name to the ``ignored`` setting::
     match:
         ignored: missing_tracks unmatched_tracks
 
+The available penalties are the same as those for the :ref:`max_rec` setting.
+
 .. _path-format-config:
 
 Path Format Configuration

From b1ea90a724fd2117d60775493a80385902b2166c Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 15:59:41 -0700
Subject: [PATCH 8/9] don't duplicate penalty names in max_rec section

This is for maintanability: in the future, when we add new distance penalty
components, now we won't have to list them twice in the default config.
---
 beets/autotag/match.py    | 16 +++++++++-------
 beets/config_default.yaml | 17 -----------------
 docs/reference/config.rst |  2 --
 test/test_autotag.py      |  2 +-
 4 files changed, 10 insertions(+), 27 deletions(-)

diff --git a/beets/autotag/match.py b/beets/autotag/match.py
index 1f1ba2d43..a4bc47fa8 100644
--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@@ -295,14 +295,16 @@ def _recommendation(results):
     if isinstance(results[0], hooks.AlbumMatch):
         for track_dist in min_dist.tracks.values():
             keys.update(track_dist.keys())
+    max_rec_view = config['match']['max_rec']
     for key in keys:
-        max_rec = config['match']['max_rec'][key].as_choice({
-            'strong': recommendation.strong,
-            'medium': recommendation.medium,
-            'low': recommendation.low,
-            'none': recommendation.none,
-        })
-        rec = min(rec, max_rec)
+        if key in max_rec_view.keys():
+            max_rec = max_rec_view[key].as_choice({
+                'strong': recommendation.strong,
+                'medium': recommendation.medium,
+                'low': recommendation.low,
+                'none': recommendation.none,
+            })
+            rec = min(rec, max_rec)
 
     return rec
 
diff --git a/beets/config_default.yaml b/beets/config_default.yaml
index 05ceb6272..113a10ed0 100644
--- a/beets/config_default.yaml
+++ b/beets/config_default.yaml
@@ -68,25 +68,8 @@ match:
     medium_rec_thresh: 0.25
     rec_gap_thresh: 0.25
     max_rec:
-        source: strong
-        artist: strong
-        album: strong
-        media: strong
-        mediums: strong
-        year: strong
-        country: strong
-        label: strong
-        catalognum: strong
-        albumdisambig: strong
-        album_id: strong
-        tracks: strong
         missing_tracks: medium
         unmatched_tracks: medium
-        track_title: strong
-        track_artist: strong
-        track_index: strong
-        track_length: strong
-        track_id: strong
     distance_weights:
         source: 2.0
         artist: 3.0
diff --git a/docs/reference/config.rst b/docs/reference/config.rst
index 617700c53..36844b668 100644
--- a/docs/reference/config.rst
+++ b/docs/reference/config.rst
@@ -430,8 +430,6 @@ penalty names here are:
 * track_title
 * track_artist
 * track_index
-* track_length_grace
-* track_length_max
 * track_length
 * track_id
 
diff --git a/test/test_autotag.py b/test/test_autotag.py
index 4684f9719..f0b637eb6 100644
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@@ -245,7 +245,7 @@ class DistanceTest(_common.TestCase):
         dist = Distance()
         dist.add('album', 0.375)
         dist.add('medium', 0.75)
-        self.assertEqual(dist.items, [('album', 0.25), ('medium', 0.25)])
+        self.assertEqual(dist.items(), [('album', 0.25), ('medium', 0.25)])
 
     def test_update(self):
         dist1 = Distance()

From 2f053b0ecd681f7daf654081776b3b7703e59205 Mon Sep 17 00:00:00 2001
From: Adrian Sampson <adrian@radbox.org>
Date: Mon, 10 Jun 2013 16:06:43 -0700
Subject: [PATCH 9/9] scrub: handle IOError

Mutagen can raise this error when trying to truncate a file.
---
 beetsplug/scrub.py | 5 +++++
 docs/changelog.rst | 1 +
 2 files changed, 6 insertions(+)

diff --git a/beetsplug/scrub.py b/beetsplug/scrub.py
index f69e3abf1..1a95c222e 100644
--- a/beetsplug/scrub.py
+++ b/beetsplug/scrub.py
@@ -108,6 +108,11 @@ def _scrub(path):
             # remove them. In this case, we just remove all the tags.
             for tag in f.keys():
                 del f[tag]
+        except IOError as exc:
+            log.error(u'could not scrub {0}: {1}'.format(
+                util.displayable_path(path),
+                exc,
+            ))
         f.save()
 
 # Automatically embed art into imported albums.
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 5f7ce39cc..3baf66585 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -36,6 +36,7 @@ And some bug fixes:
   Intzoglou.
 * Fix an occasional crash in the :doc:`/plugins/beatport` when a length
   field was missing from the API response. Thanks to Timothy Appnel.
+* :doc:`/plugins/scrub`: Handle and log I/O errors.
 
 
 1.2.0 (June 5, 2013)