From f6faf723288d7cf83ca30b946d6b9306aaa5537f Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 15:21:32 -0700 Subject: [PATCH 1/9] move Distance class to hooks module --- beets/autotag/hooks.py | 292 +++++++++++++++++++++++++++++++++++++++++ beets/autotag/match.py | 268 +------------------------------------ beets/plugins.py | 10 +- beetsplug/beatport.py | 3 +- beetsplug/chroma.py | 3 +- beetsplug/discogs.py | 6 +- test/test_autotag.py | 142 ++++++++++---------- 7 files changed, 379 insertions(+), 345 deletions(-) diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py index 96de6b674..c29cf576f 100644 --- a/beets/autotag/hooks.py +++ b/beets/autotag/hooks.py @@ -15,9 +15,13 @@ """Glue between metadata sources and the matching logic.""" import logging from collections import namedtuple +import re from beets import plugins +from beets import config from beets.autotag import mb +from beets.util import levenshtein +from unidecode import unidecode log = logging.getLogger('beets') @@ -158,6 +162,294 @@ class TrackInfo(object): if isinstance(value, str): setattr(self, fld, value.decode(codec, 'ignore')) + +# Candidate distance scoring. + +# Parameters for string distance function. +# Words that can be moved to the end of a string using a comma. +SD_END_WORDS = ['the', 'a', 'an'] +# Reduced weights for certain portions of the string. +SD_PATTERNS = [ + (r'^the ', 0.1), + (r'[\[\(]?(ep|single)[\]\)]?', 0.0), + (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1), + (r'\(.*?\)', 0.3), + (r'\[.*?\]', 0.3), + (r'(, )?(pt\.|part) .+', 0.2), +] +# Replacements to use before testing distance. +SD_REPLACE = [ + (r'&', 'and'), +] + +def _string_dist_basic(str1, str2): + """Basic edit distance between two strings, ignoring + non-alphanumeric characters and case. Comparisons are based on a + transliteration/lowering to ASCII characters. Normalized by string + length. + """ + str1 = unidecode(str1) + str2 = unidecode(str2) + str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) + str2 = re.sub(r'[^a-z0-9]', '', str2.lower()) + if not str1 and not str2: + return 0.0 + return levenshtein(str1, str2) / float(max(len(str1), len(str2))) + +def string_dist(str1, str2): + """Gives an "intuitive" edit distance between two strings. This is + an edit distance, normalized by the string length, with a number of + tweaks that reflect intuition about text. + """ + str1 = str1.lower() + str2 = str2.lower() + + # Don't penalize strings that move certain words to the end. For + # example, "the something" should be considered equal to + # "something, the". + for word in SD_END_WORDS: + if str1.endswith(', %s' % word): + str1 = '%s %s' % (word, str1[:-len(word)-2]) + if str2.endswith(', %s' % word): + str2 = '%s %s' % (word, str2[:-len(word)-2]) + + # Perform a couple of basic normalizing substitutions. + for pat, repl in SD_REPLACE: + str1 = re.sub(pat, repl, str1) + str2 = re.sub(pat, repl, str2) + + # Change the weight for certain string portions matched by a set + # of regular expressions. We gradually change the strings and build + # up penalties associated with parts of the string that were + # deleted. + base_dist = _string_dist_basic(str1, str2) + penalty = 0.0 + for pat, weight in SD_PATTERNS: + # Get strings that drop the pattern. + case_str1 = re.sub(pat, '', str1) + case_str2 = re.sub(pat, '', str2) + + if case_str1 != str1 or case_str2 != str2: + # If the pattern was present (i.e., it is deleted in the + # the current case), recalculate the distances for the + # modified strings. + case_dist = _string_dist_basic(case_str1, case_str2) + case_delta = max(0.0, base_dist - case_dist) + if case_delta == 0.0: + continue + + # Shift our baseline strings down (to avoid rematching the + # same part of the string) and add a scaled distance + # amount to the penalties. + str1 = case_str1 + str2 = case_str2 + base_dist = case_dist + penalty += weight * case_delta + dist = base_dist + penalty + + return dist + +class Distance(object): + """Keeps track of multiple distance penalties. Provides a single + weighted distance for all penalties as well as a weighted distance + for each individual penalty. + """ + def __init__(self): + self._penalties = {} + + weights_view = config['match']['distance_weights'] + self._weights = {} + for key in weights_view.keys(): + self._weights[key] = weights_view[key].as_number() + + + # Access the components and their aggregates. + + @property + def distance(self): + """Returns a weighted and normalised distance across all + penalties. + """ + dist_max = self.max_distance + if dist_max: + return self.raw_distance / self.max_distance + return 0.0 + + @property + def max_distance(self): + """Returns the maximum distance penalty. + """ + dist_max = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_max += len(penalty) * self._weights[key] + return dist_max + + @property + def raw_distance(self): + """Returns the raw (denormalized) distance. + """ + dist_raw = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_raw += sum(penalty) * self._weights[key] + return dist_raw + + @property + def sorted(self): + """Returns a list of (dist, key) pairs, with `dist` being the + weighted distance, sorted from highest to lowest. Does not + include penalties with a zero value. + """ + list_ = [] + for key in self._penalties: + dist = self[key] + if dist: + list_.append((dist, key)) + # Convert distance into a negative float we can sort items in ascending + # order (for keys, when the penalty is equal) and still get the items + # with the biggest distance first. + return sorted(list_, key=lambda (dist, key): (0-dist, key)) + + + # Behave like a float. + + def __cmp__(self, other): + return cmp(self.distance, other) + + def __float__(self): + return self.distance + def __sub__(self, other): + return self.distance - other + + def __rsub__(self, other): + return other - self.distance + + + # Behave like a dict. + + def __getitem__(self, key): + """Returns the weighted distance for a named penalty. + """ + dist = sum(self._penalties[key]) * self._weights[key] + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + def __iter__(self): + return iter(self.sorted) + + def __len__(self): + return len(self.sorted) + + def update(self, dist): + """Adds all the distance penalties from `dist`. + """ + if not isinstance(dist, Distance): + raise ValueError( + '`dist` must be a Distance object. It is: %r' % dist) + for key, penalties in dist._penalties.iteritems(): + self._penalties.setdefault(key, []).extend(penalties) + + + # Adding components. + + def _eq(self, value1, value2): + """Returns True if `value1` is equal to `value2`. `value1` may + be a compiled regular expression, in which case it will be + matched against `value2`. + """ + if isinstance(value1, re._pattern_type): + return bool(value1.match(value2)) + return value1 == value2 + + def add(self, key, dist): + """Adds a distance penalty. `key` must correspond with a + configured weight setting. `dist` must be a float between 0.0 + and 1.0, and will be added to any existing distance penalties + for the same key. + """ + if not 0.0 <= dist <= 1.0: + raise ValueError( + '`dist` must be between 0.0 and 1.0. It is: %r' % dist) + self._penalties.setdefault(key, []).append(dist) + + def add_equality(self, key, value, options): + """Adds a distance penalty of 1.0 if `value` doesn't match any + of the values in `options`. If an option is a compiled regular + expression, it will be considered equal if it matches against + `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + for opt in options: + if self._eq(opt, value): + dist = 0.0 + break + else: + dist = 1.0 + self.add(key, dist) + + def add_expr(self, key, expr): + """Adds a distance penalty of 1.0 if `expr` evaluates to True, + or 0.0. + """ + if expr: + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_number(self, key, number1, number2): + """Adds a distance penalty of 1.0 for each number of difference + between `number1` and `number2`, or 0.0 when there is no + difference. Use this when there is no upper limit on the + difference between the two numbers. + """ + diff = abs(number1 - number2) + if diff: + for i in range(diff): + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_priority(self, key, value, options): + """Adds a distance penalty that corresponds to the position at + which `value` appears in `options`. A distance penalty of 0.0 + for the first option, or 1.0 if there is no matching option. If + an option is a compiled regular expression, it will be + considered equal if it matches against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + unit = 1.0 / (len(options) or 1) + for i, opt in enumerate(options): + if self._eq(opt, value): + dist = i * unit + break + else: + dist = 1.0 + self.add(key, dist) + + def add_ratio(self, key, number1, number2): + """Adds a distance penalty for `number1` as a ratio of `number2`. + `number1` is bound at 0 and `number2`. + """ + number = float(max(min(number1, number2), 0)) + if number2: + dist = number / number2 + else: + dist = 0.0 + self.add(key, dist) + + def add_string(self, key, str1, str2): + """Adds a distance penalty based on the edit distance between + `str1` and `str2`. + """ + dist = string_dist(str1, str2) + self.add(key, dist) + + +# Structures that compose all the information for a candidate match. + AlbumMatch = namedtuple('AlbumMatch', ['distance', 'info', 'mapping', 'extra_items', 'extra_tracks']) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index cc32d6e8f..455d22be7 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -21,34 +21,16 @@ import datetime import logging import re from munkres import Munkres -from unidecode import unidecode from beets import plugins from beets import config -from beets.util import levenshtein, plurality +from beets.util import plurality from beets.util.enumeration import enum from beets.autotag import hooks # A configuration view for the distance weights. weights = config['match']['distance_weights'] -# Parameters for string distance function. -# Words that can be moved to the end of a string using a comma. -SD_END_WORDS = ['the', 'a', 'an'] -# Reduced weights for certain portions of the string. -SD_PATTERNS = [ - (r'^the ', 0.1), - (r'[\[\(]?(ep|single)[\]\)]?', 0.0), - (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1), - (r'\(.*?\)', 0.3), - (r'\[.*?\]', 0.3), - (r'(, )?(pt\.|part) .+', 0.2), -] -# Replacements to use before testing distance. -SD_REPLACE = [ - (r'&', 'and'), -] - # Recommendation enumeration. recommendation = enum('none', 'low', 'medium', 'strong', name='recommendation') @@ -64,73 +46,6 @@ log = logging.getLogger('beets') # Primary matching functionality. -def _string_dist_basic(str1, str2): - """Basic edit distance between two strings, ignoring - non-alphanumeric characters and case. Comparisons are based on a - transliteration/lowering to ASCII characters. Normalized by string - length. - """ - str1 = unidecode(str1) - str2 = unidecode(str2) - str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) - str2 = re.sub(r'[^a-z0-9]', '', str2.lower()) - if not str1 and not str2: - return 0.0 - return levenshtein(str1, str2) / float(max(len(str1), len(str2))) - -def string_dist(str1, str2): - """Gives an "intuitive" edit distance between two strings. This is - an edit distance, normalized by the string length, with a number of - tweaks that reflect intuition about text. - """ - str1 = str1.lower() - str2 = str2.lower() - - # Don't penalize strings that move certain words to the end. For - # example, "the something" should be considered equal to - # "something, the". - for word in SD_END_WORDS: - if str1.endswith(', %s' % word): - str1 = '%s %s' % (word, str1[:-len(word)-2]) - if str2.endswith(', %s' % word): - str2 = '%s %s' % (word, str2[:-len(word)-2]) - - # Perform a couple of basic normalizing substitutions. - for pat, repl in SD_REPLACE: - str1 = re.sub(pat, repl, str1) - str2 = re.sub(pat, repl, str2) - - # Change the weight for certain string portions matched by a set - # of regular expressions. We gradually change the strings and build - # up penalties associated with parts of the string that were - # deleted. - base_dist = _string_dist_basic(str1, str2) - penalty = 0.0 - for pat, weight in SD_PATTERNS: - # Get strings that drop the pattern. - case_str1 = re.sub(pat, '', str1) - case_str2 = re.sub(pat, '', str2) - - if case_str1 != str1 or case_str2 != str2: - # If the pattern was present (i.e., it is deleted in the - # the current case), recalculate the distances for the - # modified strings. - case_dist = _string_dist_basic(case_str1, case_str2) - case_delta = max(0.0, base_dist - case_dist) - if case_delta == 0.0: - continue - - # Shift our baseline strings down (to avoid rematching the - # same part of the string) and add a scaled distance - # amount to the penalties. - str1 = case_str1 - str2 = case_str2 - base_dist = case_dist - penalty += weight * case_delta - dist = base_dist + penalty - - return dist - def current_metadata(items): """Extract the likely current metadata for an album given a list of its items. Return two dictionaries: @@ -187,189 +102,12 @@ def track_index_changed(item, track_info): """ return item.track not in (track_info.medium_index, track_info.index) -class Distance(object): - """Keeps track of multiple distance penalties. Provides a single weighted - distance for all penalties as well as a weighted distance for each - individual penalty. - """ - def __cmp__(self, other): - return cmp(self.distance, other) - - def __float__(self): - return self.distance - - def __getitem__(self, key): - """Returns the weighted distance for a named penalty. - """ - dist = sum(self._penalties[key]) * weights[key].as_number() - dist_max = self.max_distance - if dist_max: - return dist / dist_max - return 0.0 - - def __init__(self): - self._penalties = {} - - def __iter__(self): - return iter(self.sorted) - - def __len__(self): - return len(self.sorted) - - def __sub__(self, other): - return self.distance - other - - def __rsub__(self, other): - return other - self.distance - - def _eq(self, value1, value2): - """Returns True if `value1` is equal to `value2`. `value1` may be a - compiled regular expression, in which case it will be matched against - `value2`. - """ - if isinstance(value1, re._pattern_type): - return bool(value1.match(value2)) - return value1 == value2 - - def add(self, key, dist): - """Adds a distance penalty. `key` must correspond with a configured - weight setting. `dist` must be a float between 0.0 and 1.0, and will be - added to any existing distance penalties for the same key. - """ - if not 0.0 <= dist <= 1.0: - raise ValueError( - '`dist` must be between 0.0 and 1.0. It is: %r' % dist) - self._penalties.setdefault(key, []).append(dist) - - def add_equality(self, key, value, options): - """Adds a distance penalty of 1.0 if `value` doesn't match any of the - values in `options`. If an option is a compiled regular expression, it - will be considered equal if it matches against `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - for opt in options: - if self._eq(opt, value): - dist = 0.0 - break - else: - dist = 1.0 - self.add(key, dist) - - def add_expr(self, key, expr): - """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0. - """ - if expr: - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_number(self, key, number1, number2): - """Adds a distance penalty of 1.0 for each number of difference between - `number1` and `number2`, or 0.0 when there is no difference. Use this - when there is no upper limit on the difference between the two numbers. - """ - diff = abs(number1 - number2) - if diff: - for i in range(diff): - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_priority(self, key, value, options): - """Adds a distance penalty that corresponds to the position at which - `value` appears in `options`. A distance penalty of 0.0 for the first - option, or 1.0 if there is no matching option. If an option is a - compiled regular expression, it will be considered equal if it matches - against `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - unit = 1.0 / (len(options) or 1) - for i, opt in enumerate(options): - if self._eq(opt, value): - dist = i * unit - break - else: - dist = 1.0 - self.add(key, dist) - - def add_ratio(self, key, number1, number2): - """Adds a distance penalty for `number1` as a ratio of `number2`. - `number1` is bound at 0 and `number2`. - """ - number = float(max(min(number1, number2), 0)) - if number2: - dist = number / number2 - else: - dist = 0.0 - self.add(key, dist) - - def add_string(self, key, str1, str2): - """Adds a distance penalty based on the edit distance between `str1` - and `str2`. - """ - dist = string_dist(str1, str2) - self.add(key, dist) - - @property - def distance(self): - """Returns a weighted and normalised distance across all penalties. - """ - dist_max = self.max_distance - if dist_max: - return self.raw_distance / self.max_distance - return 0.0 - - @property - def max_distance(self): - """Returns the maximum distance penalty. - """ - dist_max = 0.0 - for key, penalty in self._penalties.iteritems(): - dist_max += len(penalty) * weights[key].as_number() - return dist_max - - @property - def raw_distance(self): - """Returns the raw (denormalised) distance. - """ - dist_raw = 0.0 - for key, penalty in self._penalties.iteritems(): - dist_raw += sum(penalty) * weights[key].as_number() - return dist_raw - - @property - def sorted(self): - """Returns a list of (dist, key) pairs, with `dist` being the weighted - distance, sorted from highest to lowest. Does not include penalties - with a zero value. - """ - list_ = [] - for key in self._penalties: - dist = self[key] - if dist: - list_.append((dist, key)) - # Convert distance into a negative float we can sort items in ascending - # order (for keys, when the penalty is equal) and still get the items - # with the biggest distance first. - return sorted(list_, key=lambda (dist, key): (0-dist, key)) - - def update(self, dist): - """Adds all the distance penalties from `dist`. - """ - if not isinstance(dist, Distance): - raise ValueError( - '`dist` must be a Distance object. It is: %r' % dist) - for key, penalties in dist._penalties.iteritems(): - self._penalties.setdefault(key, []).extend(penalties) - def track_distance(item, track_info, incl_artist=False): """Determines the significance of a track metadata change. Returns a Distance object. `incl_artist` indicates that a distance component should be included for the track artist (i.e., for various-artist releases). """ - dist = Distance() + dist = hooks.Distance() # Length. if track_info.length: @@ -410,7 +148,7 @@ def distance(items, album_info, mapping): """ likelies, _ = current_metadata(items) - dist = Distance() + dist = hooks.Distance() # Artist, if not various. if not album_info.va: diff --git a/beets/plugins.py b/beets/plugins.py index d0c0a9654..1df98fac6 100755 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -67,13 +67,13 @@ class BeetsPlugin(object): """Should return a Distance object to be added to the distance for every track comparison. """ - return beets.autotag.match.Distance() + return beets.autotag.hooks.Distance() def album_distance(self, items, album_info, mapping): """Should return a Distance object to be added to the distance for every album-level comparison. """ - return beets.autotag.match.Distance() + return beets.autotag.hooks.Distance() def candidates(self, items, artist, album, va_likely): """Should return a sequence of AlbumInfo objects that match the @@ -244,14 +244,16 @@ def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. Returns a Distance object. """ - dist = beets.autotag.match.Distance() + from beets.autotag.hooks import Distance + dist = Distance() for plugin in find_plugins(): dist.update(plugin.track_distance(item, info)) return dist def album_distance(items, album_info, mapping): """Returns the album distance calculated by plugins.""" - dist = beets.autotag.match.Distance() + from beets.autotag.hooks import Distance + dist = Distance() for plugin in find_plugins(): dist.update(plugin.album_distance(items, album_info, mapping)) return dist diff --git a/beetsplug/beatport.py b/beetsplug/beatport.py index c68901cc1..05e33637c 100644 --- a/beetsplug/beatport.py +++ b/beetsplug/beatport.py @@ -20,8 +20,7 @@ from datetime import datetime, timedelta import requests -from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.autotag.match import Distance +from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance from beets.plugins import BeetsPlugin log = logging.getLogger('beets') diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 006f85db0..83f67c0a9 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -21,7 +21,6 @@ from beets import util from beets import config from beets.util import confit from beets.autotag import hooks -from beets.autotag.match import Distance import acoustid import logging from collections import defaultdict @@ -114,7 +113,7 @@ def _all_releases(items): class AcoustidPlugin(plugins.BeetsPlugin): def track_distance(self, item, info): - dist = Distance() + dist = hooks.Distance() if item.path not in _matches or not info.track_id: # Match failed or no track ID. return dist diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 822ed59e3..e9e6477b1 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -15,11 +15,9 @@ """Adds Discogs album search support to the autotagger. Requires the discogs-client library. """ -from beets import config -from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.autotag.match import current_metadata, Distance, VA_ARTISTS +from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance from beets.plugins import BeetsPlugin -from discogs_client import Artist, DiscogsAPIError, Release, Search +from discogs_client import DiscogsAPIError, Release, Search import beets import discogs_client import logging diff --git a/test/test_autotag.py b/test/test_autotag.py index dc75ee0ab..13c167fcf 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -23,13 +23,13 @@ import _common from _common import unittest from beets import autotag from beets.autotag import match -from beets.autotag.match import Distance +from beets.autotag.hooks import Distance, string_dist from beets.library import Item from beets.util import plurality from beets.autotag import AlbumInfo, TrackInfo from beets import config -class PluralityTest(unittest.TestCase): +class PluralityTest(_common.TestCase): def test_plurality_consensus(self): objs = [1, 1, 1, 1] obj, freq = plurality(objs) @@ -106,8 +106,9 @@ def _make_trackinfo(): TrackInfo(u'three', None, u'some artist', length=1, index=3), ] -class DistanceTest(unittest.TestCase): +class DistanceTest(_common.TestCase): def setUp(self): + super(DistanceTest, self).setUp() self.dist = Distance() def test_add(self): @@ -176,62 +177,66 @@ class DistanceTest(unittest.TestCase): self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) def test_add_string(self): - dist = match.string_dist(u'abc', u'bcd') + dist = string_dist(u'abc', u'bcd') self.dist.add_string('string', u'abc', u'bcd') self.assertEqual(self.dist._penalties['string'], [dist]) def test_distance(self): config['match']['distance_weights']['album'] = 2.0 config['match']['distance_weights']['medium'] = 1.0 - self.dist.add('album', 0.5) - self.dist.add('media', 0.25) - self.dist.add('media', 0.75) - self.assertEqual(self.dist.distance, 0.5) + dist = Distance() + dist.add('album', 0.5) + dist.add('media', 0.25) + dist.add('media', 0.75) + self.assertEqual(dist.distance, 0.5) # __getitem__() - self.assertEqual(self.dist['album'], 0.25) - self.assertEqual(self.dist['media'], 0.25) + self.assertEqual(dist['album'], 0.25) + self.assertEqual(dist['media'], 0.25) def test_max_distance(self): config['match']['distance_weights']['album'] = 3.0 config['match']['distance_weights']['medium'] = 1.0 - self.dist.add('album', 0.5) - self.dist.add('medium', 0.0) - self.dist.add('medium', 0.0) - self.assertEqual(self.dist.max_distance, 5.0) + dist = Distance() + dist.add('album', 0.5) + dist.add('medium', 0.0) + dist.add('medium', 0.0) + self.assertEqual(dist.max_distance, 5.0) def test_operators(self): config['match']['distance_weights']['source'] = 1.0 config['match']['distance_weights']['album'] = 2.0 config['match']['distance_weights']['medium'] = 1.0 - self.dist.add('source', 0.0) - self.dist.add('album', 0.5) - self.dist.add('medium', 0.25) - self.dist.add('medium', 0.75) - self.assertEqual(len(self.dist), 2) - self.assertEqual(list(self.dist), [(0.2, 'album'), (0.2, 'medium')]) - self.assertTrue(self.dist == 0.4) - self.assertTrue(self.dist < 1.0) - self.assertTrue(self.dist > 0.0) - self.assertEqual(self.dist - 0.4, 0.0) - self.assertEqual(0.4 - self.dist, 0.0) - self.assertEqual(float(self.dist), 0.4) + dist = Distance() + dist.add('source', 0.0) + dist.add('album', 0.5) + dist.add('medium', 0.25) + dist.add('medium', 0.75) + self.assertEqual(len(dist), 2) + self.assertEqual(list(dist), [(0.2, 'album'), (0.2, 'medium')]) + self.assertTrue(dist == 0.4) + self.assertTrue(dist < 1.0) + self.assertTrue(dist > 0.0) + self.assertEqual(dist - 0.4, 0.0) + self.assertEqual(0.4 - dist, 0.0) + self.assertEqual(float(dist), 0.4) def test_raw_distance(self): config['match']['distance_weights']['album'] = 3.0 config['match']['distance_weights']['medium'] = 1.0 - self.dist.add('album', 0.5) - self.dist.add('medium', 0.25) - self.dist.add('medium', 0.5) - self.assertEqual(self.dist.raw_distance, 2.25) + dist = Distance() + dist.add('album', 0.5) + dist.add('medium', 0.25) + dist.add('medium', 0.5) + self.assertEqual(dist.raw_distance, 2.25) def test_sorted(self): config['match']['distance_weights']['album'] = 4.0 config['match']['distance_weights']['medium'] = 2.0 - - self.dist.add('album', 0.1875) - self.dist.add('medium', 0.75) - self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')]) + dist = Distance() + dist.add('album', 0.1875) + dist.add('medium', 0.75) + self.assertEqual(dist.sorted, [(0.25, 'medium'), (0.125, 'album')]) # Sort by key if distance is equal. dist = Distance() @@ -240,20 +245,21 @@ class DistanceTest(unittest.TestCase): self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')]) def test_update(self): - self.dist.add('album', 0.5) - self.dist.add('media', 1.0) + dist1 = Distance() + dist1.add('album', 0.5) + dist1.add('media', 1.0) - dist = Distance() - dist.add('album', 0.75) - dist.add('album', 0.25) - self.dist.add('media', 0.05) + dist2 = Distance() + dist2.add('album', 0.75) + dist2.add('album', 0.25) + dist2.add('media', 0.05) - self.dist.update(dist) + dist1.update(dist2) - self.assertEqual(self.dist._penalties, {'album': [0.5, 0.75, 0.25], - 'media': [1.0, 0.05]}) + self.assertEqual(dist1._penalties, {'album': [0.5, 0.75, 0.25], + 'media': [1.0, 0.05]}) -class TrackDistanceTest(unittest.TestCase): +class TrackDistanceTest(_common.TestCase): def test_identical_tracks(self): item = _make_item(u'one', 1) info = _make_trackinfo()[0] @@ -280,7 +286,7 @@ class TrackDistanceTest(unittest.TestCase): dist = match.track_distance(item, info, incl_artist=True) self.assertEqual(dist, 0.0) -class AlbumDistanceTest(unittest.TestCase): +class AlbumDistanceTest(_common.TestCase): def _mapping(self, items, info): out = {} for i, t in zip(items, info.tracks): @@ -863,77 +869,77 @@ class ApplyCompilationTest(_common.TestCase, ApplyTestUtil): class StringDistanceTest(unittest.TestCase): def test_equal_strings(self): - dist = match.string_dist(u'Some String', u'Some String') + dist = string_dist(u'Some String', u'Some String') self.assertEqual(dist, 0.0) def test_different_strings(self): - dist = match.string_dist(u'Some String', u'Totally Different') + dist = string_dist(u'Some String', u'Totally Different') self.assertNotEqual(dist, 0.0) def test_punctuation_ignored(self): - dist = match.string_dist(u'Some String', u'Some.String!') + dist = string_dist(u'Some String', u'Some.String!') self.assertEqual(dist, 0.0) def test_case_ignored(self): - dist = match.string_dist(u'Some String', u'sOME sTring') + dist = string_dist(u'Some String', u'sOME sTring') self.assertEqual(dist, 0.0) def test_leading_the_has_lower_weight(self): - dist1 = match.string_dist(u'XXX Band Name', u'Band Name') - dist2 = match.string_dist(u'The Band Name', u'Band Name') + dist1 = string_dist(u'XXX Band Name', u'Band Name') + dist2 = string_dist(u'The Band Name', u'Band Name') self.assert_(dist2 < dist1) def test_parens_have_lower_weight(self): - dist1 = match.string_dist(u'One .Two.', u'One') - dist2 = match.string_dist(u'One (Two)', u'One') + dist1 = string_dist(u'One .Two.', u'One') + dist2 = string_dist(u'One (Two)', u'One') self.assert_(dist2 < dist1) def test_brackets_have_lower_weight(self): - dist1 = match.string_dist(u'One .Two.', u'One') - dist2 = match.string_dist(u'One [Two]', u'One') + dist1 = string_dist(u'One .Two.', u'One') + dist2 = string_dist(u'One [Two]', u'One') self.assert_(dist2 < dist1) def test_ep_label_has_zero_weight(self): - dist = match.string_dist(u'My Song (EP)', u'My Song') + dist = string_dist(u'My Song (EP)', u'My Song') self.assertEqual(dist, 0.0) def test_featured_has_lower_weight(self): - dist1 = match.string_dist(u'My Song blah Someone', u'My Song') - dist2 = match.string_dist(u'My Song feat Someone', u'My Song') + dist1 = string_dist(u'My Song blah Someone', u'My Song') + dist2 = string_dist(u'My Song feat Someone', u'My Song') self.assert_(dist2 < dist1) def test_postfix_the(self): - dist = match.string_dist(u'The Song Title', u'Song Title, The') + dist = string_dist(u'The Song Title', u'Song Title, The') self.assertEqual(dist, 0.0) def test_postfix_a(self): - dist = match.string_dist(u'A Song Title', u'Song Title, A') + dist = string_dist(u'A Song Title', u'Song Title, A') self.assertEqual(dist, 0.0) def test_postfix_an(self): - dist = match.string_dist(u'An Album Title', u'Album Title, An') + dist = string_dist(u'An Album Title', u'Album Title, An') self.assertEqual(dist, 0.0) def test_empty_strings(self): - dist = match.string_dist(u'', u'') + dist = string_dist(u'', u'') self.assertEqual(dist, 0.0) def test_solo_pattern(self): # Just make sure these don't crash. - match.string_dist(u'The ', u'') - match.string_dist(u'(EP)', u'(EP)') - match.string_dist(u', An', u'') + string_dist(u'The ', u'') + string_dist(u'(EP)', u'(EP)') + string_dist(u', An', u'') def test_heuristic_does_not_harm_distance(self): - dist = match.string_dist(u'Untitled', u'[Untitled]') + dist = string_dist(u'Untitled', u'[Untitled]') self.assertEqual(dist, 0.0) def test_ampersand_expansion(self): - dist = match.string_dist(u'And', u'&') + dist = string_dist(u'And', u'&') self.assertEqual(dist, 0.0) def test_accented_characters(self): - dist = match.string_dist(u'\xe9\xe1\xf1', u'ean') + dist = string_dist(u'\xe9\xe1\xf1', u'ean') self.assertEqual(dist, 0.0) def suite(): From c85e43ee2a6ea0ae70a899f3778cbadf104f799f Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 15:24:25 -0700 Subject: [PATCH 2/9] finish removing setUp from DistanceTest I had to do this for the tests that use the config, so I thought I might as well finish the job. --- test/test_autotag.py | 89 +++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/test/test_autotag.py b/test/test_autotag.py index 13c167fcf..4a1eba17b 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -107,79 +107,82 @@ def _make_trackinfo(): ] class DistanceTest(_common.TestCase): - def setUp(self): - super(DistanceTest, self).setUp() - self.dist = Distance() - def test_add(self): - self.dist.add('add', 1.0) - self.assertEqual(self.dist._penalties, {'add': [1.0]}) + dist = Distance() + dist.add('add', 1.0) + self.assertEqual(dist._penalties, {'add': [1.0]}) def test_add_equality(self): - self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) - self.assertEqual(self.dist._penalties['equality'], [0.0]) + dist = Distance() + dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) + self.assertEqual(dist._penalties['equality'], [0.0]) - self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) - self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0]) + dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) + self.assertEqual(dist._penalties['equality'], [0.0, 1.0]) - self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) - self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0]) + dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) + self.assertEqual(dist._penalties['equality'], [0.0, 1.0, 0.0]) def test_add_expr(self): - self.dist.add_expr('expr', True) - self.assertEqual(self.dist._penalties['expr'], [1.0]) + dist = Distance() + dist.add_expr('expr', True) + self.assertEqual(dist._penalties['expr'], [1.0]) - self.dist.add_expr('expr', False) - self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0]) + dist.add_expr('expr', False) + self.assertEqual(dist._penalties['expr'], [1.0, 0.0]) def test_add_number(self): + dist = Distance() # Add a full penalty for each number of difference between two numbers. - self.dist.add_number('number', 1, 1) - self.assertEqual(self.dist._penalties['number'], [0.0]) + dist.add_number('number', 1, 1) + self.assertEqual(dist._penalties['number'], [0.0]) - self.dist.add_number('number', 1, 2) - self.assertEqual(self.dist._penalties['number'], [0.0, 1.0]) + dist.add_number('number', 1, 2) + self.assertEqual(dist._penalties['number'], [0.0, 1.0]) - self.dist.add_number('number', 2, 1) - self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0]) + dist.add_number('number', 2, 1) + self.assertEqual(dist._penalties['number'], [0.0, 1.0, 1.0]) - self.dist.add_number('number', -1, 2) - self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0, + dist.add_number('number', -1, 2) + self.assertEqual(dist._penalties['number'], [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]) def test_add_priority(self): - self.dist.add_priority('priority', 'abc', 'abc') - self.assertEqual(self.dist._penalties['priority'], [0.0]) + dist = Distance() + dist.add_priority('priority', 'abc', 'abc') + self.assertEqual(dist._penalties['priority'], [0.0]) - self.dist.add_priority('priority', 'def', ['abc', 'def']) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5]) + dist.add_priority('priority', 'def', ['abc', 'def']) + self.assertEqual(dist._penalties['priority'], [0.0, 0.5]) - self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef', + dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef', re.compile('GH', re.I)]) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75]) + self.assertEqual(dist._penalties['priority'], [0.0, 0.5, 0.75]) - self.dist.add_priority('priority', 'xyz', ['abc', 'def']) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75, + dist.add_priority('priority', 'xyz', ['abc', 'def']) + self.assertEqual(dist._penalties['priority'], [0.0, 0.5, 0.75, 1.0]) def test_add_ratio(self): - self.dist.add_ratio('ratio', 25, 100) - self.assertEqual(self.dist._penalties['ratio'], [0.25]) + dist = Distance() + dist.add_ratio('ratio', 25, 100) + self.assertEqual(dist._penalties['ratio'], [0.25]) - self.dist.add_ratio('ratio', 10, 5) - self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0]) + dist.add_ratio('ratio', 10, 5) + self.assertEqual(dist._penalties['ratio'], [0.25, 1.0]) - self.dist.add_ratio('ratio', -5, 5) - self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0]) + dist.add_ratio('ratio', -5, 5) + self.assertEqual(dist._penalties['ratio'], [0.25, 1.0, 0.0]) - self.dist.add_ratio('ratio', 5, 0) - self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) + dist.add_ratio('ratio', 5, 0) + self.assertEqual(dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) def test_add_string(self): - dist = string_dist(u'abc', u'bcd') - self.dist.add_string('string', u'abc', u'bcd') - self.assertEqual(self.dist._penalties['string'], [dist]) + dist = Distance() + sdist = string_dist(u'abc', u'bcd') + dist.add_string('string', u'abc', u'bcd') + self.assertEqual(dist._penalties['string'], [sdist]) def test_distance(self): config['match']['distance_weights']['album'] = 2.0 From c818663539e096f2a74e7d9a750fa91c74a80caa Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 15:31:25 -0700 Subject: [PATCH 3/9] replace Distance.sorted() with .items() This is an effort to make the distance object feel slightly more dict-like. The name changed and order of tuples is reversed: we now yield (key, value) instead of (value, key), which I think is a little more intuitive. --- beets/autotag/hooks.py | 18 +++++++++--------- beets/ui/commands.py | 6 +++--- test/test_autotag.py | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py index c29cf576f..6156abf2c 100644 --- a/beets/autotag/hooks.py +++ b/beets/autotag/hooks.py @@ -294,8 +294,8 @@ class Distance(object): return dist_raw @property - def sorted(self): - """Returns a list of (dist, key) pairs, with `dist` being the + def items(self): + """Returns a list of (key, dist) pairs, with `dist` being the weighted distance, sorted from highest to lowest. Does not include penalties with a zero value. """ @@ -303,11 +303,11 @@ class Distance(object): for key in self._penalties: dist = self[key] if dist: - list_.append((dist, key)) - # Convert distance into a negative float we can sort items in ascending - # order (for keys, when the penalty is equal) and still get the items - # with the biggest distance first. - return sorted(list_, key=lambda (dist, key): (0-dist, key)) + list_.append((key, dist)) + # Convert distance into a negative float we can sort items in + # ascending order (for keys, when the penalty is equal) and + # still get the items with the biggest distance first. + return sorted(list_, key=lambda (key, dist): (0-dist, key)) # Behave like a float. @@ -336,10 +336,10 @@ class Distance(object): return 0.0 def __iter__(self): - return iter(self.sorted) + return iter(self.items) def __len__(self): - return len(self.sorted) + return len(self.items) def update(self, dist): """Adds all the distance penalties from `dist`. diff --git a/beets/ui/commands.py b/beets/ui/commands.py index dfe3585c1..0512cb585 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -164,11 +164,11 @@ def dist_string(dist): return out def penalty_string(distance, limit=None): - """Returns a colorized string that indicates all the penalties applied to - a distance object. + """Returns a colorized string that indicates all the penalties + applied to a distance object. """ penalties = [] - for _, key in distance: + for key, _ in distance: key = key.replace('album_', '') key = key.replace('track_', '') key = key.replace('_', ' ') diff --git a/test/test_autotag.py b/test/test_autotag.py index 4a1eba17b..889584e0c 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -216,7 +216,7 @@ class DistanceTest(_common.TestCase): dist.add('medium', 0.25) dist.add('medium', 0.75) self.assertEqual(len(dist), 2) - self.assertEqual(list(dist), [(0.2, 'album'), (0.2, 'medium')]) + self.assertEqual(list(dist), [('album', 0.2), ('medium', 0.2)]) self.assertTrue(dist == 0.4) self.assertTrue(dist < 1.0) self.assertTrue(dist > 0.0) @@ -233,19 +233,19 @@ class DistanceTest(_common.TestCase): dist.add('medium', 0.5) self.assertEqual(dist.raw_distance, 2.25) - def test_sorted(self): + def test_items(self): config['match']['distance_weights']['album'] = 4.0 config['match']['distance_weights']['medium'] = 2.0 dist = Distance() dist.add('album', 0.1875) dist.add('medium', 0.75) - self.assertEqual(dist.sorted, [(0.25, 'medium'), (0.125, 'album')]) + self.assertEqual(dist.items, [('medium', 0.25), ('album', 0.125)]) # Sort by key if distance is equal. dist = Distance() dist.add('album', 0.375) dist.add('medium', 0.75) - self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')]) + self.assertEqual(dist.items, [('album', 0.25), ('medium', 0.25)]) def test_update(self): dist1 = Distance() From 33ff001d0a6f3bc6802546c0fed5c6b6a03f8ae6 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 15:35:05 -0700 Subject: [PATCH 4/9] move track length params out of weights section These aren't really weights, so I'm moving them out of "distance_weights". --- beets/autotag/match.py | 11 ++++------- beets/config_default.yaml | 6 ++---- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 455d22be7..630a0de55 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -28,9 +28,6 @@ from beets.util import plurality from beets.util.enumeration import enum from beets.autotag import hooks -# A configuration view for the distance weights. -weights = config['match']['distance_weights'] - # Recommendation enumeration. recommendation = enum('none', 'low', 'medium', 'strong', name='recommendation') @@ -112,9 +109,9 @@ def track_distance(item, track_info, incl_artist=False): # Length. if track_info.length: diff = abs(item.length - track_info.length) - \ - weights['track_length_grace'].as_number() + config['match']['track_length_grace'].as_number() dist.add_ratio('track_length', diff, - weights['track_length_max'].as_number()) + config['match']['track_length_max'].as_number()) # Title. dist.add_string('track_title', item.title, track_info.title) @@ -294,10 +291,10 @@ def _recommendation(results): # Downgrade to the max rec if it is lower than the current rec for an # applied penalty. - keys = set(key for _, key in min_dist) + keys = set(key for key, _ in min_dist) if isinstance(results[0], hooks.AlbumMatch): for track_dist in min_dist.tracks.values(): - keys.update(key for _, key in track_dist) + keys.update(key for key, _ in track_dist) for key in keys: max_rec = config['match']['max_rec'][key].as_choice({ 'strong': recommendation.strong, diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 44cb51051..05ceb6272 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -85,8 +85,6 @@ match: track_title: strong track_artist: strong track_index: strong - track_length_grace: strong - track_length_max: strong track_length: strong track_id: strong distance_weights: @@ -107,8 +105,6 @@ match: track_title: 3.0 track_artist: 2.0 track_index: 1.0 - track_length_grace: 10 - track_length_max: 30 track_length: 2.0 track_id: 5.0 preferred: @@ -116,3 +112,5 @@ match: media: [] original_year: no ignored: [] + track_length_grace: 10 + track_length_max: 30 From 7983c94ef825bd30969b6cf4cda7a330bfa202e1 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 15:40:51 -0700 Subject: [PATCH 5/9] add keys() method to Distance --- beets/autotag/hooks.py | 16 +++++++++------- beets/autotag/match.py | 4 ++-- beets/ui/commands.py | 2 +- test/test_autotag.py | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py index 6156abf2c..5938c2308 100644 --- a/beets/autotag/hooks.py +++ b/beets/autotag/hooks.py @@ -267,7 +267,7 @@ class Distance(object): @property def distance(self): - """Returns a weighted and normalised distance across all + """Return a weighted and normalized distance across all penalties. """ dist_max = self.max_distance @@ -277,7 +277,7 @@ class Distance(object): @property def max_distance(self): - """Returns the maximum distance penalty. + """Return the maximum distance penalty (normalization factor). """ dist_max = 0.0 for key, penalty in self._penalties.iteritems(): @@ -286,16 +286,15 @@ class Distance(object): @property def raw_distance(self): - """Returns the raw (denormalized) distance. + """Return the raw (denormalized) distance. """ dist_raw = 0.0 for key, penalty in self._penalties.iteritems(): dist_raw += sum(penalty) * self._weights[key] return dist_raw - @property def items(self): - """Returns a list of (key, dist) pairs, with `dist` being the + """Return a list of (key, dist) pairs, with `dist` being the weighted distance, sorted from highest to lowest. Does not include penalties with a zero value. """ @@ -336,10 +335,13 @@ class Distance(object): return 0.0 def __iter__(self): - return iter(self.items) + return iter(self.items()) def __len__(self): - return len(self.items) + return len(self.items()) + + def keys(self): + return [key for key, _ in self.items()] def update(self, dist): """Adds all the distance penalties from `dist`. diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 630a0de55..1f1ba2d43 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -291,10 +291,10 @@ def _recommendation(results): # Downgrade to the max rec if it is lower than the current rec for an # applied penalty. - keys = set(key for key, _ in min_dist) + keys = set(min_dist.keys()) if isinstance(results[0], hooks.AlbumMatch): for track_dist in min_dist.tracks.values(): - keys.update(key for key, _ in track_dist) + keys.update(track_dist.keys()) for key in keys: max_rec = config['match']['max_rec'][key].as_choice({ 'strong': recommendation.strong, diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 0512cb585..f9e3bc6eb 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -168,7 +168,7 @@ def penalty_string(distance, limit=None): applied to a distance object. """ penalties = [] - for key, _ in distance: + for key in distance.keys(): key = key.replace('album_', '') key = key.replace('track_', '') key = key.replace('_', ' ') diff --git a/test/test_autotag.py b/test/test_autotag.py index 889584e0c..4684f9719 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -239,7 +239,7 @@ class DistanceTest(_common.TestCase): dist = Distance() dist.add('album', 0.1875) dist.add('medium', 0.75) - self.assertEqual(dist.items, [('medium', 0.25), ('album', 0.125)]) + self.assertEqual(dist.items(), [('medium', 0.25), ('album', 0.125)]) # Sort by key if distance is equal. dist = Distance() From 5c4b17685e9dfd7ee214e844c79bbbe846486a85 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 15:45:22 -0700 Subject: [PATCH 6/9] parameter name change --- beets/ui/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/ui/__init__.py b/beets/ui/__init__.py index 460320a34..f7288141f 100644 --- a/beets/ui/__init__.py +++ b/beets/ui/__init__.py @@ -366,7 +366,7 @@ def colorize(color, text): else: return text -def _colordiff(a, b, highlight='red', second_highlight='lightgray'): +def _colordiff(a, b, highlight='red', minor_highlight='lightgray'): """Given two values, return the same pair of strings except with their differences highlighted in the specified color. Strings are highlighted intelligently to show differences; other values are @@ -407,7 +407,7 @@ def _colordiff(a, b, highlight='red', second_highlight='lightgray'): if a[a_start:a_end].lower() != b[b_start:b_end].lower(): color = highlight else: - color = second_highlight + color = minor_highlight a_out.append(colorize(color, a[a_start:a_end])) b_out.append(colorize(color, b[b_start:b_end])) else: From 82991ce6145537bcf1eeddd9dc7d7f6de42ab3a9 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 15:53:06 -0700 Subject: [PATCH 7/9] documentation enhancements for new config opts --- docs/changelog.rst | 2 +- docs/reference/config.rst | 69 +++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 784d2eb88..5f7ce39cc 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -103,7 +103,7 @@ same MusicBrainz release group: beets re-identify the same release when re-importing existing files. * Prefer releases that are closest to the tagged ``year``. Tolerate files tagged with release or original year. -* The new :ref:`preferred_media` config option lets you prefer a certain media +* The new ``preferred_media`` config option lets you prefer a certain media type when the ``media`` field is unset on an album. * Apply minor penalties across a range of fields to differentiate between nearly identical releases: ``disctotal``, ``label``, ``catalognum``, diff --git a/docs/reference/config.rst b/docs/reference/config.rst index d320cd655..617700c53 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -395,40 +395,45 @@ max_rec As mentioned above, autotagger matches have *recommendations* that control how the UI behaves for a certain quality of match. The recommendation for a certain match is based on the overall distance calculation. But you can also control -the recommendation when a distance penalty is being applied for a specific -field by defining *maximum* recommendations for each field: +the recommendation when a specific distance penalty is applied by defining +*maximum* recommendations for each field: -To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here -are the defaults:: +To define maxima, use keys under ``max_rec:`` in the ``match`` section. The +defaults are "medium" for missing and unmatched tracks and "strong" (i.e., no +maximum) for everything else:: match: max_rec: - source: strong - artist: strong - album: strong - media: strong - mediums: strong - year: strong - country: strong - label: strong - catalognum: strong - albumdisambig: strong - album_id: strong - tracks: strong missing_tracks: medium unmatched_tracks: medium - track_title: strong - track_artist: strong - track_index: strong - track_length_grace: strong - track_length_max: strong - track_length: strong - track_id: strong -If a recommendation is higher than the configured maximum and a penalty is -being applied, the recommendation will be downgraded. The maximum for each -field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the -maximum recommendation is ``strong``, no "downgrading" occurs. +If a recommendation is higher than the configured maximum and the indicated +penalty is applied, the recommendation is downgraded. The setting for +each field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the +maximum recommendation is ``strong``, no "downgrading" occurs. The available +penalty names here are: + +* source +* artist +* album +* media +* mediums +* year +* country +* label +* catalognum +* albumdisambig +* album_id +* tracks +* missing_tracks +* unmatched_tracks +* track_title +* track_artist +* track_index +* track_length_grace +* track_length_max +* track_length +* track_id .. _preferred: @@ -440,10 +445,10 @@ similarity, you can also specify an ordered list of preferred countries and media types. A distance penalty will be applied if the country or media type from the match -metadata doesn't match. The order is important, the first item will be most -preferred. Each item may be a regular expression, and will be matched case -insensitively. The number of media will be stripped when matching preferred -media (e.g. "2x" in "2xCD"). +metadata doesn't match. The specified values are preferred in descending order +(i.e., the first item will be most preferred). Each item may be a regular +expression, and will be matched case insensitively. The number of media will +be stripped when matching preferred media (e.g. "2x" in "2xCD"). You can also tell the autotagger to prefer matches that have a release year closest to the original year for an album. @@ -469,6 +474,8 @@ the penalty name to the ``ignored`` setting:: match: ignored: missing_tracks unmatched_tracks +The available penalties are the same as those for the :ref:`max_rec` setting. + .. _path-format-config: Path Format Configuration From b1ea90a724fd2117d60775493a80385902b2166c Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 15:59:41 -0700 Subject: [PATCH 8/9] don't duplicate penalty names in max_rec section This is for maintanability: in the future, when we add new distance penalty components, now we won't have to list them twice in the default config. --- beets/autotag/match.py | 16 +++++++++------- beets/config_default.yaml | 17 ----------------- docs/reference/config.rst | 2 -- test/test_autotag.py | 2 +- 4 files changed, 10 insertions(+), 27 deletions(-) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 1f1ba2d43..a4bc47fa8 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -295,14 +295,16 @@ def _recommendation(results): if isinstance(results[0], hooks.AlbumMatch): for track_dist in min_dist.tracks.values(): keys.update(track_dist.keys()) + max_rec_view = config['match']['max_rec'] for key in keys: - max_rec = config['match']['max_rec'][key].as_choice({ - 'strong': recommendation.strong, - 'medium': recommendation.medium, - 'low': recommendation.low, - 'none': recommendation.none, - }) - rec = min(rec, max_rec) + if key in max_rec_view.keys(): + max_rec = max_rec_view[key].as_choice({ + 'strong': recommendation.strong, + 'medium': recommendation.medium, + 'low': recommendation.low, + 'none': recommendation.none, + }) + rec = min(rec, max_rec) return rec diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 05ceb6272..113a10ed0 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -68,25 +68,8 @@ match: medium_rec_thresh: 0.25 rec_gap_thresh: 0.25 max_rec: - source: strong - artist: strong - album: strong - media: strong - mediums: strong - year: strong - country: strong - label: strong - catalognum: strong - albumdisambig: strong - album_id: strong - tracks: strong missing_tracks: medium unmatched_tracks: medium - track_title: strong - track_artist: strong - track_index: strong - track_length: strong - track_id: strong distance_weights: source: 2.0 artist: 3.0 diff --git a/docs/reference/config.rst b/docs/reference/config.rst index 617700c53..36844b668 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -430,8 +430,6 @@ penalty names here are: * track_title * track_artist * track_index -* track_length_grace -* track_length_max * track_length * track_id diff --git a/test/test_autotag.py b/test/test_autotag.py index 4684f9719..f0b637eb6 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -245,7 +245,7 @@ class DistanceTest(_common.TestCase): dist = Distance() dist.add('album', 0.375) dist.add('medium', 0.75) - self.assertEqual(dist.items, [('album', 0.25), ('medium', 0.25)]) + self.assertEqual(dist.items(), [('album', 0.25), ('medium', 0.25)]) def test_update(self): dist1 = Distance() From 2f053b0ecd681f7daf654081776b3b7703e59205 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Mon, 10 Jun 2013 16:06:43 -0700 Subject: [PATCH 9/9] scrub: handle IOError Mutagen can raise this error when trying to truncate a file. --- beetsplug/scrub.py | 5 +++++ docs/changelog.rst | 1 + 2 files changed, 6 insertions(+) diff --git a/beetsplug/scrub.py b/beetsplug/scrub.py index f69e3abf1..1a95c222e 100644 --- a/beetsplug/scrub.py +++ b/beetsplug/scrub.py @@ -108,6 +108,11 @@ def _scrub(path): # remove them. In this case, we just remove all the tags. for tag in f.keys(): del f[tag] + except IOError as exc: + log.error(u'could not scrub {0}: {1}'.format( + util.displayable_path(path), + exc, + )) f.save() # Automatically embed art into imported albums. diff --git a/docs/changelog.rst b/docs/changelog.rst index 5f7ce39cc..3baf66585 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -36,6 +36,7 @@ And some bug fixes: Intzoglou. * Fix an occasional crash in the :doc:`/plugins/beatport` when a length field was missing from the API response. Thanks to Timothy Appnel. +* :doc:`/plugins/scrub`: Handle and log I/O errors. 1.2.0 (June 5, 2013)