diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py index 96de6b674..5938c2308 100644 --- a/beets/autotag/hooks.py +++ b/beets/autotag/hooks.py @@ -15,9 +15,13 @@ """Glue between metadata sources and the matching logic.""" import logging from collections import namedtuple +import re from beets import plugins +from beets import config from beets.autotag import mb +from beets.util import levenshtein +from unidecode import unidecode log = logging.getLogger('beets') @@ -158,6 +162,296 @@ class TrackInfo(object): if isinstance(value, str): setattr(self, fld, value.decode(codec, 'ignore')) + +# Candidate distance scoring. + +# Parameters for string distance function. +# Words that can be moved to the end of a string using a comma. +SD_END_WORDS = ['the', 'a', 'an'] +# Reduced weights for certain portions of the string. +SD_PATTERNS = [ + (r'^the ', 0.1), + (r'[\[\(]?(ep|single)[\]\)]?', 0.0), + (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1), + (r'\(.*?\)', 0.3), + (r'\[.*?\]', 0.3), + (r'(, )?(pt\.|part) .+', 0.2), +] +# Replacements to use before testing distance. +SD_REPLACE = [ + (r'&', 'and'), +] + +def _string_dist_basic(str1, str2): + """Basic edit distance between two strings, ignoring + non-alphanumeric characters and case. Comparisons are based on a + transliteration/lowering to ASCII characters. Normalized by string + length. + """ + str1 = unidecode(str1) + str2 = unidecode(str2) + str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) + str2 = re.sub(r'[^a-z0-9]', '', str2.lower()) + if not str1 and not str2: + return 0.0 + return levenshtein(str1, str2) / float(max(len(str1), len(str2))) + +def string_dist(str1, str2): + """Gives an "intuitive" edit distance between two strings. This is + an edit distance, normalized by the string length, with a number of + tweaks that reflect intuition about text. + """ + str1 = str1.lower() + str2 = str2.lower() + + # Don't penalize strings that move certain words to the end. For + # example, "the something" should be considered equal to + # "something, the". + for word in SD_END_WORDS: + if str1.endswith(', %s' % word): + str1 = '%s %s' % (word, str1[:-len(word)-2]) + if str2.endswith(', %s' % word): + str2 = '%s %s' % (word, str2[:-len(word)-2]) + + # Perform a couple of basic normalizing substitutions. + for pat, repl in SD_REPLACE: + str1 = re.sub(pat, repl, str1) + str2 = re.sub(pat, repl, str2) + + # Change the weight for certain string portions matched by a set + # of regular expressions. We gradually change the strings and build + # up penalties associated with parts of the string that were + # deleted. + base_dist = _string_dist_basic(str1, str2) + penalty = 0.0 + for pat, weight in SD_PATTERNS: + # Get strings that drop the pattern. + case_str1 = re.sub(pat, '', str1) + case_str2 = re.sub(pat, '', str2) + + if case_str1 != str1 or case_str2 != str2: + # If the pattern was present (i.e., it is deleted in the + # the current case), recalculate the distances for the + # modified strings. + case_dist = _string_dist_basic(case_str1, case_str2) + case_delta = max(0.0, base_dist - case_dist) + if case_delta == 0.0: + continue + + # Shift our baseline strings down (to avoid rematching the + # same part of the string) and add a scaled distance + # amount to the penalties. + str1 = case_str1 + str2 = case_str2 + base_dist = case_dist + penalty += weight * case_delta + dist = base_dist + penalty + + return dist + +class Distance(object): + """Keeps track of multiple distance penalties. Provides a single + weighted distance for all penalties as well as a weighted distance + for each individual penalty. + """ + def __init__(self): + self._penalties = {} + + weights_view = config['match']['distance_weights'] + self._weights = {} + for key in weights_view.keys(): + self._weights[key] = weights_view[key].as_number() + + + # Access the components and their aggregates. + + @property + def distance(self): + """Return a weighted and normalized distance across all + penalties. + """ + dist_max = self.max_distance + if dist_max: + return self.raw_distance / self.max_distance + return 0.0 + + @property + def max_distance(self): + """Return the maximum distance penalty (normalization factor). + """ + dist_max = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_max += len(penalty) * self._weights[key] + return dist_max + + @property + def raw_distance(self): + """Return the raw (denormalized) distance. + """ + dist_raw = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_raw += sum(penalty) * self._weights[key] + return dist_raw + + def items(self): + """Return a list of (key, dist) pairs, with `dist` being the + weighted distance, sorted from highest to lowest. Does not + include penalties with a zero value. + """ + list_ = [] + for key in self._penalties: + dist = self[key] + if dist: + list_.append((key, dist)) + # Convert distance into a negative float we can sort items in + # ascending order (for keys, when the penalty is equal) and + # still get the items with the biggest distance first. + return sorted(list_, key=lambda (key, dist): (0-dist, key)) + + + # Behave like a float. + + def __cmp__(self, other): + return cmp(self.distance, other) + + def __float__(self): + return self.distance + def __sub__(self, other): + return self.distance - other + + def __rsub__(self, other): + return other - self.distance + + + # Behave like a dict. + + def __getitem__(self, key): + """Returns the weighted distance for a named penalty. + """ + dist = sum(self._penalties[key]) * self._weights[key] + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + def __iter__(self): + return iter(self.items()) + + def __len__(self): + return len(self.items()) + + def keys(self): + return [key for key, _ in self.items()] + + def update(self, dist): + """Adds all the distance penalties from `dist`. + """ + if not isinstance(dist, Distance): + raise ValueError( + '`dist` must be a Distance object. It is: %r' % dist) + for key, penalties in dist._penalties.iteritems(): + self._penalties.setdefault(key, []).extend(penalties) + + + # Adding components. + + def _eq(self, value1, value2): + """Returns True if `value1` is equal to `value2`. `value1` may + be a compiled regular expression, in which case it will be + matched against `value2`. + """ + if isinstance(value1, re._pattern_type): + return bool(value1.match(value2)) + return value1 == value2 + + def add(self, key, dist): + """Adds a distance penalty. `key` must correspond with a + configured weight setting. `dist` must be a float between 0.0 + and 1.0, and will be added to any existing distance penalties + for the same key. + """ + if not 0.0 <= dist <= 1.0: + raise ValueError( + '`dist` must be between 0.0 and 1.0. It is: %r' % dist) + self._penalties.setdefault(key, []).append(dist) + + def add_equality(self, key, value, options): + """Adds a distance penalty of 1.0 if `value` doesn't match any + of the values in `options`. If an option is a compiled regular + expression, it will be considered equal if it matches against + `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + for opt in options: + if self._eq(opt, value): + dist = 0.0 + break + else: + dist = 1.0 + self.add(key, dist) + + def add_expr(self, key, expr): + """Adds a distance penalty of 1.0 if `expr` evaluates to True, + or 0.0. + """ + if expr: + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_number(self, key, number1, number2): + """Adds a distance penalty of 1.0 for each number of difference + between `number1` and `number2`, or 0.0 when there is no + difference. Use this when there is no upper limit on the + difference between the two numbers. + """ + diff = abs(number1 - number2) + if diff: + for i in range(diff): + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_priority(self, key, value, options): + """Adds a distance penalty that corresponds to the position at + which `value` appears in `options`. A distance penalty of 0.0 + for the first option, or 1.0 if there is no matching option. If + an option is a compiled regular expression, it will be + considered equal if it matches against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + unit = 1.0 / (len(options) or 1) + for i, opt in enumerate(options): + if self._eq(opt, value): + dist = i * unit + break + else: + dist = 1.0 + self.add(key, dist) + + def add_ratio(self, key, number1, number2): + """Adds a distance penalty for `number1` as a ratio of `number2`. + `number1` is bound at 0 and `number2`. + """ + number = float(max(min(number1, number2), 0)) + if number2: + dist = number / number2 + else: + dist = 0.0 + self.add(key, dist) + + def add_string(self, key, str1, str2): + """Adds a distance penalty based on the edit distance between + `str1` and `str2`. + """ + dist = string_dist(str1, str2) + self.add(key, dist) + + +# Structures that compose all the information for a candidate match. + AlbumMatch = namedtuple('AlbumMatch', ['distance', 'info', 'mapping', 'extra_items', 'extra_tracks']) diff --git a/beets/autotag/match.py b/beets/autotag/match.py index cc32d6e8f..a4bc47fa8 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -21,34 +21,13 @@ import datetime import logging import re from munkres import Munkres -from unidecode import unidecode from beets import plugins from beets import config -from beets.util import levenshtein, plurality +from beets.util import plurality from beets.util.enumeration import enum from beets.autotag import hooks -# A configuration view for the distance weights. -weights = config['match']['distance_weights'] - -# Parameters for string distance function. -# Words that can be moved to the end of a string using a comma. -SD_END_WORDS = ['the', 'a', 'an'] -# Reduced weights for certain portions of the string. -SD_PATTERNS = [ - (r'^the ', 0.1), - (r'[\[\(]?(ep|single)[\]\)]?', 0.0), - (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1), - (r'\(.*?\)', 0.3), - (r'\[.*?\]', 0.3), - (r'(, )?(pt\.|part) .+', 0.2), -] -# Replacements to use before testing distance. -SD_REPLACE = [ - (r'&', 'and'), -] - # Recommendation enumeration. recommendation = enum('none', 'low', 'medium', 'strong', name='recommendation') @@ -64,73 +43,6 @@ log = logging.getLogger('beets') # Primary matching functionality. -def _string_dist_basic(str1, str2): - """Basic edit distance between two strings, ignoring - non-alphanumeric characters and case. Comparisons are based on a - transliteration/lowering to ASCII characters. Normalized by string - length. - """ - str1 = unidecode(str1) - str2 = unidecode(str2) - str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) - str2 = re.sub(r'[^a-z0-9]', '', str2.lower()) - if not str1 and not str2: - return 0.0 - return levenshtein(str1, str2) / float(max(len(str1), len(str2))) - -def string_dist(str1, str2): - """Gives an "intuitive" edit distance between two strings. This is - an edit distance, normalized by the string length, with a number of - tweaks that reflect intuition about text. - """ - str1 = str1.lower() - str2 = str2.lower() - - # Don't penalize strings that move certain words to the end. For - # example, "the something" should be considered equal to - # "something, the". - for word in SD_END_WORDS: - if str1.endswith(', %s' % word): - str1 = '%s %s' % (word, str1[:-len(word)-2]) - if str2.endswith(', %s' % word): - str2 = '%s %s' % (word, str2[:-len(word)-2]) - - # Perform a couple of basic normalizing substitutions. - for pat, repl in SD_REPLACE: - str1 = re.sub(pat, repl, str1) - str2 = re.sub(pat, repl, str2) - - # Change the weight for certain string portions matched by a set - # of regular expressions. We gradually change the strings and build - # up penalties associated with parts of the string that were - # deleted. - base_dist = _string_dist_basic(str1, str2) - penalty = 0.0 - for pat, weight in SD_PATTERNS: - # Get strings that drop the pattern. - case_str1 = re.sub(pat, '', str1) - case_str2 = re.sub(pat, '', str2) - - if case_str1 != str1 or case_str2 != str2: - # If the pattern was present (i.e., it is deleted in the - # the current case), recalculate the distances for the - # modified strings. - case_dist = _string_dist_basic(case_str1, case_str2) - case_delta = max(0.0, base_dist - case_dist) - if case_delta == 0.0: - continue - - # Shift our baseline strings down (to avoid rematching the - # same part of the string) and add a scaled distance - # amount to the penalties. - str1 = case_str1 - str2 = case_str2 - base_dist = case_dist - penalty += weight * case_delta - dist = base_dist + penalty - - return dist - def current_metadata(items): """Extract the likely current metadata for an album given a list of its items. Return two dictionaries: @@ -187,196 +99,19 @@ def track_index_changed(item, track_info): """ return item.track not in (track_info.medium_index, track_info.index) -class Distance(object): - """Keeps track of multiple distance penalties. Provides a single weighted - distance for all penalties as well as a weighted distance for each - individual penalty. - """ - def __cmp__(self, other): - return cmp(self.distance, other) - - def __float__(self): - return self.distance - - def __getitem__(self, key): - """Returns the weighted distance for a named penalty. - """ - dist = sum(self._penalties[key]) * weights[key].as_number() - dist_max = self.max_distance - if dist_max: - return dist / dist_max - return 0.0 - - def __init__(self): - self._penalties = {} - - def __iter__(self): - return iter(self.sorted) - - def __len__(self): - return len(self.sorted) - - def __sub__(self, other): - return self.distance - other - - def __rsub__(self, other): - return other - self.distance - - def _eq(self, value1, value2): - """Returns True if `value1` is equal to `value2`. `value1` may be a - compiled regular expression, in which case it will be matched against - `value2`. - """ - if isinstance(value1, re._pattern_type): - return bool(value1.match(value2)) - return value1 == value2 - - def add(self, key, dist): - """Adds a distance penalty. `key` must correspond with a configured - weight setting. `dist` must be a float between 0.0 and 1.0, and will be - added to any existing distance penalties for the same key. - """ - if not 0.0 <= dist <= 1.0: - raise ValueError( - '`dist` must be between 0.0 and 1.0. It is: %r' % dist) - self._penalties.setdefault(key, []).append(dist) - - def add_equality(self, key, value, options): - """Adds a distance penalty of 1.0 if `value` doesn't match any of the - values in `options`. If an option is a compiled regular expression, it - will be considered equal if it matches against `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - for opt in options: - if self._eq(opt, value): - dist = 0.0 - break - else: - dist = 1.0 - self.add(key, dist) - - def add_expr(self, key, expr): - """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0. - """ - if expr: - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_number(self, key, number1, number2): - """Adds a distance penalty of 1.0 for each number of difference between - `number1` and `number2`, or 0.0 when there is no difference. Use this - when there is no upper limit on the difference between the two numbers. - """ - diff = abs(number1 - number2) - if diff: - for i in range(diff): - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_priority(self, key, value, options): - """Adds a distance penalty that corresponds to the position at which - `value` appears in `options`. A distance penalty of 0.0 for the first - option, or 1.0 if there is no matching option. If an option is a - compiled regular expression, it will be considered equal if it matches - against `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - unit = 1.0 / (len(options) or 1) - for i, opt in enumerate(options): - if self._eq(opt, value): - dist = i * unit - break - else: - dist = 1.0 - self.add(key, dist) - - def add_ratio(self, key, number1, number2): - """Adds a distance penalty for `number1` as a ratio of `number2`. - `number1` is bound at 0 and `number2`. - """ - number = float(max(min(number1, number2), 0)) - if number2: - dist = number / number2 - else: - dist = 0.0 - self.add(key, dist) - - def add_string(self, key, str1, str2): - """Adds a distance penalty based on the edit distance between `str1` - and `str2`. - """ - dist = string_dist(str1, str2) - self.add(key, dist) - - @property - def distance(self): - """Returns a weighted and normalised distance across all penalties. - """ - dist_max = self.max_distance - if dist_max: - return self.raw_distance / self.max_distance - return 0.0 - - @property - def max_distance(self): - """Returns the maximum distance penalty. - """ - dist_max = 0.0 - for key, penalty in self._penalties.iteritems(): - dist_max += len(penalty) * weights[key].as_number() - return dist_max - - @property - def raw_distance(self): - """Returns the raw (denormalised) distance. - """ - dist_raw = 0.0 - for key, penalty in self._penalties.iteritems(): - dist_raw += sum(penalty) * weights[key].as_number() - return dist_raw - - @property - def sorted(self): - """Returns a list of (dist, key) pairs, with `dist` being the weighted - distance, sorted from highest to lowest. Does not include penalties - with a zero value. - """ - list_ = [] - for key in self._penalties: - dist = self[key] - if dist: - list_.append((dist, key)) - # Convert distance into a negative float we can sort items in ascending - # order (for keys, when the penalty is equal) and still get the items - # with the biggest distance first. - return sorted(list_, key=lambda (dist, key): (0-dist, key)) - - def update(self, dist): - """Adds all the distance penalties from `dist`. - """ - if not isinstance(dist, Distance): - raise ValueError( - '`dist` must be a Distance object. It is: %r' % dist) - for key, penalties in dist._penalties.iteritems(): - self._penalties.setdefault(key, []).extend(penalties) - def track_distance(item, track_info, incl_artist=False): """Determines the significance of a track metadata change. Returns a Distance object. `incl_artist` indicates that a distance component should be included for the track artist (i.e., for various-artist releases). """ - dist = Distance() + dist = hooks.Distance() # Length. if track_info.length: diff = abs(item.length - track_info.length) - \ - weights['track_length_grace'].as_number() + config['match']['track_length_grace'].as_number() dist.add_ratio('track_length', diff, - weights['track_length_max'].as_number()) + config['match']['track_length_max'].as_number()) # Title. dist.add_string('track_title', item.title, track_info.title) @@ -410,7 +145,7 @@ def distance(items, album_info, mapping): """ likelies, _ = current_metadata(items) - dist = Distance() + dist = hooks.Distance() # Artist, if not various. if not album_info.va: @@ -556,18 +291,20 @@ def _recommendation(results): # Downgrade to the max rec if it is lower than the current rec for an # applied penalty. - keys = set(key for _, key in min_dist) + keys = set(min_dist.keys()) if isinstance(results[0], hooks.AlbumMatch): for track_dist in min_dist.tracks.values(): - keys.update(key for _, key in track_dist) + keys.update(track_dist.keys()) + max_rec_view = config['match']['max_rec'] for key in keys: - max_rec = config['match']['max_rec'][key].as_choice({ - 'strong': recommendation.strong, - 'medium': recommendation.medium, - 'low': recommendation.low, - 'none': recommendation.none, - }) - rec = min(rec, max_rec) + if key in max_rec_view.keys(): + max_rec = max_rec_view[key].as_choice({ + 'strong': recommendation.strong, + 'medium': recommendation.medium, + 'low': recommendation.low, + 'none': recommendation.none, + }) + rec = min(rec, max_rec) return rec diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 44cb51051..113a10ed0 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -68,27 +68,8 @@ match: medium_rec_thresh: 0.25 rec_gap_thresh: 0.25 max_rec: - source: strong - artist: strong - album: strong - media: strong - mediums: strong - year: strong - country: strong - label: strong - catalognum: strong - albumdisambig: strong - album_id: strong - tracks: strong missing_tracks: medium unmatched_tracks: medium - track_title: strong - track_artist: strong - track_index: strong - track_length_grace: strong - track_length_max: strong - track_length: strong - track_id: strong distance_weights: source: 2.0 artist: 3.0 @@ -107,8 +88,6 @@ match: track_title: 3.0 track_artist: 2.0 track_index: 1.0 - track_length_grace: 10 - track_length_max: 30 track_length: 2.0 track_id: 5.0 preferred: @@ -116,3 +95,5 @@ match: media: [] original_year: no ignored: [] + track_length_grace: 10 + track_length_max: 30 diff --git a/beets/plugins.py b/beets/plugins.py index d0c0a9654..1df98fac6 100755 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -67,13 +67,13 @@ class BeetsPlugin(object): """Should return a Distance object to be added to the distance for every track comparison. """ - return beets.autotag.match.Distance() + return beets.autotag.hooks.Distance() def album_distance(self, items, album_info, mapping): """Should return a Distance object to be added to the distance for every album-level comparison. """ - return beets.autotag.match.Distance() + return beets.autotag.hooks.Distance() def candidates(self, items, artist, album, va_likely): """Should return a sequence of AlbumInfo objects that match the @@ -244,14 +244,16 @@ def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. Returns a Distance object. """ - dist = beets.autotag.match.Distance() + from beets.autotag.hooks import Distance + dist = Distance() for plugin in find_plugins(): dist.update(plugin.track_distance(item, info)) return dist def album_distance(items, album_info, mapping): """Returns the album distance calculated by plugins.""" - dist = beets.autotag.match.Distance() + from beets.autotag.hooks import Distance + dist = Distance() for plugin in find_plugins(): dist.update(plugin.album_distance(items, album_info, mapping)) return dist diff --git a/beets/ui/__init__.py b/beets/ui/__init__.py index 460320a34..f7288141f 100644 --- a/beets/ui/__init__.py +++ b/beets/ui/__init__.py @@ -366,7 +366,7 @@ def colorize(color, text): else: return text -def _colordiff(a, b, highlight='red', second_highlight='lightgray'): +def _colordiff(a, b, highlight='red', minor_highlight='lightgray'): """Given two values, return the same pair of strings except with their differences highlighted in the specified color. Strings are highlighted intelligently to show differences; other values are @@ -407,7 +407,7 @@ def _colordiff(a, b, highlight='red', second_highlight='lightgray'): if a[a_start:a_end].lower() != b[b_start:b_end].lower(): color = highlight else: - color = second_highlight + color = minor_highlight a_out.append(colorize(color, a[a_start:a_end])) b_out.append(colorize(color, b[b_start:b_end])) else: diff --git a/beets/ui/commands.py b/beets/ui/commands.py index dfe3585c1..f9e3bc6eb 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -164,11 +164,11 @@ def dist_string(dist): return out def penalty_string(distance, limit=None): - """Returns a colorized string that indicates all the penalties applied to - a distance object. + """Returns a colorized string that indicates all the penalties + applied to a distance object. """ penalties = [] - for _, key in distance: + for key in distance.keys(): key = key.replace('album_', '') key = key.replace('track_', '') key = key.replace('_', ' ') diff --git a/beetsplug/beatport.py b/beetsplug/beatport.py index c68901cc1..05e33637c 100644 --- a/beetsplug/beatport.py +++ b/beetsplug/beatport.py @@ -20,8 +20,7 @@ from datetime import datetime, timedelta import requests -from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.autotag.match import Distance +from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance from beets.plugins import BeetsPlugin log = logging.getLogger('beets') diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 006f85db0..83f67c0a9 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -21,7 +21,6 @@ from beets import util from beets import config from beets.util import confit from beets.autotag import hooks -from beets.autotag.match import Distance import acoustid import logging from collections import defaultdict @@ -114,7 +113,7 @@ def _all_releases(items): class AcoustidPlugin(plugins.BeetsPlugin): def track_distance(self, item, info): - dist = Distance() + dist = hooks.Distance() if item.path not in _matches or not info.track_id: # Match failed or no track ID. return dist diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 822ed59e3..e9e6477b1 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -15,11 +15,9 @@ """Adds Discogs album search support to the autotagger. Requires the discogs-client library. """ -from beets import config -from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.autotag.match import current_metadata, Distance, VA_ARTISTS +from beets.autotag.hooks import AlbumInfo, TrackInfo, Distance from beets.plugins import BeetsPlugin -from discogs_client import Artist, DiscogsAPIError, Release, Search +from discogs_client import DiscogsAPIError, Release, Search import beets import discogs_client import logging diff --git a/beetsplug/scrub.py b/beetsplug/scrub.py index f69e3abf1..1a95c222e 100644 --- a/beetsplug/scrub.py +++ b/beetsplug/scrub.py @@ -108,6 +108,11 @@ def _scrub(path): # remove them. In this case, we just remove all the tags. for tag in f.keys(): del f[tag] + except IOError as exc: + log.error(u'could not scrub {0}: {1}'.format( + util.displayable_path(path), + exc, + )) f.save() # Automatically embed art into imported albums. diff --git a/docs/changelog.rst b/docs/changelog.rst index 784d2eb88..3baf66585 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -36,6 +36,7 @@ And some bug fixes: Intzoglou. * Fix an occasional crash in the :doc:`/plugins/beatport` when a length field was missing from the API response. Thanks to Timothy Appnel. +* :doc:`/plugins/scrub`: Handle and log I/O errors. 1.2.0 (June 5, 2013) @@ -103,7 +104,7 @@ same MusicBrainz release group: beets re-identify the same release when re-importing existing files. * Prefer releases that are closest to the tagged ``year``. Tolerate files tagged with release or original year. -* The new :ref:`preferred_media` config option lets you prefer a certain media +* The new ``preferred_media`` config option lets you prefer a certain media type when the ``media`` field is unset on an album. * Apply minor penalties across a range of fields to differentiate between nearly identical releases: ``disctotal``, ``label``, ``catalognum``, diff --git a/docs/reference/config.rst b/docs/reference/config.rst index d320cd655..36844b668 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -395,40 +395,43 @@ max_rec As mentioned above, autotagger matches have *recommendations* that control how the UI behaves for a certain quality of match. The recommendation for a certain match is based on the overall distance calculation. But you can also control -the recommendation when a distance penalty is being applied for a specific -field by defining *maximum* recommendations for each field: +the recommendation when a specific distance penalty is applied by defining +*maximum* recommendations for each field: -To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here -are the defaults:: +To define maxima, use keys under ``max_rec:`` in the ``match`` section. The +defaults are "medium" for missing and unmatched tracks and "strong" (i.e., no +maximum) for everything else:: match: max_rec: - source: strong - artist: strong - album: strong - media: strong - mediums: strong - year: strong - country: strong - label: strong - catalognum: strong - albumdisambig: strong - album_id: strong - tracks: strong missing_tracks: medium unmatched_tracks: medium - track_title: strong - track_artist: strong - track_index: strong - track_length_grace: strong - track_length_max: strong - track_length: strong - track_id: strong -If a recommendation is higher than the configured maximum and a penalty is -being applied, the recommendation will be downgraded. The maximum for each -field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the -maximum recommendation is ``strong``, no "downgrading" occurs. +If a recommendation is higher than the configured maximum and the indicated +penalty is applied, the recommendation is downgraded. The setting for +each field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the +maximum recommendation is ``strong``, no "downgrading" occurs. The available +penalty names here are: + +* source +* artist +* album +* media +* mediums +* year +* country +* label +* catalognum +* albumdisambig +* album_id +* tracks +* missing_tracks +* unmatched_tracks +* track_title +* track_artist +* track_index +* track_length +* track_id .. _preferred: @@ -440,10 +443,10 @@ similarity, you can also specify an ordered list of preferred countries and media types. A distance penalty will be applied if the country or media type from the match -metadata doesn't match. The order is important, the first item will be most -preferred. Each item may be a regular expression, and will be matched case -insensitively. The number of media will be stripped when matching preferred -media (e.g. "2x" in "2xCD"). +metadata doesn't match. The specified values are preferred in descending order +(i.e., the first item will be most preferred). Each item may be a regular +expression, and will be matched case insensitively. The number of media will +be stripped when matching preferred media (e.g. "2x" in "2xCD"). You can also tell the autotagger to prefer matches that have a release year closest to the original year for an album. @@ -469,6 +472,8 @@ the penalty name to the ``ignored`` setting:: match: ignored: missing_tracks unmatched_tracks +The available penalties are the same as those for the :ref:`max_rec` setting. + .. _path-format-config: Path Format Configuration diff --git a/test/test_autotag.py b/test/test_autotag.py index dc75ee0ab..f0b637eb6 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -23,13 +23,13 @@ import _common from _common import unittest from beets import autotag from beets.autotag import match -from beets.autotag.match import Distance +from beets.autotag.hooks import Distance, string_dist from beets.library import Item from beets.util import plurality from beets.autotag import AlbumInfo, TrackInfo from beets import config -class PluralityTest(unittest.TestCase): +class PluralityTest(_common.TestCase): def test_plurality_consensus(self): objs = [1, 1, 1, 1] obj, freq = plurality(objs) @@ -106,154 +106,163 @@ def _make_trackinfo(): TrackInfo(u'three', None, u'some artist', length=1, index=3), ] -class DistanceTest(unittest.TestCase): - def setUp(self): - self.dist = Distance() - +class DistanceTest(_common.TestCase): def test_add(self): - self.dist.add('add', 1.0) - self.assertEqual(self.dist._penalties, {'add': [1.0]}) + dist = Distance() + dist.add('add', 1.0) + self.assertEqual(dist._penalties, {'add': [1.0]}) def test_add_equality(self): - self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) - self.assertEqual(self.dist._penalties['equality'], [0.0]) + dist = Distance() + dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) + self.assertEqual(dist._penalties['equality'], [0.0]) - self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) - self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0]) + dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) + self.assertEqual(dist._penalties['equality'], [0.0, 1.0]) - self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) - self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0]) + dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) + self.assertEqual(dist._penalties['equality'], [0.0, 1.0, 0.0]) def test_add_expr(self): - self.dist.add_expr('expr', True) - self.assertEqual(self.dist._penalties['expr'], [1.0]) + dist = Distance() + dist.add_expr('expr', True) + self.assertEqual(dist._penalties['expr'], [1.0]) - self.dist.add_expr('expr', False) - self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0]) + dist.add_expr('expr', False) + self.assertEqual(dist._penalties['expr'], [1.0, 0.0]) def test_add_number(self): + dist = Distance() # Add a full penalty for each number of difference between two numbers. - self.dist.add_number('number', 1, 1) - self.assertEqual(self.dist._penalties['number'], [0.0]) + dist.add_number('number', 1, 1) + self.assertEqual(dist._penalties['number'], [0.0]) - self.dist.add_number('number', 1, 2) - self.assertEqual(self.dist._penalties['number'], [0.0, 1.0]) + dist.add_number('number', 1, 2) + self.assertEqual(dist._penalties['number'], [0.0, 1.0]) - self.dist.add_number('number', 2, 1) - self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0]) + dist.add_number('number', 2, 1) + self.assertEqual(dist._penalties['number'], [0.0, 1.0, 1.0]) - self.dist.add_number('number', -1, 2) - self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0, + dist.add_number('number', -1, 2) + self.assertEqual(dist._penalties['number'], [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]) def test_add_priority(self): - self.dist.add_priority('priority', 'abc', 'abc') - self.assertEqual(self.dist._penalties['priority'], [0.0]) + dist = Distance() + dist.add_priority('priority', 'abc', 'abc') + self.assertEqual(dist._penalties['priority'], [0.0]) - self.dist.add_priority('priority', 'def', ['abc', 'def']) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5]) + dist.add_priority('priority', 'def', ['abc', 'def']) + self.assertEqual(dist._penalties['priority'], [0.0, 0.5]) - self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef', + dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef', re.compile('GH', re.I)]) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75]) + self.assertEqual(dist._penalties['priority'], [0.0, 0.5, 0.75]) - self.dist.add_priority('priority', 'xyz', ['abc', 'def']) - self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75, + dist.add_priority('priority', 'xyz', ['abc', 'def']) + self.assertEqual(dist._penalties['priority'], [0.0, 0.5, 0.75, 1.0]) def test_add_ratio(self): - self.dist.add_ratio('ratio', 25, 100) - self.assertEqual(self.dist._penalties['ratio'], [0.25]) + dist = Distance() + dist.add_ratio('ratio', 25, 100) + self.assertEqual(dist._penalties['ratio'], [0.25]) - self.dist.add_ratio('ratio', 10, 5) - self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0]) + dist.add_ratio('ratio', 10, 5) + self.assertEqual(dist._penalties['ratio'], [0.25, 1.0]) - self.dist.add_ratio('ratio', -5, 5) - self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0]) + dist.add_ratio('ratio', -5, 5) + self.assertEqual(dist._penalties['ratio'], [0.25, 1.0, 0.0]) - self.dist.add_ratio('ratio', 5, 0) - self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) + dist.add_ratio('ratio', 5, 0) + self.assertEqual(dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) def test_add_string(self): - dist = match.string_dist(u'abc', u'bcd') - self.dist.add_string('string', u'abc', u'bcd') - self.assertEqual(self.dist._penalties['string'], [dist]) + dist = Distance() + sdist = string_dist(u'abc', u'bcd') + dist.add_string('string', u'abc', u'bcd') + self.assertEqual(dist._penalties['string'], [sdist]) def test_distance(self): config['match']['distance_weights']['album'] = 2.0 config['match']['distance_weights']['medium'] = 1.0 - self.dist.add('album', 0.5) - self.dist.add('media', 0.25) - self.dist.add('media', 0.75) - self.assertEqual(self.dist.distance, 0.5) + dist = Distance() + dist.add('album', 0.5) + dist.add('media', 0.25) + dist.add('media', 0.75) + self.assertEqual(dist.distance, 0.5) # __getitem__() - self.assertEqual(self.dist['album'], 0.25) - self.assertEqual(self.dist['media'], 0.25) + self.assertEqual(dist['album'], 0.25) + self.assertEqual(dist['media'], 0.25) def test_max_distance(self): config['match']['distance_weights']['album'] = 3.0 config['match']['distance_weights']['medium'] = 1.0 - self.dist.add('album', 0.5) - self.dist.add('medium', 0.0) - self.dist.add('medium', 0.0) - self.assertEqual(self.dist.max_distance, 5.0) + dist = Distance() + dist.add('album', 0.5) + dist.add('medium', 0.0) + dist.add('medium', 0.0) + self.assertEqual(dist.max_distance, 5.0) def test_operators(self): config['match']['distance_weights']['source'] = 1.0 config['match']['distance_weights']['album'] = 2.0 config['match']['distance_weights']['medium'] = 1.0 - self.dist.add('source', 0.0) - self.dist.add('album', 0.5) - self.dist.add('medium', 0.25) - self.dist.add('medium', 0.75) - self.assertEqual(len(self.dist), 2) - self.assertEqual(list(self.dist), [(0.2, 'album'), (0.2, 'medium')]) - self.assertTrue(self.dist == 0.4) - self.assertTrue(self.dist < 1.0) - self.assertTrue(self.dist > 0.0) - self.assertEqual(self.dist - 0.4, 0.0) - self.assertEqual(0.4 - self.dist, 0.0) - self.assertEqual(float(self.dist), 0.4) + dist = Distance() + dist.add('source', 0.0) + dist.add('album', 0.5) + dist.add('medium', 0.25) + dist.add('medium', 0.75) + self.assertEqual(len(dist), 2) + self.assertEqual(list(dist), [('album', 0.2), ('medium', 0.2)]) + self.assertTrue(dist == 0.4) + self.assertTrue(dist < 1.0) + self.assertTrue(dist > 0.0) + self.assertEqual(dist - 0.4, 0.0) + self.assertEqual(0.4 - dist, 0.0) + self.assertEqual(float(dist), 0.4) def test_raw_distance(self): config['match']['distance_weights']['album'] = 3.0 config['match']['distance_weights']['medium'] = 1.0 - self.dist.add('album', 0.5) - self.dist.add('medium', 0.25) - self.dist.add('medium', 0.5) - self.assertEqual(self.dist.raw_distance, 2.25) + dist = Distance() + dist.add('album', 0.5) + dist.add('medium', 0.25) + dist.add('medium', 0.5) + self.assertEqual(dist.raw_distance, 2.25) - def test_sorted(self): + def test_items(self): config['match']['distance_weights']['album'] = 4.0 config['match']['distance_weights']['medium'] = 2.0 - - self.dist.add('album', 0.1875) - self.dist.add('medium', 0.75) - self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')]) + dist = Distance() + dist.add('album', 0.1875) + dist.add('medium', 0.75) + self.assertEqual(dist.items(), [('medium', 0.25), ('album', 0.125)]) # Sort by key if distance is equal. dist = Distance() dist.add('album', 0.375) dist.add('medium', 0.75) - self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')]) + self.assertEqual(dist.items(), [('album', 0.25), ('medium', 0.25)]) def test_update(self): - self.dist.add('album', 0.5) - self.dist.add('media', 1.0) + dist1 = Distance() + dist1.add('album', 0.5) + dist1.add('media', 1.0) - dist = Distance() - dist.add('album', 0.75) - dist.add('album', 0.25) - self.dist.add('media', 0.05) + dist2 = Distance() + dist2.add('album', 0.75) + dist2.add('album', 0.25) + dist2.add('media', 0.05) - self.dist.update(dist) + dist1.update(dist2) - self.assertEqual(self.dist._penalties, {'album': [0.5, 0.75, 0.25], - 'media': [1.0, 0.05]}) + self.assertEqual(dist1._penalties, {'album': [0.5, 0.75, 0.25], + 'media': [1.0, 0.05]}) -class TrackDistanceTest(unittest.TestCase): +class TrackDistanceTest(_common.TestCase): def test_identical_tracks(self): item = _make_item(u'one', 1) info = _make_trackinfo()[0] @@ -280,7 +289,7 @@ class TrackDistanceTest(unittest.TestCase): dist = match.track_distance(item, info, incl_artist=True) self.assertEqual(dist, 0.0) -class AlbumDistanceTest(unittest.TestCase): +class AlbumDistanceTest(_common.TestCase): def _mapping(self, items, info): out = {} for i, t in zip(items, info.tracks): @@ -863,77 +872,77 @@ class ApplyCompilationTest(_common.TestCase, ApplyTestUtil): class StringDistanceTest(unittest.TestCase): def test_equal_strings(self): - dist = match.string_dist(u'Some String', u'Some String') + dist = string_dist(u'Some String', u'Some String') self.assertEqual(dist, 0.0) def test_different_strings(self): - dist = match.string_dist(u'Some String', u'Totally Different') + dist = string_dist(u'Some String', u'Totally Different') self.assertNotEqual(dist, 0.0) def test_punctuation_ignored(self): - dist = match.string_dist(u'Some String', u'Some.String!') + dist = string_dist(u'Some String', u'Some.String!') self.assertEqual(dist, 0.0) def test_case_ignored(self): - dist = match.string_dist(u'Some String', u'sOME sTring') + dist = string_dist(u'Some String', u'sOME sTring') self.assertEqual(dist, 0.0) def test_leading_the_has_lower_weight(self): - dist1 = match.string_dist(u'XXX Band Name', u'Band Name') - dist2 = match.string_dist(u'The Band Name', u'Band Name') + dist1 = string_dist(u'XXX Band Name', u'Band Name') + dist2 = string_dist(u'The Band Name', u'Band Name') self.assert_(dist2 < dist1) def test_parens_have_lower_weight(self): - dist1 = match.string_dist(u'One .Two.', u'One') - dist2 = match.string_dist(u'One (Two)', u'One') + dist1 = string_dist(u'One .Two.', u'One') + dist2 = string_dist(u'One (Two)', u'One') self.assert_(dist2 < dist1) def test_brackets_have_lower_weight(self): - dist1 = match.string_dist(u'One .Two.', u'One') - dist2 = match.string_dist(u'One [Two]', u'One') + dist1 = string_dist(u'One .Two.', u'One') + dist2 = string_dist(u'One [Two]', u'One') self.assert_(dist2 < dist1) def test_ep_label_has_zero_weight(self): - dist = match.string_dist(u'My Song (EP)', u'My Song') + dist = string_dist(u'My Song (EP)', u'My Song') self.assertEqual(dist, 0.0) def test_featured_has_lower_weight(self): - dist1 = match.string_dist(u'My Song blah Someone', u'My Song') - dist2 = match.string_dist(u'My Song feat Someone', u'My Song') + dist1 = string_dist(u'My Song blah Someone', u'My Song') + dist2 = string_dist(u'My Song feat Someone', u'My Song') self.assert_(dist2 < dist1) def test_postfix_the(self): - dist = match.string_dist(u'The Song Title', u'Song Title, The') + dist = string_dist(u'The Song Title', u'Song Title, The') self.assertEqual(dist, 0.0) def test_postfix_a(self): - dist = match.string_dist(u'A Song Title', u'Song Title, A') + dist = string_dist(u'A Song Title', u'Song Title, A') self.assertEqual(dist, 0.0) def test_postfix_an(self): - dist = match.string_dist(u'An Album Title', u'Album Title, An') + dist = string_dist(u'An Album Title', u'Album Title, An') self.assertEqual(dist, 0.0) def test_empty_strings(self): - dist = match.string_dist(u'', u'') + dist = string_dist(u'', u'') self.assertEqual(dist, 0.0) def test_solo_pattern(self): # Just make sure these don't crash. - match.string_dist(u'The ', u'') - match.string_dist(u'(EP)', u'(EP)') - match.string_dist(u', An', u'') + string_dist(u'The ', u'') + string_dist(u'(EP)', u'(EP)') + string_dist(u', An', u'') def test_heuristic_does_not_harm_distance(self): - dist = match.string_dist(u'Untitled', u'[Untitled]') + dist = string_dist(u'Untitled', u'[Untitled]') self.assertEqual(dist, 0.0) def test_ampersand_expansion(self): - dist = match.string_dist(u'And', u'&') + dist = string_dist(u'And', u'&') self.assertEqual(dist, 0.0) def test_accented_characters(self): - dist = match.string_dist(u'\xe9\xe1\xf1', u'ean') + dist = string_dist(u'\xe9\xe1\xf1', u'ean') self.assertEqual(dist, 0.0) def suite():