mirror of
https://github.com/beetbox/beets.git
synced 2026-01-03 22:42:44 +01:00
Merge pull request #302 from mrmachine/distance-refactor
Use a Distance object instead of floats for distance calculations.
This commit is contained in:
commit
40dadd4c59
11 changed files with 640 additions and 279 deletions
|
|
@ -30,7 +30,7 @@ from beets.util.enumeration import enum
|
|||
from beets.autotag import hooks
|
||||
|
||||
# A configuration view for the distance weights.
|
||||
weights = config['match']['weight']
|
||||
weights = config['match']['distance_weights']
|
||||
|
||||
# Parameters for string distance function.
|
||||
# Words that can be moved to the end of a string using a comma.
|
||||
|
|
@ -187,62 +187,221 @@ def track_index_changed(item, track_info):
|
|||
"""
|
||||
return item.track not in (track_info.medium_index, track_info.index)
|
||||
|
||||
class Distance(object):
|
||||
"""Keeps track of multiple distance penalties. Provides a single weighted
|
||||
distance for all penalties as well as a weighted distance for each
|
||||
individual penalty.
|
||||
"""
|
||||
def __cmp__(self, other):
|
||||
return cmp(self.distance, other)
|
||||
|
||||
def __float__(self):
|
||||
return self.distance
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Returns the weighted distance for a named penalty.
|
||||
"""
|
||||
dist = sum(self._penalties[key]) * weights[key].as_number()
|
||||
dist_max = self.max_distance
|
||||
if dist_max:
|
||||
return dist / dist_max
|
||||
return 0.0
|
||||
|
||||
def __init__(self):
|
||||
self._penalties = {}
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.sorted)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.sorted)
|
||||
|
||||
def __sub__(self, other):
|
||||
return self.distance - other
|
||||
|
||||
def __rsub__(self, other):
|
||||
return other - self.distance
|
||||
|
||||
def _eq(self, value1, value2):
|
||||
"""Returns True if `value1` is equal to `value2`. `value1` may be a
|
||||
compiled regular expression, in which case it will be matched against
|
||||
`value2`.
|
||||
"""
|
||||
if isinstance(value1, re._pattern_type):
|
||||
return bool(value1.match(value2))
|
||||
return value1 == value2
|
||||
|
||||
def add(self, key, dist):
|
||||
"""Adds a distance penalty. `key` must correspond with a configured
|
||||
weight setting. `dist` must be a float between 0.0 and 1.0, and will be
|
||||
added to any existing distance penalties for the same key.
|
||||
"""
|
||||
if not 0.0 <= dist <= 1.0:
|
||||
raise ValueError(
|
||||
'`dist` must be between 0.0 and 1.0. It is: %r' % dist)
|
||||
self._penalties.setdefault(key, []).append(dist)
|
||||
|
||||
def add_equality(self, key, value, options):
|
||||
"""Adds a distance penalty of 1.0 if `value` doesn't match any of the
|
||||
values in `options`. If an option is a compiled regular expression, it
|
||||
will be considered equal if it matches against `value`.
|
||||
"""
|
||||
if not isinstance(options, (list, tuple)):
|
||||
options = [options]
|
||||
for opt in options:
|
||||
if self._eq(opt, value):
|
||||
dist = 0.0
|
||||
break
|
||||
else:
|
||||
dist = 1.0
|
||||
self.add(key, dist)
|
||||
|
||||
def add_expr(self, key, expr):
|
||||
"""Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0.
|
||||
"""
|
||||
if expr:
|
||||
self.add(key, 1.0)
|
||||
else:
|
||||
self.add(key, 0.0)
|
||||
|
||||
def add_number(self, key, number1, number2):
|
||||
"""Adds a distance penalty of 1.0 for each number of difference between
|
||||
`number1` and `number2`, or 0.0 when there is no difference. Use this
|
||||
when there is no upper limit on the difference between the two numbers.
|
||||
"""
|
||||
diff = abs(number1 - number2)
|
||||
if diff:
|
||||
for i in range(diff):
|
||||
self.add(key, 1.0)
|
||||
else:
|
||||
self.add(key, 0.0)
|
||||
|
||||
def add_priority(self, key, value, options):
|
||||
"""Adds a distance penalty that corresponds to the position at which
|
||||
`value` appears in `options`. A distance penalty of 0.0 for the first
|
||||
option, or 1.0 if there is no matching option. If an option is a
|
||||
compiled regular expression, it will be considered equal if it matches
|
||||
against `value`.
|
||||
"""
|
||||
if not isinstance(options, (list, tuple)):
|
||||
options = [options]
|
||||
unit = 1.0 / (len(options) or 1)
|
||||
for i, opt in enumerate(options):
|
||||
if self._eq(opt, value):
|
||||
dist = i * unit
|
||||
break
|
||||
else:
|
||||
dist = 1.0
|
||||
self.add(key, dist)
|
||||
|
||||
def add_ratio(self, key, number1, number2):
|
||||
"""Adds a distance penalty for `number1` as a ratio of `number2`.
|
||||
`number1` is bound at 0 and `number2`.
|
||||
"""
|
||||
number = float(max(min(number1, number2), 0))
|
||||
if number2:
|
||||
dist = number / number2
|
||||
else:
|
||||
dist = 0.0
|
||||
self.add(key, dist)
|
||||
|
||||
def add_string(self, key, str1, str2):
|
||||
"""Adds a distance penalty based on the edit distance between `str1`
|
||||
and `str2`.
|
||||
"""
|
||||
dist = string_dist(str1, str2)
|
||||
self.add(key, dist)
|
||||
|
||||
@property
|
||||
def distance(self):
|
||||
"""Returns a weighted and normalised distance across all penalties.
|
||||
"""
|
||||
dist_max = self.max_distance
|
||||
if dist_max:
|
||||
return self.raw_distance / self.max_distance
|
||||
return 0.0
|
||||
|
||||
@property
|
||||
def max_distance(self):
|
||||
"""Returns the maximum distance penalty.
|
||||
"""
|
||||
dist_max = 0.0
|
||||
for key, penalty in self._penalties.iteritems():
|
||||
dist_max += len(penalty) * weights[key].as_number()
|
||||
return dist_max
|
||||
|
||||
@property
|
||||
def raw_distance(self):
|
||||
"""Returns the raw (denormalised) distance.
|
||||
"""
|
||||
dist_raw = 0.0
|
||||
for key, penalty in self._penalties.iteritems():
|
||||
dist_raw += sum(penalty) * weights[key].as_number()
|
||||
return dist_raw
|
||||
|
||||
@property
|
||||
def sorted(self):
|
||||
"""Returns a list of (dist, key) pairs, with `dist` being the weighted
|
||||
distance, sorted from highest to lowest. Does not include penalties
|
||||
with a zero value.
|
||||
"""
|
||||
list_ = []
|
||||
for key in self._penalties:
|
||||
dist = self[key]
|
||||
if dist:
|
||||
list_.append((dist, key))
|
||||
# Convert distance into a negative float we can sort items in ascending
|
||||
# order (for keys, when the penalty is equal) and still get the items
|
||||
# with the biggest distance first.
|
||||
return sorted(list_, key=lambda (dist, key): (0-dist, key))
|
||||
|
||||
def update(self, dist):
|
||||
"""Adds all the distance penalties from `dist`.
|
||||
"""
|
||||
if not isinstance(dist, Distance):
|
||||
raise ValueError(
|
||||
'`dist` must be a Distance object. It is: %r' % dist)
|
||||
for key, penalties in dist._penalties.iteritems():
|
||||
self._penalties.setdefault(key, []).extend(penalties)
|
||||
|
||||
def track_distance(item, track_info, incl_artist=False):
|
||||
"""Determines the significance of a track metadata change. Returns a
|
||||
float in [0.0,1.0]. `incl_artist` indicates that a distance
|
||||
component should be included for the track artist (i.e., for
|
||||
various-artist releases).
|
||||
Distance object. `incl_artist` indicates that a distance component should
|
||||
be included for the track artist (i.e., for various-artist releases).
|
||||
"""
|
||||
# Distance and normalization accumulators.
|
||||
dist, dist_max = 0.0, 0.0
|
||||
dist = Distance()
|
||||
|
||||
# Check track length.
|
||||
# If there's no length to check, apply no penalty.
|
||||
# Length.
|
||||
if track_info.length:
|
||||
diff = abs(item.length - track_info.length)
|
||||
diff = max(diff - weights['track_length_grace'].as_number(), 0.0)
|
||||
diff = min(diff, weights['track_length_max'].as_number())
|
||||
dist += (diff / weights['track_length_max'].as_number()) * \
|
||||
weights['track_length'].as_number()
|
||||
dist_max += weights['track_length'].as_number()
|
||||
diff = abs(item.length - track_info.length) - \
|
||||
weights['track_length_grace'].as_number()
|
||||
dist.add_ratio('track_length', diff,
|
||||
weights['track_length_max'].as_number())
|
||||
|
||||
# Track title.
|
||||
dist += string_dist(item.title, track_info.title) * \
|
||||
weights['track_title'].as_number()
|
||||
dist_max += weights['track_title'].as_number()
|
||||
# Title.
|
||||
dist.add_string('track_title', item.title, track_info.title)
|
||||
|
||||
# Track artist, if included.
|
||||
# Attention: MB DB does not have artist info for all compilations,
|
||||
# so only check artist distance if there is actually an artist in
|
||||
# the MB track data.
|
||||
# Artist. Only check if there is actually an artist in the track data.
|
||||
if incl_artist and track_info.artist and \
|
||||
item.artist.lower() not in VA_ARTISTS:
|
||||
dist += string_dist(item.artist, track_info.artist) * \
|
||||
weights['track_artist'].as_number()
|
||||
dist_max += weights['track_artist'].as_number()
|
||||
dist.add_string('track_artist', item.artist, track_info.artist)
|
||||
|
||||
# Track index.
|
||||
if track_info.index and item.track:
|
||||
if track_index_changed(item, track_info):
|
||||
dist += weights['track_index'].as_number()
|
||||
dist_max += weights['track_index'].as_number()
|
||||
dist.add_expr('track_index', track_index_changed(item, track_info))
|
||||
|
||||
# MusicBrainz track ID.
|
||||
# Track ID.
|
||||
if item.mb_trackid:
|
||||
if item.mb_trackid != track_info.track_id:
|
||||
dist += weights['track_id'].as_number()
|
||||
dist_max += weights['track_id'].as_number()
|
||||
dist.add_expr('track_id', item.mb_trackid != track_info.track_id)
|
||||
|
||||
# Plugin distances.
|
||||
plugin_d, plugin_dm = plugins.track_distance(item, track_info)
|
||||
dist += plugin_d
|
||||
dist_max += plugin_dm
|
||||
# Plugins.
|
||||
dist.update(plugins.track_distance(item, track_info))
|
||||
|
||||
return dist / dist_max
|
||||
return dist
|
||||
|
||||
def distance(items, album_info, mapping):
|
||||
"""Determines how "significant" an album metadata change would be.
|
||||
Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object
|
||||
Returns a Distance object. `album_info` is an AlbumInfo object
|
||||
reflecting the album to be compared. `items` is a sequence of all
|
||||
Item objects that will be matched (order is not important).
|
||||
`mapping` is a dictionary mapping Items to TrackInfo objects; the
|
||||
|
|
@ -251,97 +410,97 @@ def distance(items, album_info, mapping):
|
|||
"""
|
||||
likelies, _ = current_metadata(items)
|
||||
|
||||
# These accumulate the possible distance components. The final
|
||||
# distance will be dist/dist_max.
|
||||
dist = 0.0
|
||||
dist_max = 0.0
|
||||
dist = Distance()
|
||||
|
||||
# Artist/album metadata.
|
||||
# Artist, if not various.
|
||||
if not album_info.va:
|
||||
dist += string_dist(likelies['artist'], album_info.artist) * \
|
||||
weights['artist'].as_number()
|
||||
dist_max += weights['artist'].as_number()
|
||||
dist += string_dist(likelies['album'], album_info.album) * \
|
||||
weights['album'].as_number()
|
||||
dist_max += weights['album'].as_number()
|
||||
dist.add_string('artist', likelies['artist'], album_info.artist)
|
||||
|
||||
# Year. No penalty for matching release or original year.
|
||||
if likelies['year'] and album_info.year:
|
||||
if likelies['year'] not in (album_info.year, album_info.original_year):
|
||||
diff = abs(album_info.year - likelies['year'])
|
||||
if diff:
|
||||
dist += (1.0 - 1.0 / diff) * weights['year'].as_number()
|
||||
dist_max += weights['year'].as_number()
|
||||
# Album.
|
||||
dist.add_string('album', likelies['album'], album_info.album)
|
||||
|
||||
# Actual or preferred media.
|
||||
if album_info.media:
|
||||
compare_media = likelies['media'] or \
|
||||
config['match']['preferred_media'].get()
|
||||
if compare_media and compare_media.lower() != album_info.media.lower():
|
||||
dist += weights['media'].as_number()
|
||||
dist_max += weights['media'].as_number()
|
||||
# Preferred media.
|
||||
patterns = config['match']['preferred']['media'].as_str_seq()
|
||||
options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns]
|
||||
if album_info.media and options:
|
||||
dist.add_priority('media', album_info.media, options)
|
||||
# Media.
|
||||
elif likelies['media'] and album_info.media:
|
||||
dist.add_string('media', likelies['media'], album_info.media)
|
||||
|
||||
# MusicBrainz album ID.
|
||||
if likelies['mb_albumid']:
|
||||
if likelies['mb_albumid'] != album_info.album_id:
|
||||
dist += weights['album_id'].as_number()
|
||||
dist_max += weights['album_id'].as_number()
|
||||
# Mediums.
|
||||
if likelies['disctotal'] and album_info.mediums:
|
||||
dist.add_number('mediums', likelies['disctotal'], album_info.mediums)
|
||||
|
||||
# Apply a small penalty for differences across many minor metadata. This
|
||||
# helps prioritise releases that are nearly identical.
|
||||
# Prefer earliest release.
|
||||
if album_info.year and config['match']['preferred']['original_year']:
|
||||
# Assume 1889 (earliest first gramophone discs) if we don't know the
|
||||
# original year.
|
||||
original = album_info.original_year or 1889
|
||||
diff = abs(album_info.year - original)
|
||||
diff_max = abs(datetime.date.today().year - original)
|
||||
dist.add_ratio('year', diff, diff_max)
|
||||
# Year.
|
||||
elif likelies['year'] and album_info.year:
|
||||
if likelies['year'] in (album_info.year, album_info.original_year):
|
||||
# No penalty for matching release or original year.
|
||||
dist.add('year', 0.0)
|
||||
elif album_info.original_year:
|
||||
# Prefer matchest closest to the release year.
|
||||
diff = abs(likelies['year'] - album_info.year)
|
||||
diff_max = abs(datetime.date.today().year -
|
||||
album_info.original_year)
|
||||
dist.add_ratio('year', diff, diff_max)
|
||||
else:
|
||||
# Full penalty when there is no original year.
|
||||
dist.add('year', 1.0)
|
||||
|
||||
if likelies['disctotal']:
|
||||
if likelies['disctotal'] != album_info.mediums:
|
||||
dist += weights['minor'].as_number()
|
||||
dist_max += weights['minor'].as_number()
|
||||
# Preferred countries.
|
||||
patterns = config['match']['preferred']['countries'].as_str_seq()
|
||||
options = [re.compile(pat, re.I) for pat in patterns]
|
||||
if album_info.country and options:
|
||||
dist.add_priority('country', album_info.country, options)
|
||||
# Country.
|
||||
elif likelies['country'] and album_info.country:
|
||||
dist.add_string('country', likelies['country'], album_info.country)
|
||||
|
||||
# Label.
|
||||
if likelies['label'] and album_info.label:
|
||||
dist += string_dist(likelies['label'], album_info.label) * \
|
||||
weights['minor'].as_number()
|
||||
dist_max += weights['minor'].as_number()
|
||||
dist.add_string('label', likelies['label'], album_info.label)
|
||||
|
||||
# Catalog number.
|
||||
if likelies['catalognum'] and album_info.catalognum:
|
||||
dist += string_dist(likelies['catalognum'],
|
||||
album_info.catalognum) * \
|
||||
weights['minor'].as_number()
|
||||
dist_max += weights['minor'].as_number()
|
||||
|
||||
if likelies['country'] and album_info.country:
|
||||
dist += string_dist(likelies['country'],
|
||||
album_info.country) * \
|
||||
weights['minor'].as_number()
|
||||
dist_max += weights['minor'].as_number()
|
||||
dist.add_string('catalognum', likelies['catalognum'],
|
||||
album_info.catalognum)
|
||||
|
||||
# Disambiguation.
|
||||
if likelies['albumdisambig'] and album_info.albumdisambig:
|
||||
dist += string_dist(likelies['albumdisambig'],
|
||||
album_info.albumdisambig) * \
|
||||
weights['minor'].as_number()
|
||||
dist_max += weights['minor'].as_number()
|
||||
dist.add_string('albumdisambig', likelies['albumdisambig'],
|
||||
album_info.albumdisambig)
|
||||
|
||||
# Matched track distances.
|
||||
# Album ID.
|
||||
if likelies['mb_albumid']:
|
||||
dist.add_equality('album_id', likelies['mb_albumid'],
|
||||
album_info.album_id)
|
||||
|
||||
# Tracks.
|
||||
dist.tracks = {}
|
||||
for item, track in mapping.iteritems():
|
||||
dist += track_distance(item, track, album_info.va) * \
|
||||
weights['track'].as_number()
|
||||
dist_max += weights['track'].as_number()
|
||||
dist.tracks[track] = track_distance(item, track, album_info.va)
|
||||
dist.add('tracks', dist.tracks[track].distance)
|
||||
|
||||
# Extra and unmatched tracks.
|
||||
for track in set(album_info.tracks) - set(mapping.values()):
|
||||
dist += weights['missing'].as_number()
|
||||
dist_max += weights['missing'].as_number()
|
||||
for item in set(items) - set(mapping.keys()):
|
||||
dist += weights['unmatched'].as_number()
|
||||
dist_max += weights['unmatched'].as_number()
|
||||
# Missing tracks.
|
||||
for i in range(len(album_info.tracks) - len(mapping)):
|
||||
dist.add('missing_tracks', 1.0)
|
||||
|
||||
# Plugin distances.
|
||||
plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping)
|
||||
dist += plugin_d
|
||||
dist_max += plugin_dm
|
||||
# Unmatched tracks.
|
||||
for i in range(len(items) - len(mapping)):
|
||||
dist.add('unmatched_tracks', 1.0)
|
||||
|
||||
# Normalize distance, avoiding divide-by-zero.
|
||||
if dist_max == 0.0:
|
||||
return 0.0
|
||||
else:
|
||||
return dist / dist_max
|
||||
# Plugins.
|
||||
dist.update(plugins.album_distance(items, album_info, mapping))
|
||||
|
||||
return dist
|
||||
|
||||
def match_by_id(items):
|
||||
"""If the items are tagged with a MusicBrainz album ID, returns an
|
||||
|
|
@ -367,8 +526,8 @@ def _recommendation(results):
|
|||
recommendation based on the results' distances.
|
||||
|
||||
If the recommendation is higher than the configured maximum for
|
||||
certain situations, the recommendation will be downgraded to the
|
||||
configured maximum.
|
||||
an applied penalty, the recommendation will be downgraded to the
|
||||
configured maximum for that penalty.
|
||||
"""
|
||||
if not results:
|
||||
# No candidates: no recommendation.
|
||||
|
|
@ -390,45 +549,23 @@ def _recommendation(results):
|
|||
# Gap between first two candidates is large.
|
||||
rec = recommendation.low
|
||||
else:
|
||||
# No conclusion.
|
||||
rec = recommendation.none
|
||||
# No conclusion. Return immediately. Can't be downgraded any further.
|
||||
return recommendation.none
|
||||
|
||||
# "Downgrades" in certain configured situations.
|
||||
# Downgrade to the max rec if it is lower than the current rec for an
|
||||
# applied penalty.
|
||||
keys = set(key for _, key in min_dist)
|
||||
if isinstance(results[0], hooks.AlbumMatch):
|
||||
# Load the configured recommendation maxima.
|
||||
max_rec = {}
|
||||
for trigger in 'non_mb_source', 'partial', 'tracklength', 'tracknumber':
|
||||
max_rec[trigger] = \
|
||||
config['match']['max_rec'][trigger].as_choice({
|
||||
'strong': recommendation.strong,
|
||||
'medium': recommendation.medium,
|
||||
'low': recommendation.low,
|
||||
'none': recommendation.none,
|
||||
})
|
||||
|
||||
# Non-MusicBrainz source.
|
||||
if rec > max_rec['non_mb_source'] and \
|
||||
results[0].info.data_source != 'MusicBrainz':
|
||||
rec = max_rec['non_mb_source']
|
||||
|
||||
# Partial match.
|
||||
if rec > max_rec['partial'] and \
|
||||
(results[0].extra_items or results[0].extra_tracks):
|
||||
rec = max_rec['partial']
|
||||
|
||||
# Check track number and duration for each item.
|
||||
for item, track_info in results[0].mapping.items():
|
||||
# Track length differs.
|
||||
if rec > max_rec['tracklength'] and \
|
||||
item.length and track_info.length and \
|
||||
abs(item.length - track_info.length) > \
|
||||
weights['track_length_grace'].as_number():
|
||||
rec = max_rec['tracklength']
|
||||
|
||||
# Track number differs.
|
||||
if rec > max_rec['tracknumber'] and \
|
||||
track_index_changed(item, track_info):
|
||||
rec = max_rec['tracknumber']
|
||||
for track_dist in min_dist.tracks.values():
|
||||
keys.update(key for _, key in track_dist)
|
||||
for key in keys:
|
||||
max_rec = config['match']['max_rec'][key].as_choice({
|
||||
'strong': recommendation.strong,
|
||||
'medium': recommendation.medium,
|
||||
'low': recommendation.low,
|
||||
'none': recommendation.none,
|
||||
})
|
||||
rec = min(rec, max_rec)
|
||||
|
||||
return rec
|
||||
|
||||
|
|
@ -450,8 +587,15 @@ def _add_candidate(items, results, info):
|
|||
|
||||
# Get the change distance.
|
||||
dist = distance(items, info, mapping)
|
||||
log.debug('Success. Distance: %f' % dist)
|
||||
|
||||
# Skip matches with ignored penalties.
|
||||
penalties = [key for _, key in dist]
|
||||
for penalty in config['match']['ignored'].as_str_seq():
|
||||
if penalty in penalties:
|
||||
log.debug('Ignored. Penalty: %s' % penalty)
|
||||
return
|
||||
|
||||
log.debug('Success. Distance: %f' % dist)
|
||||
results[info.album_id] = hooks.AlbumMatch(dist, info, mapping,
|
||||
extra_items, extra_tracks)
|
||||
|
||||
|
|
@ -462,7 +606,7 @@ def tag_album(items, search_artist=None, search_album=None,
|
|||
- The current artist.
|
||||
- The current album.
|
||||
- A list of AlbumMatch objects. The candidates are sorted by
|
||||
distance (i.e., best match first).
|
||||
distance (i.e., best match first).
|
||||
- A recommendation.
|
||||
If search_artist and search_album or search_id are provided, then
|
||||
they are used as search terms in place of the current metadata.
|
||||
|
|
|
|||
|
|
@ -68,22 +68,42 @@ match:
|
|||
medium_rec_thresh: 0.25
|
||||
rec_gap_thresh: 0.25
|
||||
max_rec:
|
||||
non_mb_source: strong
|
||||
partial: medium
|
||||
tracklength: strong
|
||||
tracknumber: strong
|
||||
preferred_media: null
|
||||
weight:
|
||||
source: strong
|
||||
artist: strong
|
||||
album: strong
|
||||
media: strong
|
||||
mediums: strong
|
||||
year: strong
|
||||
country: strong
|
||||
label: strong
|
||||
catalognum: strong
|
||||
albumdisambig: strong
|
||||
album_id: strong
|
||||
tracks: strong
|
||||
missing_tracks: medium
|
||||
unmatched_tracks: medium
|
||||
track_title: strong
|
||||
track_artist: strong
|
||||
track_index: strong
|
||||
track_length_grace: strong
|
||||
track_length_max: strong
|
||||
track_length: strong
|
||||
track_id: strong
|
||||
distance_weights:
|
||||
source: 2.0
|
||||
artist: 3.0
|
||||
album: 3.0
|
||||
year: 1.0
|
||||
media: 1.0
|
||||
mediums: 1.0
|
||||
year: 1.0
|
||||
country: 0.5
|
||||
label: 0.5
|
||||
catalognum: 0.5
|
||||
albumdisambig: 0.5
|
||||
album_id: 5.0
|
||||
minor: 0.5
|
||||
track: 1.0
|
||||
missing: 0.9
|
||||
unmatched: 0.6
|
||||
tracks: 2.0
|
||||
missing_tracks: 0.9
|
||||
unmatched_tracks: 0.6
|
||||
track_title: 3.0
|
||||
track_artist: 2.0
|
||||
track_index: 1.0
|
||||
|
|
@ -91,3 +111,8 @@ match:
|
|||
track_length_max: 30
|
||||
track_length: 2.0
|
||||
track_id: 5.0
|
||||
preferred:
|
||||
countries: []
|
||||
media: []
|
||||
original_year: no
|
||||
ignored: []
|
||||
|
|
|
|||
|
|
@ -64,16 +64,16 @@ class BeetsPlugin(object):
|
|||
return {}
|
||||
|
||||
def track_distance(self, item, info):
|
||||
"""Should return a (distance, distance_max) pair to be added
|
||||
to the distance value for every track comparison.
|
||||
"""Should return a Distance object to be added to the
|
||||
distance for every track comparison.
|
||||
"""
|
||||
return 0.0, 0.0
|
||||
return beets.autotag.match.Distance()
|
||||
|
||||
def album_distance(self, items, album_info, mapping):
|
||||
"""Should return a (distance, distance_max) pair to be added
|
||||
to the distance value for every album-level comparison.
|
||||
"""Should return a Distance object to be added to the
|
||||
distance for every album-level comparison.
|
||||
"""
|
||||
return 0.0, 0.0
|
||||
return beets.autotag.match.Distance()
|
||||
|
||||
def candidates(self, items, artist, album, va_likely):
|
||||
"""Should return a sequence of AlbumInfo objects that match the
|
||||
|
|
@ -242,25 +242,19 @@ def queries():
|
|||
|
||||
def track_distance(item, info):
|
||||
"""Gets the track distance calculated by all loaded plugins.
|
||||
Returns a (distance, distance_max) pair.
|
||||
Returns a Distance object.
|
||||
"""
|
||||
dist = 0.0
|
||||
dist_max = 0.0
|
||||
dist = beets.autotag.match.Distance()
|
||||
for plugin in find_plugins():
|
||||
d, dm = plugin.track_distance(item, info)
|
||||
dist += d
|
||||
dist_max += dm
|
||||
return dist, dist_max
|
||||
dist.update(plugin.track_distance(item, info))
|
||||
return dist
|
||||
|
||||
def album_distance(items, album_info, mapping):
|
||||
"""Returns the album distance calculated by plugins."""
|
||||
dist = 0.0
|
||||
dist_max = 0.0
|
||||
dist = beets.autotag.match.Distance()
|
||||
for plugin in find_plugins():
|
||||
d, dm = plugin.album_distance(items, album_info, mapping)
|
||||
dist += d
|
||||
dist_max += dm
|
||||
return dist, dist_max
|
||||
dist.update(plugin.album_distance(items, album_info, mapping))
|
||||
return dist
|
||||
|
||||
def candidates(items, artist, album, va_likely):
|
||||
"""Gets MusicBrainz candidates for an album from each plugin.
|
||||
|
|
|
|||
|
|
@ -366,7 +366,7 @@ def colorize(color, text):
|
|||
else:
|
||||
return text
|
||||
|
||||
def _colordiff(a, b, highlight='red'):
|
||||
def _colordiff(a, b, highlight='red', second_highlight='lightgray'):
|
||||
"""Given two values, return the same pair of strings except with
|
||||
their differences highlighted in the specified color. Strings are
|
||||
highlighted intelligently to show differences; other values are
|
||||
|
|
@ -402,9 +402,14 @@ def _colordiff(a, b, highlight='red'):
|
|||
# Left only.
|
||||
a_out.append(colorize(highlight, a[a_start:a_end]))
|
||||
elif op == 'replace':
|
||||
# Right and left differ.
|
||||
a_out.append(colorize(highlight, a[a_start:a_end]))
|
||||
b_out.append(colorize(highlight, b[b_start:b_end]))
|
||||
# Right and left differ. Colorise with second highlight if
|
||||
# it's just a case change.
|
||||
if a[a_start:a_end].lower() != b[b_start:b_end].lower():
|
||||
color = highlight
|
||||
else:
|
||||
color = second_highlight
|
||||
a_out.append(colorize(color, a[a_start:a_end]))
|
||||
b_out.append(colorize(color, b[b_start:b_end]))
|
||||
else:
|
||||
assert(False)
|
||||
|
||||
|
|
|
|||
|
|
@ -125,14 +125,14 @@ default_commands.append(fields_cmd)
|
|||
|
||||
VARIOUS_ARTISTS = u'Various Artists'
|
||||
|
||||
PARTIAL_MATCH_MESSAGE = u'(partial match!)'
|
||||
|
||||
# Importer utilities and support.
|
||||
|
||||
def disambig_string(info):
|
||||
"""Returns label, year and media disambiguation, if available.
|
||||
"""Returns source, media, year, country, label and album disambiguation.
|
||||
"""
|
||||
disambig = []
|
||||
if info.data_source != 'MusicBrainz':
|
||||
disambig.append(info.data_source)
|
||||
if info.media:
|
||||
if info.mediums > 1:
|
||||
disambig.append(u'{0}x{1}'.format(
|
||||
|
|
@ -163,26 +163,34 @@ def dist_string(dist):
|
|||
out = ui.colorize('red', out)
|
||||
return out
|
||||
|
||||
def penalty_string(distance, limit=None):
|
||||
"""Returns a colorized string that indicates all the penalties applied to
|
||||
a distance object.
|
||||
"""
|
||||
penalties = []
|
||||
for _, key in distance:
|
||||
key = key.replace('album_', '')
|
||||
key = key.replace('track_', '')
|
||||
key = key.replace('_', ' ')
|
||||
penalties.append(key)
|
||||
if penalties:
|
||||
if limit and len(penalties) > limit:
|
||||
penalties = penalties[:limit] + ['...']
|
||||
return ui.colorize('yellow', '(%s)' % ', '.join(penalties))
|
||||
|
||||
def show_change(cur_artist, cur_album, match):
|
||||
"""Print out a representation of the changes that will be made if an
|
||||
album's tags are changed according to `match`, which must be an AlbumMatch
|
||||
object.
|
||||
"""
|
||||
def show_album(artist, album, partial=False):
|
||||
def show_album(artist, album):
|
||||
if artist:
|
||||
album_description = u' %s - %s' % (artist, album)
|
||||
elif album:
|
||||
album_description = u' %s' % album
|
||||
else:
|
||||
album_description = u' (unknown album)'
|
||||
|
||||
out = album_description
|
||||
|
||||
# Add a suffix if this is a partial match.
|
||||
if partial:
|
||||
out += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE)
|
||||
|
||||
print_(out)
|
||||
print_(album_description)
|
||||
|
||||
def format_index(track_info):
|
||||
"""Return a string representing the track index of the given
|
||||
|
|
@ -223,11 +231,7 @@ def show_change(cur_artist, cur_album, match):
|
|||
print_("To:")
|
||||
show_album(artist_r, album_r)
|
||||
else:
|
||||
message = u"Tagging:\n %s - %s" % (match.info.artist,
|
||||
match.info.album)
|
||||
if match.extra_items or match.extra_tracks:
|
||||
message += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE)
|
||||
print_(message)
|
||||
print_(u"Tagging:\n %s - %s" % (match.info.artist, match.info.album))
|
||||
|
||||
# Data URL.
|
||||
if match.info.data_url:
|
||||
|
|
@ -235,9 +239,13 @@ def show_change(cur_artist, cur_album, match):
|
|||
|
||||
# Info line.
|
||||
info = []
|
||||
# Similarity.
|
||||
info.append('(Similarity: %s)' % dist_string(match.distance))
|
||||
if match.info.data_source != 'MusicBrainz':
|
||||
info.append(ui.colorize('turquoise', '(%s)' % match.info.data_source))
|
||||
# Penalties.
|
||||
penalties = penalty_string(match.distance)
|
||||
if penalties:
|
||||
info.append(penalties)
|
||||
# Disambiguation.
|
||||
disambig = disambig_string(match.info)
|
||||
if disambig:
|
||||
info.append(ui.colorize('lightgray', '(%s)' % disambig))
|
||||
|
|
@ -285,7 +293,7 @@ def show_change(cur_artist, cur_album, match):
|
|||
cur_track, new_track = format_index(item), format_index(track_info)
|
||||
if cur_track != new_track:
|
||||
if item.track in (track_info.index, track_info.medium_index):
|
||||
color = 'yellow'
|
||||
color = 'lightgray'
|
||||
else:
|
||||
color = 'red'
|
||||
if (cur_track + new_track).count('-') == 1:
|
||||
|
|
@ -315,18 +323,10 @@ def show_change(cur_artist, cur_album, match):
|
|||
rhs += templ.format(rhs_length)
|
||||
lhs_width += len(cur_length) + 3
|
||||
|
||||
# Hidden penalties. No LHS/RHS diff is displayed, but we still want to
|
||||
# indicate that a penalty has been applied to explain the similarity
|
||||
# score.
|
||||
penalties = []
|
||||
if match.info.va and track_info.artist and \
|
||||
item.artist.lower() not in VA_ARTISTS:
|
||||
penalties.append('artist')
|
||||
if item.mb_trackid and item.mb_trackid != track_info.track_id:
|
||||
penalties.append('ID')
|
||||
# Penalties.
|
||||
penalties = penalty_string(match.distance.tracks[track_info])
|
||||
if penalties:
|
||||
rhs += ' %s' % ui.colorize('red',
|
||||
'(%s)' % ', '.join(penalties))
|
||||
rhs += ' %s' % penalties
|
||||
|
||||
if lhs != rhs:
|
||||
lines.append((' * %s' % lhs, rhs, lhs_width))
|
||||
|
|
@ -489,20 +489,17 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None,
|
|||
(cur_artist, cur_album))
|
||||
print_('Candidates:')
|
||||
for i, match in enumerate(candidates):
|
||||
# Artist, album and distance.
|
||||
line = ['%i. %s - %s (%s)' % (i + 1, match.info.artist,
|
||||
match.info.album,
|
||||
dist_string(match.distance))]
|
||||
|
||||
# Point out the partial matches.
|
||||
if match.extra_items or match.extra_tracks:
|
||||
line.append(ui.colorize('yellow',
|
||||
PARTIAL_MATCH_MESSAGE))
|
||||
|
||||
# Sources other than MusicBrainz.
|
||||
source = match.info.data_source
|
||||
if source != 'MusicBrainz':
|
||||
line.append(ui.colorize('turquoise', '(%s)' % source))
|
||||
# Penalties.
|
||||
penalties = penalty_string(match.distance, 3)
|
||||
if penalties:
|
||||
line.append(penalties)
|
||||
|
||||
# Disambiguation
|
||||
disambig = disambig_string(match.info)
|
||||
if disambig:
|
||||
line.append(ui.colorize('lightgray', '(%s)' % disambig))
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from beets import util
|
|||
from beets import config
|
||||
from beets.util import confit
|
||||
from beets.autotag import hooks
|
||||
from beets.autotag.match import Distance
|
||||
import acoustid
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
|
|
@ -113,16 +114,14 @@ def _all_releases(items):
|
|||
|
||||
class AcoustidPlugin(plugins.BeetsPlugin):
|
||||
def track_distance(self, item, info):
|
||||
dist = Distance()
|
||||
if item.path not in _matches or not info.track_id:
|
||||
# Match failed or no track ID.
|
||||
return 0.0, 0.0
|
||||
return dist
|
||||
|
||||
recording_ids, _ = _matches[item.path]
|
||||
if info.track_id in recording_ids:
|
||||
dist = 0.0
|
||||
else:
|
||||
dist = TRACK_ID_WEIGHT
|
||||
return dist, TRACK_ID_WEIGHT
|
||||
dist.add_expr('track_id', info.track_id not in recording_ids)
|
||||
return dist
|
||||
|
||||
def candidates(self, items, artist, album, va_likely):
|
||||
albums = []
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ discogs-client library.
|
|||
"""
|
||||
from beets import config
|
||||
from beets.autotag.hooks import AlbumInfo, TrackInfo
|
||||
from beets.autotag.match import current_metadata, VA_ARTISTS
|
||||
from beets.autotag.match import current_metadata, Distance, VA_ARTISTS
|
||||
from beets.plugins import BeetsPlugin
|
||||
from discogs_client import Artist, DiscogsAPIError, Release, Search
|
||||
import beets
|
||||
|
|
@ -44,14 +44,12 @@ class DiscogsPlugin(BeetsPlugin):
|
|||
})
|
||||
|
||||
def album_distance(self, items, album_info, mapping):
|
||||
"""Returns the discogs source weight and the maximum source weight.
|
||||
"""Returns the album distance.
|
||||
"""
|
||||
dist = Distance()
|
||||
if album_info.data_source == 'Discogs':
|
||||
return self.config['source_weight'].as_number() * \
|
||||
config['match']['weight']['source'].as_number(), \
|
||||
config['match']['weight']['source'].as_number()
|
||||
else:
|
||||
return 0.0, 0.0
|
||||
dist.add('source', self.config['source_weight'].as_number())
|
||||
return dist
|
||||
|
||||
def candidates(self, items, artist, album, va_likely):
|
||||
"""Returns a list of AlbumInfo objects for discogs search results
|
||||
|
|
|
|||
|
|
@ -49,22 +49,29 @@ There are also three more big features added to beets core:
|
|||
|
||||
In addition, the importer saw various UI enhancements, thanks to Tai Lee:
|
||||
|
||||
* More consistent format and colorization of album and track metadata.
|
||||
* Display data source URL for matches from the new data source plugins. This
|
||||
should make it easier to migrate data from Discogs or Beatport into
|
||||
MusicBrainz.
|
||||
* The top 3 distance penalties are now displayed on the release listing,
|
||||
and all album and track penalties are now displayed on the track changes
|
||||
list. This should make it clear exactly which metadata is contributing to a
|
||||
low similarity score.
|
||||
* Display album disambiguation and disc titles in the track listing, when
|
||||
available.
|
||||
* More consistent format and colorization of album and track metadata. Red
|
||||
for an actual difference, yellow to indicate that a distance penalty is being
|
||||
applied, and light gray for no-penalty or disambiguation data.
|
||||
* Track changes are highlighted in yellow when they indicate a change in
|
||||
format to or from the style of :ref:`per_disc_numbering`. (As before, no
|
||||
penalty is applied because the track number is still "correct", just in a
|
||||
different format.)
|
||||
* Sort missing and unmatched tracks by index and title and group them
|
||||
together for better readability.
|
||||
* Indicate MusicBrainz ID mismatches.
|
||||
* Don't show potential matches that have specific penalties applied, as
|
||||
configured by the :ref:`ignored` setting.
|
||||
|
||||
The calculation of the similarity score for autotagger matches was also
|
||||
approved, again thanks to Tai Lee. These changes, in general, help deal with
|
||||
improved, again thanks to Tai Lee. These changes, in general, help deal with
|
||||
the new metadata sources and help disambiguate between similar releases in the
|
||||
same MusicBrainz release group:
|
||||
|
||||
|
|
@ -72,8 +79,12 @@ same MusicBrainz release group:
|
|||
beets re-identify the same release when re-importing existing files.
|
||||
* Prefer releases that are closest to the tagged ``year``. Tolerate files
|
||||
tagged with release or original year.
|
||||
* The new :ref:`preferred_media` config option lets you prefer a certain media
|
||||
type when the ``media`` field is unset on an album.
|
||||
* Add a :ref:`preferred` collection of settings, which allow the user to
|
||||
specify a sorted list of preferred countries and media types, or prefer
|
||||
releases closest to the original year for an album.
|
||||
* It is now possible to configure a :ref:`max_rec` for any field that is used
|
||||
to calculate the similarity score. The recommendation will be downgraded if
|
||||
a penalty is being applied to the specified field.
|
||||
* Apply minor penalties across a range of fields to differentiate between
|
||||
nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
|
||||
``country`` and ``albumdisambig``.
|
||||
|
|
|
|||
|
|
@ -394,43 +394,80 @@ max_rec
|
|||
|
||||
As mentioned above, autotagger matches have *recommendations* that control how
|
||||
the UI behaves for a certain quality of match. The recommendation for a certain
|
||||
match is usually based on the distance calculation. But you can also control
|
||||
the recommendation for certain specific situations by defining *maximum*
|
||||
recommendations when:
|
||||
match is based on the overall distance calculation. But you can also control
|
||||
the recommendation when a distance penalty is being applied for a specific
|
||||
field by defining *maximum* recommendations for each field:
|
||||
|
||||
* a match came from a source other than MusicBrainz (e.g., the
|
||||
:doc:`Discogs </plugins/discogs>` plugin);
|
||||
* a match has missing or extra tracks;
|
||||
* the length (duration) of at least one track differs; or
|
||||
* at least one track number differs.
|
||||
|
||||
To define maxima, use keys under ``max_rec:`` in the ``match`` section::
|
||||
To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here
|
||||
are the defaults::
|
||||
|
||||
match:
|
||||
max_rec:
|
||||
non_mb_source: strong
|
||||
partial: medium
|
||||
tracklength: strong
|
||||
tracknumber: strong
|
||||
source: strong
|
||||
artist: strong
|
||||
album: strong
|
||||
media: strong
|
||||
mediums: strong
|
||||
year: strong
|
||||
country: strong
|
||||
label: strong
|
||||
catalognum: strong
|
||||
albumdisambig: strong
|
||||
album_id: strong
|
||||
tracks: strong
|
||||
missing_tracks: medium
|
||||
unmatched_tracks: medium
|
||||
track_title: strong
|
||||
track_artist: strong
|
||||
track_index: strong
|
||||
track_length_grace: strong
|
||||
track_length_max: strong
|
||||
track_length: strong
|
||||
track_id: strong
|
||||
|
||||
If a recommendation is higher than the configured maximum and the condition is
|
||||
met, the recommendation will be downgraded. The maximum for each condition can
|
||||
be one of ``none``, ``low``, ``medium`` or ``strong``. When the maximum
|
||||
recommendation is ``strong``, no "downgrading" occurs for that situation.
|
||||
If a recommendation is higher than the configured maximum and a penalty is
|
||||
being applied, the recommendation will be downgraded. The maximum for each
|
||||
field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the
|
||||
maximum recommendation is ``strong``, no "downgrading" occurs.
|
||||
|
||||
The above example shows the default ``max_rec`` settings.
|
||||
.. _preferred:
|
||||
|
||||
.. _preferred_media:
|
||||
preferred
|
||||
~~~~~~~~~
|
||||
|
||||
preferred_media
|
||||
~~~~~~~~~~~~~~~
|
||||
In addition to comparing the tagged metadata with the match metadata for
|
||||
similarity, you can also specify an ordered list of preferred countries and
|
||||
media types.
|
||||
|
||||
When an album has its ``media`` field set, it is compared against matches to
|
||||
prefer releases of the same media type. But this option lets you control what
|
||||
happens when an album *doesn't* have ``media`` set (which is the case for most
|
||||
albums that haven't already been run through a MusicBrainz tagger). Set this
|
||||
option to ``CD``, for example, to prefer CD releases. Defaults to ``null``,
|
||||
indicating no preference.
|
||||
A distance penalty will be applied if the country or media type from the match
|
||||
metadata doesn't match. The order is important, the first item will be most
|
||||
preferred. Each item may be a regular expression, and will be matched case
|
||||
insensitively. The number of media will be stripped when matching preferred
|
||||
media (e.g. "2x" in "2xCD").
|
||||
|
||||
You can also tell the autotagger to prefer matches that have a release year
|
||||
closest to the original year for an album.
|
||||
|
||||
Here's an example::
|
||||
|
||||
match:
|
||||
preferred:
|
||||
countries: ['US', 'GB|UK']
|
||||
media: ['CD', 'Digital Media|File']
|
||||
original_year: yes
|
||||
|
||||
By default, none of these options are enabled.
|
||||
|
||||
.. _ignored:
|
||||
|
||||
ignored
|
||||
~~~~~~~
|
||||
|
||||
You can completely avoid matches that have certain penalties applied by adding
|
||||
the penalty name to the ``ignored`` setting::
|
||||
|
||||
match:
|
||||
ignored: missing_tracks unmatched_tracks
|
||||
|
||||
.. _path-format-config:
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import _common
|
|||
from _common import unittest
|
||||
from beets import autotag
|
||||
from beets.autotag import match
|
||||
from beets.autotag.match import Distance
|
||||
from beets.library import Item
|
||||
from beets.util import plurality
|
||||
from beets.autotag import AlbumInfo, TrackInfo
|
||||
|
|
@ -105,6 +106,153 @@ def _make_trackinfo():
|
|||
TrackInfo(u'three', None, u'some artist', length=1, index=3),
|
||||
]
|
||||
|
||||
class DistanceTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.dist = Distance()
|
||||
|
||||
def test_add(self):
|
||||
self.dist.add('add', 1.0)
|
||||
self.assertEqual(self.dist._penalties, {'add': [1.0]})
|
||||
|
||||
def test_add_equality(self):
|
||||
self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi'])
|
||||
self.assertEqual(self.dist._penalties['equality'], [0.0])
|
||||
|
||||
self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi'])
|
||||
self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0])
|
||||
|
||||
self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I))
|
||||
self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0])
|
||||
|
||||
def test_add_expr(self):
|
||||
self.dist.add_expr('expr', True)
|
||||
self.assertEqual(self.dist._penalties['expr'], [1.0])
|
||||
|
||||
self.dist.add_expr('expr', False)
|
||||
self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0])
|
||||
|
||||
def test_add_number(self):
|
||||
# Add a full penalty for each number of difference between two numbers.
|
||||
|
||||
self.dist.add_number('number', 1, 1)
|
||||
self.assertEqual(self.dist._penalties['number'], [0.0])
|
||||
|
||||
self.dist.add_number('number', 1, 2)
|
||||
self.assertEqual(self.dist._penalties['number'], [0.0, 1.0])
|
||||
|
||||
self.dist.add_number('number', 2, 1)
|
||||
self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0])
|
||||
|
||||
self.dist.add_number('number', -1, 2)
|
||||
self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0,
|
||||
1.0, 1.0])
|
||||
|
||||
def test_add_priority(self):
|
||||
self.dist.add_priority('priority', 'abc', 'abc')
|
||||
self.assertEqual(self.dist._penalties['priority'], [0.0])
|
||||
|
||||
self.dist.add_priority('priority', 'def', ['abc', 'def'])
|
||||
self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5])
|
||||
|
||||
self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef',
|
||||
re.compile('GH', re.I)])
|
||||
self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75])
|
||||
|
||||
self.dist.add_priority('priority', 'xyz', ['abc', 'def'])
|
||||
self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75,
|
||||
1.0])
|
||||
|
||||
def test_add_ratio(self):
|
||||
self.dist.add_ratio('ratio', 25, 100)
|
||||
self.assertEqual(self.dist._penalties['ratio'], [0.25])
|
||||
|
||||
self.dist.add_ratio('ratio', 10, 5)
|
||||
self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0])
|
||||
|
||||
self.dist.add_ratio('ratio', -5, 5)
|
||||
self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0])
|
||||
|
||||
self.dist.add_ratio('ratio', 5, 0)
|
||||
self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0])
|
||||
|
||||
def test_add_string(self):
|
||||
dist = match.string_dist(u'abc', u'bcd')
|
||||
self.dist.add_string('string', u'abc', u'bcd')
|
||||
self.assertEqual(self.dist._penalties['string'], [dist])
|
||||
|
||||
def test_distance(self):
|
||||
config['match']['distance_weights']['album'] = 2.0
|
||||
config['match']['distance_weights']['medium'] = 1.0
|
||||
self.dist.add('album', 0.5)
|
||||
self.dist.add('media', 0.25)
|
||||
self.dist.add('media', 0.75)
|
||||
self.assertEqual(self.dist.distance, 0.5)
|
||||
|
||||
# __getitem__()
|
||||
self.assertEqual(self.dist['album'], 0.25)
|
||||
self.assertEqual(self.dist['media'], 0.25)
|
||||
|
||||
def test_max_distance(self):
|
||||
config['match']['distance_weights']['album'] = 3.0
|
||||
config['match']['distance_weights']['medium'] = 1.0
|
||||
self.dist.add('album', 0.5)
|
||||
self.dist.add('medium', 0.0)
|
||||
self.dist.add('medium', 0.0)
|
||||
self.assertEqual(self.dist.max_distance, 5.0)
|
||||
|
||||
def test_operators(self):
|
||||
config['match']['distance_weights']['source'] = 1.0
|
||||
config['match']['distance_weights']['album'] = 2.0
|
||||
config['match']['distance_weights']['medium'] = 1.0
|
||||
self.dist.add('source', 0.0)
|
||||
self.dist.add('album', 0.5)
|
||||
self.dist.add('medium', 0.25)
|
||||
self.dist.add('medium', 0.75)
|
||||
self.assertEqual(len(self.dist), 2)
|
||||
self.assertEqual(list(self.dist), [(0.2, 'album'), (0.2, 'medium')])
|
||||
self.assertTrue(self.dist == 0.4)
|
||||
self.assertTrue(self.dist < 1.0)
|
||||
self.assertTrue(self.dist > 0.0)
|
||||
self.assertEqual(self.dist - 0.4, 0.0)
|
||||
self.assertEqual(0.4 - self.dist, 0.0)
|
||||
self.assertEqual(float(self.dist), 0.4)
|
||||
|
||||
def test_raw_distance(self):
|
||||
config['match']['distance_weights']['album'] = 3.0
|
||||
config['match']['distance_weights']['medium'] = 1.0
|
||||
self.dist.add('album', 0.5)
|
||||
self.dist.add('medium', 0.25)
|
||||
self.dist.add('medium', 0.5)
|
||||
self.assertEqual(self.dist.raw_distance, 2.25)
|
||||
|
||||
def test_sorted(self):
|
||||
config['match']['distance_weights']['album'] = 4.0
|
||||
config['match']['distance_weights']['medium'] = 2.0
|
||||
|
||||
self.dist.add('album', 0.1875)
|
||||
self.dist.add('medium', 0.75)
|
||||
self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')])
|
||||
|
||||
# Sort by key if distance is equal.
|
||||
dist = Distance()
|
||||
dist.add('album', 0.375)
|
||||
dist.add('medium', 0.75)
|
||||
self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')])
|
||||
|
||||
def test_update(self):
|
||||
self.dist.add('album', 0.5)
|
||||
self.dist.add('media', 1.0)
|
||||
|
||||
dist = Distance()
|
||||
dist.add('album', 0.75)
|
||||
dist.add('album', 0.25)
|
||||
self.dist.add('media', 0.05)
|
||||
|
||||
self.dist.update(dist)
|
||||
|
||||
self.assertEqual(self.dist._penalties, {'album': [0.5, 0.75, 0.25],
|
||||
'media': [1.0, 0.05]})
|
||||
|
||||
class TrackDistanceTest(unittest.TestCase):
|
||||
def test_identical_tracks(self):
|
||||
item = _make_item(u'one', 1)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ from beets import library
|
|||
from beets import ui
|
||||
from beets.ui import commands
|
||||
from beets import autotag
|
||||
from beets.autotag.match import distance
|
||||
from beets import importer
|
||||
from beets.mediafile import MediaFile
|
||||
from beets import config
|
||||
|
|
@ -594,21 +595,23 @@ class ShowChangeTest(_common.TestCase):
|
|||
self.items[0].track = 1
|
||||
self.items[0].path = '/path/to/file.mp3'
|
||||
self.info = autotag.AlbumInfo(
|
||||
'the album', 'album id', 'the artist', 'artist id', [
|
||||
autotag.TrackInfo('the title', 'track id', index=1)
|
||||
u'the album', u'album id', u'the artist', u'artist id', [
|
||||
autotag.TrackInfo(u'the title', u'track id', index=1)
|
||||
])
|
||||
|
||||
def _show_change(self, items=None, info=None,
|
||||
cur_artist='the artist', cur_album='the album',
|
||||
cur_artist=u'the artist', cur_album=u'the album',
|
||||
dist=0.1):
|
||||
items = items or self.items
|
||||
info = info or self.info
|
||||
mapping = dict(zip(items, info.tracks))
|
||||
config['color'] = False
|
||||
album_dist = distance(items, info, mapping)
|
||||
album_dist._penalties = {'album': [dist]}
|
||||
commands.show_change(
|
||||
cur_artist,
|
||||
cur_album,
|
||||
autotag.AlbumMatch(0.1, info, mapping, set(), set()),
|
||||
autotag.AlbumMatch(album_dist, info, mapping, set(), set()),
|
||||
)
|
||||
return self.io.getoutput().lower()
|
||||
|
||||
|
|
@ -623,7 +626,7 @@ class ShowChangeTest(_common.TestCase):
|
|||
self.assertTrue('correcting tags from:' in msg)
|
||||
|
||||
def test_item_data_change(self):
|
||||
self.items[0].title = 'different'
|
||||
self.items[0].title = u'different'
|
||||
msg = self._show_change()
|
||||
self.assertTrue('different -> the title' in msg)
|
||||
|
||||
|
|
@ -638,12 +641,12 @@ class ShowChangeTest(_common.TestCase):
|
|||
self.assertTrue('correcting tags from:' in msg)
|
||||
|
||||
def test_item_data_change_title_missing(self):
|
||||
self.items[0].title = ''
|
||||
self.items[0].title = u''
|
||||
msg = re.sub(r' +', ' ', self._show_change())
|
||||
self.assertTrue('file.mp3 -> the title' in msg)
|
||||
|
||||
def test_item_data_change_title_missing_with_unicode_filename(self):
|
||||
self.items[0].title = ''
|
||||
self.items[0].title = u''
|
||||
self.items[0].path = u'/path/to/caf\xe9.mp3'.encode('utf8')
|
||||
msg = re.sub(r' +', ' ', self._show_change().decode('utf8'))
|
||||
self.assertTrue(u'caf\xe9.mp3 -> the title' in msg
|
||||
|
|
|
|||
Loading…
Reference in a new issue