Use a Distance object instead of floats for distance calculations.

The new Distance object knows how to perform various types of distance
calculations (expression, equality, number, priority, string).

It will keep track of each individual penalty that has been applied so
that we can utilise that information in the UI and when making decisions
about the recommendation level.

We now display the top 3 penalties (sorted by weight) on the release
list (and "..." if there are more than 3), and we display all penalties
on the album info line and track change line.

The implementation of the `max_rec` setting has been simplified by
removing duplicate validation and instead looking at the penalties that
have been applied to a distance. As a result, we can now configure a
maximum recommendation for any penalty that might be applied.

We have a few new checks when calculating album distance:

`match: preferred: countries` and `match: preferred: media` can each be
set to a list of countries and media in order of your preference. These
are empty by default. A value that matches the first item will have no
penalty, and a value that doesn't match any item will have an unweighted
penalty of 1.0.

If `match: preferred: original_year` is set to "yes", beets will apply
an unweighted penalty of 1.0 for each year of difference between the
release year and the original year.

We now configure individual weights for `mediums` (disctotal), `label`,
`catalognum`, `country` and `albumdisambig` instead of a single generic
`minor` weight. This gives more control, but more importantly separates
and names the applied penalties so that the UI can convey exactly which
fields have contributed to the overall distance penalty.

Likewise, `missing tracks` and `unmatched tracks` are penalised and
displayed in the UI separately, instead of a combined `partial` penalty.

Display non-MusicBrainz source in the disambiguation string, and
"source" in the list of penalties if a release is penalised for being
a non-MusicBrainz.
This commit is contained in:
Tai Lee 2013-06-02 16:33:07 +10:00
parent cff06431cc
commit e6ac8e1646
10 changed files with 553 additions and 280 deletions

View file

@ -30,7 +30,7 @@ from beets.util.enumeration import enum
from beets.autotag import hooks
# A configuration view for the distance weights.
weights = config['match']['weight']
weights = config['match']['distance_weights']
# Parameters for string distance function.
# Words that can be moved to the end of a string using a comma.
@ -187,62 +187,202 @@ def track_index_changed(item, track_info):
"""
return item.track not in (track_info.medium_index, track_info.index)
class Distance(object):
"""Keeps track of multiple distance penalties. Provides a single weighted
distance for all penalties as well as a weighted distance for each
individual penalty.
"""
def __cmp__(self, other):
return cmp(self.distance, other)
def __float__(self):
return self.distance
def __getitem__(self, key):
"""Returns the weighted distance for a named penalty.
"""
dist = sum(self.penalties[key]) * weights[key].as_number()
dist_max = self.max_distance
if dist_max:
return dist / dist_max
return 0.0
def __init__(self):
self.penalties = {}
def __sub__(self, other):
return self.distance - other
def __rsub__(self, other):
return other - self.distance
def _eq(self, value1, value2):
"""Returns True if `value1` is equal to `value2`. `value1` may be a
compiled regular expression, in which case it will be matched against
`value2`.
"""
if isinstance(value1, re._pattern_type):
return bool(value1.match(value2))
return value1 == value2
def add(self, key, dist):
"""Adds a distance penalty. `key` must correspond with a configured
weight setting. `dist` must be a float between 0.0 and 1.0, and will be
added to any existing distance penalties for the same key.
"""
if not 0.0 <= dist <= 1.0:
raise ValueError(
'`dist` must be between 0.0 and 1.0. It is: %r' % dist)
self.penalties.setdefault(key, []).append(dist)
def add_equality(self, key, value, options):
"""Adds a distance penalty of 1.0 if `value` doesn't match any of the
values in `options`. If an option is a compiled regular expression, it
will be considered equal if it matches against `value`.
"""
if not isinstance(options, (list, tuple)):
options = [options]
for opt in options:
if self._eq(opt, value):
dist = 0.0
break
else:
dist = 1.0
self.add(key, dist)
def add_expr(self, key, expr):
"""Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0.
"""
if expr:
self.add(key, 1.0)
else:
self.add(key, 0.0)
def add_number(self, key, number1, number2):
"""Adds a distance penalty of 1.0 for each number of difference between
`number1` and `number2`, or 0.0 when there is no difference. Use this
when there is no upper limit on the difference between the two numbers.
"""
diff = abs(number1 - number2)
if diff:
for i in range(diff):
self.add(key, 1.0)
else:
self.add(key, 0.0)
def add_priority(self, key, value, options):
"""Adds a distance penalty that corresponds to the position at which
`value` appears in `options`. A distance penalty of 0.0 for the first
option, or 1.0 if there is no matching option. If an option is a
compiled regular expression, it will be considered equal if it matches
against `value`.
"""
if not isinstance(options, (list, tuple)):
options = [options]
unit = 1.0 / (len(options) + 1)
for i, opt in enumerate(options):
if self._eq(opt, value):
dist = i * unit
break
else:
dist = 1.0
self.add(key, dist)
def add_ratio(self, key, number1, number2):
"""Adds a distance penalty for `number1` as a ratio of `number2`.
`number1` is bound at 0 and `number2`.
"""
number = float(max(min(number1, number2), 0))
if number2:
dist = number / number2
else:
dist = 0.0
self.add(key, dist)
def add_string(self, key, str1, str2):
"""Adds a distance penalty based on the edit distance between `str1`
and `str2`.
"""
dist = string_dist(str1, str2)
self.add(key, dist)
@property
def distance(self):
"""Returns an overall weighted distance across all penalties.
"""
dist = 0.0
for key, penalty in self.penalties.iteritems():
dist += sum(penalty) * weights[key].as_number()
dist_max = self.max_distance
if dist_max:
return dist / dist_max
return 0.0
@property
def max_distance(self):
"""Returns the maximum distance penalty.
"""
dist_max = 0.0
for key, penalty in self.penalties.iteritems():
dist_max += len(penalty) * weights[key].as_number()
return dist_max
@property
def sorted(self):
"""Returns a list of (dist, key) pairs, with `dist` being the weighted
distance, sorted from highest to lowest.
"""
list_ = [(self[key], key) for key in self.penalties]
return sorted(list_, key=lambda (dist, key): (0-dist, key))
def update(self, dist):
"""Adds all the distance penalties from `dist`.
"""
if not isinstance(dist, Distance):
raise ValueError(
'`dist` must be a Distance object. It is: %r' % dist)
for key, penalties in dist.penalties.iteritems():
self.penalties.setdefault(key, []).extend(penalties)
def track_distance(item, track_info, incl_artist=False):
"""Determines the significance of a track metadata change. Returns a
float in [0.0,1.0]. `incl_artist` indicates that a distance
component should be included for the track artist (i.e., for
various-artist releases).
Distance object. `incl_artist` indicates that a distance component should
be included for the track artist (i.e., for various-artist releases).
"""
# Distance and normalization accumulators.
dist, dist_max = 0.0, 0.0
dist = Distance()
# Check track length.
# If there's no length to check, apply no penalty.
# Length.
if track_info.length:
diff = abs(item.length - track_info.length)
diff = max(diff - weights['track_length_grace'].as_number(), 0.0)
diff = min(diff, weights['track_length_max'].as_number())
dist += (diff / weights['track_length_max'].as_number()) * \
weights['track_length'].as_number()
dist_max += weights['track_length'].as_number()
dist.add_ratio('track_length', diff,
weights['track_length_max'].as_number())
# Track title.
dist += string_dist(item.title, track_info.title) * \
weights['track_title'].as_number()
dist_max += weights['track_title'].as_number()
# Title.
dist.add_string('track_title', item.title, track_info.title)
# Track artist, if included.
# Attention: MB DB does not have artist info for all compilations,
# so only check artist distance if there is actually an artist in
# the MB track data.
# Artist. Only check if there is actually an artist in the track data.
if incl_artist and track_info.artist and \
item.artist.lower() not in VA_ARTISTS:
dist += string_dist(item.artist, track_info.artist) * \
weights['track_artist'].as_number()
dist_max += weights['track_artist'].as_number()
dist.add_string('track_artist', item.artist, track_info.artist)
# Track index.
if track_info.index and item.track:
if track_index_changed(item, track_info):
dist += weights['track_index'].as_number()
dist_max += weights['track_index'].as_number()
dist.add_expr('track_index', track_index_changed(item, track_info))
# MusicBrainz track ID.
# Track ID.
if item.mb_trackid:
if item.mb_trackid != track_info.track_id:
dist += weights['track_id'].as_number()
dist_max += weights['track_id'].as_number()
dist.add_expr('track_id', item.mb_trackid != track_info.track_id)
# Plugin distances.
plugin_d, plugin_dm = plugins.track_distance(item, track_info)
dist += plugin_d
dist_max += plugin_dm
# Plugins.
dist.update(plugins.track_distance(item, track_info))
return dist / dist_max
return dist
def distance(items, album_info, mapping):
"""Determines how "significant" an album metadata change would be.
Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object
Returns a Distance object. `album_info` is an AlbumInfo object
reflecting the album to be compared. `items` is a sequence of all
Item objects that will be matched (order is not important).
`mapping` is a dictionary mapping Items to TrackInfo objects; the
@ -251,100 +391,89 @@ def distance(items, album_info, mapping):
"""
likelies, _ = current_metadata(items)
# These accumulate the possible distance components. The final
# distance will be dist/dist_max.
dist = 0.0
dist_max = 0.0
dist = Distance()
# Artist/album metadata.
# Artist, if not various.
if not album_info.va:
dist += string_dist(likelies['artist'], album_info.artist) * \
weights['artist'].as_number()
dist_max += weights['artist'].as_number()
dist += string_dist(likelies['album'], album_info.album) * \
weights['album'].as_number()
dist_max += weights['album'].as_number()
dist.add_string('artist', likelies['artist'], album_info.artist)
# Year. No penalty for matching release or original year.
if likelies['year'] and album_info.year:
if likelies['year'] not in (album_info.year, album_info.original_year):
diff = abs(album_info.year - likelies['year'])
if diff:
dist += (1.0 - 1.0 / diff) * weights['year'].as_number()
dist_max += weights['year'].as_number()
# Album.
dist.add_string('album', likelies['album'], album_info.album)
# Actual or preferred media.
preferred_media = config['match']['preferred_media'].get()
# Media.
if likelies['media'] and album_info.media:
dist += string_dist(likelies['media'], album_info.media) * \
weights['media'].as_number()
dist_max += weights['media'].as_number()
elif album_info.media and preferred_media:
dist += string_dist(album_info.media, preferred_media) * \
weights['media'].as_number()
dist_max += weights['media'].as_number()
dist.add_string('media', likelies['media'], album_info.media)
# MusicBrainz album ID.
if likelies['mb_albumid']:
if likelies['mb_albumid'] != album_info.album_id:
dist += weights['album_id'].as_number()
dist_max += weights['album_id'].as_number()
# Preferred media.
preferred_media = [re.compile(r'(\d+x)?%s' % pattern, re.I) for pattern
in config['match']['preferred']['media'].get()]
if album_info.media and preferred_media:
dist.add_priority('media', album_info.media, preferred_media)
# Apply a small penalty for differences across many minor metadata. This
# helps prioritise releases that are nearly identical.
# Number of discs.
if likelies['disctotal'] and album_info.mediums:
dist.add_number('mediums', likelies['disctotal'], album_info.mediums)
if likelies['disctotal']:
if likelies['disctotal'] != album_info.mediums:
dist += weights['minor'].as_number()
dist_max += weights['minor'].as_number()
# Year.
if likelies['year'] and album_info.year:
# No penalty for matching release or original year.
if likelies['year'] in (album_info.year, album_info.original_year):
dist.add('year', 0.0)
else:
dist.add_number('year', likelies['year'], album_info.year)
if likelies['label'] and album_info.label:
dist += string_dist(likelies['label'], album_info.label) * \
weights['minor'].as_number()
dist_max += weights['minor'].as_number()
if likelies['catalognum'] and album_info.catalognum:
dist += string_dist(likelies['catalognum'],
album_info.catalognum) * \
weights['minor'].as_number()
dist_max += weights['minor'].as_number()
# Prefer earlier releases.
if album_info.year and album_info.original_year and \
config['match']['preferred']['original_year'].get():
dist.add_number('year', album_info.year, album_info.original_year)
# Country.
if likelies['country'] and album_info.country:
dist += string_dist(likelies['country'],
album_info.country) * \
weights['minor'].as_number()
dist_max += weights['minor'].as_number()
dist.add_string('country', likelies['country'], album_info.country)
# Preferred countries.
preferred_countries = [re.compile(pattern, re.I) for pattern
in config['match']['preferred']['countries'].get()]
if album_info.country and preferred_countries:
dist.add_priority('country', album_info.country, preferred_countries)
# Label.
if likelies['label'] and album_info.label:
dist.add_string('label', likelies['label'], album_info.label)
# Catalog number.
if likelies['catalognum'] and album_info.catalognum:
dist.add_string('catalognum', likelies['catalognum'],
album_info.catalognum)
# Disambiguation.
if likelies['albumdisambig'] and album_info.albumdisambig:
dist += string_dist(likelies['albumdisambig'],
album_info.albumdisambig) * \
weights['minor'].as_number()
dist_max += weights['minor'].as_number()
dist.add_string('albumdisambig', likelies['albumdisambig'],
album_info.albumdisambig)
# Matched track distances.
# Album ID.
if likelies['mb_albumid']:
dist.add_equality('album_id', likelies['mb_albumid'],
album_info.album_id)
# Tracks.
dist.tracks = {}
for item, track in mapping.iteritems():
dist += track_distance(item, track, album_info.va) * \
weights['track'].as_number()
dist_max += weights['track'].as_number()
dist.tracks[track] = track_distance(item, track, album_info.va)
dist.add('tracks', dist.tracks[track].distance)
# Extra and unmatched tracks.
for track in set(album_info.tracks) - set(mapping.values()):
dist += weights['missing'].as_number()
dist_max += weights['missing'].as_number()
for item in set(items) - set(mapping.keys()):
dist += weights['unmatched'].as_number()
dist_max += weights['unmatched'].as_number()
# Missing tracks.
for i in range(len(album_info.tracks) - len(mapping)):
dist.add('missing_tracks', 1.0)
# Plugin distances.
plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping)
dist += plugin_d
dist_max += plugin_dm
# Unmatched tracks.
for i in range(len(items) - len(mapping)):
dist.add('unmatched_tracks', 1.0)
# Normalize distance, avoiding divide-by-zero.
if dist_max == 0.0:
return 0.0
else:
return dist / dist_max
# Plugins.
dist.update(plugins.album_distance(items, album_info, mapping))
return dist
def match_by_id(items):
"""If the items are tagged with a MusicBrainz album ID, returns an
@ -370,8 +499,8 @@ def _recommendation(results):
recommendation based on the results' distances.
If the recommendation is higher than the configured maximum for
certain situations, the recommendation will be downgraded to the
configured maximum.
an applied penalty, the recommendation will be downgraded to the
configured maximum for that penalty.
"""
if not results:
# No candidates: no recommendation.
@ -393,45 +522,20 @@ def _recommendation(results):
# Gap between first two candidates is large.
rec = recommendation.low
else:
# No conclusion.
rec = recommendation.none
# No conclusion. Return immediately. Can't be downgraded any further.
return recommendation.none
# "Downgrades" in certain configured situations.
if isinstance(results[0], hooks.AlbumMatch):
# Load the configured recommendation maxima.
max_rec = {}
for trigger in 'non_mb_source', 'partial', 'tracklength', 'tracknumber':
max_rec[trigger] = \
config['match']['max_rec'][trigger].as_choice({
'strong': recommendation.strong,
'medium': recommendation.medium,
'low': recommendation.low,
'none': recommendation.none,
})
# Non-MusicBrainz source.
if rec > max_rec['non_mb_source'] and \
results[0].info.data_source != 'MusicBrainz':
rec = max_rec['non_mb_source']
# Partial match.
if rec > max_rec['partial'] and \
(results[0].extra_items or results[0].extra_tracks):
rec = max_rec['partial']
# Check track number and duration for each item.
for item, track_info in results[0].mapping.items():
# Track length differs.
if rec > max_rec['tracklength'] and \
item.length and track_info.length and \
abs(item.length - track_info.length) > \
weights['track_length_grace'].as_number():
rec = max_rec['tracklength']
# Track number differs.
if rec > max_rec['tracknumber'] and \
track_index_changed(item, track_info):
rec = max_rec['tracknumber']
# Downgrade to the max rec if it is lower than the current rec for an
# applied penalty.
for dist, key in results[0].distance.sorted:
if dist:
max_rec = config['match']['max_rec'][key].as_choice({
'strong': recommendation.strong,
'medium': recommendation.medium,
'low': recommendation.low,
'none': recommendation.none,
})
rec = min(rec, max_rec)
return rec
@ -465,7 +569,7 @@ def tag_album(items, search_artist=None, search_album=None,
- The current artist.
- The current album.
- A list of AlbumMatch objects. The candidates are sorted by
distance (i.e., best match first).
distance (i.e., best match first).
- A recommendation.
If search_artist and search_album or search_id are provided, then
they are used as search terms in place of the current metadata.

View file

@ -68,22 +68,42 @@ match:
medium_rec_thresh: 0.25
rec_gap_thresh: 0.25
max_rec:
non_mb_source: strong
partial: medium
tracklength: strong
tracknumber: strong
preferred_media: CD
weight:
source: strong
artist: strong
album: strong
media: strong
mediums: strong
year: strong
country: strong
label: strong
catalognum: strong
albumdisambig: strong
album_id: strong
tracks: strong
missing_tracks: medium
unmatched_tracks: medium
track_title: strong
track_artist: strong
track_index: strong
track_length_grace: strong
track_length_max: strong
track_length: strong
track_id: strong
distance_weights:
source: 2.0
artist: 3.0
album: 3.0
year: 1.0
media: 1.0
mediums: 1.0
year: 1.0
country: 0.5
label: 0.5
catalognum: 0.5
albumdisambig: 0.5
album_id: 5.0
minor: 0.5
track: 1.0
missing: 0.9
unmatched: 0.6
tracks: 2.0
missing_tracks: 0.9
unmatched_tracks: 0.6
track_title: 3.0
track_artist: 2.0
track_index: 1.0
@ -91,3 +111,7 @@ match:
track_length_max: 30
track_length: 2.0
track_id: 5.0
preferred:
countries: []
media: []
original_year: no

View file

@ -64,16 +64,16 @@ class BeetsPlugin(object):
return {}
def track_distance(self, item, info):
"""Should return a (distance, distance_max) pair to be added
to the distance value for every track comparison.
"""Should return a Distance object to be added to the
distance for every track comparison.
"""
return 0.0, 0.0
return beets.autotag.match.Distance()
def album_distance(self, items, album_info, mapping):
"""Should return a (distance, distance_max) pair to be added
to the distance value for every album-level comparison.
"""Should return a Distance object to be added to the
distance for every album-level comparison.
"""
return 0.0, 0.0
return beets.autotag.match.Distance()
def candidates(self, items, artist, album, va_likely):
"""Should return a sequence of AlbumInfo objects that match the
@ -242,25 +242,19 @@ def queries():
def track_distance(item, info):
"""Gets the track distance calculated by all loaded plugins.
Returns a (distance, distance_max) pair.
Returns a Distance object.
"""
dist = 0.0
dist_max = 0.0
dist = beets.autotag.match.Distance()
for plugin in find_plugins():
d, dm = plugin.track_distance(item, info)
dist += d
dist_max += dm
return dist, dist_max
dist.update(plugin.track_distance(item, info))
return dist
def album_distance(items, album_info, mapping):
"""Returns the album distance calculated by plugins."""
dist = 0.0
dist_max = 0.0
dist = beets.autotag.match.Distance()
for plugin in find_plugins():
d, dm = plugin.album_distance(items, album_info, mapping)
dist += d
dist_max += dm
return dist, dist_max
dist.update(plugin.album_distance(items, album_info, mapping))
return dist
def candidates(items, artist, album, va_likely):
"""Gets MusicBrainz candidates for an album from each plugin.

View file

@ -125,14 +125,14 @@ default_commands.append(fields_cmd)
VARIOUS_ARTISTS = u'Various Artists'
PARTIAL_MATCH_MESSAGE = u'(partial match!)'
# Importer utilities and support.
def disambig_string(info):
"""Returns label, year and media disambiguation, if available.
"""Returns source, media, year, country, and album disambiguation.
"""
disambig = []
if info.data_source != 'MusicBrainz':
disambig.append(info.data_source)
if info.media:
if info.mediums > 1:
disambig.append(u'{0}x{1}'.format(
@ -163,26 +163,35 @@ def dist_string(dist):
out = ui.colorize('red', out)
return out
def penalty_string(distance, limit=None):
"""Returns a colorized string that indicates all the penalties applied to
a distance object.
"""
penalties = []
for dist, key in distance.sorted:
if dist:
key = key.replace('album_', '')
key = key.replace('track_', '')
key = key.replace('_', ' ')
penalties.append(key)
if penalties:
if limit and len(penalties) > limit:
penalties = penalties[:limit] + ['...']
return ui.colorize('yellow', '(%s)' % ', '.join(penalties))
def show_change(cur_artist, cur_album, match):
"""Print out a representation of the changes that will be made if an
album's tags are changed according to `match`, which must be an AlbumMatch
object.
"""
def show_album(artist, album, partial=False):
def show_album(artist, album):
if artist:
album_description = u' %s - %s' % (artist, album)
elif album:
album_description = u' %s' % album
else:
album_description = u' (unknown album)'
out = album_description
# Add a suffix if this is a partial match.
if partial:
out += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE)
print_(out)
print_(album_description)
def format_index(track_info):
"""Return a string representing the track index of the given
@ -223,11 +232,7 @@ def show_change(cur_artist, cur_album, match):
print_("To:")
show_album(artist_r, album_r)
else:
message = u"Tagging:\n %s - %s" % (match.info.artist,
match.info.album)
if match.extra_items or match.extra_tracks:
message += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE)
print_(message)
print_(u"Tagging:\n %s - %s" % (match.info.artist, match.info.album))
# Data URL.
if match.info.data_url:
@ -235,9 +240,13 @@ def show_change(cur_artist, cur_album, match):
# Info line.
info = []
# Similarity.
info.append('(Similarity: %s)' % dist_string(match.distance))
if match.info.data_source != 'MusicBrainz':
info.append(ui.colorize('turquoise', '(%s)' % match.info.data_source))
# Penalties.
penalties = penalty_string(match.distance)
if penalties:
info.append(penalties)
# Disambiguation.
disambig = disambig_string(match.info)
if disambig:
info.append(ui.colorize('lightgray', '(%s)' % disambig))
@ -315,18 +324,10 @@ def show_change(cur_artist, cur_album, match):
rhs += templ.format(rhs_length)
lhs_width += len(cur_length) + 3
# Hidden penalties. No LHS/RHS diff is displayed, but we still want to
# indicate that a penalty has been applied to explain the similarity
# score.
penalties = []
if match.info.va and track_info.artist and \
item.artist.lower() not in VA_ARTISTS:
penalties.append('artist')
if item.mb_trackid and item.mb_trackid != track_info.track_id:
penalties.append('ID')
# Penalties.
penalties = penalty_string(match.distance.tracks[track_info])
if penalties:
rhs += ' %s' % ui.colorize('red',
'(%s)' % ', '.join(penalties))
rhs += ' %s' % penalties
if lhs != rhs:
lines.append((' * %s' % lhs, rhs, lhs_width))
@ -489,20 +490,17 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None,
(cur_artist, cur_album))
print_('Candidates:')
for i, match in enumerate(candidates):
# Artist, album and distance.
line = ['%i. %s - %s (%s)' % (i + 1, match.info.artist,
match.info.album,
dist_string(match.distance))]
# Point out the partial matches.
if match.extra_items or match.extra_tracks:
line.append(ui.colorize('yellow',
PARTIAL_MATCH_MESSAGE))
# Sources other than MusicBrainz.
source = match.info.data_source
if source != 'MusicBrainz':
line.append(ui.colorize('turquoise', '(%s)' % source))
# Penalties.
penalties = penalty_string(match.distance, 3)
if penalties:
line.append(penalties)
# Disambiguation
disambig = disambig_string(match.info)
if disambig:
line.append(ui.colorize('lightgray', '(%s)' % disambig))

View file

@ -21,6 +21,7 @@ from beets import util
from beets import config
from beets.util import confit
from beets.autotag import hooks
from beets.autotag.match import Distance
import acoustid
import logging
from collections import defaultdict
@ -113,16 +114,14 @@ def _all_releases(items):
class AcoustidPlugin(plugins.BeetsPlugin):
def track_distance(self, item, info):
dist = Distance()
if item.path not in _matches or not info.track_id:
# Match failed or no track ID.
return 0.0, 0.0
return dist
recording_ids, _ = _matches[item.path]
if info.track_id in recording_ids:
dist = 0.0
else:
dist = TRACK_ID_WEIGHT
return dist, TRACK_ID_WEIGHT
dist.add_expr('track_id', info.track_id not in recording_ids)
return dist
def candidates(self, items, artist, album, va_likely):
albums = []

View file

@ -17,7 +17,7 @@ discogs-client library.
"""
from beets import config
from beets.autotag.hooks import AlbumInfo, TrackInfo
from beets.autotag.match import current_metadata, VA_ARTISTS
from beets.autotag.match import current_metadata, Distance, VA_ARTISTS
from beets.plugins import BeetsPlugin
from discogs_client import Artist, DiscogsAPIError, Release, Search
import beets
@ -44,14 +44,12 @@ class DiscogsPlugin(BeetsPlugin):
})
def album_distance(self, items, album_info, mapping):
"""Returns the discogs source weight and the maximum source weight.
"""Returns the album distance.
"""
dist = Distance()
if album_info.data_source == 'Discogs':
return self.config['source_weight'].as_number() * \
config['match']['weight']['source'].as_number(), \
config['match']['weight']['source'].as_number()
else:
return 0.0, 0.0
dist.add('source', self.config['source_weight'].as_number())
return dist
def candidates(self, items, artist, album, va_likely):
"""Returns a list of AlbumInfo objects for discogs search results

View file

@ -53,30 +53,36 @@ Changelog
None.
* Various UI enhancements to the importer due to Tai Lee:
* More consistent format and colorization of album and track metadata.
* Display data source URL for :doc:`/plugins/discogs` matches. This should
make it easier for people who would rather import and correct data from
Discogs into MusicBrainz.
* Display data source URL and source name in album disambiguation for
non-MusicBrainz matches. This should make it easier for people who want to
import and correct data from other sources into MusicBrainz.
* The top 3 distance penalties are now displayed on the release listing,
and all album and track penalties are now displayed on the track changes
list. This should make it clear exactly which metadata is contributing to a
low similarity score.
* Display album disambiguation and disc titles in the track listing, when
available.
* Track changes highlighted in yellow indicate a change in format to or from
:ref:`per_disc_numbering`. No penalty is applied because the track number
is still "correct", just in a different format.
* More consistent format and colorization of album and track metadata.
* Track changes highlighted in turquoise indicate a change in format to or
from :ref:`per_disc_numbering`. No penalty is applied because the track
number is still "correct", just in a different format.
* Sort missing and unmatched tracks by index and title and group them
together for better readability.
* Indicate MusicBrainz ID mismatches.
* Improve calculation of similarity score:
* Improve calculation of similarity score and recommendation:
* It is now possible to configure a :ref:`max_rec` for any field that is used
to calculate the similarity score. The recommendation will be downgraded if
a penalty is being applied to the specified field.
* Strongly prefer releases with a matching MusicBrainz album ID. This helps
beets re-identify the same release when re-importing existing files.
* Prefer releases that are closest to the tagged ``year``. Tolerate files
tagged with release or original year.
* Prefer CD releases by default, when there is no ``media`` tagged in the
files being imported. This can be changed with the :ref:`preferred_media`
setting.
* Apply minor penalties across a range of fields to differentiate between
nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
* Add a :ref:`preferred` collection of settings, which allow the user to
specify a sorted list of preferred countries and media types, or prefer
releases closest to the original year for an album.
* Apply minor distance penalties across a range of fields to differentiate
between nearly identical releases: ``mediums``, ``label``, ``catalognum``,
``country`` and ``albumdisambig``.
.. _Discogs: http://discogs.com/

View file

@ -394,40 +394,65 @@ max_rec
As mentioned above, autotagger matches have *recommendations* that control how
the UI behaves for a certain quality of match. The recommendation for a certain
match is usually based on the distance calculation. But you can also control
the recommendation for certain specific situations by defining *maximum*
recommendations when:
match is based on the overall distance calculation. But you can also control
the recommendation when a distance penalty is being applied for a specific
field by defining *maximum* recommendations for each field:
* a match came from a source other than MusicBrainz (e.g., the
:doc:`Discogs </plugins/discogs>` plugin);
* a match has missing or extra tracks;
* the length (duration) of at least one track differs; or
* at least one track number differs.
To define maxima, use keys under ``max_rec:`` in the ``match`` section::
To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here
are the defaults::
match:
max_rec:
non_mb_source: strong
partial: medium
tracklength: strong
tracknumber: strong
source: strong
artist: strong
album: strong
media: strong
mediums: strong
year: strong
country: strong
label: strong
catalognum: strong
albumdisambig: strong
album_id: strong
tracks: strong
missing_tracks: medium
unmatched_tracks: medium
track_title: strong
track_artist: strong
track_index: strong
track_length_grace: strong
track_length_max: strong
track_length: strong
track_id: strong
If a recommendation is higher than the configured maximum and the condition is
met, the recommendation will be downgraded. The maximum for each condition can
be one of ``none``, ``low``, ``medium`` or ``strong``. When the maximum
recommendation is ``strong``, no "downgrading" occurs for that situation.
If a recommendation is higher than the configured maximum and a penalty is
being applied, the recommendation will be downgraded. The maximum for each
field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the
maximum recommendation is ``strong``, no "downgrading" occurs.
The above example shows the default ``max_rec`` settings.
.. _preferred:
.. _preferred_media:
preferred
~~~~~~~~~
preferred_media
~~~~~~~~~~~~~~~
In addition to comparing the tagged metadata with the match metadata for
similarity, you can also specify an ordered list of preferred countries and
media types. A distance penalty will be applied if the country or media type
from the match metadata doesn't match. The order is important, the first item
will be most preferred.
When comparing files that have no ``media`` tagged, prefer releases that more
closely resemble this media (using a string distance). When files are already
tagged with media, this setting is ignored. Default: ``CD``.
You can also tell the autotagger to prefer matches that have a release year
closest to the original year for an album.
Here's an example::
match:
preferred:
countries: ['US', 'GB', 'UK']
media: ['CD', 'Digital Media']
original_year: yes
By default, none of these options are enabled.
.. _path-format-config:

View file

@ -23,6 +23,7 @@ import _common
from _common import unittest
from beets import autotag
from beets.autotag import match
from beets.autotag.match import Distance
from beets.library import Item
from beets.util import plurality
from beets.autotag import AlbumInfo, TrackInfo
@ -105,6 +106,127 @@ def _make_trackinfo():
TrackInfo(u'three', None, u'some artist', length=1, index=3),
]
class DistanceTest(unittest.TestCase):
def setUp(self):
self.dist = Distance()
def test_add(self):
self.dist.add('add', 1.0)
self.assertEqual(self.dist.penalties, {'add': [1.0]})
def test_add_equality(self):
self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi'])
self.assertEqual(self.dist.penalties['equality'], [0.0])
self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi'])
self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0])
self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I))
self.assertEqual(self.dist.penalties['equality'], [0.0, 1.0, 0.0])
def test_add_expr(self):
self.dist.add_expr('expr', True)
self.assertEqual(self.dist.penalties['expr'], [1.0])
self.dist.add_expr('expr', False)
self.assertEqual(self.dist.penalties['expr'], [1.0, 0.0])
def test_add_number(self):
# Add a full penalty for each number of difference between two numbers.
self.dist.add_number('number', 1, 1)
self.assertEqual(self.dist.penalties['number'], [0.0])
self.dist.add_number('number', 1, 2)
self.assertEqual(self.dist.penalties['number'], [0.0, 1.0])
self.dist.add_number('number', 2, 1)
self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0])
self.dist.add_number('number', -1, 2)
self.assertEqual(self.dist.penalties['number'], [0.0, 1.0, 1.0, 1.0,
1.0, 1.0])
def test_add_priority(self):
self.dist.add_priority('priority', 'abc', 'abc')
self.assertEqual(self.dist.penalties['priority'], [0.0])
self.dist.add_priority('priority', 'def', ['abc', 'def', 'ghi'])
self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25])
self.dist.add_priority('priority', 'ghi', ['abc', 'def',
re.compile('GHI', re.I)])
self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5])
self.dist.add_priority('priority', 'xyz', ['abc', 'def'])
self.assertEqual(self.dist.penalties['priority'], [0.0, 0.25, 0.5, 1.0])
def test_add_ratio(self):
self.dist.add_ratio('ratio', 25, 100)
self.assertEqual(self.dist.penalties['ratio'], [0.25])
self.dist.add_ratio('ratio', 10, 5)
self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0])
self.dist.add_ratio('ratio', -5, 5)
self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0])
self.dist.add_ratio('ratio', 5, 0)
self.assertEqual(self.dist.penalties['ratio'], [0.25, 1.0, 0.0, 0.0])
def test_add_string(self):
dist = match.string_dist(u'abc', u'bcd')
self.dist.add_string('string', u'abc', u'bcd')
self.assertEqual(self.dist.penalties['string'], [dist])
def test_distance(self):
config['match']['distance_weights']['album'] = 2.0
config['match']['distance_weights']['medium'] = 1.0
self.dist.add('album', 0.5)
self.dist.add('media', 0.25)
self.dist.add('media', 0.75)
self.assertEqual(self.dist.distance, 0.5)
# __getitem__()
self.assertEqual(self.dist['album'], 0.25)
self.assertEqual(self.dist['media'], 0.25)
def test_max_distance(self):
config['match']['distance_weights']['album'] = 3.0
config['match']['distance_weights']['medium'] = 1.0
self.dist.add('album', 0.5)
self.dist.add('medium', 0.0)
self.dist.add('medium', 0.0)
self.assertEqual(self.dist.max_distance, 5.0)
def test_sorted(self):
config['match']['distance_weights']['album'] = 4.0
config['match']['distance_weights']['medium'] = 2.0
self.dist.add('album', 0.1875)
self.dist.add('medium', 0.75)
self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')])
# Sort by key if distance is equal.
dist = Distance()
dist.add('album', 0.375)
dist.add('medium', 0.75)
self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')])
def test_update(self):
self.dist.add('album', 0.5)
self.dist.add('media', 1.0)
dist = Distance()
dist.add('album', 0.75)
dist.add('album', 0.25)
self.dist.add('media', 0.05)
self.dist.update(dist)
self.assertEqual(self.dist.penalties, {'album': [0.5, 0.75, 0.25],
'media': [1.0, 0.05]})
class TrackDistanceTest(unittest.TestCase):
def test_identical_tracks(self):
item = _make_item(u'one', 1)

View file

@ -27,6 +27,7 @@ from beets import library
from beets import ui
from beets.ui import commands
from beets import autotag
from beets.autotag.match import distance
from beets import importer
from beets.mediafile import MediaFile
from beets import config
@ -594,21 +595,23 @@ class ShowChangeTest(_common.TestCase):
self.items[0].track = 1
self.items[0].path = '/path/to/file.mp3'
self.info = autotag.AlbumInfo(
'the album', 'album id', 'the artist', 'artist id', [
autotag.TrackInfo('the title', 'track id', index=1)
u'the album', u'album id', u'the artist', u'artist id', [
autotag.TrackInfo(u'the title', u'track id', index=1)
])
def _show_change(self, items=None, info=None,
cur_artist='the artist', cur_album='the album',
cur_artist=u'the artist', cur_album=u'the album',
dist=0.1):
items = items or self.items
info = info or self.info
mapping = dict(zip(items, info.tracks))
config['color'] = False
album_dist = distance(items, info, mapping)
album_dist.penalties = {'album': [dist]}
commands.show_change(
cur_artist,
cur_album,
autotag.AlbumMatch(0.1, info, mapping, set(), set()),
autotag.AlbumMatch(album_dist, info, mapping, set(), set()),
)
return self.io.getoutput().lower()
@ -623,7 +626,7 @@ class ShowChangeTest(_common.TestCase):
self.assertTrue('correcting tags from:' in msg)
def test_item_data_change(self):
self.items[0].title = 'different'
self.items[0].title = u'different'
msg = self._show_change()
self.assertTrue('different -> the title' in msg)
@ -638,12 +641,12 @@ class ShowChangeTest(_common.TestCase):
self.assertTrue('correcting tags from:' in msg)
def test_item_data_change_title_missing(self):
self.items[0].title = ''
self.items[0].title = u''
msg = re.sub(r' +', ' ', self._show_change())
self.assertTrue('file.mp3 -> the title' in msg)
def test_item_data_change_title_missing_with_unicode_filename(self):
self.items[0].title = ''
self.items[0].title = u''
self.items[0].path = u'/path/to/caf\xe9.mp3'.encode('utf8')
msg = re.sub(r' +', ' ', self._show_change().decode('utf8'))
self.assertTrue(u'caf\xe9.mp3 -> the title' in msg