configurable match thresholds (GC-290)

This commit is contained in:
Adrian Sampson 2013-01-11 14:46:15 -08:00
parent c418e6d379
commit 9e6b1db02d
6 changed files with 44 additions and 13 deletions

View file

@ -26,7 +26,6 @@ from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch
from .match import AutotagError
from .match import tag_item, tag_album
from .match import RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE
from .match import STRONG_REC_THRESH, MEDIUM_REC_THRESH, REC_GAP_THRESH
# Global logger.
log = logging.getLogger('beets')

View file

@ -75,10 +75,6 @@ SD_REPLACE = [
RECOMMEND_STRONG = 'RECOMMEND_STRONG'
RECOMMEND_MEDIUM = 'RECOMMEND_MEDIUM'
RECOMMEND_NONE = 'RECOMMEND_NONE'
# Thresholds for recommendations.
STRONG_REC_THRESH = 0.04
MEDIUM_REC_THRESH = 0.25
REC_GAP_THRESH = 0.25
# Artist signals that indicate "various artists". These are used at the
# album level to determine whether a given release is likely a VA
@ -335,16 +331,17 @@ def recommendation(results):
rec = RECOMMEND_NONE
else:
min_dist = results[0].distance
if min_dist < STRONG_REC_THRESH:
if min_dist < config['match']['strong_rec_thresh'].as_number():
# Strong recommendation level.
rec = RECOMMEND_STRONG
elif len(results) == 1:
# Only a single candidate. Medium recommendation.
rec = RECOMMEND_MEDIUM
elif min_dist <= MEDIUM_REC_THRESH:
elif min_dist <= config['match']['medium_rec_thresh'].as_number():
# Medium recommendation level.
rec = RECOMMEND_MEDIUM
elif results[1].distance - min_dist >= REC_GAP_THRESH:
elif results[1].distance - min_dist >= \
config['match']['rec_gap_thresh'].as_number():
# Gap between first two candidates is large.
rec = RECOMMEND_MEDIUM
else:

View file

@ -48,3 +48,8 @@ musicbrainz:
host: musicbrainz.org
ratelimit: 1
ratelimit_interval: 1.0
match:
strong_rec_thresh: 0.04
medium_rec_thresh: 0.25
rec_gap_thresh: 0.25

View file

@ -106,13 +106,13 @@ PARTIAL_MATCH_MESSAGE = u'(partial match!)'
# Importer utilities and support.
def dist_string(dist):
"""Formats a distance (a float) as a similarity percentage string.
The string is colorized if color is True.
"""Formats a distance (a float) as a colorized similarity percentage
string.
"""
out = '%.1f%%' % ((1 - dist) * 100)
if dist <= autotag.STRONG_REC_THRESH:
if dist <= config['match']['strong_rec_thresh'].as_number():
out = ui.colorize('green', out)
elif dist <= autotag.MEDIUM_REC_THRESH:
elif dist <= config['match']['medium_rec_thresh'].as_number():
out = ui.colorize('yellow', out)
else:
out = ui.colorize('red', out)

View file

@ -26,6 +26,8 @@ It also adds some new features:
encoding used to print messages to standard output.
* The MusicBrainz hostname (and rate limiting) are now configurable. See
:ref:`musicbrainz-config`.
* You can now configure the similarity thresholds used to determine when the
autotagger automatically accepts a metadata match. See :ref:`match-config`.
* :doc:`/plugins/lyrics`: Fix an issue that failed to find lyrics when metadata
contained "real" apostrophes.

View file

@ -268,7 +268,7 @@ no log is written. This can be overridden with the ``-l`` flag to
.. _musicbrainz-config:
MusicBrainz Options
~~~~~~~~~~~~~~~~~~~
-------------------
If you run your own `MusicBrainz`_ server, you can instruct beets to use it
instead of the main server. Use the ``host`` and ``ratelimit`` options under a
@ -287,6 +287,34 @@ server---on this public server, you're `limited`_ to one request per second.
.. _limited: http://musicbrainz.org/doc/XML_Web_Service/Rate_Limiting
.. _MusicBrainz: http://musicbrainz.org/
.. _match-config:
Autotagger Matching Options
---------------------------
You can configure some aspects of the logic beets uses when automatically
matching MusicBrainz results under the ``match:`` section. To control how
*tolerant* the autotagger is of differences, use the ``strong_rec_thresh``
option, which reflects the distance threshold below which beets will make a
"strong recommendation" that the metadata be used. Strong recommendations
are accepted automatically (except in "timid" mode), so you can use this to
make beets ask your opinion more or less often.
The threshold is a *distance* value between 0.0 and 1.0, so you can think of it
as the opposite of a *similarity* value. For example, if you want to
automatically accept any matches above 90% similarity, use::
match:
strong_rec_thresh: 0.10
The default strong recommendation threshold is 0.04.
The ``medium_rec_thresh`` and ``rec_gap_thresh`` options work similarly. When a
match is above the *medium* recommendation threshold or the distance between it
and the next-best match is above the *gap* threshold, the importer will suggest
that match but not automatically confirm it. Otherwise, you'll see a list of
options to choose from.
.. _path-format-config:
Path Format Configuration