autotag refactoring in preparation for interface changes

2026-01-30 03:54:21 +01:00 · 2011-10-10 18:19:24 -07:00 · 2011-10-10 18:19:24 -07:00 · ee78391f4f
commit ee78391f4f
parent c891dac3ec
4 changed files with 623 additions and 520 deletions
--- a/beets/autotag/init.py
+++ b/beets/autotag/init.py
@ -15,79 +15,18 @@
 """Facilities for automatically determining files' correct metadata.
 """
 import os
-import logging
-import re
-from munkres import Munkres
-from unidecode import unidecode

-from beets.autotag import mb
-from beets import library, mediafile, plugins
-from beets.util import levenshtein, sorted_walk, plurality
+from beets import library, mediafile
+from beets.util import sorted_walk

-# Try 5 releases. In the future, this should be more dynamic: let the
-# probability of continuing to the next release be inversely
-# proportional to how good our current best is and how long we've
-# already taken.
-MAX_CANDIDATES = 5
+# Parts of external interface.
+from .model import AlbumInfo, TrackInfo
+from .match import tag_item, tag_album
+from .match import RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE
+from .match import STRONG_REC_THRESH, MEDIUM_REC_THRESH, REC_GAP_THRESH

-# Distance parameters.
-# Text distance weights: proportions on the normalized intuitive edit
-# distance.
-ARTIST_WEIGHT = 3.0
-ALBUM_WEIGHT = 3.0
-# The weight of the entire distance calculated for a given track.
-TRACK_WEIGHT = 1.0
-# These distances are components of the track distance (that is, they
-# compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT;
-# the overall TRACK_WEIGHT does that).
-TRACK_TITLE_WEIGHT = 3.0
-# Used instead of a global artist penalty for various-artist matches.
-TRACK_ARTIST_WEIGHT = 2.0
-# Added when the indices of tracks don't match.
-TRACK_INDEX_WEIGHT = 1.0
-# Track length weights: no penalty before GRACE, maximum (WEIGHT)
-# penalty at GRACE+MAX discrepancy.
-TRACK_LENGTH_GRACE = 10
-TRACK_LENGTH_MAX = 30
-TRACK_LENGTH_WEIGHT = 2.0
-# MusicBrainz track ID matches.
-TRACK_ID_WEIGHT = 5.0

-# Recommendation constants.
-RECOMMEND_STRONG = 'RECOMMEND_STRONG'
-RECOMMEND_MEDIUM = 'RECOMMEND_MEDIUM'
-RECOMMEND_NONE = 'RECOMMEND_NONE'
-# Thresholds for recommendations.
-STRONG_REC_THRESH = 0.04
-MEDIUM_REC_THRESH = 0.25
-REC_GAP_THRESH = 0.25
-
-# Parameters for string distance function.
-# Words that can be moved to the end of a string using a comma.
-SD_END_WORDS = ['the', 'a', 'an']
-# Reduced weights for certain portions of the string.
-SD_PATTERNS = [
-    (r'^the ', 0.1),
-    (r'[\[\(]?(ep|single)[\]\)]?', 0.0),
-    (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
-    (r'\(.*?\)', 0.3),
-    (r'\[.*?\]', 0.3),
-    (r'(, )?(pt\.|part) .+', 0.2),
-]
-# Replacements to use before testing distance.
-SD_REPLACE = [
-    (r'&', 'and'),
-]
-
-# Artist signals that indicate "various artists".
-VA_ARTISTS = (u'', u'various artists', u'va', u'unknown')
-
-# Autotagging exceptions.
-class AutotagError(Exception):
-    pass
-
-# Global logger.
-log = logging.getLogger('beets')
+# Main interface.

 def albums_in_dir(path):
    """Recursively searches the given directory and returns an iterable
@ -112,204 +51,6 @@ def albums_in_dir(path):
        if items:
            yield root, items

-def _string_dist_basic(str1, str2):
-    """Basic edit distance between two strings, ignoring
-    non-alphanumeric characters and case. Comparisons are based on a
-    transliteration/lowering to ASCII characters. Normalized by string
-    length.
-    """
-    str1 = unidecode(str1)
-    str2 = unidecode(str2)
-    str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
-    str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
-    if not str1 and not str2:
-        return 0.0
-    return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
-
-def string_dist(str1, str2):
-    """Gives an "intuitive" edit distance between two strings. This is
-    an edit distance, normalized by the string length, with a number of
-    tweaks that reflect intuition about text.
-    """
-    str1 = str1.lower()
-    str2 = str2.lower()
-    
-    # Don't penalize strings that move certain words to the end. For
-    # example, "the something" should be considered equal to
-    # "something, the".
-    for word in SD_END_WORDS:
-        if str1.endswith(', %s' % word):
-            str1 = '%s %s' % (word, str1[:-len(word)-2])
-        if str2.endswith(', %s' % word):
-            str2 = '%s %s' % (word, str2[:-len(word)-2])
-
-    # Perform a couple of basic normalizing substitutions.
-    for pat, repl in SD_REPLACE:
-        str1 = re.sub(pat, repl, str1)
-        str2 = re.sub(pat, repl, str2)
-    
-    # Change the weight for certain string portions matched by a set
-    # of regular expressions. We gradually change the strings and build
-    # up penalties associated with parts of the string that were
-    # deleted.
-    base_dist = _string_dist_basic(str1, str2)
-    penalty = 0.0
-    for pat, weight in SD_PATTERNS:
-        # Get strings that drop the pattern.
-        case_str1 = re.sub(pat, '', str1)
-        case_str2 = re.sub(pat, '', str2)
-        
-        if case_str1 != str1 or case_str2 != str2:
-            # If the pattern was present (i.e., it is deleted in the
-            # the current case), recalculate the distances for the
-            # modified strings.
-            case_dist = _string_dist_basic(case_str1, case_str2)
-            case_delta = max(0.0, base_dist - case_dist)
-            if case_delta == 0.0:
-                continue
-            
-            # Shift our baseline strings down (to avoid rematching the
-            # same part of the string) and add a scaled distance
-            # amount to the penalties.
-            str1 = case_str1
-            str2 = case_str2
-            base_dist = case_dist
-            penalty += weight * case_delta
-    dist = base_dist + penalty
-    
-    return dist
-
-def current_metadata(items):
-    """Returns the most likely artist and album for a set of Items.
-    Each is determined by tag reflected by the plurality of the Items.
-    """
-    keys = 'artist', 'album'
-    likelies = {}
-    consensus = {}
-    for key in keys:
-        values = [getattr(item, key) for item in items]
-        likelies[key], freq = plurality(values)
-        consensus[key] = (freq == len(values))
-    return likelies['artist'], likelies['album'], consensus['artist']
-
-def order_items(items, trackinfo):
-    """Orders the items based on how they match some canonical track
-    information. This always produces a result if the numbers of tracks
-    match.
-    """
-    # Make sure lengths match.
-    if len(items) != len(trackinfo):
-        return None
-
-    # Construct the cost matrix.
-    costs = []
-    for cur_item in items:
-        row = []
-        for i, canon_item in enumerate(trackinfo):
-            row.append(track_distance(cur_item, canon_item, i+1))
-        costs.append(row)
-    
-    # Find a minimum-cost bipartite matching.
-    matching = Munkres().compute(costs)
-
-    # Order items based on the matching.
-    ordered_items = [None]*len(items)
-    for cur_idx, canon_idx in matching:
-        ordered_items[canon_idx] = items[cur_idx]
-    return ordered_items
-
-def track_distance(item, track_data, track_index=None, incl_artist=False):
-    """Determines the significance of a track metadata change. Returns
-    a float in [0.0,1.0]. `track_index` is the track number of the
-    `track_data` metadata set. If `track_index` is provided and
-    item.track is set, then these indices are used as a component of
-    the distance calculation. `incl_artist` indicates that a distance
-    component should be included for the track artist (i.e., for
-    various-artist releases).
-    """
-    # Distance and normalization accumulators.
-    dist, dist_max = 0.0, 0.0
-
-    # Check track length.
-    if 'length' not in track_data:
-        # If there's no length to check, assume the worst.
-        dist += TRACK_LENGTH_WEIGHT
-    else:
-        diff = abs(item.length - track_data['length'])
-        diff = max(diff - TRACK_LENGTH_GRACE, 0.0)
-        diff = min(diff, TRACK_LENGTH_MAX)
-        dist += (diff / TRACK_LENGTH_MAX) * TRACK_LENGTH_WEIGHT
-    dist_max += TRACK_LENGTH_WEIGHT
-    
-    # Track title.
-    dist += string_dist(item.title, track_data['title']) * TRACK_TITLE_WEIGHT
-    dist_max += TRACK_TITLE_WEIGHT
-
-    # Track artist, if included.
-    # Attention: MB DB does not have artist info for all compilations,
-    # so only check artist distance if there is actually an artist in
-    # the MB track data.
-    if incl_artist and 'artist' in track_data:
-        dist += string_dist(item.artist, track_data['artist']) * \
-                TRACK_ARTIST_WEIGHT
-        dist_max += TRACK_ARTIST_WEIGHT
-
-    # Track index.
-    if track_index and item.track:
-        if track_index != item.track:
-            dist += TRACK_INDEX_WEIGHT
-        dist_max += TRACK_INDEX_WEIGHT
-    
-    # MusicBrainz track ID.
-    if item.mb_trackid:
-        if item.mb_trackid != track_data['id']:
-            dist += TRACK_ID_WEIGHT
-        dist_max += TRACK_ID_WEIGHT
-
-    # Plugin distances.
-    plugin_d, plugin_dm = plugins.track_distance(item, track_data)
-    dist += plugin_d
-    dist_max += plugin_dm
-
-    return dist / dist_max
-
-def distance(items, info):
-    """Determines how "significant" an album metadata change would be.
-    Returns a float in [0.0,1.0]. The list of items must be ordered.
-    """
-    cur_artist, cur_album, _ = current_metadata(items)
-    cur_artist = cur_artist or ''
-    cur_album = cur_album or ''
-    
-    # These accumulate the possible distance components. The final
-    # distance will be dist/dist_max.
-    dist = 0.0
-    dist_max = 0.0
-    
-    # Artist/album metadata.
-    if not info['va']:
-        dist += string_dist(cur_artist, info['artist']) * ARTIST_WEIGHT
-        dist_max += ARTIST_WEIGHT
-    dist += string_dist(cur_album,  info['album']) * ALBUM_WEIGHT
-    dist_max += ALBUM_WEIGHT
-    
-    # Track distances.
-    for i, (item, track_data) in enumerate(zip(items, info['tracks'])):
-        dist += track_distance(item, track_data, i+1, info['va']) * \
-                TRACK_WEIGHT
-        dist_max += TRACK_WEIGHT
-
-    # Plugin distances.
-    plugin_d, plugin_dm = plugins.album_distance(items, info)
-    dist += plugin_d
-    dist_max += plugin_dm
-
-    # Normalize distance, avoiding divide-by-zero.
-    if dist_max == 0.0:
-        return 0.0
-    else:
-        return dist/dist_max
-
 def apply_item_metadata(item, track_data):
    """Set an item's metadata from its matched info dictionary.
    """
@ -361,222 +102,3 @@ def apply_metadata(items, info):
        
        # Compilation flag.
        item.comp = info['va']
-
-def match_by_id(items):
-    """If the items are tagged with a MusicBrainz album ID, returns an
-    info dict for the corresponding album. Otherwise, returns None.
-    """
-    # Is there a consensus on the MB album ID?
-    albumids = [item.mb_albumid for item in items if item.mb_albumid]
-    if not albumids:
-        log.debug('No album IDs found.')
-        return None
-    
-    # If all album IDs are equal, look up the album.
-    if bool(reduce(lambda x,y: x if x==y else (), albumids)):
-        albumid = albumids[0]
-        log.debug('Searching for discovered album ID: ' + albumid)
-        return mb.album_for_id(albumid)
-    else:
-        log.debug('No album ID consensus.')
-        return None
-    
-    #fixme In the future, at the expense of performance, we could use
-    # other IDs (i.e., track and artist) in case the album tag isn't
-    # present, but that event seems very unlikely.
-
-def recommendation(results):
-    """Given a sorted list of result tuples, returns a recommendation
-    flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE) based
-    on the results' distances.
-    """
-    if not results:
-        # No candidates: no recommendation.
-        rec = RECOMMEND_NONE
-    else:
-        min_dist = results[0][0]
-        if min_dist < STRONG_REC_THRESH:
-            # Strong recommendation level.
-            rec = RECOMMEND_STRONG
-        elif len(results) == 1:
-            # Only a single candidate. Medium recommendation.
-            rec = RECOMMEND_MEDIUM
-        elif min_dist <= MEDIUM_REC_THRESH:
-            # Medium recommendation level.
-            rec = RECOMMEND_MEDIUM
-        elif results[1][0] - min_dist >= REC_GAP_THRESH:
-            # Gap between first two candidates is large.
-            rec = RECOMMEND_MEDIUM
-        else:
-            # No conclusion.
-            rec = RECOMMEND_NONE
-    return rec
-
-def validate_candidate(items, tuple_dict, info):
-    """Given a candidate info dict, attempt to add the candidate to
-    the output dictionary of result tuples. This involves checking
-    the track count, ordering the items, checking for duplicates, and
-    calculating the distance.
-    """
-    log.debug('Candidate: %s - %s' % (info['artist'], info['album']))
-
-    # Don't duplicate.
-    if info['album_id'] in tuple_dict:
-        log.debug('Duplicate.')
-        return
-
-    # Make sure the album has the correct number of tracks.
-    if len(items) != len(info['tracks']):
-        log.debug('Track count mismatch.')
-        return
-
-    # Put items in order.
-    ordered = order_items(items, info['tracks'])
-    if not ordered:
-        log.debug('Not orderable.')
-        return
-
-    # Get the change distance.
-    dist = distance(ordered, info)
-    log.debug('Success. Distance: %f' % dist)
-
-    tuple_dict[info['album_id']] = dist, ordered, info
-
-def tag_album(items, timid=False, search_artist=None, search_album=None,
-              search_id=None):
-    """Bundles together the functionality used to infer tags for a
-    set of items comprised by an album. Returns everything relevant:
-        - The current artist.
-        - The current album.
-        - A list of (distance, items, info) tuples where info is a
-          dictionary containing the inferred tags and items is a
-          reordered version of the input items list. The candidates are
-          sorted by distance (i.e., best match first).
-        - A recommendation, one of RECOMMEND_STRONG, RECOMMEND_MEDIUM,
-          or RECOMMEND_NONE; indicating that the first candidate is
-          very likely, it is somewhat likely, or no conclusion could
-          be reached.
-    If search_artist and search_album or search_id are provided, then
-    they are used as search terms in place of the current metadata.
-    May raise an AutotagError if existing metadata is insufficient.
-    """
-    # Get current metadata.
-    cur_artist, cur_album, artist_consensus = current_metadata(items)
-    log.debug('Tagging %s - %s' % (cur_artist, cur_album))
-    
-    # The output result tuples (keyed by MB album ID).
-    out_tuples = {}
-    
-    # Try to find album indicated by MusicBrainz IDs.
-    if search_id:
-        log.debug('Searching for album ID: ' + search_id)
-        id_info = mb.album_for_id(search_id)
-    else:
-        id_info = match_by_id(items)
-    if id_info:
-        validate_candidate(items, out_tuples, id_info)
-        rec = recommendation(out_tuples.values())
-        log.debug('Album ID match recommendation is ' + str(rec))
-        if out_tuples and not timid:
-            # If we have a very good MBID match, return immediately.
-            # Otherwise, this match will compete against metadata-based
-            # matches.
-            if rec == RECOMMEND_STRONG:
-                log.debug('ID match.')
-                return cur_artist, cur_album, out_tuples.values(), rec
-
-    # If searching by ID, don't continue to metadata search.
-    if search_id is not None:
-        if out_tuples:
-            return cur_artist, cur_album, out_tuples.values(), rec
-        else:
-            return cur_artist, cur_album, [], RECOMMEND_NONE
-    
-    # Search terms.
-    if not (search_artist and search_album):
-        # No explicit search terms -- use current metadata.
-        search_artist, search_album = cur_artist, cur_album
-    log.debug(u'Search terms: %s - %s' % (search_artist, search_album))
-    
-    # Get candidate metadata from search.
-    if search_artist and search_album:
-        candidates = mb.match_album(search_artist, search_album,
-                                    len(items), MAX_CANDIDATES)
-        candidates = list(candidates)
-    else:
-        candidates = []
-
-    # Possibly add "various artists" search.
-    if search_album and ((not artist_consensus) or \
-                         (search_artist.lower() in VA_ARTISTS) or \
-                         any(item.comp for item in items)):
-        log.debug(u'Possibly Various Artists; adding matches.')
-        candidates.extend(mb.match_album(None, search_album, len(items),
-                                         MAX_CANDIDATES))
-
-    # Get candidates from plugins.
-    candidates.extend(plugins.candidates(items))
-    
-    # Get the distance to each candidate.
-    log.debug(u'Evaluating %i candidates.' % len(candidates))
-    for info in candidates:
-        validate_candidate(items, out_tuples, info)
-    
-    # Sort by distance.
-    out_tuples = out_tuples.values()
-    out_tuples.sort()
-    
-    rec = recommendation(out_tuples)
-    return cur_artist, cur_album, out_tuples, rec
-
-def tag_item(item, timid=False, search_artist=None, search_title=None,
-             search_id=None):
-    """Attempts to find metadata for a single track. Returns a
-    `(candidates, recommendation)` pair where `candidates` is a list
-    of `(distance, track_info)` pairs. `search_artist` and 
-    `search_title` may be used to override the current metadata for
-    the purposes of the MusicBrainz title; likewise `search_id`.
-    """
-    candidates = []
-
-    # First, try matching by MusicBrainz ID.
-    trackid = search_id or item.mb_trackid
-    if trackid:
-        log.debug('Searching for track ID: ' + trackid)
-        track_info = mb.track_for_id(trackid)
-        if track_info:
-            dist = track_distance(item, track_info, incl_artist=True)
-            candidates.append((dist, track_info))
-            # If this is a good match, then don't keep searching.
-            rec = recommendation(candidates)
-            if rec == RECOMMEND_STRONG and not timid:
-                log.debug('Track ID match.')
-                return candidates, rec
-
-    # If we're searching by ID, don't proceed.
-    if search_id is not None:
-        if candidates:
-            return candidates, rec
-        else:
-            return [], RECOMMEND_NONE
-    
-    # Search terms.
-    if not (search_artist and search_title):
-        search_artist, search_title = item.artist, item.title
-    log.debug(u'Item search terms: %s - %s' % (search_artist, search_title))
-
-    # Candidate metadata from search.
-    for track_info in mb.match_track(search_artist, search_title):
-        dist = track_distance(item, track_info, incl_artist=True)
-        candidates.append((dist, track_info))
-
-    # Add candidates from plugins.
-    for track_info in plugins.item_candidates(item):
-        dist = track_distance(item, track_info, incl_artist=True)
-        candidates.append((dist, track_info))
-
-    # Sort by distance and return with recommendation.
-    log.debug('Found %i candidates.' % len(candidates))
-    candidates.sort()
-    rec = recommendation(candidates)
-    return candidates, rec
--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@ -0,0 +1,510 @@
+# This file is part of beets.
+# Copyright 2011, Adrian Sampson.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+# 
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+"""Matches existing metadata with canonical information to identify
+releases and tracks.
+"""
+import logging
+import re
+from munkres import Munkres
+from unidecode import unidecode
+
+from beets.autotag import mb
+from beets import plugins
+from beets.util import levenshtein, plurality
+
+# Distance parameters.
+# Text distance weights: proportions on the normalized intuitive edit
+# distance.
+ARTIST_WEIGHT = 3.0
+ALBUM_WEIGHT = 3.0
+# The weight of the entire distance calculated for a given track.
+TRACK_WEIGHT = 1.0
+# These distances are components of the track distance (that is, they
+# compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT;
+# the overall TRACK_WEIGHT does that).
+TRACK_TITLE_WEIGHT = 3.0
+# Used instead of a global artist penalty for various-artist matches.
+TRACK_ARTIST_WEIGHT = 2.0
+# Added when the indices of tracks don't match.
+TRACK_INDEX_WEIGHT = 1.0
+# Track length weights: no penalty before GRACE, maximum (WEIGHT)
+# penalty at GRACE+MAX discrepancy.
+TRACK_LENGTH_GRACE = 10
+TRACK_LENGTH_MAX = 30
+TRACK_LENGTH_WEIGHT = 2.0
+# MusicBrainz track ID matches.
+TRACK_ID_WEIGHT = 5.0
+
+# Parameters for string distance function.
+# Words that can be moved to the end of a string using a comma.
+SD_END_WORDS = ['the', 'a', 'an']
+# Reduced weights for certain portions of the string.
+SD_PATTERNS = [
+    (r'^the ', 0.1),
+    (r'[\[\(]?(ep|single)[\]\)]?', 0.0),
+    (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
+    (r'\(.*?\)', 0.3),
+    (r'\[.*?\]', 0.3),
+    (r'(, )?(pt\.|part) .+', 0.2),
+]
+# Replacements to use before testing distance.
+SD_REPLACE = [
+    (r'&', 'and'),
+]
+
+# Try 5 releases. In the future, this should be more dynamic: let the
+# probability of continuing to the next release be inversely
+# proportional to how good our current best is and how long we've
+# already taken.
+MAX_CANDIDATES = 5
+
+# Recommendation constants.
+RECOMMEND_STRONG = 'RECOMMEND_STRONG'
+RECOMMEND_MEDIUM = 'RECOMMEND_MEDIUM'
+RECOMMEND_NONE = 'RECOMMEND_NONE'
+# Thresholds for recommendations.
+STRONG_REC_THRESH = 0.04
+MEDIUM_REC_THRESH = 0.25
+REC_GAP_THRESH = 0.25
+
+# Artist signals that indicate "various artists".
+VA_ARTISTS = (u'', u'various artists', u'va', u'unknown')
+
+# Autotagging exceptions.
+class AutotagError(Exception):
+    pass
+
+# Global logger.
+log = logging.getLogger('beets')
+
+
+# Primary matching functionality.
+
+def _string_dist_basic(str1, str2):
+    """Basic edit distance between two strings, ignoring
+    non-alphanumeric characters and case. Comparisons are based on a
+    transliteration/lowering to ASCII characters. Normalized by string
+    length.
+    """
+    str1 = unidecode(str1)
+    str2 = unidecode(str2)
+    str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
+    str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
+    if not str1 and not str2:
+        return 0.0
+    return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
+
+def string_dist(str1, str2):
+    """Gives an "intuitive" edit distance between two strings. This is
+    an edit distance, normalized by the string length, with a number of
+    tweaks that reflect intuition about text.
+    """
+    str1 = str1.lower()
+    str2 = str2.lower()
+    
+    # Don't penalize strings that move certain words to the end. For
+    # example, "the something" should be considered equal to
+    # "something, the".
+    for word in SD_END_WORDS:
+        if str1.endswith(', %s' % word):
+            str1 = '%s %s' % (word, str1[:-len(word)-2])
+        if str2.endswith(', %s' % word):
+            str2 = '%s %s' % (word, str2[:-len(word)-2])
+
+    # Perform a couple of basic normalizing substitutions.
+    for pat, repl in SD_REPLACE:
+        str1 = re.sub(pat, repl, str1)
+        str2 = re.sub(pat, repl, str2)
+    
+    # Change the weight for certain string portions matched by a set
+    # of regular expressions. We gradually change the strings and build
+    # up penalties associated with parts of the string that were
+    # deleted.
+    base_dist = _string_dist_basic(str1, str2)
+    penalty = 0.0
+    for pat, weight in SD_PATTERNS:
+        # Get strings that drop the pattern.
+        case_str1 = re.sub(pat, '', str1)
+        case_str2 = re.sub(pat, '', str2)
+        
+        if case_str1 != str1 or case_str2 != str2:
+            # If the pattern was present (i.e., it is deleted in the
+            # the current case), recalculate the distances for the
+            # modified strings.
+            case_dist = _string_dist_basic(case_str1, case_str2)
+            case_delta = max(0.0, base_dist - case_dist)
+            if case_delta == 0.0:
+                continue
+            
+            # Shift our baseline strings down (to avoid rematching the
+            # same part of the string) and add a scaled distance
+            # amount to the penalties.
+            str1 = case_str1
+            str2 = case_str2
+            base_dist = case_dist
+            penalty += weight * case_delta
+    dist = base_dist + penalty
+    
+    return dist
+
+def current_metadata(items):
+    """Returns the most likely artist and album for a set of Items.
+    Each is determined by tag reflected by the plurality of the Items.
+    """
+    keys = 'artist', 'album'
+    likelies = {}
+    consensus = {}
+    for key in keys:
+        values = [getattr(item, key) for item in items]
+        likelies[key], freq = plurality(values)
+        consensus[key] = (freq == len(values))
+    return likelies['artist'], likelies['album'], consensus['artist']
+
+def order_items(items, trackinfo):
+    """Orders the items based on how they match some canonical track
+    information. This always produces a result if the numbers of tracks
+    match.
+    """
+    # Make sure lengths match.
+    if len(items) != len(trackinfo):
+        return None
+
+    # Construct the cost matrix.
+    costs = []
+    for cur_item in items:
+        row = []
+        for i, canon_item in enumerate(trackinfo):
+            row.append(track_distance(cur_item, canon_item, i+1))
+        costs.append(row)
+    
+    # Find a minimum-cost bipartite matching.
+    matching = Munkres().compute(costs)
+
+    # Order items based on the matching.
+    ordered_items = [None]*len(items)
+    for cur_idx, canon_idx in matching:
+        ordered_items[canon_idx] = items[cur_idx]
+    return ordered_items
+
+def track_distance(item, track_data, track_index=None, incl_artist=False):
+    """Determines the significance of a track metadata change. Returns
+    a float in [0.0,1.0]. `track_index` is the track number of the
+    `track_data` metadata set. If `track_index` is provided and
+    item.track is set, then these indices are used as a component of
+    the distance calculation. `incl_artist` indicates that a distance
+    component should be included for the track artist (i.e., for
+    various-artist releases).
+    """
+    # Distance and normalization accumulators.
+    dist, dist_max = 0.0, 0.0
+
+    # Check track length.
+    if 'length' not in track_data:
+        # If there's no length to check, assume the worst.
+        dist += TRACK_LENGTH_WEIGHT
+    else:
+        diff = abs(item.length - track_data['length'])
+        diff = max(diff - TRACK_LENGTH_GRACE, 0.0)
+        diff = min(diff, TRACK_LENGTH_MAX)
+        dist += (diff / TRACK_LENGTH_MAX) * TRACK_LENGTH_WEIGHT
+    dist_max += TRACK_LENGTH_WEIGHT
+    
+    # Track title.
+    dist += string_dist(item.title, track_data['title']) * TRACK_TITLE_WEIGHT
+    dist_max += TRACK_TITLE_WEIGHT
+
+    # Track artist, if included.
+    # Attention: MB DB does not have artist info for all compilations,
+    # so only check artist distance if there is actually an artist in
+    # the MB track data.
+    if incl_artist and 'artist' in track_data:
+        dist += string_dist(item.artist, track_data['artist']) * \
+                TRACK_ARTIST_WEIGHT
+        dist_max += TRACK_ARTIST_WEIGHT
+
+    # Track index.
+    if track_index and item.track:
+        if track_index != item.track:
+            dist += TRACK_INDEX_WEIGHT
+        dist_max += TRACK_INDEX_WEIGHT
+    
+    # MusicBrainz track ID.
+    if item.mb_trackid:
+        if item.mb_trackid != track_data['id']:
+            dist += TRACK_ID_WEIGHT
+        dist_max += TRACK_ID_WEIGHT
+
+    # Plugin distances.
+    plugin_d, plugin_dm = plugins.track_distance(item, track_data)
+    dist += plugin_d
+    dist_max += plugin_dm
+
+    return dist / dist_max
+
+def distance(items, info):
+    """Determines how "significant" an album metadata change would be.
+    Returns a float in [0.0,1.0]. The list of items must be ordered.
+    """
+    cur_artist, cur_album, _ = current_metadata(items)
+    cur_artist = cur_artist or ''
+    cur_album = cur_album or ''
+    
+    # These accumulate the possible distance components. The final
+    # distance will be dist/dist_max.
+    dist = 0.0
+    dist_max = 0.0
+    
+    # Artist/album metadata.
+    if not info['va']:
+        dist += string_dist(cur_artist, info['artist']) * ARTIST_WEIGHT
+        dist_max += ARTIST_WEIGHT
+    dist += string_dist(cur_album,  info['album']) * ALBUM_WEIGHT
+    dist_max += ALBUM_WEIGHT
+    
+    # Track distances.
+    for i, (item, track_data) in enumerate(zip(items, info['tracks'])):
+        dist += track_distance(item, track_data, i+1, info['va']) * \
+                TRACK_WEIGHT
+        dist_max += TRACK_WEIGHT
+
+    # Plugin distances.
+    plugin_d, plugin_dm = plugins.album_distance(items, info)
+    dist += plugin_d
+    dist_max += plugin_dm
+
+    # Normalize distance, avoiding divide-by-zero.
+    if dist_max == 0.0:
+        return 0.0
+    else:
+        return dist/dist_max
+
+def match_by_id(items):
+    """If the items are tagged with a MusicBrainz album ID, returns an
+    info dict for the corresponding album. Otherwise, returns None.
+    """
+    # Is there a consensus on the MB album ID?
+    albumids = [item.mb_albumid for item in items if item.mb_albumid]
+    if not albumids:
+        log.debug('No album IDs found.')
+        return None
+    
+    # If all album IDs are equal, look up the album.
+    if bool(reduce(lambda x,y: x if x==y else (), albumids)):
+        albumid = albumids[0]
+        log.debug('Searching for discovered album ID: ' + albumid)
+        return mb.album_for_id(albumid)
+    else:
+        log.debug('No album ID consensus.')
+        return None
+    
+    #fixme In the future, at the expense of performance, we could use
+    # other IDs (i.e., track and artist) in case the album tag isn't
+    # present, but that event seems very unlikely.
+
+def recommendation(results):
+    """Given a sorted list of result tuples, returns a recommendation
+    flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE) based
+    on the results' distances.
+    """
+    if not results:
+        # No candidates: no recommendation.
+        rec = RECOMMEND_NONE
+    else:
+        min_dist = results[0][0]
+        if min_dist < STRONG_REC_THRESH:
+            # Strong recommendation level.
+            rec = RECOMMEND_STRONG
+        elif len(results) == 1:
+            # Only a single candidate. Medium recommendation.
+            rec = RECOMMEND_MEDIUM
+        elif min_dist <= MEDIUM_REC_THRESH:
+            # Medium recommendation level.
+            rec = RECOMMEND_MEDIUM
+        elif results[1][0] - min_dist >= REC_GAP_THRESH:
+            # Gap between first two candidates is large.
+            rec = RECOMMEND_MEDIUM
+        else:
+            # No conclusion.
+            rec = RECOMMEND_NONE
+    return rec
+
+def validate_candidate(items, tuple_dict, info):
+    """Given a candidate info dict, attempt to add the candidate to
+    the output dictionary of result tuples. This involves checking
+    the track count, ordering the items, checking for duplicates, and
+    calculating the distance.
+    """
+    log.debug('Candidate: %s - %s' % (info['artist'], info['album']))
+
+    # Don't duplicate.
+    if info['album_id'] in tuple_dict:
+        log.debug('Duplicate.')
+        return
+
+    # Make sure the album has the correct number of tracks.
+    if len(items) != len(info['tracks']):
+        log.debug('Track count mismatch.')
+        return
+
+    # Put items in order.
+    ordered = order_items(items, info['tracks'])
+    if not ordered:
+        log.debug('Not orderable.')
+        return
+
+    # Get the change distance.
+    dist = distance(ordered, info)
+    log.debug('Success. Distance: %f' % dist)
+
+    tuple_dict[info['album_id']] = dist, ordered, info
+
+def tag_album(items, timid=False, search_artist=None, search_album=None,
+              search_id=None):
+    """Bundles together the functionality used to infer tags for a
+    set of items comprised by an album. Returns everything relevant:
+        - The current artist.
+        - The current album.
+        - A list of (distance, items, info) tuples where info is a
+          dictionary containing the inferred tags and items is a
+          reordered version of the input items list. The candidates are
+          sorted by distance (i.e., best match first).
+        - A recommendation, one of RECOMMEND_STRONG, RECOMMEND_MEDIUM,
+          or RECOMMEND_NONE; indicating that the first candidate is
+          very likely, it is somewhat likely, or no conclusion could
+          be reached.
+    If search_artist and search_album or search_id are provided, then
+    they are used as search terms in place of the current metadata.
+    May raise an AutotagError if existing metadata is insufficient.
+    """
+    # Get current metadata.
+    cur_artist, cur_album, artist_consensus = current_metadata(items)
+    log.debug('Tagging %s - %s' % (cur_artist, cur_album))
+    
+    # The output result tuples (keyed by MB album ID).
+    out_tuples = {}
+    
+    # Try to find album indicated by MusicBrainz IDs.
+    if search_id:
+        log.debug('Searching for album ID: ' + search_id)
+        id_info = mb.album_for_id(search_id)
+    else:
+        id_info = match_by_id(items)
+    if id_info:
+        validate_candidate(items, out_tuples, id_info)
+        rec = recommendation(out_tuples.values())
+        log.debug('Album ID match recommendation is ' + str(rec))
+        if out_tuples and not timid:
+            # If we have a very good MBID match, return immediately.
+            # Otherwise, this match will compete against metadata-based
+            # matches.
+            if rec == RECOMMEND_STRONG:
+                log.debug('ID match.')
+                return cur_artist, cur_album, out_tuples.values(), rec
+
+    # If searching by ID, don't continue to metadata search.
+    if search_id is not None:
+        if out_tuples:
+            return cur_artist, cur_album, out_tuples.values(), rec
+        else:
+            return cur_artist, cur_album, [], RECOMMEND_NONE
+    
+    # Search terms.
+    if not (search_artist and search_album):
+        # No explicit search terms -- use current metadata.
+        search_artist, search_album = cur_artist, cur_album
+    log.debug(u'Search terms: %s - %s' % (search_artist, search_album))
+    
+    # Get candidate metadata from search.
+    if search_artist and search_album:
+        candidates = mb.match_album(search_artist, search_album,
+                                    len(items), MAX_CANDIDATES)
+        candidates = list(candidates)
+    else:
+        candidates = []
+
+    # Possibly add "various artists" search.
+    if search_album and ((not artist_consensus) or \
+                         (search_artist.lower() in VA_ARTISTS) or \
+                         any(item.comp for item in items)):
+        log.debug(u'Possibly Various Artists; adding matches.')
+        candidates.extend(mb.match_album(None, search_album, len(items),
+                                         MAX_CANDIDATES))
+
+    # Get candidates from plugins.
+    candidates.extend(plugins.candidates(items))
+    
+    # Get the distance to each candidate.
+    log.debug(u'Evaluating %i candidates.' % len(candidates))
+    for info in candidates:
+        validate_candidate(items, out_tuples, info)
+    
+    # Sort by distance.
+    out_tuples = out_tuples.values()
+    out_tuples.sort()
+    
+    rec = recommendation(out_tuples)
+    return cur_artist, cur_album, out_tuples, rec
+
+def tag_item(item, timid=False, search_artist=None, search_title=None,
+             search_id=None):
+    """Attempts to find metadata for a single track. Returns a
+    `(candidates, recommendation)` pair where `candidates` is a list
+    of `(distance, track_info)` pairs. `search_artist` and 
+    `search_title` may be used to override the current metadata for
+    the purposes of the MusicBrainz title; likewise `search_id`.
+    """
+    candidates = []
+
+    # First, try matching by MusicBrainz ID.
+    trackid = search_id or item.mb_trackid
+    if trackid:
+        log.debug('Searching for track ID: ' + trackid)
+        track_info = mb.track_for_id(trackid)
+        if track_info:
+            dist = track_distance(item, track_info, incl_artist=True)
+            candidates.append((dist, track_info))
+            # If this is a good match, then don't keep searching.
+            rec = recommendation(candidates)
+            if rec == RECOMMEND_STRONG and not timid:
+                log.debug('Track ID match.')
+                return candidates, rec
+
+    # If we're searching by ID, don't proceed.
+    if search_id is not None:
+        if candidates:
+            return candidates, rec
+        else:
+            return [], RECOMMEND_NONE
+    
+    # Search terms.
+    if not (search_artist and search_title):
+        search_artist, search_title = item.artist, item.title
+    log.debug(u'Item search terms: %s - %s' % (search_artist, search_title))
+
+    # Candidate metadata from search.
+    for track_info in mb.match_track(search_artist, search_title):
+        dist = track_distance(item, track_info, incl_artist=True)
+        candidates.append((dist, track_info))
+
+    # Add candidates from plugins.
+    for track_info in plugins.item_candidates(item):
+        dist = track_distance(item, track_info, incl_artist=True)
+        candidates.append((dist, track_info))
+
+    # Sort by distance and return with recommendation.
+    log.debug('Found %i candidates.' % len(candidates))
+    candidates.sort()
+    rec = recommendation(candidates)
+    return candidates, rec
--- a/beets/autotag/model.py
+++ b/beets/autotag/model.py
@ -0,0 +1,70 @@
+# This file is part of beets.
+# Copyright 2011, Adrian Sampson.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+# 
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+"""Classes used by metadata sources and the matching logic."""
+
+class AlbumInfo(object):
+    """Describes a canonical release that may be used to match a release
+    in the library. Consists of these data members:
+
+    - ``album``: the release title
+    - ``album_id``: MusicBrainz ID; UUID fragment only
+    - ``artist``: name of the release's primary artist
+    - ``artist_id``
+    - ``tracks``: list of TrackInfo objects making up the release
+    - ``asin``: Amazon ASIN
+    - ``albumtype``: string describing the kind of release
+    - ``va``: boolean: whether the release has "various artists"
+    - ``year``: release year
+    - ``month``: release month
+    - ``day``: release day
+    - ``label``: music label responsible for the release
+
+    The fields up through ``tracks`` are required. The others are
+    optional and may be None.
+    """
+    def __init__(self, album, album_id, artist, artist_id, tracks, asin=None,
+                 albumtype=None, va=False, year=None, month=None, day=None):
+        self.album = album
+        self.album_id = album_id
+        self.artist = artist
+        self.artist_id = artist_id
+        self.tracks = tracks
+        self.asin = asin
+        self.albumtype = albumtype
+        self.va = va
+        self.year = year
+        self.month = month
+        self.day = day
+
+class TrackInfo(object):
+    """Describes a canonical track present on a release. Appears as part
+    of an AlbumInfo's ``tracks`` list. Consists of these data members:
+
+    - ``title``: name of the track
+    - ``track_id``: MusicBrainz ID; UUID fragment only
+    - ``artist``: individual track artist name
+    - ``artist_id``
+    - ``length``: float: duration of the track in seconds
+
+    Only ``title`` and ``track_id`` are required. The rest of the fields
+    may be None.
+    """
+    def __init__(self, title, track_id, artist=None, artist_id=None,
+                 length=None):
+        self.title = title
+        self.track_id = track_id
+        self.artist = artist
+        self.artist_id = artist
+        self.length = length
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@ -21,6 +21,7 @@ import re

 import _common
 from beets import autotag
+from beets.autotag import match
 from beets.library import Item
 from beets.util import plurality

@ -47,7 +48,7 @@ class PluralityTest(unittest.TestCase):
        items = [Item({'artist': 'The Beetles', 'album': 'The White Album'}),
                 Item({'artist': 'The Beatles', 'album': 'The White Album'}),
                 Item({'artist': 'The Beatles', 'album': 'Teh White Album'})]
-        l_artist, l_album, artist_consensus = autotag.current_metadata(items)
+        l_artist, l_album, artist_consensus = match.current_metadata(items)
        self.assertEqual(l_artist, 'The Beatles')
        self.assertEqual(l_album, 'The White Album')
        self.assertFalse(artist_consensus)
@ -56,7 +57,7 @@ class PluralityTest(unittest.TestCase):
        items = [Item({'artist': 'The Beatles', 'album': 'The White Album'}),
                 Item({'artist': 'The Beatles', 'album': 'The White Album'}),
                 Item({'artist': 'The Beatles', 'album': 'Teh White Album'})]
-        l_artist, l_album, artist_consensus = autotag.current_metadata(items)
+        l_artist, l_album, artist_consensus = match.current_metadata(items)
        self.assertEqual(l_artist, 'The Beatles')
        self.assertEqual(l_album, 'The White Album')
        self.assertTrue(artist_consensus)
@ -91,7 +92,7 @@ class AlbumDistanceTest(unittest.TestCase):
            'tracks': self.trackinfo(),
            'va': False,
        }
-        self.assertEqual(autotag.distance(items, info), 0)
+        self.assertEqual(match.distance(items, info), 0)

    def test_global_artists_differ(self):
        items = []
@ -104,7 +105,7 @@ class AlbumDistanceTest(unittest.TestCase):
            'tracks': self.trackinfo(),
            'va': False,
        }
-        self.assertNotEqual(autotag.distance(items, info), 0)
+        self.assertNotEqual(match.distance(items, info), 0)

    def test_comp_track_artists_match(self):
        items = []
@ -117,7 +118,7 @@ class AlbumDistanceTest(unittest.TestCase):
            'tracks': self.trackinfo(),
            'va': True,
        }
-        self.assertEqual(autotag.distance(items, info), 0)
+        self.assertEqual(match.distance(items, info), 0)

    def test_comp_no_track_artists(self):
        # Some VA releases don't have track artists (incomplete metadata).
@ -134,7 +135,7 @@ class AlbumDistanceTest(unittest.TestCase):
        del info['tracks'][0]['artist']
        del info['tracks'][1]['artist']
        del info['tracks'][2]['artist']
-        self.assertEqual(autotag.distance(items, info), 0)
+        self.assertEqual(match.distance(items, info), 0)

    def test_comp_track_artists_do_not_match(self):
        items = []
@ -147,7 +148,7 @@ class AlbumDistanceTest(unittest.TestCase):
            'tracks': self.trackinfo(),
            'va': True,
        }
-        self.assertNotEqual(autotag.distance(items, info), 0)
+        self.assertNotEqual(match.distance(items, info), 0)

 def _mkmp3(path):
    shutil.copyfile(os.path.join(_common.RSRC, 'min.mp3'), path)
@ -208,7 +209,7 @@ class OrderingTest(unittest.TestCase):
        trackinfo.append({'title': 'one', 'track': 1})
        trackinfo.append({'title': 'two', 'track': 2})
        trackinfo.append({'title': 'three', 'track': 3})
-        ordered = autotag.order_items(items, trackinfo)
+        ordered = match.order_items(items, trackinfo)
        self.assertEqual(ordered[0].title, 'one')
        self.assertEqual(ordered[1].title, 'two')
        self.assertEqual(ordered[2].title, 'three')
@ -222,7 +223,7 @@ class OrderingTest(unittest.TestCase):
        trackinfo.append({'title': 'one', 'track': 1})
        trackinfo.append({'title': 'two', 'track': 2})
        trackinfo.append({'title': 'three', 'track': 3})
-        ordered = autotag.order_items(items, trackinfo)
+        ordered = match.order_items(items, trackinfo)
        self.assertEqual(ordered[0].title, 'one')
        self.assertEqual(ordered[1].title, 'two')
        self.assertEqual(ordered[2].title, 'three')
@ -233,7 +234,7 @@ class OrderingTest(unittest.TestCase):
        items.append(self.item('two', 2))
        trackinfo = []
        trackinfo.append({'title': 'one', 'track': 1})
-        ordered = autotag.order_items(items, trackinfo)
+        ordered = match.order_items(items, trackinfo)
        self.assertEqual(ordered, None)

    def test_order_corrects_when_track_names_are_entirely_wrong(self):
@ -280,7 +281,7 @@ class OrderingTest(unittest.TestCase):
        trackinfo.append(info('Beloved One', 243.733))
        trackinfo.append(info('In the Lord\'s Arms', 186.13300000000001))

-        ordered = autotag.order_items(items, trackinfo)
+        ordered = match.order_items(items, trackinfo)
        for i, item in enumerate(ordered):
            self.assertEqual(i+1, item.track)

@ -426,77 +427,77 @@ class ApplyCompilationTest(unittest.TestCase):

 class StringDistanceTest(unittest.TestCase):
    def test_equal_strings(self):
-        dist = autotag.string_dist('Some String', 'Some String')
+        dist = match.string_dist('Some String', 'Some String')
        self.assertEqual(dist, 0.0)
    
    def test_different_strings(self):
-        dist = autotag.string_dist('Some String', 'Totally Different')
+        dist = match.string_dist('Some String', 'Totally Different')
        self.assertNotEqual(dist, 0.0)
    
    def test_punctuation_ignored(self):
-        dist = autotag.string_dist('Some String', 'Some.String!')
+        dist = match.string_dist('Some String', 'Some.String!')
        self.assertEqual(dist, 0.0)
    
    def test_case_ignored(self):
-        dist = autotag.string_dist('Some String', 'sOME sTring')
+        dist = match.string_dist('Some String', 'sOME sTring')
        self.assertEqual(dist, 0.0)
    
    def test_leading_the_has_lower_weight(self):    
-        dist1 = autotag.string_dist('XXX Band Name', 'Band Name')
-        dist2 = autotag.string_dist('The Band Name', 'Band Name')
+        dist1 = match.string_dist('XXX Band Name', 'Band Name')
+        dist2 = match.string_dist('The Band Name', 'Band Name')
        self.assert_(dist2 < dist1)
    
    def test_parens_have_lower_weight(self):    
-        dist1 = autotag.string_dist('One .Two.', 'One')
-        dist2 = autotag.string_dist('One (Two)', 'One')
+        dist1 = match.string_dist('One .Two.', 'One')
+        dist2 = match.string_dist('One (Two)', 'One')
        self.assert_(dist2 < dist1)
    
    def test_brackets_have_lower_weight(self):    
-        dist1 = autotag.string_dist('One .Two.', 'One')
-        dist2 = autotag.string_dist('One [Two]', 'One')
+        dist1 = match.string_dist('One .Two.', 'One')
+        dist2 = match.string_dist('One [Two]', 'One')
        self.assert_(dist2 < dist1)
    
    def test_ep_label_has_zero_weight(self):    
-        dist = autotag.string_dist('My Song (EP)', 'My Song')
+        dist = match.string_dist('My Song (EP)', 'My Song')
        self.assertEqual(dist, 0.0)
    
    def test_featured_has_lower_weight(self):    
-        dist1 = autotag.string_dist('My Song blah Someone', 'My Song')
-        dist2 = autotag.string_dist('My Song feat Someone', 'My Song')
+        dist1 = match.string_dist('My Song blah Someone', 'My Song')
+        dist2 = match.string_dist('My Song feat Someone', 'My Song')
        self.assert_(dist2 < dist1)
    
    def test_postfix_the(self):    
-        dist = autotag.string_dist('The Song Title', 'Song Title, The')
+        dist = match.string_dist('The Song Title', 'Song Title, The')
        self.assertEqual(dist, 0.0)
    
    def test_postfix_a(self):    
-        dist = autotag.string_dist('A Song Title', 'Song Title, A')
+        dist = match.string_dist('A Song Title', 'Song Title, A')
        self.assertEqual(dist, 0.0)
    
    def test_postfix_an(self):    
-        dist = autotag.string_dist('An Album Title', 'Album Title, An')
+        dist = match.string_dist('An Album Title', 'Album Title, An')
        self.assertEqual(dist, 0.0)
    
    def test_empty_strings(self):
-        dist = autotag.string_dist('', '')
+        dist = match.string_dist('', '')
        self.assertEqual(dist, 0.0)
    
    def test_solo_pattern(self):
        # Just make sure these don't crash.
-        autotag.string_dist('The ', '')
-        autotag.string_dist('(EP)', '(EP)')
-        autotag.string_dist(', An', '')
+        match.string_dist('The ', '')
+        match.string_dist('(EP)', '(EP)')
+        match.string_dist(', An', '')

    def test_heuristic_does_not_harm_distance(self):
-        dist = autotag.string_dist('Untitled', '[Untitled]')
+        dist = match.string_dist('Untitled', '[Untitled]')
        self.assertEqual(dist, 0.0)

    def test_ampersand_expansion(self):
-        dist = autotag.string_dist('And', '&')
+        dist = match.string_dist('And', '&')
        self.assertEqual(dist, 0.0)

    def test_accented_characters(self):
-        dist = autotag.string_dist(u'\xe9\xe1\xf1', u'ean')
+        dist = match.string_dist(u'\xe9\xe1\xf1', u'ean')
        self.assertEqual(dist, 0.0)

 def suite():