diff --git a/.hgtags b/.hgtags index 7fa4eb224..2d04bb17f 100644 --- a/.hgtags +++ b/.hgtags @@ -19,3 +19,6 @@ f3cd4c138c6f40dc324a23bf01c4c7d97766477e 1.0rc2 f28ea9e2ef8d39913d79dbba73db280ff0740c50 v1.1.0-beta.2 8f070ce28a7b33d8509b29a8dbe937109bbdbd21 v1.1.0-beta.3 97f04ce252332dbda013cbc478d702d54a8fc1bd v1.1.0 +b3f7b5267a2f7b46b826d087421d7f4569211240 v1.2.0 +b3f7b5267a2f7b46b826d087421d7f4569211240 v1.2.0 +ecff182221ec32a9f6549ad3ce8d2ab4c3e5568a v1.2.0 diff --git a/beets/__init__.py b/beets/__init__.py index 151b46994..7a1cc1b92 100644 --- a/beets/__init__.py +++ b/beets/__init__.py @@ -12,7 +12,7 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -__version__ = '1.1.1' +__version__ = '1.2.1' __author__ = 'Adrian Sampson ' import beets.library diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py index cedaa3d90..96de6b674 100644 --- a/beets/autotag/hooks.py +++ b/beets/autotag/hooks.py @@ -166,37 +166,37 @@ TrackMatch = namedtuple('TrackMatch', ['distance', 'info']) # Aggregation of sources. -def _album_for_id(album_id): - """Get a list of albums corresponding to a release ID.""" - candidates = [] - - # Candidates from MusicBrainz. +def album_for_mbid(release_id): + """Get an AlbumInfo object for a MusicBrainz release ID. Return None + if the ID is not found. + """ try: - candidates.append(mb.album_for_id(album_id)) + return mb.album_for_id(release_id) except mb.MusicBrainzAPIError as exc: exc.log(log) - # From plugins. +def track_for_mbid(recording_id): + """Get a TrackInfo object for a MusicBrainz recording ID. Return None + if the ID is not found. + """ + try: + return mb.track_for_id(recording_id) + except mb.MusicBrainzAPIError as exc: + exc.log(log) + +def albums_for_id(album_id): + """Get a list of albums for an ID.""" + candidates = [album_for_mbid(album_id)] candidates.extend(plugins.album_for_id(album_id)) - return filter(None, candidates) -def _track_for_id(track_id): - """Get an item for a recording ID.""" - candidates = [] - - # From MusicBrainz. - try: - candidates.append(mb.track_for_id(track_id)) - except mb.MusicBrainzAPIError as exc: - exc.log(log) - - # From plugins. +def tracks_for_id(track_id): + """Get a list of tracks for an ID.""" + candidates = [track_for_mbid(track_id)] candidates.extend(plugins.track_for_id(track_id)) - return filter(None, candidates) -def _album_candidates(items, artist, album, va_likely): +def album_candidates(items, artist, album, va_likely): """Search for album matches. ``items`` is a list of Item objects that make up the album. ``artist`` and ``album`` are the respective names (strings), which may be derived from the item list or may be @@ -224,7 +224,7 @@ def _album_candidates(items, artist, album, va_likely): return out -def _item_candidates(item, artist, title): +def item_candidates(item, artist, title): """Search for item matches. ``item`` is the Item to be matched. ``artist`` and ``title`` are strings and either reflect the item or are specified by the user. diff --git a/beets/autotag/match.py b/beets/autotag/match.py index bcd3d040d..cc32d6e8f 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -30,7 +30,7 @@ from beets.util.enumeration import enum from beets.autotag import hooks # A configuration view for the distance weights. -weights = config['match']['weight'] +weights = config['match']['distance_weights'] # Parameters for string distance function. # Words that can be moved to the end of a string using a comma. @@ -187,62 +187,221 @@ def track_index_changed(item, track_info): """ return item.track not in (track_info.medium_index, track_info.index) +class Distance(object): + """Keeps track of multiple distance penalties. Provides a single weighted + distance for all penalties as well as a weighted distance for each + individual penalty. + """ + def __cmp__(self, other): + return cmp(self.distance, other) + + def __float__(self): + return self.distance + + def __getitem__(self, key): + """Returns the weighted distance for a named penalty. + """ + dist = sum(self._penalties[key]) * weights[key].as_number() + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + def __init__(self): + self._penalties = {} + + def __iter__(self): + return iter(self.sorted) + + def __len__(self): + return len(self.sorted) + + def __sub__(self, other): + return self.distance - other + + def __rsub__(self, other): + return other - self.distance + + def _eq(self, value1, value2): + """Returns True if `value1` is equal to `value2`. `value1` may be a + compiled regular expression, in which case it will be matched against + `value2`. + """ + if isinstance(value1, re._pattern_type): + return bool(value1.match(value2)) + return value1 == value2 + + def add(self, key, dist): + """Adds a distance penalty. `key` must correspond with a configured + weight setting. `dist` must be a float between 0.0 and 1.0, and will be + added to any existing distance penalties for the same key. + """ + if not 0.0 <= dist <= 1.0: + raise ValueError( + '`dist` must be between 0.0 and 1.0. It is: %r' % dist) + self._penalties.setdefault(key, []).append(dist) + + def add_equality(self, key, value, options): + """Adds a distance penalty of 1.0 if `value` doesn't match any of the + values in `options`. If an option is a compiled regular expression, it + will be considered equal if it matches against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + for opt in options: + if self._eq(opt, value): + dist = 0.0 + break + else: + dist = 1.0 + self.add(key, dist) + + def add_expr(self, key, expr): + """Adds a distance penalty of 1.0 if `expr` evaluates to True, or 0.0. + """ + if expr: + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_number(self, key, number1, number2): + """Adds a distance penalty of 1.0 for each number of difference between + `number1` and `number2`, or 0.0 when there is no difference. Use this + when there is no upper limit on the difference between the two numbers. + """ + diff = abs(number1 - number2) + if diff: + for i in range(diff): + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_priority(self, key, value, options): + """Adds a distance penalty that corresponds to the position at which + `value` appears in `options`. A distance penalty of 0.0 for the first + option, or 1.0 if there is no matching option. If an option is a + compiled regular expression, it will be considered equal if it matches + against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + unit = 1.0 / (len(options) or 1) + for i, opt in enumerate(options): + if self._eq(opt, value): + dist = i * unit + break + else: + dist = 1.0 + self.add(key, dist) + + def add_ratio(self, key, number1, number2): + """Adds a distance penalty for `number1` as a ratio of `number2`. + `number1` is bound at 0 and `number2`. + """ + number = float(max(min(number1, number2), 0)) + if number2: + dist = number / number2 + else: + dist = 0.0 + self.add(key, dist) + + def add_string(self, key, str1, str2): + """Adds a distance penalty based on the edit distance between `str1` + and `str2`. + """ + dist = string_dist(str1, str2) + self.add(key, dist) + + @property + def distance(self): + """Returns a weighted and normalised distance across all penalties. + """ + dist_max = self.max_distance + if dist_max: + return self.raw_distance / self.max_distance + return 0.0 + + @property + def max_distance(self): + """Returns the maximum distance penalty. + """ + dist_max = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_max += len(penalty) * weights[key].as_number() + return dist_max + + @property + def raw_distance(self): + """Returns the raw (denormalised) distance. + """ + dist_raw = 0.0 + for key, penalty in self._penalties.iteritems(): + dist_raw += sum(penalty) * weights[key].as_number() + return dist_raw + + @property + def sorted(self): + """Returns a list of (dist, key) pairs, with `dist` being the weighted + distance, sorted from highest to lowest. Does not include penalties + with a zero value. + """ + list_ = [] + for key in self._penalties: + dist = self[key] + if dist: + list_.append((dist, key)) + # Convert distance into a negative float we can sort items in ascending + # order (for keys, when the penalty is equal) and still get the items + # with the biggest distance first. + return sorted(list_, key=lambda (dist, key): (0-dist, key)) + + def update(self, dist): + """Adds all the distance penalties from `dist`. + """ + if not isinstance(dist, Distance): + raise ValueError( + '`dist` must be a Distance object. It is: %r' % dist) + for key, penalties in dist._penalties.iteritems(): + self._penalties.setdefault(key, []).extend(penalties) + def track_distance(item, track_info, incl_artist=False): """Determines the significance of a track metadata change. Returns a - float in [0.0,1.0]. `incl_artist` indicates that a distance - component should be included for the track artist (i.e., for - various-artist releases). + Distance object. `incl_artist` indicates that a distance component should + be included for the track artist (i.e., for various-artist releases). """ - # Distance and normalization accumulators. - dist, dist_max = 0.0, 0.0 + dist = Distance() - # Check track length. - # If there's no length to check, apply no penalty. + # Length. if track_info.length: - diff = abs(item.length - track_info.length) - diff = max(diff - weights['track_length_grace'].as_number(), 0.0) - diff = min(diff, weights['track_length_max'].as_number()) - dist += (diff / weights['track_length_max'].as_number()) * \ - weights['track_length'].as_number() - dist_max += weights['track_length'].as_number() + diff = abs(item.length - track_info.length) - \ + weights['track_length_grace'].as_number() + dist.add_ratio('track_length', diff, + weights['track_length_max'].as_number()) - # Track title. - dist += string_dist(item.title, track_info.title) * \ - weights['track_title'].as_number() - dist_max += weights['track_title'].as_number() + # Title. + dist.add_string('track_title', item.title, track_info.title) - # Track artist, if included. - # Attention: MB DB does not have artist info for all compilations, - # so only check artist distance if there is actually an artist in - # the MB track data. + # Artist. Only check if there is actually an artist in the track data. if incl_artist and track_info.artist and \ item.artist.lower() not in VA_ARTISTS: - dist += string_dist(item.artist, track_info.artist) * \ - weights['track_artist'].as_number() - dist_max += weights['track_artist'].as_number() + dist.add_string('track_artist', item.artist, track_info.artist) # Track index. if track_info.index and item.track: - if track_index_changed(item, track_info): - dist += weights['track_index'].as_number() - dist_max += weights['track_index'].as_number() + dist.add_expr('track_index', track_index_changed(item, track_info)) - # MusicBrainz track ID. + # Track ID. if item.mb_trackid: - if item.mb_trackid != track_info.track_id: - dist += weights['track_id'].as_number() - dist_max += weights['track_id'].as_number() + dist.add_expr('track_id', item.mb_trackid != track_info.track_id) - # Plugin distances. - plugin_d, plugin_dm = plugins.track_distance(item, track_info) - dist += plugin_d - dist_max += plugin_dm + # Plugins. + dist.update(plugins.track_distance(item, track_info)) - return dist / dist_max + return dist def distance(items, album_info, mapping): """Determines how "significant" an album metadata change would be. - Returns a float in [0.0,1.0]. `album_info` is an AlbumInfo object + Returns a Distance object. `album_info` is an AlbumInfo object reflecting the album to be compared. `items` is a sequence of all Item objects that will be matched (order is not important). `mapping` is a dictionary mapping Items to TrackInfo objects; the @@ -251,100 +410,99 @@ def distance(items, album_info, mapping): """ likelies, _ = current_metadata(items) - # These accumulate the possible distance components. The final - # distance will be dist/dist_max. - dist = 0.0 - dist_max = 0.0 + dist = Distance() - # Artist/album metadata. + # Artist, if not various. if not album_info.va: - dist += string_dist(likelies['artist'], album_info.artist) * \ - weights['artist'].as_number() - dist_max += weights['artist'].as_number() - dist += string_dist(likelies['album'], album_info.album) * \ - weights['album'].as_number() - dist_max += weights['album'].as_number() + dist.add_string('artist', likelies['artist'], album_info.artist) - # Year. No penalty for matching release or original year. - if likelies['year'] and album_info.year: - if likelies['year'] not in (album_info.year, album_info.original_year): - diff = abs(album_info.year - likelies['year']) - if diff: - dist += (1.0 - 1.0 / diff) * weights['year'].as_number() - dist_max += weights['year'].as_number() + # Album. + dist.add_string('album', likelies['album'], album_info.album) - # Actual or preferred media. - preferred_media = config['match']['preferred_media'].get() - if likelies['media'] and album_info.media: - dist += string_dist(likelies['media'], album_info.media) * \ - weights['media'].as_number() - dist_max += weights['media'].as_number() - elif album_info.media and preferred_media: - dist += string_dist(album_info.media, preferred_media) * \ - weights['media'].as_number() - dist_max += weights['media'].as_number() + # Current or preferred media. + if album_info.media: + # Preferred media options. + patterns = config['match']['preferred']['media'].as_str_seq() + options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns] + if options: + dist.add_priority('media', album_info.media, options) + # Current media. + elif likelies['media']: + dist.add_equality('media', album_info.media, likelies['media']) - # MusicBrainz album ID. - if likelies['mb_albumid']: - if likelies['mb_albumid'] != album_info.album_id: - dist += weights['album_id'].as_number() - dist_max += weights['album_id'].as_number() + # Mediums. + if likelies['disctotal'] and album_info.mediums: + dist.add_number('mediums', likelies['disctotal'], album_info.mediums) - # Apply a small penalty for differences across many minor metadata. This - # helps prioritise releases that are nearly identical. + # Prefer earliest release. + if album_info.year and config['match']['preferred']['original_year']: + # Assume 1889 (earliest first gramophone discs) if we don't know the + # original year. + original = album_info.original_year or 1889 + diff = abs(album_info.year - original) + diff_max = abs(datetime.date.today().year - original) + dist.add_ratio('year', diff, diff_max) + # Year. + elif likelies['year'] and album_info.year: + if likelies['year'] in (album_info.year, album_info.original_year): + # No penalty for matching release or original year. + dist.add('year', 0.0) + elif album_info.original_year: + # Prefer matchest closest to the release year. + diff = abs(likelies['year'] - album_info.year) + diff_max = abs(datetime.date.today().year - + album_info.original_year) + dist.add_ratio('year', diff, diff_max) + else: + # Full penalty when there is no original year. + dist.add('year', 1.0) - if likelies['disctotal']: - if likelies['disctotal'] != album_info.mediums: - dist += weights['minor'].as_number() - dist_max += weights['minor'].as_number() + # Preferred countries. + patterns = config['match']['preferred']['countries'].as_str_seq() + options = [re.compile(pat, re.I) for pat in patterns] + if album_info.country and options: + dist.add_priority('country', album_info.country, options) + # Country. + elif likelies['country'] and album_info.country: + dist.add_string('country', likelies['country'], album_info.country) + # Label. if likelies['label'] and album_info.label: - dist += string_dist(likelies['label'], album_info.label) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('label', likelies['label'], album_info.label) + # Catalog number. if likelies['catalognum'] and album_info.catalognum: - dist += string_dist(likelies['catalognum'], - album_info.catalognum) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() - - if likelies['country'] and album_info.country: - dist += string_dist(likelies['country'], - album_info.country) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('catalognum', likelies['catalognum'], + album_info.catalognum) + # Disambiguation. if likelies['albumdisambig'] and album_info.albumdisambig: - dist += string_dist(likelies['albumdisambig'], - album_info.albumdisambig) * \ - weights['minor'].as_number() - dist_max += weights['minor'].as_number() + dist.add_string('albumdisambig', likelies['albumdisambig'], + album_info.albumdisambig) - # Matched track distances. + # Album ID. + if likelies['mb_albumid']: + dist.add_equality('album_id', likelies['mb_albumid'], + album_info.album_id) + + # Tracks. + dist.tracks = {} for item, track in mapping.iteritems(): - dist += track_distance(item, track, album_info.va) * \ - weights['track'].as_number() - dist_max += weights['track'].as_number() + dist.tracks[track] = track_distance(item, track, album_info.va) + dist.add('tracks', dist.tracks[track].distance) - # Extra and unmatched tracks. - for track in set(album_info.tracks) - set(mapping.values()): - dist += weights['missing'].as_number() - dist_max += weights['missing'].as_number() - for item in set(items) - set(mapping.keys()): - dist += weights['unmatched'].as_number() - dist_max += weights['unmatched'].as_number() + # Missing tracks. + for i in range(len(album_info.tracks) - len(mapping)): + dist.add('missing_tracks', 1.0) - # Plugin distances. - plugin_d, plugin_dm = plugins.album_distance(items, album_info, mapping) - dist += plugin_d - dist_max += plugin_dm + # Unmatched tracks. + for i in range(len(items) - len(mapping)): + dist.add('unmatched_tracks', 1.0) - # Normalize distance, avoiding divide-by-zero. - if dist_max == 0.0: - return 0.0 - else: - return dist / dist_max + # Plugins. + dist.update(plugins.album_distance(items, album_info, mapping)) + + return dist def match_by_id(items): """If the items are tagged with a MusicBrainz album ID, returns an @@ -361,9 +519,7 @@ def match_by_id(items): if bool(reduce(lambda x,y: x if x==y else (), albumids)): albumid = albumids[0] log.debug('Searching for discovered album ID: ' + albumid) - matches = hooks._album_for_id(albumid) - if matches: - return matches[0] + return hooks.album_for_mbid(albumid) else: log.debug('No album ID consensus.') @@ -372,8 +528,8 @@ def _recommendation(results): recommendation based on the results' distances. If the recommendation is higher than the configured maximum for - certain situations, the recommendation will be downgraded to the - configured maximum. + an applied penalty, the recommendation will be downgraded to the + configured maximum for that penalty. """ if not results: # No candidates: no recommendation. @@ -395,45 +551,23 @@ def _recommendation(results): # Gap between first two candidates is large. rec = recommendation.low else: - # No conclusion. - rec = recommendation.none + # No conclusion. Return immediately. Can't be downgraded any further. + return recommendation.none - # "Downgrades" in certain configured situations. + # Downgrade to the max rec if it is lower than the current rec for an + # applied penalty. + keys = set(key for _, key in min_dist) if isinstance(results[0], hooks.AlbumMatch): - # Load the configured recommendation maxima. - max_rec = {} - for trigger in 'non_mb_source', 'partial', 'tracklength', 'tracknumber': - max_rec[trigger] = \ - config['match']['max_rec'][trigger].as_choice({ - 'strong': recommendation.strong, - 'medium': recommendation.medium, - 'low': recommendation.low, - 'none': recommendation.none, - }) - - # Non-MusicBrainz source. - if rec > max_rec['non_mb_source'] and \ - results[0].info.data_source != 'MusicBrainz': - rec = max_rec['non_mb_source'] - - # Partial match. - if rec > max_rec['partial'] and \ - (results[0].extra_items or results[0].extra_tracks): - rec = max_rec['partial'] - - # Check track number and duration for each item. - for item, track_info in results[0].mapping.items(): - # Track length differs. - if rec > max_rec['tracklength'] and \ - item.length and track_info.length and \ - abs(item.length - track_info.length) > \ - weights['track_length_grace'].as_number(): - rec = max_rec['tracklength'] - - # Track number differs. - if rec > max_rec['tracknumber'] and \ - track_index_changed(item, track_info): - rec = max_rec['tracknumber'] + for track_dist in min_dist.tracks.values(): + keys.update(key for _, key in track_dist) + for key in keys: + max_rec = config['match']['max_rec'][key].as_choice({ + 'strong': recommendation.strong, + 'medium': recommendation.medium, + 'low': recommendation.low, + 'none': recommendation.none, + }) + rec = min(rec, max_rec) return rec @@ -455,8 +589,15 @@ def _add_candidate(items, results, info): # Get the change distance. dist = distance(items, info, mapping) - log.debug('Success. Distance: %f' % dist) + # Skip matches with ignored penalties. + penalties = [key for _, key in dist] + for penalty in config['match']['ignored'].as_str_seq(): + if penalty in penalties: + log.debug('Ignored. Penalty: %s' % penalty) + return + + log.debug('Success. Distance: %f' % dist) results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, extra_items, extra_tracks) @@ -467,7 +608,7 @@ def tag_album(items, search_artist=None, search_album=None, - The current artist. - The current album. - A list of AlbumMatch objects. The candidates are sorted by - distance (i.e., best match first). + distance (i.e., best match first). - A recommendation. If search_artist and search_album or search_id are provided, then they are used as search terms in place of the current metadata. @@ -485,7 +626,7 @@ def tag_album(items, search_artist=None, search_album=None, # Search by explicit ID. if search_id is not None: log.debug('Searching for album ID: ' + search_id) - search_cands = hooks._album_for_id(search_id) + search_cands = hooks.albums_for_id(search_id) # Use existing metadata or text search. else: @@ -516,8 +657,8 @@ def tag_album(items, search_artist=None, search_album=None, log.debug(u'Album might be VA: %s' % str(va_likely)) # Get the results from the data sources. - search_cands = hooks._album_candidates(items, search_artist, - search_album, va_likely) + search_cands = hooks.album_candidates(items, search_artist, + search_album, va_likely) log.debug(u'Evaluating %i candidates.' % len(search_cands)) for info in search_cands: @@ -544,7 +685,7 @@ def tag_item(item, search_artist=None, search_title=None, trackid = search_id or item.mb_trackid if trackid: log.debug('Searching for track ID: ' + trackid) - for track_info in hooks._track_for_id(trackid): + for track_info in hooks.tracks_for_id(trackid): dist = track_distance(item, track_info, incl_artist=True) candidates[track_info.track_id] = \ hooks.TrackMatch(dist, track_info) @@ -567,7 +708,7 @@ def tag_item(item, search_artist=None, search_title=None, log.debug(u'Item search terms: %s - %s' % (search_artist, search_title)) # Get and evaluate candidate metadata. - for track_info in hooks._item_candidates(item, search_artist, search_title): + for track_info in hooks.item_candidates(item, search_artist, search_title): dist = track_distance(item, track_info, incl_artist=True) candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info) diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 7bbb16a6b..44cb51051 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -68,22 +68,42 @@ match: medium_rec_thresh: 0.25 rec_gap_thresh: 0.25 max_rec: - non_mb_source: strong - partial: medium - tracklength: strong - tracknumber: strong - preferred_media: CD - weight: + source: strong + artist: strong + album: strong + media: strong + mediums: strong + year: strong + country: strong + label: strong + catalognum: strong + albumdisambig: strong + album_id: strong + tracks: strong + missing_tracks: medium + unmatched_tracks: medium + track_title: strong + track_artist: strong + track_index: strong + track_length_grace: strong + track_length_max: strong + track_length: strong + track_id: strong + distance_weights: source: 2.0 artist: 3.0 album: 3.0 - year: 1.0 media: 1.0 + mediums: 1.0 + year: 1.0 + country: 0.5 + label: 0.5 + catalognum: 0.5 + albumdisambig: 0.5 album_id: 5.0 - minor: 0.5 - track: 1.0 - missing: 0.9 - unmatched: 0.6 + tracks: 2.0 + missing_tracks: 0.9 + unmatched_tracks: 0.6 track_title: 3.0 track_artist: 2.0 track_index: 1.0 @@ -91,3 +111,8 @@ match: track_length_max: 30 track_length: 2.0 track_id: 5.0 + preferred: + countries: [] + media: [] + original_year: no + ignored: [] diff --git a/beets/library.py b/beets/library.py index d1b0b6d09..7696541fa 100644 --- a/beets/library.py +++ b/beets/library.py @@ -321,7 +321,7 @@ class Item(object): try: f = MediaFile(syspath(read_path)) except (OSError, IOError) as exc: - raise util.FilesystemError(exc, 'read', (self.path,), + raise util.FilesystemError(exc, 'read', (read_path,), traceback.format_exc()) for key in ITEM_KEYS_META: @@ -399,7 +399,7 @@ class Item(object): # Build the mapping for substitution in the template, # beginning with the values from the database. mapping = {} - for key in ITEM_KEYS_META: + for key in ITEM_KEYS: # Get the values from either the item or its album. if key in ALBUM_KEYS_ITEM and album is not None: # From album. @@ -411,8 +411,10 @@ class Item(object): value = format_for_path(value, key, pathmod) mapping[key] = value - # Additional fields in non-sanitized case. - if not sanitize: + # Include the path if we're not sanitizing to construct a path. + if sanitize: + del mapping['path'] + else: mapping['path'] = displayable_path(self.path) # Use the album artist if the track artist is not set and diff --git a/beets/mediafile.py b/beets/mediafile.py index e6648d757..9b6234192 100644 --- a/beets/mediafile.py +++ b/beets/mediafile.py @@ -59,10 +59,6 @@ log = logging.getLogger('beets') class UnreadableFileError(Exception): pass -class FileIOError(UnreadableFileError, IOError): - def __init__(self, exc): - IOError.__init__(self, exc.errno, exc.strerror, exc.filename) - # Raised for files that don't seem to have a type MediaFile supports. class FileTypeError(UnreadableFileError): pass @@ -73,7 +69,8 @@ class FileTypeError(UnreadableFileError): # Human-readable type names. TYPES = { 'mp3': 'MP3', - 'mp4': 'AAC', + 'aac': 'AAC', + 'alac': 'ALAC', 'ogg': 'OGG', 'flac': 'FLAC', 'ape': 'APE', @@ -82,6 +79,8 @@ TYPES = { 'asf': 'Windows Media', } +MP4_TYPES = ('aac', 'alac') + # Utility. @@ -532,8 +531,10 @@ class MediaField(object): obj.mgfile[style.key] = out def _styles(self, obj): - if obj.type in ('mp3', 'mp4', 'asf'): + if obj.type in ('mp3', 'asf'): styles = self.styles[obj.type] + elif obj.type in MP4_TYPES: + styles = self.styles['mp4'] else: styles = self.styles['etc'] # Sane styles. @@ -568,7 +569,7 @@ class MediaField(object): out = out[:-len(style.suffix)] # MPEG-4 freeform frames are (should be?) encoded as UTF-8. - if obj.type == 'mp4' and style.key.startswith('----:') and \ + if obj.type in MP4_TYPES and style.key.startswith('----:') and \ isinstance(out, str): out = out.decode('utf8') @@ -636,7 +637,7 @@ class MediaField(object): # MPEG-4 "freeform" (----) frames must be encoded as UTF-8 # byte strings. - if obj.type == 'mp4' and style.key.startswith('----:') and \ + if obj.type in MP4_TYPES and style.key.startswith('----:') and \ isinstance(out, unicode): out = out.encode('utf8') @@ -723,7 +724,7 @@ class ImageField(object): return picframe.data - elif obj.type == 'mp4': + elif obj.type in MP4_TYPES: if 'covr' in obj.mgfile: covers = obj.mgfile['covr'] if covers: @@ -795,7 +796,7 @@ class ImageField(object): ) obj.mgfile['APIC'] = picframe - elif obj.type == 'mp4': + elif obj.type in MP4_TYPES: if val is None: if 'covr' in obj.mgfile: del obj.mgfile['covr'] @@ -856,12 +857,15 @@ class MediaFile(object): self.path = path unreadable_exc = ( - mutagen.mp3.HeaderNotFoundError, - mutagen.flac.FLACNoHeaderError, + mutagen.mp3.error, + mutagen.id3.error, + mutagen.flac.error, mutagen.monkeysaudio.MonkeysAudioHeaderError, - mutagen.mp4.MP4StreamInfoError, - mutagen.oggvorbis.OggVorbisHeaderError, - mutagen.asf.ASFHeaderError, + mutagen.mp4.error, + mutagen.oggvorbis.error, + mutagen.ogg.error, + mutagen.asf.error, + mutagen.apev2.error, ) try: self.mgfile = mutagen.File(path) @@ -869,7 +873,13 @@ class MediaFile(object): log.debug(u'header parsing failed: {0}'.format(unicode(exc))) raise UnreadableFileError('Mutagen could not read file') except IOError as exc: - raise FileIOError(exc) + if type(exc) == IOError: + # This is a base IOError, not a subclass from Mutagen or + # anywhere else. + raise + else: + log.debug(traceback.format_exc()) + raise UnreadableFileError('Mutagen raised an exception') except Exception as exc: # Hide bugs in Mutagen. log.debug(traceback.format_exc()) @@ -880,7 +890,15 @@ class MediaFile(object): raise FileTypeError('file type unsupported by Mutagen') elif type(self.mgfile).__name__ == 'M4A' or \ type(self.mgfile).__name__ == 'MP4': - self.type = 'mp4' + # This hack differentiates AAC and ALAC until we find a more + # deterministic approach. Mutagen only sets the sample rate + # for AAC files. See: + # https://github.com/sampsyo/beets/pull/295 + if hasattr(self.mgfile.info, 'sample_rate') and \ + self.mgfile.info.sample_rate > 0: + self.type = 'aac' + else: + self.type = 'alac' elif type(self.mgfile).__name__ == 'ID3' or \ type(self.mgfile).__name__ == 'MP3': self.type = 'mp3' diff --git a/beets/plugins.py b/beets/plugins.py index 7d49ad3aa..d0c0a9654 100755 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -64,16 +64,16 @@ class BeetsPlugin(object): return {} def track_distance(self, item, info): - """Should return a (distance, distance_max) pair to be added - to the distance value for every track comparison. + """Should return a Distance object to be added to the + distance for every track comparison. """ - return 0.0, 0.0 + return beets.autotag.match.Distance() def album_distance(self, items, album_info, mapping): - """Should return a (distance, distance_max) pair to be added - to the distance value for every album-level comparison. + """Should return a Distance object to be added to the + distance for every album-level comparison. """ - return 0.0, 0.0 + return beets.autotag.match.Distance() def candidates(self, items, artist, album, va_likely): """Should return a sequence of AlbumInfo objects that match the @@ -242,25 +242,19 @@ def queries(): def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. - Returns a (distance, distance_max) pair. + Returns a Distance object. """ - dist = 0.0 - dist_max = 0.0 + dist = beets.autotag.match.Distance() for plugin in find_plugins(): - d, dm = plugin.track_distance(item, info) - dist += d - dist_max += dm - return dist, dist_max + dist.update(plugin.track_distance(item, info)) + return dist def album_distance(items, album_info, mapping): """Returns the album distance calculated by plugins.""" - dist = 0.0 - dist_max = 0.0 + dist = beets.autotag.match.Distance() for plugin in find_plugins(): - d, dm = plugin.album_distance(items, album_info, mapping) - dist += d - dist_max += dm - return dist, dist_max + dist.update(plugin.album_distance(items, album_info, mapping)) + return dist def candidates(items, artist, album, va_likely): """Gets MusicBrainz candidates for an album from each plugin. diff --git a/beets/ui/__init__.py b/beets/ui/__init__.py index 6789045f1..460320a34 100644 --- a/beets/ui/__init__.py +++ b/beets/ui/__init__.py @@ -366,7 +366,7 @@ def colorize(color, text): else: return text -def _colordiff(a, b, highlight='red'): +def _colordiff(a, b, highlight='red', second_highlight='lightgray'): """Given two values, return the same pair of strings except with their differences highlighted in the specified color. Strings are highlighted intelligently to show differences; other values are @@ -402,9 +402,14 @@ def _colordiff(a, b, highlight='red'): # Left only. a_out.append(colorize(highlight, a[a_start:a_end])) elif op == 'replace': - # Right and left differ. - a_out.append(colorize(highlight, a[a_start:a_end])) - b_out.append(colorize(highlight, b[b_start:b_end])) + # Right and left differ. Colorise with second highlight if + # it's just a case change. + if a[a_start:a_end].lower() != b[b_start:b_end].lower(): + color = highlight + else: + color = second_highlight + a_out.append(colorize(color, a[a_start:a_end])) + b_out.append(colorize(color, b[b_start:b_end])) else: assert(False) diff --git a/beets/ui/commands.py b/beets/ui/commands.py index 9e42751ab..dfe3585c1 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -125,14 +125,14 @@ default_commands.append(fields_cmd) VARIOUS_ARTISTS = u'Various Artists' -PARTIAL_MATCH_MESSAGE = u'(partial match!)' - # Importer utilities and support. def disambig_string(info): - """Returns label, year and media disambiguation, if available. + """Returns source, media, year, country, label and album disambiguation. """ disambig = [] + if info.data_source != 'MusicBrainz': + disambig.append(info.data_source) if info.media: if info.mediums > 1: disambig.append(u'{0}x{1}'.format( @@ -163,26 +163,34 @@ def dist_string(dist): out = ui.colorize('red', out) return out +def penalty_string(distance, limit=None): + """Returns a colorized string that indicates all the penalties applied to + a distance object. + """ + penalties = [] + for _, key in distance: + key = key.replace('album_', '') + key = key.replace('track_', '') + key = key.replace('_', ' ') + penalties.append(key) + if penalties: + if limit and len(penalties) > limit: + penalties = penalties[:limit] + ['...'] + return ui.colorize('yellow', '(%s)' % ', '.join(penalties)) + def show_change(cur_artist, cur_album, match): """Print out a representation of the changes that will be made if an album's tags are changed according to `match`, which must be an AlbumMatch object. """ - def show_album(artist, album, partial=False): + def show_album(artist, album): if artist: album_description = u' %s - %s' % (artist, album) elif album: album_description = u' %s' % album else: album_description = u' (unknown album)' - - out = album_description - - # Add a suffix if this is a partial match. - if partial: - out += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) - - print_(out) + print_(album_description) def format_index(track_info): """Return a string representing the track index of the given @@ -223,11 +231,7 @@ def show_change(cur_artist, cur_album, match): print_("To:") show_album(artist_r, album_r) else: - message = u"Tagging:\n %s - %s" % (match.info.artist, - match.info.album) - if match.extra_items or match.extra_tracks: - message += u' %s' % ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) - print_(message) + print_(u"Tagging:\n %s - %s" % (match.info.artist, match.info.album)) # Data URL. if match.info.data_url: @@ -235,9 +239,13 @@ def show_change(cur_artist, cur_album, match): # Info line. info = [] + # Similarity. info.append('(Similarity: %s)' % dist_string(match.distance)) - if match.info.data_source != 'MusicBrainz': - info.append(ui.colorize('turquoise', '(%s)' % match.info.data_source)) + # Penalties. + penalties = penalty_string(match.distance) + if penalties: + info.append(penalties) + # Disambiguation. disambig = disambig_string(match.info) if disambig: info.append(ui.colorize('lightgray', '(%s)' % disambig)) @@ -285,7 +293,7 @@ def show_change(cur_artist, cur_album, match): cur_track, new_track = format_index(item), format_index(track_info) if cur_track != new_track: if item.track in (track_info.index, track_info.medium_index): - color = 'yellow' + color = 'lightgray' else: color = 'red' if (cur_track + new_track).count('-') == 1: @@ -315,18 +323,10 @@ def show_change(cur_artist, cur_album, match): rhs += templ.format(rhs_length) lhs_width += len(cur_length) + 3 - # Hidden penalties. No LHS/RHS diff is displayed, but we still want to - # indicate that a penalty has been applied to explain the similarity - # score. - penalties = [] - if match.info.va and track_info.artist and \ - item.artist.lower() not in VA_ARTISTS: - penalties.append('artist') - if item.mb_trackid and item.mb_trackid != track_info.track_id: - penalties.append('ID') + # Penalties. + penalties = penalty_string(match.distance.tracks[track_info]) if penalties: - rhs += ' %s' % ui.colorize('red', - '(%s)' % ', '.join(penalties)) + rhs += ' %s' % penalties if lhs != rhs: lines.append((' * %s' % lhs, rhs, lhs_width)) @@ -489,20 +489,17 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, (cur_artist, cur_album)) print_('Candidates:') for i, match in enumerate(candidates): + # Artist, album and distance. line = ['%i. %s - %s (%s)' % (i + 1, match.info.artist, match.info.album, dist_string(match.distance))] - # Point out the partial matches. - if match.extra_items or match.extra_tracks: - line.append(ui.colorize('yellow', - PARTIAL_MATCH_MESSAGE)) - - # Sources other than MusicBrainz. - source = match.info.data_source - if source != 'MusicBrainz': - line.append(ui.colorize('turquoise', '(%s)' % source)) + # Penalties. + penalties = penalty_string(match.distance, 3) + if penalties: + line.append(penalties) + # Disambiguation disambig = disambig_string(match.info) if disambig: line.append(ui.colorize('lightgray', '(%s)' % disambig)) diff --git a/beets/ui/migrate.py b/beets/ui/migrate.py index fea9c13b1..784d7c827 100644 --- a/beets/ui/migrate.py +++ b/beets/ui/migrate.py @@ -251,6 +251,15 @@ def migrate_config(replace=False): config.yaml will be moved aside. Otherwise, the process is aborted when the file exists. """ + + # Load legacy configuration data, if any. + config, configpath = get_config() + if not config: + log.debug(u'no config file found at {0}'.format( + util.displayable_path(configpath) + )) + return + # Get the new configuration file path and possibly move it out of # the way. destfn = os.path.join(beets.config.config_dir(), confit.CONFIG_FILENAME) @@ -264,13 +273,6 @@ def migrate_config(replace=False): # File exists and we won't replace it. We're done. return - # Load legacy configuration data, if any. - config, configpath = get_config() - if not config: - log.debug(u'no config file found at {0}'.format( - util.displayable_path(configpath) - )) - return log.debug(u'migrating config file {0}'.format( util.displayable_path(configpath) )) diff --git a/beetsplug/beatport.py b/beetsplug/beatport.py new file mode 100644 index 000000000..c68901cc1 --- /dev/null +++ b/beetsplug/beatport.py @@ -0,0 +1,301 @@ +# This file is part of beets. +# Copyright 2013, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Beatport release and track search support to the autotagger +""" +import logging +import re +from datetime import datetime, timedelta + +import requests + +from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.autotag.match import Distance +from beets.plugins import BeetsPlugin + +log = logging.getLogger('beets') + + +class BeatportAPIError(Exception): + pass + + +class BeatportObject(object): + def __init__(self, data): + self.beatport_id = data['id'] + self.name = unicode(data['name']) + if 'releaseDate' in data: + self.release_date = datetime.strptime(data['releaseDate'], + '%Y-%m-%d') + if 'artists' in data: + self.artists = [(x['id'], unicode(x['name'])) + for x in data['artists']] + if 'genres' in data: + self.genres = [unicode(x['name']) + for x in data['genres']] + + +class BeatportAPI(object): + API_BASE = 'http://api.beatport.com/' + + @classmethod + def get(cls, endpoint, **kwargs): + try: + response = requests.get(cls.API_BASE + endpoint, params=kwargs) + except Exception as e: + raise BeatportAPIError("Error connection to Beatport API: {}" + .format(e.message)) + if not response: + raise BeatportAPIError( + "Error {0.status_code} for '{0.request.path_url}" + .format(response)) + return response.json()['results'] + + +class BeatportSearch(object): + query = None + release_type = None + + def __unicode__(self): + return u''.format( + self.release_type, self.query, len(self.results)) + + def __init__(self, query, release_type='release', details=True): + self.results = [] + self.query = query + self.release_type = release_type + response = BeatportAPI.get('catalog/3/search', query=query, + facets=['fieldType:{0}' + .format(release_type)], + perPage=5) + for item in response: + if release_type == 'release': + release = BeatportRelease(item) + if details: + release.get_tracks() + self.results.append(release) + elif release_type == 'track': + self.results.append(BeatportTrack(item)) + + +class BeatportRelease(BeatportObject): + API_ENDPOINT = 'catalog/3/beatport/release' + + def __unicode__(self): + if len(self.artists) < 4: + artist_str = ", ".join(x[1] for x in self.artists) + else: + artist_str = "Various Artists" + return u"".format(artist_str, + self.name, + self.catalog_number) + + def __init__(self, data): + BeatportObject.__init__(self, data) + if 'catalogNumber' in data: + self.catalog_number = data['catalogNumber'] + if 'label' in data: + self.label_name = data['label']['name'] + if 'category' in data: + self.category = data['category'] + if 'slug' in data: + self.url = "http://beatport.com/release/{0}/{1}".format( + data['slug'], data['id']) + + @classmethod + def from_id(cls, beatport_id): + response = BeatportAPI.get(cls.API_ENDPOINT, id=beatport_id) + release = BeatportRelease(response['release']) + release.tracks = [BeatportTrack(x) for x in response['tracks']] + return release + + def get_tracks(self): + response = BeatportAPI.get(self.API_ENDPOINT, id=self.beatport_id) + self.tracks = [BeatportTrack(x) for x in response['tracks']] + + +class BeatportTrack(BeatportObject): + API_ENDPOINT = 'catalog/3/beatport/track' + + def __unicode__(self): + artist_str = ", ".join(x[1] for x in self.artists) + return u"".format(artist_str, self.name, + self.mix_name) + + def __init__(self, data): + BeatportObject.__init__(self, data) + if 'title' in data: + self.title = unicode(data['title']) + if 'mixName' in data: + self.mix_name = unicode(data['mixName']) + self.length = timedelta(milliseconds=data.get('lengthMs', 0) or 0) + if not self.length: + try: + min, sec = data.get('length', '0:0').split(':') + self.length = timedelta(minutes=int(min), seconds=int(sec)) + except ValueError: + pass + if 'slug' in data: + self.url = "http://beatport.com/track/{0}/{1}".format(data['slug'], + data['id']) + + @classmethod + def from_id(cls, beatport_id): + response = BeatportAPI.get(cls.API_ENDPOINT, id=beatport_id) + return BeatportTrack(response['track']) + + +class BeatportPlugin(BeetsPlugin): + def __init__(self): + super(BeatportPlugin, self).__init__() + self.config.add({ + 'source_weight': 0.5, + }) + + def album_distance(self, items, album_info, mapping): + """Returns the beatport source weight and the maximum source weight + for albums. + """ + dist = Distance() + if album_info.data_source == 'Beatport': + dist.add('source', self.config['source_weight'].as_number()) + return dist + + def track_distance(self, item, info): + """Returns the beatport source weight and the maximum source weight + for individual tracks. + """ + return Distance() # FIXME: Need source information for tracks. + + def candidates(self, items, artist, release, va_likely): + """Returns a list of AlbumInfo objects for beatport search results + matching release and artist (if not various). + """ + if va_likely: + query = release + else: + query = '%s %s' % (artist, release) + try: + return self._get_releases(query) + except BeatportAPIError as e: + log.debug('Beatport API Error: %s (query: %s)' % (e, query)) + return [] + + def item_candidates(self, item, artist, title): + """Returns a list of TrackInfo objects for beatport search results + matching title and artist. + """ + query = '%s %s' % (artist, title) + try: + return self._get_tracks(query) + except BeatportAPIError as e: + log.debug('Beatport API Error: %s (query: %s)' % (e, query)) + return [] + + def album_for_id(self, release_id): + """Fetches a release by its Beatport ID and returns an AlbumInfo object + or None if the release is not found. + """ + log.debug('Searching Beatport for release %s' % str(release_id)) + match = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id) + if not match: + return None + release = BeatportRelease.from_id(match.group(2)) + album = self._get_album_info(release) + return album + + def track_for_id(self, track_id): + """Fetches a track by its Beatport ID and returns a TrackInfo object + or None if the track is not found. + """ + log.debug('Searching Beatport for track %s' % str(track_id)) + match = re.search(r'(^|beatport\.com/track/.+/)(\d+)$', track_id) + if not match: + return None + bp_track = BeatportTrack.from_id(match.group(2)) + track = self._get_track_info(bp_track) + return track + + def _get_releases(self, query): + """Returns a list of AlbumInfo objects for a beatport search query. + """ + # Strip non-word characters from query. Things like "!" and "-" can + # cause a query to return no results, even if they match the artist or + # album title. Use `re.UNICODE` flag to avoid stripping non-english + # word characters. + query = re.sub(r'\W+', ' ', query, re.UNICODE) + # Strip medium information from query, Things like "CD1" and "disk 1" + # can also negate an otherwise positive result. + query = re.sub(r'\b(CD|disc)\s*\d+', '', query, re.I) + albums = [self._get_album_info(x) + for x in BeatportSearch(query).results] + return albums + + def _get_album_info(self, release): + """Returns an AlbumInfo object for a Beatport Release object. + """ + va = len(release.artists) > 3 + artist, artist_id = self._get_artist(release.artists) + if va: + artist = u"Various Artists" + tracks = [self._get_track_info(x, index=idx) + for idx, x in enumerate(release.tracks, 1)] + + return AlbumInfo(album=release.name, album_id=release.beatport_id, + artist=artist, artist_id=artist_id, tracks=tracks, + albumtype=release.category, va=va, + year=release.release_date.year, + month=release.release_date.month, + day=release.release_date.day, + label=release.label_name, + catalognum=release.catalog_number, media=u'Digital', + data_source=u'Beatport', data_url=release.url) + + def _get_track_info(self, track, index=None): + """Returns a TrackInfo object for a Beatport Track object. + """ + title = track.name + if track.mix_name != u"Original Mix": + title += u" ({0})".format(track.mix_name) + artist, artist_id = self._get_artist(track.artists) + length = track.length.total_seconds() + + return TrackInfo(title=title, track_id=track.beatport_id, + artist=artist, artist_id=artist_id, + length=length, index=index) + + def _get_artist(self, artists): + """Returns an artist string (all artists) and an artist_id (the main + artist) for a list of Beatport release or track artists. + """ + artist_id = None + bits = [] + for artist in artists: + if not artist_id: + artist_id = artist[0] + name = artist[1] + # Strip disambiguation number. + name = re.sub(r' \(\d+\)$', '', name) + # Move articles to the front. + name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I) + bits.append(name) + artist = ', '.join(bits).replace(' ,', ',') or None + return artist, artist_id + + def _get_tracks(self, query): + """Returns a list of TrackInfo objects for a Beatport query. + """ + bp_tracks = BeatportSearch(query, release_type='track').results + tracks = [self._get_track_info(x) for x in bp_tracks] + return tracks diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 163d7b1ee..006f85db0 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -21,6 +21,7 @@ from beets import util from beets import config from beets.util import confit from beets.autotag import hooks +from beets.autotag.match import Distance import acoustid import logging from collections import defaultdict @@ -113,23 +114,21 @@ def _all_releases(items): class AcoustidPlugin(plugins.BeetsPlugin): def track_distance(self, item, info): + dist = Distance() if item.path not in _matches or not info.track_id: # Match failed or no track ID. - return 0.0, 0.0 + return dist recording_ids, _ = _matches[item.path] - if info.track_id in recording_ids: - dist = 0.0 - else: - dist = TRACK_ID_WEIGHT - return dist, TRACK_ID_WEIGHT + dist.add_expr('track_id', info.track_id not in recording_ids) + return dist def candidates(self, items, artist, album, va_likely): albums = [] for relid in _all_releases(items): - matches = hooks._album_for_id(relid) - if matches: - albums.extend(matches) + album = hooks.album_for_mbid(relid) + if album: + albums.append(album) log.debug('acoustid album candidates: %i' % len(albums)) return albums @@ -141,7 +140,7 @@ class AcoustidPlugin(plugins.BeetsPlugin): recording_ids, _ = _matches[item.path] tracks = [] for recording_id in recording_ids: - track = hooks._track_for_id(recording_id) + track = hooks.track_for_mbid(recording_id) if track: tracks.append(track) log.debug('acoustid item candidates: {0}'.format(len(tracks))) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index bb8d37146..822ed59e3 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -17,7 +17,7 @@ discogs-client library. """ from beets import config from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.autotag.match import current_metadata, VA_ARTISTS +from beets.autotag.match import current_metadata, Distance, VA_ARTISTS from beets.plugins import BeetsPlugin from discogs_client import Artist, DiscogsAPIError, Release, Search import beets @@ -44,14 +44,12 @@ class DiscogsPlugin(BeetsPlugin): }) def album_distance(self, items, album_info, mapping): - """Returns the discogs source weight and the maximum source weight. + """Returns the album distance. """ + dist = Distance() if album_info.data_source == 'Discogs': - return self.config['source_weight'].as_number() * \ - config['match']['weight']['source'].as_number(), \ - config['match']['weight']['source'].as_number() - else: - return 0.0, 0.0 + dist.add('source', self.config['source_weight'].as_number()) + return dist def candidates(self, items, artist, album, va_likely): """Returns a list of AlbumInfo objects for discogs search results diff --git a/beetsplug/mbsync.py b/beetsplug/mbsync.py index 103de81b5..81e802a33 100644 --- a/beetsplug/mbsync.py +++ b/beetsplug/mbsync.py @@ -72,7 +72,7 @@ def mbsync_singletons(lib, query, move, pretend, write): s.old_data = dict(s.record) # Get the MusicBrainz recording info. - track_info = hooks._track_for_id(s.mb_trackid) + track_info = hooks.track_for_mbid(s.mb_trackid) if not track_info: log.info(u'Recording ID not found: {0}'.format(s.mb_trackid)) continue @@ -97,11 +97,10 @@ def mbsync_albums(lib, query, move, pretend, write): item.old_data = dict(item.record) # Get the MusicBrainz album information. - matches = hooks._album_for_id(a.mb_albumid) - if not matches: + album_info = hooks.album_for_mbid(a.mb_albumid) + if not album_info: log.info(u'Release ID not found: {0}'.format(a.mb_albumid)) continue - album_info = matches[0] # Construct a track mapping according to MBIDs. This should work # for albums that have missing or extra tracks. diff --git a/beetsplug/missing.py b/beetsplug/missing.py index 0aa1427e2..8e4c4010f 100644 --- a/beetsplug/missing.py +++ b/beetsplug/missing.py @@ -39,9 +39,7 @@ def _missing(album): if len([i for i in album.items()]) < album.tracktotal: # fetch missing items # TODO: Implement caching that without breaking other stuff - matches = hooks._album_for_id(album.mb_albumid) - if matches: - album_info = matches[0] + album_info = hooks.album_for_mbid(album.mb_albumid) for track_info in getattr(album_info, 'tracks', []): if track_info.track_id not in item_mbids: item = _item(track_info, album_info, album.id) diff --git a/beetsplug/mpdupdate.py b/beetsplug/mpdupdate.py index 0360a45df..6138efca2 100644 --- a/beetsplug/mpdupdate.py +++ b/beetsplug/mpdupdate.py @@ -35,14 +35,16 @@ database_changed = False # easier. class BufferedSocket(object): """Socket abstraction that allows reading by line.""" - def __init__(self, sep='\n'): - self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + def __init__(self, host, port, sep='\n'): + if host[0] == '/': + self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self.sock.connect(host) + else: + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock.connect((host, port)) self.buf = '' self.sep = sep - def connect(self, host, port): - self.sock.connect((host, port)) - def readline(self): while self.sep not in self.buf: data = self.sock.recv(1024) @@ -67,8 +69,7 @@ def update_mpd(host='localhost', port=6600, password=None): """ print('Updating MPD database...') - s = BufferedSocket() - s.connect(host, port) + s = BufferedSocket(host, port) resp = s.readline() if 'OK MPD' not in resp: print('MPD connection failed:', repr(resp)) diff --git a/beetsplug/zero.py b/beetsplug/zero.py index ef0b8b28d..1ea6d0e50 100644 --- a/beetsplug/zero.py +++ b/beetsplug/zero.py @@ -89,6 +89,7 @@ class ZeroPlugin(BeetsPlugin): continue self._log.debug(u'[zero] \"{0}\" ({1}) match: {2}' .format(fval, fn, ' '.join(patterns))) - setattr(item, fn, type(fval)()) + new_val = None if fval is None else type(fval)() + setattr(item, fn, new_val) self._log.debug(u'[zero] {0}={1}' .format(fn, getattr(item, fn))) diff --git a/docs/changelog.rst b/docs/changelog.rst index eeeb78d86..784d2eb88 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,15 +1,72 @@ Changelog ========= -1.1.1 (in development) +1.2.1 (in development) ---------------------- +This release introduces a major internal change in the way that similarity +scores are handled, thanks to the continued efforts of Tai Lee. The changes +you'll notice while using the autotagger are: + +* The top 3 distance penalties are now displayed on the release listing, + and all album and track penalties are now displayed on the track changes + list. This should make it clear exactly which metadata is contributing to a + low similarity score. +* When displaying differences, the colorization has been made more consistent + and helpful: red for an actual difference, yellow to indicate that a + distance penalty is being applied, and light gray for no penalty (e.g., case + changes) or disambiguation data. + +There are also three new (or overhauled) configuration options that let you +customize the way that matches are selected: + +* The :ref:`ignored` setting lets you instruct the importer not to show you + matches that have a certain penalty applied. +* The :ref:`preferred` collection of settings specifies a sorted list of + preferred countries and media types, or prefer releases closest to the + original year for an album. +* The :ref:`max_rec` settings can now be used for any distance penalty + component. The recommendation will be downgraded if a penalty is being + applied to the specified field. + +And some bug fixes: + +* Python 2.6 compatibility for :doc:`/plugins/beatport`. Thanks Wesley Bitter. +* Don't move the config file during a null migration. Thanks to Theofilos + Intzoglou. +* Fix an occasional crash in the :doc:`/plugins/beatport` when a length + field was missing from the API response. Thanks to Timothy Appnel. + + +1.2.0 (June 5, 2013) +-------------------- + +There's a *lot* of new stuff in this release: new data sources for the +autotagger, new plugins to look for problems in your library, tracking the +date that you acquired new music, an awesome new syntax for doing queries over +numeric fields, support for ALAC files, and major enhancements to the +importer's UI and distance calculations. A special thanks goes out to all the +contributors who helped make this release awesome. + +For the first time, beets can now tag your music using additional **data +sources** to augment the matches from MusicBrainz. When you enable either of +these plugins, the importer will start showing you new kinds of matches: + +* New :doc:`/plugins/discogs`: Get matches from the `Discogs`_ database. + Thanks to Artem Ponomarenko and Tai Lee. +* New :doc:`/plugins/beatport`: Get matches from the `Beatport`_ database. + Thanks to Johannes Baiter. + +We also have two other new plugins that can scan your library to check for +common problems, both by Pedro Silva: + * New :doc:`/plugins/duplicates`: Find tracks or albums in your - library that are **duplicated**. Thanks to Pedro Silva. + library that are **duplicated**. * New :doc:`/plugins/missing`: Find albums in your library that are **missing - tracks**. Thanks once more to Pedro Silva. -* New :doc:`/plugins/discogs`: Extends the autotagger to include matches from - the `Discogs`_ database. Thanks to Artem Ponomarenko and Tai Lee. + tracks**. + +There are also three more big features added to beets core: + * Your library now keeps track of **when music was added** to it. The new ``added`` field is a timestamp reflecting when each item and album was imported and the new ``%time{}`` template function lets you format this @@ -18,6 +75,42 @@ Changelog **numeric ranges**. For example, you can get a list of albums from the '90s by typing ``beet ls year:1990..1999`` or find high-bitrate music with ``bitrate:128000..``. See :ref:`numericquery`. Thanks to Michael Schuerig. +* **ALAC files** are now marked as ALAC instead of being conflated with AAC + audio. Thanks to Simon Luijk. + +In addition, the importer saw various UI enhancements, thanks to Tai Lee: + +* More consistent format and colorization of album and track metadata. +* Display data source URL for matches from the new data source plugins. This + should make it easier to migrate data from Discogs or Beatport into + MusicBrainz. +* Display album disambiguation and disc titles in the track listing, when + available. +* Track changes are highlighted in yellow when they indicate a change in + format to or from the style of :ref:`per_disc_numbering`. (As before, no + penalty is applied because the track number is still "correct", just in a + different format.) +* Sort missing and unmatched tracks by index and title and group them + together for better readability. +* Indicate MusicBrainz ID mismatches. + +The calculation of the similarity score for autotagger matches was also +improved, again thanks to Tai Lee. These changes, in general, help deal with +the new metadata sources and help disambiguate between similar releases in the +same MusicBrainz release group: + +* Strongly prefer releases with a matching MusicBrainz album ID. This helps + beets re-identify the same release when re-importing existing files. +* Prefer releases that are closest to the tagged ``year``. Tolerate files + tagged with release or original year. +* The new :ref:`preferred_media` config option lets you prefer a certain media + type when the ``media`` field is unset on an album. +* Apply minor penalties across a range of fields to differentiate between + nearly identical releases: ``disctotal``, ``label``, ``catalognum``, + ``country`` and ``albumdisambig``. + +As usual, there were also lots of other great littler enhancements: + * :doc:`/plugins/random`: A new ``-e`` option gives an equal chance to each artist in your collection to avoid biasing random samples to prolific artists. Thanks to Georges Dubus. @@ -29,8 +122,6 @@ Changelog Duailibe. * The importer output now shows the number of audio files in each album. Thanks to jayme on GitHub. -* :doc:`/plugins/lyrics`: Lyrics searches should now turn up more results due - to some fixes in dealing with special characters. * Plugins can now provide fields for both Album and Item templates, thanks to Pedro Silva. Accordingly, the :doc:`/plugins/inline` can also now define album fields. For consistency, the ``pathfields`` configuration section has @@ -42,40 +133,26 @@ Changelog Johannes Baiter. * The :ref:`fields-cmd` command shows template fields provided by plugins. Thanks again to Pedro Silva. +* :doc:`/plugins/mpdupdate`: You can now communicate with MPD over a Unix + domain socket. Thanks to John Hawthorn. + +And a batch of fixes: + * Album art filenames now respect the :ref:`replace` configuration. * Friendly error messages are now printed when trying to read or write files that go missing. * The :ref:`modify-cmd` command can now change albums' album art paths (i.e., ``beet modify artpath=...`` works). Thanks to Lucas Duailibe. -* Various UI enhancements to the importer due to Tai Lee: - - * More consistent format and colorization of album and track metadata. - * Display data source URL for :doc:`/plugins/discogs` matches. This should - make it easier for people who would rather import and correct data from - Discogs into MusicBrainz. - * Display album disambiguation and disc titles in the track listing, when - available. - * Track changes highlighted in yellow indicate a change in format to or from - :ref:`per_disc_numbering`. No penalty is applied because the track number - is still "correct", just in a different format. - * Sort missing and unmatched tracks by index and title and group them - together for better readability. - * Indicate MusicBrainz ID mismatches. - -* Improve calculation of similarity score: - - * Strongly prefer releases with a matching MusicBrainz album ID. This helps - beets re-identify the same release when re-importing existing files. - * Prefer releases that are closest to the tagged ``year``. Tolerate files - tagged with release or original year. - * Prefer CD releases by default, when there is no ``media`` tagged in the - files being imported. This can be changed with the :ref:`preferred_media` - setting. - * Apply minor penalties across a range of fields to differentiate between - nearly identical releases: ``disctotal``, ``label``, ``catalognum``, - ``country`` and ``albumdisambig``. +* :doc:`/plugins/zero`: Fix a crash when nulling out a field that contains + None. +* Templates can now refer to non-tag item fields (e.g., ``$id`` and + ``$album_id``). +* :doc:`/plugins/lyrics`: Lyrics searches should now turn up more results due + to some fixes in dealing with special characters. .. _Discogs: http://discogs.com/ +.. _Beatport: http://www.beatport.com/ + 1.1.0 (April 29, 203) --------------------- diff --git a/docs/conf.py b/docs/conf.py index f5fd07017..81ae4e9e6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,8 +12,8 @@ master_doc = 'index' project = u'beets' copyright = u'2012, Adrian Sampson' -version = '1.1' -release = '1.1.1' +version = '1.2' +release = '1.2.1' pygments_style = 'sphinx' diff --git a/docs/guides/tagger.rst b/docs/guides/tagger.rst index 68bf3ac1f..8bf9af621 100644 --- a/docs/guides/tagger.rst +++ b/docs/guides/tagger.rst @@ -60,9 +60,9 @@ all of these limitations. plugin if you're willing to spend a little more CPU power to get tags for unidentified albums. -* Currently, MP3, AAC, FLAC, Ogg Vorbis, Monkey's Audio, WavPack, Musepack, and - Windows Media files are supported. (Do you use some other format? `Let me - know!`_) +* Currently, MP3, AAC, FLAC, ALAC, Ogg Vorbis, Monkey's Audio, WavPack, + Musepack, and Windows Media files are supported. (Do you use some other + format? `Let me know!`_) .. _Let me know!: mailto:adrian@radbox.org diff --git a/docs/plugins/beatport.rst b/docs/plugins/beatport.rst new file mode 100644 index 000000000..751f458be --- /dev/null +++ b/docs/plugins/beatport.rst @@ -0,0 +1,26 @@ +Beatport Plugin +=============== + +The ``beatport`` plugin adds support for querying the `Beatport`_ catalogue +during the autotagging process. This can potentially be helpful for users +whose collection includes a lot of diverse electronic music releases, for which +both MusicBrainz and (to a lesser degree) Discogs show no matches. + +.. _Beatport: http://beatport.com + +Installation +------------ + +To see matches from the ``beatport`` plugin, you first have to enable it in +your configuration (see :doc:`/plugins/index`). Then, install the `requests`_ +library (which we need for querying the Beatport API) by typing:: + + pip install requests + +And you're done. Matches from Beatport should now show up alongside matches +from MusicBrainz and other sources. + +If you have a Beatport ID or a URL for a release or track you want to tag, you +can just enter one of the two at the "enter Id" prompt in the importer. + +.. _requests: http://docs.python-requests.org/en/latest/ diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index 5db83e8e1..d56471d89 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -66,13 +66,18 @@ disabled by default, but you can turn them on as described above. missing duplicates discogs + beatport Autotagger Extensions '''''''''''''''''''''' * :doc:`chroma`: Use acoustic fingerprinting to identify audio files with missing or incorrect metadata. -* :doc:`discogs`: Search for releases in the discogs database. +* :doc:`discogs`: Search for releases in the `Discogs`_ database. +* :doc:`beatport`: Search for tracks and releases in the `Beatport`_ database. + +.. _Beatport: http://www.beatport.com/ +.. _Discogs: http://www.discogs.com/ Metadata '''''''' diff --git a/docs/plugins/mpdupdate.rst b/docs/plugins/mpdupdate.rst index 492660106..dca41fd22 100644 --- a/docs/plugins/mpdupdate.rst +++ b/docs/plugins/mpdupdate.rst @@ -17,3 +17,8 @@ MPD server. You can do that using an ``mpdupdate:`` section in your password: seekrit With that all in place, you'll see beets send the "update" command to your MPD server every time you change your beets library. + +If you want to communicate with MPD over a Unix domain socket instead over +TCP, just give the path to the socket in the filesystem for the ``host`` +setting. (Any ``host`` value starting with a slash is interpreted as a domain +socket.) diff --git a/docs/reference/config.rst b/docs/reference/config.rst index d23db6b02..d320cd655 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -394,40 +394,80 @@ max_rec As mentioned above, autotagger matches have *recommendations* that control how the UI behaves for a certain quality of match. The recommendation for a certain -match is usually based on the distance calculation. But you can also control -the recommendation for certain specific situations by defining *maximum* -recommendations when: +match is based on the overall distance calculation. But you can also control +the recommendation when a distance penalty is being applied for a specific +field by defining *maximum* recommendations for each field: -* a match came from a source other than MusicBrainz (e.g., the - :doc:`Discogs ` plugin); -* a match has missing or extra tracks; -* the length (duration) of at least one track differs; or -* at least one track number differs. - -To define maxima, use keys under ``max_rec:`` in the ``match`` section:: +To define maxima, use keys under ``max_rec:`` in the ``match`` section. Here +are the defaults:: match: max_rec: - non_mb_source: strong - partial: medium - tracklength: strong - tracknumber: strong + source: strong + artist: strong + album: strong + media: strong + mediums: strong + year: strong + country: strong + label: strong + catalognum: strong + albumdisambig: strong + album_id: strong + tracks: strong + missing_tracks: medium + unmatched_tracks: medium + track_title: strong + track_artist: strong + track_index: strong + track_length_grace: strong + track_length_max: strong + track_length: strong + track_id: strong -If a recommendation is higher than the configured maximum and the condition is -met, the recommendation will be downgraded. The maximum for each condition can -be one of ``none``, ``low``, ``medium`` or ``strong``. When the maximum -recommendation is ``strong``, no "downgrading" occurs for that situation. +If a recommendation is higher than the configured maximum and a penalty is +being applied, the recommendation will be downgraded. The maximum for each +field can be one of ``none``, ``low``, ``medium`` or ``strong``. When the +maximum recommendation is ``strong``, no "downgrading" occurs. -The above example shows the default ``max_rec`` settings. +.. _preferred: -.. _preferred_media: +preferred +~~~~~~~~~ -preferred_media -~~~~~~~~~~~~~~~ +In addition to comparing the tagged metadata with the match metadata for +similarity, you can also specify an ordered list of preferred countries and +media types. -When comparing files that have no ``media`` tagged, prefer releases that more -closely resemble this media (using a string distance). When files are already -tagged with media, this setting is ignored. Default: ``CD``. +A distance penalty will be applied if the country or media type from the match +metadata doesn't match. The order is important, the first item will be most +preferred. Each item may be a regular expression, and will be matched case +insensitively. The number of media will be stripped when matching preferred +media (e.g. "2x" in "2xCD"). + +You can also tell the autotagger to prefer matches that have a release year +closest to the original year for an album. + +Here's an example:: + + match: + preferred: + countries: ['US', 'GB|UK'] + media: ['CD', 'Digital Media|File'] + original_year: yes + +By default, none of these options are enabled. + +.. _ignored: + +ignored +~~~~~~~ + +You can completely avoid matches that have certain penalties applied by adding +the penalty name to the ``ignored`` setting:: + + match: + ignored: missing_tracks unmatched_tracks .. _path-format-config: diff --git a/setup.py b/setup.py index d9ec76e69..86fd375bb 100755 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ if 'sdist' in sys.argv: shutil.copytree(os.path.join(docdir, '_build', 'man'), mandir) setup(name='beets', - version='1.1.1', + version='1.2.1', description='music tagger and library organizer', author='Adrian Sampson', author_email='adrian@radbox.org', diff --git a/test/rsrc/full.alac.m4a b/test/rsrc/full.alac.m4a new file mode 100644 index 000000000..8ec7d377c Binary files /dev/null and b/test/rsrc/full.alac.m4a differ diff --git a/test/test_autotag.py b/test/test_autotag.py index 1a6188e7c..dc75ee0ab 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -23,6 +23,7 @@ import _common from _common import unittest from beets import autotag from beets.autotag import match +from beets.autotag.match import Distance from beets.library import Item from beets.util import plurality from beets.autotag import AlbumInfo, TrackInfo @@ -105,6 +106,153 @@ def _make_trackinfo(): TrackInfo(u'three', None, u'some artist', length=1, index=3), ] +class DistanceTest(unittest.TestCase): + def setUp(self): + self.dist = Distance() + + def test_add(self): + self.dist.add('add', 1.0) + self.assertEqual(self.dist._penalties, {'add': [1.0]}) + + def test_add_equality(self): + self.dist.add_equality('equality', 'ghi', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist._penalties['equality'], [0.0]) + + self.dist.add_equality('equality', 'xyz', ['abc', 'def', 'ghi']) + self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0]) + + self.dist.add_equality('equality', 'abc', re.compile(r'ABC', re.I)) + self.assertEqual(self.dist._penalties['equality'], [0.0, 1.0, 0.0]) + + def test_add_expr(self): + self.dist.add_expr('expr', True) + self.assertEqual(self.dist._penalties['expr'], [1.0]) + + self.dist.add_expr('expr', False) + self.assertEqual(self.dist._penalties['expr'], [1.0, 0.0]) + + def test_add_number(self): + # Add a full penalty for each number of difference between two numbers. + + self.dist.add_number('number', 1, 1) + self.assertEqual(self.dist._penalties['number'], [0.0]) + + self.dist.add_number('number', 1, 2) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0]) + + self.dist.add_number('number', 2, 1) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0]) + + self.dist.add_number('number', -1, 2) + self.assertEqual(self.dist._penalties['number'], [0.0, 1.0, 1.0, 1.0, + 1.0, 1.0]) + + def test_add_priority(self): + self.dist.add_priority('priority', 'abc', 'abc') + self.assertEqual(self.dist._penalties['priority'], [0.0]) + + self.dist.add_priority('priority', 'def', ['abc', 'def']) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5]) + + self.dist.add_priority('priority', 'gh', ['ab', 'cd', 'ef', + re.compile('GH', re.I)]) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75]) + + self.dist.add_priority('priority', 'xyz', ['abc', 'def']) + self.assertEqual(self.dist._penalties['priority'], [0.0, 0.5, 0.75, + 1.0]) + + def test_add_ratio(self): + self.dist.add_ratio('ratio', 25, 100) + self.assertEqual(self.dist._penalties['ratio'], [0.25]) + + self.dist.add_ratio('ratio', 10, 5) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0]) + + self.dist.add_ratio('ratio', -5, 5) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0]) + + self.dist.add_ratio('ratio', 5, 0) + self.assertEqual(self.dist._penalties['ratio'], [0.25, 1.0, 0.0, 0.0]) + + def test_add_string(self): + dist = match.string_dist(u'abc', u'bcd') + self.dist.add_string('string', u'abc', u'bcd') + self.assertEqual(self.dist._penalties['string'], [dist]) + + def test_distance(self): + config['match']['distance_weights']['album'] = 2.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('media', 0.25) + self.dist.add('media', 0.75) + self.assertEqual(self.dist.distance, 0.5) + + # __getitem__() + self.assertEqual(self.dist['album'], 0.25) + self.assertEqual(self.dist['media'], 0.25) + + def test_max_distance(self): + config['match']['distance_weights']['album'] = 3.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('medium', 0.0) + self.dist.add('medium', 0.0) + self.assertEqual(self.dist.max_distance, 5.0) + + def test_operators(self): + config['match']['distance_weights']['source'] = 1.0 + config['match']['distance_weights']['album'] = 2.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('source', 0.0) + self.dist.add('album', 0.5) + self.dist.add('medium', 0.25) + self.dist.add('medium', 0.75) + self.assertEqual(len(self.dist), 2) + self.assertEqual(list(self.dist), [(0.2, 'album'), (0.2, 'medium')]) + self.assertTrue(self.dist == 0.4) + self.assertTrue(self.dist < 1.0) + self.assertTrue(self.dist > 0.0) + self.assertEqual(self.dist - 0.4, 0.0) + self.assertEqual(0.4 - self.dist, 0.0) + self.assertEqual(float(self.dist), 0.4) + + def test_raw_distance(self): + config['match']['distance_weights']['album'] = 3.0 + config['match']['distance_weights']['medium'] = 1.0 + self.dist.add('album', 0.5) + self.dist.add('medium', 0.25) + self.dist.add('medium', 0.5) + self.assertEqual(self.dist.raw_distance, 2.25) + + def test_sorted(self): + config['match']['distance_weights']['album'] = 4.0 + config['match']['distance_weights']['medium'] = 2.0 + + self.dist.add('album', 0.1875) + self.dist.add('medium', 0.75) + self.assertEqual(self.dist.sorted, [(0.25, 'medium'), (0.125, 'album')]) + + # Sort by key if distance is equal. + dist = Distance() + dist.add('album', 0.375) + dist.add('medium', 0.75) + self.assertEqual(dist.sorted, [(0.25, 'album'), (0.25, 'medium')]) + + def test_update(self): + self.dist.add('album', 0.5) + self.dist.add('media', 1.0) + + dist = Distance() + dist.add('album', 0.75) + dist.add('album', 0.25) + self.dist.add('media', 0.05) + + self.dist.update(dist) + + self.assertEqual(self.dist._penalties, {'album': [0.5, 0.75, 0.25], + 'media': [1.0, 0.05]}) + class TrackDistanceTest(unittest.TestCase): def test_identical_tracks(self): item = _make_item(u'one', 1) diff --git a/test/test_mediafile.py b/test/test_mediafile.py index 5c237c248..71ebba3c4 100644 --- a/test/test_mediafile.py +++ b/test/test_mediafile.py @@ -151,7 +151,7 @@ class SafetyTest(unittest.TestCase): fn = os.path.join(_common.RSRC, 'brokenlink') os.symlink('does_not_exist', fn) try: - self.assertRaises(beets.mediafile.UnreadableFileError, + self.assertRaises(IOError, beets.mediafile.MediaFile, fn) finally: os.unlink(fn) diff --git a/test/test_mediafile_basic.py b/test/test_mediafile_basic.py index decdfb9a2..91f663556 100644 --- a/test/test_mediafile_basic.py +++ b/test/test_mediafile_basic.py @@ -208,6 +208,15 @@ READ_ONLY_CORRECT_DICTS = { 'bitdepth': 0, 'channels': 1, }, + + 'full.alac.m4a': { + 'length': 1.0, + 'bitrate': 55072, + 'format': 'ALAC', + 'samplerate': 0, + 'bitdepth': 0, + 'channels': 0, + }, } TEST_FILES = { @@ -267,6 +276,9 @@ class AllFilesMixin(object): def test_wma(self): self._run('full', 'wma') + def test_alac(self): + self._run('full', 'alac.m4a') + # Special test for advanced release date. def test_date_mp3(self): self._run('date', 'mp3') @@ -429,6 +441,9 @@ class ReadOnlyTest(unittest.TestCase): def test_wma(self): self._run('full.wma') + def test_alac(self): + self._run('full.alac.m4a') + def suite(): return unittest.TestLoader().loadTestsFromName(__name__) diff --git a/test/test_ui.py b/test/test_ui.py index b679021f7..6cb09dcf1 100644 --- a/test/test_ui.py +++ b/test/test_ui.py @@ -27,6 +27,7 @@ from beets import library from beets import ui from beets.ui import commands from beets import autotag +from beets.autotag.match import distance from beets import importer from beets.mediafile import MediaFile from beets import config @@ -594,21 +595,23 @@ class ShowChangeTest(_common.TestCase): self.items[0].track = 1 self.items[0].path = '/path/to/file.mp3' self.info = autotag.AlbumInfo( - 'the album', 'album id', 'the artist', 'artist id', [ - autotag.TrackInfo('the title', 'track id', index=1) + u'the album', u'album id', u'the artist', u'artist id', [ + autotag.TrackInfo(u'the title', u'track id', index=1) ]) def _show_change(self, items=None, info=None, - cur_artist='the artist', cur_album='the album', + cur_artist=u'the artist', cur_album=u'the album', dist=0.1): items = items or self.items info = info or self.info mapping = dict(zip(items, info.tracks)) config['color'] = False + album_dist = distance(items, info, mapping) + album_dist._penalties = {'album': [dist]} commands.show_change( cur_artist, cur_album, - autotag.AlbumMatch(0.1, info, mapping, set(), set()), + autotag.AlbumMatch(album_dist, info, mapping, set(), set()), ) return self.io.getoutput().lower() @@ -623,7 +626,7 @@ class ShowChangeTest(_common.TestCase): self.assertTrue('correcting tags from:' in msg) def test_item_data_change(self): - self.items[0].title = 'different' + self.items[0].title = u'different' msg = self._show_change() self.assertTrue('different -> the title' in msg) @@ -638,12 +641,12 @@ class ShowChangeTest(_common.TestCase): self.assertTrue('correcting tags from:' in msg) def test_item_data_change_title_missing(self): - self.items[0].title = '' + self.items[0].title = u'' msg = re.sub(r' +', ' ', self._show_change()) self.assertTrue('file.mp3 -> the title' in msg) def test_item_data_change_title_missing_with_unicode_filename(self): - self.items[0].title = '' + self.items[0].title = u'' self.items[0].path = u'/path/to/caf\xe9.mp3'.encode('utf8') msg = re.sub(r' +', ' ', self._show_change().decode('utf8')) self.assertTrue(u'caf\xe9.mp3 -> the title' in msg