diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py index bbf7ae087..1fcc2cebb 100644 --- a/beets/autotag/hooks.py +++ b/beets/autotag/hooks.py @@ -51,6 +51,7 @@ class AlbumInfo(object): - ``media``: delivery mechanism (Vinyl, etc.) - ``albumdisambig``: MusicBrainz release disambiguation comment - ``artist_credit``: Release-specific artist name + - ``data_source``: The original data source (MusicBrainz, Discogs, etc.) The fields up through ``tracks`` are required. The others are optional and may be None. @@ -61,7 +62,7 @@ class AlbumInfo(object): releasegroup_id=None, catalognum=None, script=None, language=None, country=None, albumstatus=None, media=None, albumdisambig=None, artist_credit=None, original_year=None, - original_month=None, original_day=None): + original_month=None, original_day=None, data_source=None): self.album = album self.album_id = album_id self.artist = artist @@ -88,6 +89,7 @@ class AlbumInfo(object): self.original_year = original_year self.original_month = original_month self.original_day = original_day + self.data_source = data_source or 'Unknown' # Work around a bug in python-musicbrainz-ngs that causes some # strings to be bytes rather than Unicode. @@ -199,7 +201,7 @@ def _album_candidates(items, artist, album, va_likely): exc.log(log) # Candidates from plugins. - out.extend(plugins.candidates(items)) + out.extend(plugins.candidates(items, artist, album, va_likely)) return out @@ -218,6 +220,6 @@ def _item_candidates(item, artist, title): exc.log(log) # Plugin candidates. - out.extend(plugins.item_candidates(item)) + out.extend(plugins.item_candidates(item, artist, title)) return out diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 839c2b56d..76161d905 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -79,7 +79,7 @@ recommendation = enum('none', 'low', 'medium', 'strong', name='recommendation') # album level to determine whether a given release is likely a VA # release and also on the track level to to remove the penalty for # differing artists. -VA_ARTISTS = (u'', u'various artists', u'va', u'unknown') +VA_ARTISTS = (u'', u'various artists', u'various', u'va', u'unknown') # Global logger. log = logging.getLogger('beets') diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index e67a78a09..566340e83 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -213,6 +213,7 @@ def album_info(release): mediums=len(release['medium-list']), artist_sort=artist_sort_name, artist_credit=artist_credit_name, + data_source='MusicBrainz', ) info.va = info.artist_id == VARIOUS_ARTISTS_ID info.asin = release.get('asin') diff --git a/beets/plugins.py b/beets/plugins.py index 5a5a718f7..079dd86b0 100755 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -73,13 +73,13 @@ class BeetsPlugin(object): """ return 0.0, 0.0 - def candidates(self, items): + def candidates(self, items, artist, album, va_likely): """Should return a sequence of AlbumInfo objects that match the album whose items are provided. """ return () - def item_candidates(self, item): + def item_candidates(self, item, artist, title): """Should return a sequence of TrackInfo objects that match the item provided. """ @@ -247,20 +247,20 @@ def album_distance(items, album_info, mapping): dist_max += dm return dist, dist_max -def candidates(items): +def candidates(items, artist, album, va_likely): """Gets MusicBrainz candidates for an album from each plugin. """ out = [] for plugin in find_plugins(): - out.extend(plugin.candidates(items)) + out.extend(plugin.candidates(items, artist, album, va_likely)) return out -def item_candidates(item): +def item_candidates(item, artist, title): """Gets MusicBrainz candidates for an item from the plugins. """ out = [] for plugin in find_plugins(): - out.extend(plugin.item_candidates(item)) + out.extend(plugin.item_candidates(item, artist, title)) return out def configure(config): diff --git a/beets/ui/commands.py b/beets/ui/commands.py index c6096750e..e4222b631 100644 --- a/beets/ui/commands.py +++ b/beets/ui/commands.py @@ -131,6 +131,13 @@ def dist_string(dist): out = ui.colorize('red', out) return out +def source_string(source): + colors = { + 'MusicBrainz': 'green', + 'Unknown': 'red', + } + return ui.colorize(colors.get(source, 'yellow'), source) + def show_change(cur_artist, cur_album, match): """Print out a representation of the changes that will be made if an album's tags are changed according to `match`, which must be an AlbumMatch @@ -188,8 +195,13 @@ def show_change(cur_artist, cur_album, match): message += u' ' + ui.colorize('yellow', PARTIAL_MATCH_MESSAGE) print_(message) + # Info line. + info = [] # Distance/similarity. - print_('(Similarity: %s)' % dist_string(match.distance)) + info.append('(Similarity: %s)' % dist_string(match.distance)) + # Source. + info.append('(Source: %s)' % source_string(match.info.data_source)) + print_(' '.join(info)) # Tracks. pairs = match.mapping.items() diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 2dac5c89f..3b2731af0 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -113,8 +113,8 @@ def _all_releases(items): class AcoustidPlugin(plugins.BeetsPlugin): def track_distance(self, item, info): - if item.path not in _matches: - # Match failed. + if item.path not in _matches or not info.track_id: + # Match failed or no track ID. return 0.0, 0.0 recording_ids, _ = _matches[item.path] @@ -124,7 +124,7 @@ class AcoustidPlugin(plugins.BeetsPlugin): dist = TRACK_ID_WEIGHT return dist, TRACK_ID_WEIGHT - def candidates(self, items): + def candidates(self, items, artist, album, va_likely): albums = [] for relid in _all_releases(items): album = hooks._album_for_id(relid) @@ -134,7 +134,7 @@ class AcoustidPlugin(plugins.BeetsPlugin): log.debug('acoustid album candidates: %i' % len(albums)) return albums - def item_candidates(self, item): + def item_candidates(self, item, artist, title): if item.path not in _matches: return [] diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py new file mode 100644 index 000000000..66831ca85 --- /dev/null +++ b/beetsplug/discogs.py @@ -0,0 +1,174 @@ +# This file is part of beets. +# Copyright 2013, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Discogs album search support to the autotagger. Requires the +discogs-client library. +""" +from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.autotag.match import current_metadata, VA_ARTISTS +from beets.plugins import BeetsPlugin +from discogs_client import Artist, DiscogsAPIError, Release, Search +import beets +import discogs_client +import logging +import re +import time + +log = logging.getLogger('beets') + +# Silence spurious INFO log lines generated by urllib3. +urllib3_logger = logging.getLogger('requests.packages.urllib3') +urllib3_logger.setLevel(logging.CRITICAL) + +# Set user-agent for discogs client. +discogs_client.user_agent = 'beets/%s +http://beets.radbox.org/' % \ + beets.__version__ + +class DiscogsPlugin(BeetsPlugin): + def candidates(self, items, artist, album, va_likely): + """Returns a list of AlbumInfo objects for discogs search results + matching an album and artist (if not various). + """ + if va_likely: + query = album + else: + query = '%s %s' % (artist, album) + try: + return self.get_albums(query) + except DiscogsAPIError as e: + log.debug('Discogs API Error: %s (query: %s' % (e, query)) + return [] + + def get_albums(self, query): + """Returns a list of AlbumInfo objects for a discogs search query. + """ + # Strip non-word characters from query. Things like "!" and "-" can + # cause a query to return no results, even if they match the artist or + # album title. Use `re.UNICODE` flag to avoid stripping non-english + # word characters. + query = re.sub(r'\W+', ' ', query, re.UNICODE) + # Strip medium information from query, Things like "CD1" and "disk 1" + # can also negate an otherwise positive result. + query = re.sub(r'\b(CD|disc)\s*\d+', '', query, re.I) + albums = [] + for result in Search(query).results(): + if isinstance(result, Release): + albums.append(self.get_album_info(result)) + if len(albums) >= 5: + break + return albums + + def get_album_info(self, result): + """Returns an AlbumInfo object for a discogs Release object. + """ + album = re.sub(r' +', ' ', result.title) + album_id = result.data['id'] + artist, artist_id = self.get_artist(result.data['artists']) + # Use `.data` to access the tracklist directly instead of the convenient + # `.tracklist` property, which will strip out useful artist information + # and leave us with skeleton `Artist` objects that will each make an API + # call just to get the same data back. + tracks = self.get_tracks(result.data['tracklist']) + albumtype = ', '.join( + result.data['formats'][0].get('descriptions', [])) or None + va = result.data['artists'][0]['name'].lower() == 'various' + year = result.data['year'] + label = result.data['labels'][0]['name'] + mediums = len(set(t.medium for t in tracks)) + catalogno = result.data['labels'][0]['catno'] + if catalogno == 'none': + catalogno = None + country = result.data.get('country') + media = result.data['formats'][0]['name'] + return AlbumInfo(album, album_id, artist, artist_id, tracks, asin=None, + albumtype=albumtype, va=va, year=year, month=None, + day=None, label=label, mediums=mediums, + artist_sort=None, releasegroup_id=None, + catalognum=catalogno, script=None, language=None, + country=country, albumstatus=None, media=media, + albumdisambig=None, artist_credit=None, + original_year=None, original_month=None, + original_day=None, data_source='Discogs') + + def get_artist(self, artists): + """Returns an artist string (all artists) and an artist_id (the main + artist) for a list of discogs album or track artists. + """ + artist_id = None + bits = [] + for artist in artists: + if not artist_id: + artist_id = artist['id'] + bits.append(artist['name']) + if artist['join']: + bits.append(artist['join']) + artist = ' '.join(bits).replace(' ,', ',') or None + return artist, artist_id + + def get_tracks(self, tracklist): + """Returns a list of TrackInfo objects for a discogs tracklist. + """ + tracks = [] + index = 0 + for track in tracklist: + # Only real tracks have `position`. Otherwise, it's an index track. + if track['position']: + index += 1 + tracks.append(self.get_track_info(track, index)) + # Fix up medium and medium_index for each track. Discogs position is + # unreliable, but tracks are in order. + medium = None + medium_count, index_count = 0, 0 + for track in tracks: + if medium != track.medium: + # Increment medium_count and reset index_count when medium + # changes. + medium = track.medium + medium_count += 1 + index_count = 0 + index_count += 1 + track.medium, track.medium_index = medium_count, index_count + return tracks + + def get_track_info(self, track, index): + """Returns a TrackInfo object for a discogs track. + """ + title = track['title'] + track_id = None + medium, medium_index = self.get_track_index(track['position']) + artist, artist_id = self.get_artist(track.get('artists', [])) + length = self.get_track_length(track['duration']) + return TrackInfo(title, track_id, artist, artist_id, length, index, + medium, medium_index, artist_sort=None, disctitle=None, + artist_credit=None) + + def get_track_index(self, position): + """Returns the medium and medium index for a discogs track position. + """ + match = re.match(r'^(.*?)(\d*)$', position, re.I) + if match: + medium, index = match.groups() + else: + log.debug('Invalid discogs position: %s' % position) + medium = index = None + return medium or None, index or None + + def get_track_length(self, duration): + """Returns the track length in seconds for a discogs duration. + """ + try: + length = time.strptime(duration, '%M:%S') + except ValueError: + return None + return length.tm_min * 60 + length.tm_sec diff --git a/docs/changelog.rst b/docs/changelog.rst index 7a56160fa..611169e85 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -8,6 +8,8 @@ Changelog library that are **duplicated**. Thanks to Pedro Silva. * New :doc:`/plugins/missing`: Find albums in your library that are **missing tracks**. Thanks once more to Pedro Silva. +* New :doc:`/plugins/discogs`: Extends the autotagger to include matches from + the `discogs`_ database. * Your library now keeps track of **when music was added** to it. The new ``added`` field is a timestamp reflecting when each item and album was imported and the new ``%time{}`` template function lets you format this @@ -32,6 +34,8 @@ Changelog Thanks again to Pedro Silva. * Album art filenames now respect the :ref:`replace` configuration. +.. _discogs: http://discogs.com/ + 1.1.0 (April 29, 203) --------------------- diff --git a/docs/plugins/discogs.rst b/docs/plugins/discogs.rst new file mode 100644 index 000000000..091907172 --- /dev/null +++ b/docs/plugins/discogs.rst @@ -0,0 +1,16 @@ +Discogs Plugin +============== + +The ``discogs`` plugin will extend the autotagger's search capabilities to +include matches from the `discogs`_ database. + +.. _discogs: http://discogs.com + +Installation +------------ + +First, enable the ``discogs`` plugin (see :doc:`/plugins/index`). + +Then you will need to install the ``discogs-client`` library:: + + pip install discogs-client>=1.1.1 diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index 2ad472066..5db83e8e1 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -65,12 +65,14 @@ disabled by default, but you can turn them on as described above. mbsync missing duplicates + discogs Autotagger Extensions '''''''''''''''''''''' * :doc:`chroma`: Use acoustic fingerprinting to identify audio files with missing or incorrect metadata. +* :doc:`discogs`: Search for releases in the discogs database. Metadata '''''''' diff --git a/docs/plugins/writing.rst b/docs/plugins/writing.rst index cff9a166b..7c6b20245 100644 --- a/docs/plugins/writing.rst +++ b/docs/plugins/writing.rst @@ -166,7 +166,7 @@ A plugin can extend three parts of the autotagger's process: the track distance function, the album distance function, and the initial MusicBrainz search. The distance functions determine how "good" a match is at the track and album levels; the initial search controls which candidates are presented to the -matching algorithm. Plugins implement these extensions by implementing three +matching algorithm. Plugins implement these extensions by implementing four methods on the plugin class: * ``track_distance(self, item, info)``: adds a component to the distance @@ -181,12 +181,13 @@ methods on the plugin class: object; and ``mapping`` is a dictionary that maps Items to their corresponding TrackInfo objects. -* ``candidates(self, items)``: given a list of items comprised by an album to be - matched, return a list of ``AlbumInfo`` objects for candidate albums to be - compared and matched. +* ``candidates(self, items, artist, album, va_likely)``: given a list of items + comprised by an album to be matched, return a list of ``AlbumInfo`` objects + for candidate albums to be compared and matched. -* ``item_candidates(self, item)``: given a *singleton* item, return a list of - ``TrackInfo`` objects for candidate tracks to be compared and matched. +* ``item_candidates(self, item, artist, album)``: given a *singleton* item, + return a list of ``TrackInfo`` objects for candidate tracks to be compared and + matched. When implementing these functions, it will probably be very necessary to use the functions from the ``beets.autotag`` and ``beets.autotag.mb`` modules, both of