Merge branch 'master' into distance-refactor

Conflicts: docs/changelog.rst
2026-03-06 13:12:00 +01:00 · 2013-06-06 11:09:36 +10:00 · 2013-06-06 11:09:36 +10:00 · 898254ed6a
commit 898254ed6a
parent c1ebae83bc 9542a292ed
10 changed files with 436 additions and 64 deletions
--- a/beets/init.py
+++ b/beets/init.py
@ -12,7 +12,7 @@
 # The above copyright notice and this permission notice shall be
 # included in all copies or substantial portions of the Software.

-__version__ = '1.1.1'
+__version__ = '1.2.0'
 __author__ = 'Adrian Sampson <adrian@radbox.org>'

 import beets.library
--- a/beets/library.py
+++ b/beets/library.py
@ -321,7 +321,7 @@ class Item(object):
        try:
            f = MediaFile(syspath(read_path))
        except (OSError, IOError) as exc:
-            raise util.FilesystemError(exc, 'read', (self.path,),
+            raise util.FilesystemError(exc, 'read', (read_path,),
                                       traceback.format_exc())

        for key in ITEM_KEYS_META:
@ -399,7 +399,7 @@ class Item(object):
        # Build the mapping for substitution in the template,
        # beginning with the values from the database.
        mapping = {}
-        for key in ITEM_KEYS_META:
+        for key in ITEM_KEYS:
            # Get the values from either the item or its album.
            if key in ALBUM_KEYS_ITEM and album is not None:
                # From album.
@ -411,8 +411,10 @@ class Item(object):
                value = format_for_path(value, key, pathmod)
            mapping[key] = value

-        # Additional fields in non-sanitized case.
-        if not sanitize:
+        # Include the path if we're not sanitizing to construct a path.
+        if sanitize:
+            del mapping['path']
+        else:
            mapping['path'] = displayable_path(self.path)

        # Use the album artist if the track artist is not set and
--- a/beets/mediafile.py
+++ b/beets/mediafile.py
@ -59,10 +59,6 @@ log = logging.getLogger('beets')
 class UnreadableFileError(Exception):
    pass

-class FileIOError(UnreadableFileError, IOError):
-    def __init__(self, exc):
-        IOError.__init__(self, exc.errno, exc.strerror, exc.filename)
-
 # Raised for files that don't seem to have a type MediaFile supports.
 class FileTypeError(UnreadableFileError):
    pass
@ -861,12 +857,15 @@ class MediaFile(object):
        self.path = path

        unreadable_exc = (
-            mutagen.mp3.HeaderNotFoundError,
-            mutagen.flac.FLACNoHeaderError,
+            mutagen.mp3.error,
+            mutagen.id3.error,
+            mutagen.flac.error,
            mutagen.monkeysaudio.MonkeysAudioHeaderError,
-            mutagen.mp4.MP4StreamInfoError,
-            mutagen.oggvorbis.OggVorbisHeaderError,
-            mutagen.asf.ASFHeaderError,
+            mutagen.mp4.error,
+            mutagen.oggvorbis.error,
+            mutagen.ogg.error,
+            mutagen.asf.error,
+            mutagen.apev2.error,
        )
        try:
            self.mgfile = mutagen.File(path)
@ -874,7 +873,13 @@ class MediaFile(object):
            log.debug(u'header parsing failed: {0}'.format(unicode(exc)))
            raise UnreadableFileError('Mutagen could not read file')
        except IOError as exc:
-            raise FileIOError(exc)
+            if type(exc) == IOError:
+                # This is a base IOError, not a subclass from Mutagen or
+                # anywhere else.
+                raise
+            else:
+                log.debug(traceback.format_exc())
+                raise UnreadableFileError('Mutagen raised an exception')
        except Exception as exc:
            # Hide bugs in Mutagen.
            log.debug(traceback.format_exc())
--- a/beetsplug/beatport.py
+++ b/beetsplug/beatport.py
@ -0,0 +1,299 @@
+# This file is part of beets.
+# Copyright 2013, Adrian Sampson.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+"""Adds Beatport release and track search support to the autotagger
+"""
+import logging
+import re
+from datetime import datetime, timedelta
+
+import requests
+
+from beets import config
+from beets.autotag.hooks import AlbumInfo, TrackInfo
+from beets.plugins import BeetsPlugin
+
+log = logging.getLogger('beets')
+
+
+class BeatportAPIError(Exception):
+    pass
+
+
+class BeatportObject(object):
+    def __init__(self, data):
+        self.beatport_id = data['id']
+        self.name = unicode(data['name'])
+        if 'releaseDate' in data:
+            self.release_date = datetime.strptime(data['releaseDate'],
+                                                  '%Y-%m-%d')
+        if 'artists' in data:
+            self.artists = [(x['id'], unicode(x['name']))
+                            for x in data['artists']]
+        if 'genres' in data:
+            self.genres = [unicode(x['name'])
+                           for x in data['genres']]
+
+
+class BeatportAPI(object):
+    API_BASE = 'http://api.beatport.com/'
+
+    @classmethod
+    def get(cls, endpoint, **kwargs):
+        try:
+            response = requests.get(cls.API_BASE + endpoint, params=kwargs)
+        except Exception as e:
+            raise BeatportAPIError("Error connection to Beatport API: {}"
+                                   .format(e.message))
+        if not response:
+            raise BeatportAPIError(
+                "Error {0.status_code} for '{0.request.path_url}"
+                .format(response))
+        return response.json()['results']
+
+
+class BeatportSearch(object):
+    query = None
+    release_type = None
+
+    def __unicode__(self):
+        return u"<BeatportSearch for {} \"{}\" with {} results>".format(
+            self.release_type, self.query, len(self.results))
+
+    def __init__(self, query, release_type='release', details=True):
+        self.results = []
+        self.query = query
+        self.release_type = release_type
+        response = BeatportAPI.get('catalog/3/search', query=query,
+                                   facets=['fieldType:{}'
+                                           .format(release_type)],
+                                   perPage=5)
+        for item in response:
+            if release_type == 'release':
+                release = BeatportRelease(item)
+                if details:
+                    release.get_tracks()
+                self.results.append(release)
+            elif release_type == 'track':
+                self.results.append(BeatportTrack(item))
+
+
+class BeatportRelease(BeatportObject):
+    API_ENDPOINT = 'catalog/3/beatport/release'
+
+    def __unicode__(self):
+        if len(self.artists) < 4:
+            artist_str = ", ".join(x[1] for x in self.artists)
+        else:
+            artist_str = "Various Artists"
+        return u"<BeatportRelease: {} - {} ({})>".format(artist_str, self.name,
+                                                         self.catalog_number)
+
+    def __init__(self, data):
+        BeatportObject.__init__(self, data)
+        if 'catalogNumber' in data:
+            self.catalog_number = data['catalogNumber']
+        if 'label' in data:
+            self.label_name = data['label']['name']
+        if 'category' in data:
+            self.category = data['category']
+        if 'slug' in data:
+            self.url = "http://beatport.com/release/{}/{}".format(
+                data['slug'], data['id'])
+
+    @classmethod
+    def from_id(cls, beatport_id):
+        response = BeatportAPI.get(cls.API_ENDPOINT, id=beatport_id)
+        release = BeatportRelease(response['release'])
+        release.tracks = [BeatportTrack(x) for x in response['tracks']]
+        return release
+
+    def get_tracks(self):
+        response = BeatportAPI.get(self.API_ENDPOINT, id=self.beatport_id)
+        self.tracks = [BeatportTrack(x) for x in response['tracks']]
+
+
+class BeatportTrack(BeatportObject):
+    API_ENDPOINT = 'catalog/3/beatport/track'
+
+    def __unicode__(self):
+        artist_str = ", ".join(x[1] for x in self.artists)
+        return u"<BeatportTrack: {} - {} ({})>".format(artist_str, self.name,
+                                                       self.mix_name)
+
+    def __init__(self, data):
+        BeatportObject.__init__(self, data)
+        if 'title' in data:
+            self.title = unicode(data['title'])
+        if 'mixName' in data:
+            self.mix_name = unicode(data['mixName'])
+        if 'length' in data:
+            self.length = timedelta(milliseconds=data['lengthMs'])
+        if 'slug' in data:
+            self.url = "http://beatport.com/track/{}/{}".format(
+                data['slug'], data['id'])
+
+    @classmethod
+    def from_id(cls, beatport_id):
+        response = BeatportAPI.get(cls.API_ENDPOINT, id=beatport_id)
+        return BeatportTrack(response['track'])
+
+
+class BeatportPlugin(BeetsPlugin):
+    def __init__(self):
+        super(BeatportPlugin, self).__init__()
+        self.config.add({
+            'source_weight': 0.5,
+        })
+
+    def album_distance(self, items, album_info, mapping):
+        """Returns the beatport source weight and the maximum source weight
+        for albums.
+        """
+        if album_info.data_source == 'Beatport':
+            return self.config['source_weight'].as_number() * \
+                config['match']['weight']['source'].as_number(), \
+                config['match']['weight']['source'].as_number()
+        else:
+            return 0.0, 0.0
+
+    def track_distance(self, item, info):
+        """Returns the beatport source weight and the maximum source weight
+        for individual tracks.
+        """
+        return self.config['source_weight'].as_number() * \
+            config['match']['weight']['source'].as_number(), \
+            config['match']['weight']['source'].as_number()
+
+    def candidates(self, items, artist, release, va_likely):
+        """Returns a list of AlbumInfo objects for beatport search results
+        matching release and artist (if not various).
+        """
+        if va_likely:
+            query = release
+        else:
+            query = '%s %s' % (artist, release)
+        try:
+            return self._get_releases(query)
+        except BeatportAPIError as e:
+            log.debug('Beatport API Error: %s (query: %s)' % (e, query))
+            return []
+
+    def item_candidates(self, item, artist, title):
+        """Returns a list of TrackInfo objects for beatport search results
+        matching title and artist.
+        """
+        query = '%s %s' % (artist, title)
+        try:
+            return self._get_tracks(query)
+        except BeatportAPIError as e:
+            log.debug('Beatport API Error: %s (query: %s)' % (e, query))
+            return []
+
+    def album_for_id(self, release_id):
+        """Fetches a release by its Beatport ID and returns an AlbumInfo object
+        or None if the release is not found.
+        """
+        log.debug('Searching Beatport for release %s' % str(release_id))
+        match  = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id)
+        if not match:
+            return None
+        release = BeatportRelease.from_id(match.group(2))
+        album = self._get_album_info(release)
+        return album
+
+    def track_for_id(self, track_id):
+        """Fetches a track by its Beatport ID and returns a TrackInfo object
+        or None if the track is not found.
+        """
+        log.debug('Searching Beatport for track %s' % str(track_id))
+        match  = re.search(r'(^|beatport\.com/track/.+/)(\d+)$', track_id)
+        if not match:
+            return None
+        bp_track = BeatportTrack.from_id(match.group(2))
+        track = self._get_track_info(bp_track)
+        return track
+
+    def _get_releases(self, query):
+        """Returns a list of AlbumInfo objects for a beatport search query.
+        """
+        # Strip non-word characters from query. Things like "!" and "-" can
+        # cause a query to return no results, even if they match the artist or
+        # album title. Use `re.UNICODE` flag to avoid stripping non-english
+        # word characters.
+        query = re.sub(r'\W+', ' ', query, re.UNICODE)
+        # Strip medium information from query, Things like "CD1" and "disk 1"
+        # can also negate an otherwise positive result.
+        query = re.sub(r'\b(CD|disc)\s*\d+', '', query, re.I)
+        albums = [self._get_album_info(x)
+                  for x in BeatportSearch(query).results]
+        return albums
+
+    def _get_album_info(self, release):
+        """Returns an AlbumInfo object for a Beatport Release object.
+        """
+        va = len(release.artists) > 3
+        artist, artist_id = self._get_artist(release.artists)
+        if va:
+            artist = u"Various Artists"
+        tracks = [self._get_track_info(x, index=idx)
+                  for idx, x in enumerate(release.tracks, 1)]
+
+        return AlbumInfo(album=release.name, album_id=release.beatport_id,
+                         artist=artist, artist_id=artist_id, tracks=tracks,
+                         albumtype=release.category, va=va,
+                         year=release.release_date.year,
+                         month=release.release_date.month,
+                         day=release.release_date.day,
+                         label=release.label_name,
+                         catalognum=release.catalog_number, media=u'Digital',
+                         data_source=u'Beatport', data_url=release.url)
+
+    def _get_track_info(self, track, index=None):
+        """Returns a TrackInfo object for a Beatport Track object.
+        """
+        title = track.name
+        if track.mix_name != u"Original Mix":
+            title += u" ({})".format(track.mix_name)
+        artist, artist_id = self._get_artist(track.artists)
+        length = track.length.total_seconds()
+
+        return TrackInfo(title=title, track_id=track.beatport_id,
+                         artist=artist, artist_id=artist_id,
+                         length=length, index=index)
+
+    def _get_artist(self, artists):
+        """Returns an artist string (all artists) and an artist_id (the main
+        artist) for a list of Beatport release or track artists.
+        """
+        artist_id = None
+        bits = []
+        for artist in artists:
+            if not artist_id:
+                artist_id = artist[0]
+            name = artist[1]
+            # Strip disambiguation number.
+            name = re.sub(r' \(\d+\)$', '', name)
+            # Move articles to the front.
+            name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I)
+            bits.append(name)
+        artist = ', '.join(bits).replace(' ,', ',') or None
+        return artist, artist_id
+
+    def _get_tracks(self, query):
+        """Returns a list of TrackInfo objects for a Beatport query.
+        """
+        bp_tracks = BeatportSearch(query, release_type='track').results
+        tracks = [self._get_track_info(x) for x in bp_tracks]
+        return tracks
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -1,15 +1,35 @@
 Changelog
 =========

-1.1.1 (in development)
+1.2.0 (in development)
 ----------------------

+There's a *lot* of new stuff in this release: new data sources for the
+autotagger, new plugins to look for problems in your library, tracking the
+date that you acquired new music, an awesome new syntax for doing queries over
+numeric fields, support for ALAC files, and major enhancements to the
+importer's UI and distance calculations. A special thanks goes out to all the
+contributors who helped make this release awesome.
+
+For the first time, beets can now tag your music using additional **data
+sources** to augment the matches from MusicBrainz. When you enable either of
+these plugins, the importer will start showing you new kinds of matches:
+
+* New :doc:`/plugins/discogs`: Get matches from the `Discogs`_ database.
+  Thanks to Artem Ponomarenko and Tai Lee.
+* New :doc:`/plugins/beatport`: Get matches from the `Beatport`_ database.
+  Thanks to Johannes Baiter.
+
+We also have two other new plugins that can scan your library to check for
+common problems, both by Pedro Silva:
+
 * New :doc:`/plugins/duplicates`: Find tracks or albums in your
-  library that are **duplicated**. Thanks to Pedro Silva.
+  library that are **duplicated**.
 * New :doc:`/plugins/missing`: Find albums in your library that are **missing
-  tracks**. Thanks once more to Pedro Silva.
-* New :doc:`/plugins/discogs`: Extends the autotagger to include matches from
-  the `Discogs`_ database. Thanks to Artem Ponomarenko and Tai Lee.
+  tracks**.
+
+There are also three more big features added to beets core:
+
 * Your library now keeps track of **when music was added** to it. The new
  ``added`` field is a timestamp reflecting when each item and album was
  imported and the new ``%time{}`` template function lets you format this
@ -20,6 +40,51 @@ Changelog
  ``bitrate:128000..``. See :ref:`numericquery`. Thanks to Michael Schuerig.
 * **ALAC files** are now marked as ALAC instead of being conflated with AAC
  audio. Thanks to Simon Luijk.
+
+In addition, the importer saw various UI enhancements, thanks to Tai Lee:
+
+* Display data source URL for matches from the new data source plugins. This
+  should make it easier to migrate data from Discogs or Beatport into
+  MusicBrainz.
+* The top 3 distance penalties are now displayed on the release listing,
+  and all album and track penalties are now displayed on the track changes
+  list. This should make it clear exactly which metadata is contributing to a
+  low similarity score.
+* Display album disambiguation and disc titles in the track listing, when
+  available.
+* More consistent format and colorization of album and track metadata. Red
+  for an actual difference, yellow to indicate that a distance penalty is being
+  applied, and light gray for no-penalty or disambiguation data.
+* Track changes are highlighted in yellow when they indicate a change in
+  format to or from the style of :ref:`per_disc_numbering`. (As before, no
+  penalty is applied because the track number is still "correct", just in a
+  different format.)
+* Sort missing and unmatched tracks by index and title and group them
+  together for better readability.
+* Don't show potential matches that have specific penalties applied, as
+  configured by the :ref:`ignored` setting.
+
+The calculation of the similarity score for autotagger matches was also
+improved, again thanks to Tai Lee. These changes, in general, help deal with
+the new metadata sources and help disambiguate between similar releases in the
+same MusicBrainz release group:
+
+* Strongly prefer releases with a matching MusicBrainz album ID. This helps
+  beets re-identify the same release when re-importing existing files.
+* Prefer releases that are closest to the tagged ``year``. Tolerate files
+  tagged with release or original year.
+* Add a :ref:`preferred` collection of settings, which allow the user to
+  specify a sorted list of preferred countries and media types, or prefer
+  releases closest to the original year for an album.
+* It is now possible to configure a :ref:`max_rec` for any field that is used
+  to calculate the similarity score. The recommendation will be downgraded if
+  a penalty is being applied to the specified field.
+* Apply minor penalties across a range of fields to differentiate between
+  nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
+  ``country`` and ``albumdisambig``.
+
+As usual, there were also lots of other great littler enhancements:
+
 * :doc:`/plugins/random`: A new ``-e`` option gives an equal chance to each
  artist in your collection to avoid biasing random samples to prolific
  artists. Thanks to Georges Dubus.
@ -31,8 +96,6 @@ Changelog
  Duailibe.
 * The importer output now shows the number of audio files in each album.
  Thanks to jayme on GitHub.
-* :doc:`/plugins/lyrics`: Lyrics searches should now turn up more results due
-  to some fixes in dealing with special characters.
 * Plugins can now provide fields for both Album and Item templates, thanks
  to Pedro Silva. Accordingly, the :doc:`/plugins/inline` can also now define
  album fields. For consistency, the ``pathfields`` configuration section has
@ -44,6 +107,9 @@ Changelog
  Johannes Baiter.
 * The :ref:`fields-cmd` command shows template fields provided by plugins.
  Thanks again to Pedro Silva.
+
+And a batch of fixes:
+
 * Album art filenames now respect the :ref:`replace` configuration.
 * Friendly error messages are now printed when trying to read or write files
  that go missing.
@ -51,45 +117,14 @@ Changelog
  ``beet modify artpath=...`` works). Thanks to Lucas Duailibe.
 * :doc:`/plugins/zero`: Fix a crash when nulling out a field that contains
  None.
-* Various UI enhancements to the importer due to Tai Lee:
-
-  * Display data source URL and source name in album disambiguation for
-    non-MusicBrainz matches. This should make it easier for people who want to
-    import and correct data from other sources into MusicBrainz.
-  * The top 3 distance penalties are now displayed on the release listing,
-    and all album and track penalties are now displayed on the track changes
-    list. This should make it clear exactly which metadata is contributing to a
-    low similarity score.
-  * Display album disambiguation and disc titles in the track listing, when
-    available.
-  * More consistent format and colorization of album and track metadata. Red
-    for actual differences, yellow to indicate that a penalty is being applied,
-    and light gray for no-penalty supplementary data.
-  * Track changes highlighted in light gray indicate a change in format to or
-    from :ref:`per_disc_numbering`. No penalty is applied because the track
-    number is still "correct", just in a different format.
-  * Sort missing and unmatched tracks by index and title and group them
-    together for better readability.
-  * Don't show potential matches that have specific penalties applied, as
-    configured by the :ref:`ignored` setting.
-
-* Improve calculation of similarity score and recommendation:
-
-  * It is now possible to configure a :ref:`max_rec` for any field that is used
-    to calculate the similarity score. The recommendation will be downgraded if
-    a penalty is being applied to the specified field.
-  * Strongly prefer releases with a matching MusicBrainz album ID. This helps
-    beets re-identify the same release when re-importing existing files.
-  * Prefer releases that are closest to the tagged ``year``. Tolerate files
-    tagged with release or original year.
-  * Add a :ref:`preferred` collection of settings, which allow the user to
-    specify a sorted list of preferred countries and media types, or prefer
-    releases closest to the original year for an album.
-  * Apply minor distance penalties across a range of fields to differentiate
-    between nearly identical releases: ``mediums``, ``label``, ``catalognum``,
-    ``country`` and ``albumdisambig``.
+* Templates can now refer to non-tag item fields (e.g., ``$id`` and
+  ``$album_id``).
+* :doc:`/plugins/lyrics`: Lyrics searches should now turn up more results due
+  to some fixes in dealing with special characters.

 .. _Discogs: http://discogs.com/
+.. _Beatport: http://www.beatport.com/
+

 1.1.0 (April 29, 203)
 ---------------------
--- a/docs/conf.py
+++ b/docs/conf.py
@ -12,8 +12,8 @@ master_doc = 'index'
 project = u'beets'
 copyright = u'2012, Adrian Sampson'

-version = '1.1'
-release = '1.1.1'
+version = '1.2'
+release = '1.2.0'

 pygments_style = 'sphinx'

--- a/docs/plugins/beatport.rst
+++ b/docs/plugins/beatport.rst
@ -0,0 +1,26 @@
+Beatport Plugin
+===============
+
+The ``beatport`` plugin adds support for querying the `Beatport`_ catalogue
+during the autotagging process. This can potentially be helpful for users
+whose collection includes a lot of diverse electronic music releases, for which
+both MusicBrainz and (to a lesser degree) Discogs show no matches.
+
+.. _Beatport: http://beatport.com
+
+Installation
+------------
+
+To see matches from the ``beatport`` plugin, you first have to enable it in
+your configuration (see :doc:`/plugins/index`). Then, install the `requests`_
+library (which we need for querying the Beatport API) by typing::
+
+    pip install requests
+
+And you're done. Matches from Beatport should now show up alongside matches
+from MusicBrainz and other sources.
+
+If you have a Beatport ID or a URL for a release or track you want to tag, you
+can just enter one of the two at the "enter Id" prompt in the importer.
+
+.. _requests: http://docs.python-requests.org/en/latest/
--- a/docs/plugins/index.rst
+++ b/docs/plugins/index.rst
@ -66,13 +66,18 @@ disabled by default, but you can turn them on as described above.
   missing
   duplicates
   discogs
+   beatport

 Autotagger Extensions
 ''''''''''''''''''''''

 * :doc:`chroma`: Use acoustic fingerprinting to identify audio files with
  missing or incorrect metadata.
-* :doc:`discogs`: Search for releases in the discogs database.
+* :doc:`discogs`: Search for releases in the `Discogs`_ database.
+* :doc:`beatport`: Search for tracks and releases in the `Beatport`_ database.
+
+.. _Beatport: http://www.beatport.com/
+.. _Discogs: http://www.discogs.com/

 Metadata
 ''''''''
--- a/setup.py
+++ b/setup.py
@ -42,7 +42,7 @@ if 'sdist' in sys.argv:
    shutil.copytree(os.path.join(docdir, '_build', 'man'), mandir)

 setup(name='beets',
-      version='1.1.1',
+      version='1.2.0',
      description='music tagger and library organizer',
      author='Adrian Sampson',
      author_email='adrian@radbox.org',
--- a/test/test_mediafile.py
+++ b/test/test_mediafile.py
@ -151,7 +151,7 @@ class SafetyTest(unittest.TestCase):
        fn = os.path.join(_common.RSRC, 'brokenlink')
        os.symlink('does_not_exist', fn)
        try:
-            self.assertRaises(beets.mediafile.UnreadableFileError,
+            self.assertRaises(IOError,
                              beets.mediafile.MediaFile, fn)
        finally:
            os.unlink(fn)