Merge branch 'master' into distance-refactor

Conflicts:
	docs/changelog.rst
This commit is contained in:
Tai Lee 2013-06-06 11:09:36 +10:00
commit 898254ed6a
10 changed files with 436 additions and 64 deletions

View file

@ -12,7 +12,7 @@
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
__version__ = '1.1.1'
__version__ = '1.2.0'
__author__ = 'Adrian Sampson <adrian@radbox.org>'
import beets.library

View file

@ -321,7 +321,7 @@ class Item(object):
try:
f = MediaFile(syspath(read_path))
except (OSError, IOError) as exc:
raise util.FilesystemError(exc, 'read', (self.path,),
raise util.FilesystemError(exc, 'read', (read_path,),
traceback.format_exc())
for key in ITEM_KEYS_META:
@ -399,7 +399,7 @@ class Item(object):
# Build the mapping for substitution in the template,
# beginning with the values from the database.
mapping = {}
for key in ITEM_KEYS_META:
for key in ITEM_KEYS:
# Get the values from either the item or its album.
if key in ALBUM_KEYS_ITEM and album is not None:
# From album.
@ -411,8 +411,10 @@ class Item(object):
value = format_for_path(value, key, pathmod)
mapping[key] = value
# Additional fields in non-sanitized case.
if not sanitize:
# Include the path if we're not sanitizing to construct a path.
if sanitize:
del mapping['path']
else:
mapping['path'] = displayable_path(self.path)
# Use the album artist if the track artist is not set and

View file

@ -59,10 +59,6 @@ log = logging.getLogger('beets')
class UnreadableFileError(Exception):
pass
class FileIOError(UnreadableFileError, IOError):
def __init__(self, exc):
IOError.__init__(self, exc.errno, exc.strerror, exc.filename)
# Raised for files that don't seem to have a type MediaFile supports.
class FileTypeError(UnreadableFileError):
pass
@ -861,12 +857,15 @@ class MediaFile(object):
self.path = path
unreadable_exc = (
mutagen.mp3.HeaderNotFoundError,
mutagen.flac.FLACNoHeaderError,
mutagen.mp3.error,
mutagen.id3.error,
mutagen.flac.error,
mutagen.monkeysaudio.MonkeysAudioHeaderError,
mutagen.mp4.MP4StreamInfoError,
mutagen.oggvorbis.OggVorbisHeaderError,
mutagen.asf.ASFHeaderError,
mutagen.mp4.error,
mutagen.oggvorbis.error,
mutagen.ogg.error,
mutagen.asf.error,
mutagen.apev2.error,
)
try:
self.mgfile = mutagen.File(path)
@ -874,7 +873,13 @@ class MediaFile(object):
log.debug(u'header parsing failed: {0}'.format(unicode(exc)))
raise UnreadableFileError('Mutagen could not read file')
except IOError as exc:
raise FileIOError(exc)
if type(exc) == IOError:
# This is a base IOError, not a subclass from Mutagen or
# anywhere else.
raise
else:
log.debug(traceback.format_exc())
raise UnreadableFileError('Mutagen raised an exception')
except Exception as exc:
# Hide bugs in Mutagen.
log.debug(traceback.format_exc())

299
beetsplug/beatport.py Normal file
View file

@ -0,0 +1,299 @@
# This file is part of beets.
# Copyright 2013, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Adds Beatport release and track search support to the autotagger
"""
import logging
import re
from datetime import datetime, timedelta
import requests
from beets import config
from beets.autotag.hooks import AlbumInfo, TrackInfo
from beets.plugins import BeetsPlugin
log = logging.getLogger('beets')
class BeatportAPIError(Exception):
pass
class BeatportObject(object):
def __init__(self, data):
self.beatport_id = data['id']
self.name = unicode(data['name'])
if 'releaseDate' in data:
self.release_date = datetime.strptime(data['releaseDate'],
'%Y-%m-%d')
if 'artists' in data:
self.artists = [(x['id'], unicode(x['name']))
for x in data['artists']]
if 'genres' in data:
self.genres = [unicode(x['name'])
for x in data['genres']]
class BeatportAPI(object):
API_BASE = 'http://api.beatport.com/'
@classmethod
def get(cls, endpoint, **kwargs):
try:
response = requests.get(cls.API_BASE + endpoint, params=kwargs)
except Exception as e:
raise BeatportAPIError("Error connection to Beatport API: {}"
.format(e.message))
if not response:
raise BeatportAPIError(
"Error {0.status_code} for '{0.request.path_url}"
.format(response))
return response.json()['results']
class BeatportSearch(object):
query = None
release_type = None
def __unicode__(self):
return u"<BeatportSearch for {} \"{}\" with {} results>".format(
self.release_type, self.query, len(self.results))
def __init__(self, query, release_type='release', details=True):
self.results = []
self.query = query
self.release_type = release_type
response = BeatportAPI.get('catalog/3/search', query=query,
facets=['fieldType:{}'
.format(release_type)],
perPage=5)
for item in response:
if release_type == 'release':
release = BeatportRelease(item)
if details:
release.get_tracks()
self.results.append(release)
elif release_type == 'track':
self.results.append(BeatportTrack(item))
class BeatportRelease(BeatportObject):
API_ENDPOINT = 'catalog/3/beatport/release'
def __unicode__(self):
if len(self.artists) < 4:
artist_str = ", ".join(x[1] for x in self.artists)
else:
artist_str = "Various Artists"
return u"<BeatportRelease: {} - {} ({})>".format(artist_str, self.name,
self.catalog_number)
def __init__(self, data):
BeatportObject.__init__(self, data)
if 'catalogNumber' in data:
self.catalog_number = data['catalogNumber']
if 'label' in data:
self.label_name = data['label']['name']
if 'category' in data:
self.category = data['category']
if 'slug' in data:
self.url = "http://beatport.com/release/{}/{}".format(
data['slug'], data['id'])
@classmethod
def from_id(cls, beatport_id):
response = BeatportAPI.get(cls.API_ENDPOINT, id=beatport_id)
release = BeatportRelease(response['release'])
release.tracks = [BeatportTrack(x) for x in response['tracks']]
return release
def get_tracks(self):
response = BeatportAPI.get(self.API_ENDPOINT, id=self.beatport_id)
self.tracks = [BeatportTrack(x) for x in response['tracks']]
class BeatportTrack(BeatportObject):
API_ENDPOINT = 'catalog/3/beatport/track'
def __unicode__(self):
artist_str = ", ".join(x[1] for x in self.artists)
return u"<BeatportTrack: {} - {} ({})>".format(artist_str, self.name,
self.mix_name)
def __init__(self, data):
BeatportObject.__init__(self, data)
if 'title' in data:
self.title = unicode(data['title'])
if 'mixName' in data:
self.mix_name = unicode(data['mixName'])
if 'length' in data:
self.length = timedelta(milliseconds=data['lengthMs'])
if 'slug' in data:
self.url = "http://beatport.com/track/{}/{}".format(
data['slug'], data['id'])
@classmethod
def from_id(cls, beatport_id):
response = BeatportAPI.get(cls.API_ENDPOINT, id=beatport_id)
return BeatportTrack(response['track'])
class BeatportPlugin(BeetsPlugin):
def __init__(self):
super(BeatportPlugin, self).__init__()
self.config.add({
'source_weight': 0.5,
})
def album_distance(self, items, album_info, mapping):
"""Returns the beatport source weight and the maximum source weight
for albums.
"""
if album_info.data_source == 'Beatport':
return self.config['source_weight'].as_number() * \
config['match']['weight']['source'].as_number(), \
config['match']['weight']['source'].as_number()
else:
return 0.0, 0.0
def track_distance(self, item, info):
"""Returns the beatport source weight and the maximum source weight
for individual tracks.
"""
return self.config['source_weight'].as_number() * \
config['match']['weight']['source'].as_number(), \
config['match']['weight']['source'].as_number()
def candidates(self, items, artist, release, va_likely):
"""Returns a list of AlbumInfo objects for beatport search results
matching release and artist (if not various).
"""
if va_likely:
query = release
else:
query = '%s %s' % (artist, release)
try:
return self._get_releases(query)
except BeatportAPIError as e:
log.debug('Beatport API Error: %s (query: %s)' % (e, query))
return []
def item_candidates(self, item, artist, title):
"""Returns a list of TrackInfo objects for beatport search results
matching title and artist.
"""
query = '%s %s' % (artist, title)
try:
return self._get_tracks(query)
except BeatportAPIError as e:
log.debug('Beatport API Error: %s (query: %s)' % (e, query))
return []
def album_for_id(self, release_id):
"""Fetches a release by its Beatport ID and returns an AlbumInfo object
or None if the release is not found.
"""
log.debug('Searching Beatport for release %s' % str(release_id))
match = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id)
if not match:
return None
release = BeatportRelease.from_id(match.group(2))
album = self._get_album_info(release)
return album
def track_for_id(self, track_id):
"""Fetches a track by its Beatport ID and returns a TrackInfo object
or None if the track is not found.
"""
log.debug('Searching Beatport for track %s' % str(track_id))
match = re.search(r'(^|beatport\.com/track/.+/)(\d+)$', track_id)
if not match:
return None
bp_track = BeatportTrack.from_id(match.group(2))
track = self._get_track_info(bp_track)
return track
def _get_releases(self, query):
"""Returns a list of AlbumInfo objects for a beatport search query.
"""
# Strip non-word characters from query. Things like "!" and "-" can
# cause a query to return no results, even if they match the artist or
# album title. Use `re.UNICODE` flag to avoid stripping non-english
# word characters.
query = re.sub(r'\W+', ' ', query, re.UNICODE)
# Strip medium information from query, Things like "CD1" and "disk 1"
# can also negate an otherwise positive result.
query = re.sub(r'\b(CD|disc)\s*\d+', '', query, re.I)
albums = [self._get_album_info(x)
for x in BeatportSearch(query).results]
return albums
def _get_album_info(self, release):
"""Returns an AlbumInfo object for a Beatport Release object.
"""
va = len(release.artists) > 3
artist, artist_id = self._get_artist(release.artists)
if va:
artist = u"Various Artists"
tracks = [self._get_track_info(x, index=idx)
for idx, x in enumerate(release.tracks, 1)]
return AlbumInfo(album=release.name, album_id=release.beatport_id,
artist=artist, artist_id=artist_id, tracks=tracks,
albumtype=release.category, va=va,
year=release.release_date.year,
month=release.release_date.month,
day=release.release_date.day,
label=release.label_name,
catalognum=release.catalog_number, media=u'Digital',
data_source=u'Beatport', data_url=release.url)
def _get_track_info(self, track, index=None):
"""Returns a TrackInfo object for a Beatport Track object.
"""
title = track.name
if track.mix_name != u"Original Mix":
title += u" ({})".format(track.mix_name)
artist, artist_id = self._get_artist(track.artists)
length = track.length.total_seconds()
return TrackInfo(title=title, track_id=track.beatport_id,
artist=artist, artist_id=artist_id,
length=length, index=index)
def _get_artist(self, artists):
"""Returns an artist string (all artists) and an artist_id (the main
artist) for a list of Beatport release or track artists.
"""
artist_id = None
bits = []
for artist in artists:
if not artist_id:
artist_id = artist[0]
name = artist[1]
# Strip disambiguation number.
name = re.sub(r' \(\d+\)$', '', name)
# Move articles to the front.
name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I)
bits.append(name)
artist = ', '.join(bits).replace(' ,', ',') or None
return artist, artist_id
def _get_tracks(self, query):
"""Returns a list of TrackInfo objects for a Beatport query.
"""
bp_tracks = BeatportSearch(query, release_type='track').results
tracks = [self._get_track_info(x) for x in bp_tracks]
return tracks

View file

@ -1,15 +1,35 @@
Changelog
=========
1.1.1 (in development)
1.2.0 (in development)
----------------------
There's a *lot* of new stuff in this release: new data sources for the
autotagger, new plugins to look for problems in your library, tracking the
date that you acquired new music, an awesome new syntax for doing queries over
numeric fields, support for ALAC files, and major enhancements to the
importer's UI and distance calculations. A special thanks goes out to all the
contributors who helped make this release awesome.
For the first time, beets can now tag your music using additional **data
sources** to augment the matches from MusicBrainz. When you enable either of
these plugins, the importer will start showing you new kinds of matches:
* New :doc:`/plugins/discogs`: Get matches from the `Discogs`_ database.
Thanks to Artem Ponomarenko and Tai Lee.
* New :doc:`/plugins/beatport`: Get matches from the `Beatport`_ database.
Thanks to Johannes Baiter.
We also have two other new plugins that can scan your library to check for
common problems, both by Pedro Silva:
* New :doc:`/plugins/duplicates`: Find tracks or albums in your
library that are **duplicated**. Thanks to Pedro Silva.
library that are **duplicated**.
* New :doc:`/plugins/missing`: Find albums in your library that are **missing
tracks**. Thanks once more to Pedro Silva.
* New :doc:`/plugins/discogs`: Extends the autotagger to include matches from
the `Discogs`_ database. Thanks to Artem Ponomarenko and Tai Lee.
tracks**.
There are also three more big features added to beets core:
* Your library now keeps track of **when music was added** to it. The new
``added`` field is a timestamp reflecting when each item and album was
imported and the new ``%time{}`` template function lets you format this
@ -20,6 +40,51 @@ Changelog
``bitrate:128000..``. See :ref:`numericquery`. Thanks to Michael Schuerig.
* **ALAC files** are now marked as ALAC instead of being conflated with AAC
audio. Thanks to Simon Luijk.
In addition, the importer saw various UI enhancements, thanks to Tai Lee:
* Display data source URL for matches from the new data source plugins. This
should make it easier to migrate data from Discogs or Beatport into
MusicBrainz.
* The top 3 distance penalties are now displayed on the release listing,
and all album and track penalties are now displayed on the track changes
list. This should make it clear exactly which metadata is contributing to a
low similarity score.
* Display album disambiguation and disc titles in the track listing, when
available.
* More consistent format and colorization of album and track metadata. Red
for an actual difference, yellow to indicate that a distance penalty is being
applied, and light gray for no-penalty or disambiguation data.
* Track changes are highlighted in yellow when they indicate a change in
format to or from the style of :ref:`per_disc_numbering`. (As before, no
penalty is applied because the track number is still "correct", just in a
different format.)
* Sort missing and unmatched tracks by index and title and group them
together for better readability.
* Don't show potential matches that have specific penalties applied, as
configured by the :ref:`ignored` setting.
The calculation of the similarity score for autotagger matches was also
improved, again thanks to Tai Lee. These changes, in general, help deal with
the new metadata sources and help disambiguate between similar releases in the
same MusicBrainz release group:
* Strongly prefer releases with a matching MusicBrainz album ID. This helps
beets re-identify the same release when re-importing existing files.
* Prefer releases that are closest to the tagged ``year``. Tolerate files
tagged with release or original year.
* Add a :ref:`preferred` collection of settings, which allow the user to
specify a sorted list of preferred countries and media types, or prefer
releases closest to the original year for an album.
* It is now possible to configure a :ref:`max_rec` for any field that is used
to calculate the similarity score. The recommendation will be downgraded if
a penalty is being applied to the specified field.
* Apply minor penalties across a range of fields to differentiate between
nearly identical releases: ``disctotal``, ``label``, ``catalognum``,
``country`` and ``albumdisambig``.
As usual, there were also lots of other great littler enhancements:
* :doc:`/plugins/random`: A new ``-e`` option gives an equal chance to each
artist in your collection to avoid biasing random samples to prolific
artists. Thanks to Georges Dubus.
@ -31,8 +96,6 @@ Changelog
Duailibe.
* The importer output now shows the number of audio files in each album.
Thanks to jayme on GitHub.
* :doc:`/plugins/lyrics`: Lyrics searches should now turn up more results due
to some fixes in dealing with special characters.
* Plugins can now provide fields for both Album and Item templates, thanks
to Pedro Silva. Accordingly, the :doc:`/plugins/inline` can also now define
album fields. For consistency, the ``pathfields`` configuration section has
@ -44,6 +107,9 @@ Changelog
Johannes Baiter.
* The :ref:`fields-cmd` command shows template fields provided by plugins.
Thanks again to Pedro Silva.
And a batch of fixes:
* Album art filenames now respect the :ref:`replace` configuration.
* Friendly error messages are now printed when trying to read or write files
that go missing.
@ -51,45 +117,14 @@ Changelog
``beet modify artpath=...`` works). Thanks to Lucas Duailibe.
* :doc:`/plugins/zero`: Fix a crash when nulling out a field that contains
None.
* Various UI enhancements to the importer due to Tai Lee:
* Display data source URL and source name in album disambiguation for
non-MusicBrainz matches. This should make it easier for people who want to
import and correct data from other sources into MusicBrainz.
* The top 3 distance penalties are now displayed on the release listing,
and all album and track penalties are now displayed on the track changes
list. This should make it clear exactly which metadata is contributing to a
low similarity score.
* Display album disambiguation and disc titles in the track listing, when
available.
* More consistent format and colorization of album and track metadata. Red
for actual differences, yellow to indicate that a penalty is being applied,
and light gray for no-penalty supplementary data.
* Track changes highlighted in light gray indicate a change in format to or
from :ref:`per_disc_numbering`. No penalty is applied because the track
number is still "correct", just in a different format.
* Sort missing and unmatched tracks by index and title and group them
together for better readability.
* Don't show potential matches that have specific penalties applied, as
configured by the :ref:`ignored` setting.
* Improve calculation of similarity score and recommendation:
* It is now possible to configure a :ref:`max_rec` for any field that is used
to calculate the similarity score. The recommendation will be downgraded if
a penalty is being applied to the specified field.
* Strongly prefer releases with a matching MusicBrainz album ID. This helps
beets re-identify the same release when re-importing existing files.
* Prefer releases that are closest to the tagged ``year``. Tolerate files
tagged with release or original year.
* Add a :ref:`preferred` collection of settings, which allow the user to
specify a sorted list of preferred countries and media types, or prefer
releases closest to the original year for an album.
* Apply minor distance penalties across a range of fields to differentiate
between nearly identical releases: ``mediums``, ``label``, ``catalognum``,
``country`` and ``albumdisambig``.
* Templates can now refer to non-tag item fields (e.g., ``$id`` and
``$album_id``).
* :doc:`/plugins/lyrics`: Lyrics searches should now turn up more results due
to some fixes in dealing with special characters.
.. _Discogs: http://discogs.com/
.. _Beatport: http://www.beatport.com/
1.1.0 (April 29, 203)
---------------------

View file

@ -12,8 +12,8 @@ master_doc = 'index'
project = u'beets'
copyright = u'2012, Adrian Sampson'
version = '1.1'
release = '1.1.1'
version = '1.2'
release = '1.2.0'
pygments_style = 'sphinx'

26
docs/plugins/beatport.rst Normal file
View file

@ -0,0 +1,26 @@
Beatport Plugin
===============
The ``beatport`` plugin adds support for querying the `Beatport`_ catalogue
during the autotagging process. This can potentially be helpful for users
whose collection includes a lot of diverse electronic music releases, for which
both MusicBrainz and (to a lesser degree) Discogs show no matches.
.. _Beatport: http://beatport.com
Installation
------------
To see matches from the ``beatport`` plugin, you first have to enable it in
your configuration (see :doc:`/plugins/index`). Then, install the `requests`_
library (which we need for querying the Beatport API) by typing::
pip install requests
And you're done. Matches from Beatport should now show up alongside matches
from MusicBrainz and other sources.
If you have a Beatport ID or a URL for a release or track you want to tag, you
can just enter one of the two at the "enter Id" prompt in the importer.
.. _requests: http://docs.python-requests.org/en/latest/

View file

@ -66,13 +66,18 @@ disabled by default, but you can turn them on as described above.
missing
duplicates
discogs
beatport
Autotagger Extensions
''''''''''''''''''''''
* :doc:`chroma`: Use acoustic fingerprinting to identify audio files with
missing or incorrect metadata.
* :doc:`discogs`: Search for releases in the discogs database.
* :doc:`discogs`: Search for releases in the `Discogs`_ database.
* :doc:`beatport`: Search for tracks and releases in the `Beatport`_ database.
.. _Beatport: http://www.beatport.com/
.. _Discogs: http://www.discogs.com/
Metadata
''''''''

View file

@ -42,7 +42,7 @@ if 'sdist' in sys.argv:
shutil.copytree(os.path.join(docdir, '_build', 'man'), mandir)
setup(name='beets',
version='1.1.1',
version='1.2.0',
description='music tagger and library organizer',
author='Adrian Sampson',
author_email='adrian@radbox.org',

View file

@ -151,7 +151,7 @@ class SafetyTest(unittest.TestCase):
fn = os.path.join(_common.RSRC, 'brokenlink')
os.symlink('does_not_exist', fn)
try:
self.assertRaises(beets.mediafile.UnreadableFileError,
self.assertRaises(IOError,
beets.mediafile.MediaFile, fn)
finally:
os.unlink(fn)