diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index 07d1feffa..b8bdea479 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -22,7 +22,13 @@ from beets import logging from beets import config # Parts of external interface. -from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch # noqa +from .hooks import ( # noqa + AlbumInfo, + TrackInfo, + AlbumMatch, + TrackMatch, + Distance, +) from .match import tag_item, tag_album, Proposal # noqa from .match import Recommendation # noqa diff --git a/beets/plugins.py b/beets/plugins.py index 7c98225ca..b0752203f 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -20,6 +20,7 @@ from __future__ import division, absolute_import, print_function import traceback import re import inspect +import abc from collections import defaultdict from functools import wraps @@ -29,6 +30,7 @@ from beets import logging import mediafile import six + PLUGIN_NAMESPACE = 'beetsplug' # Plugins using the Last.fm API can share the same API key. @@ -576,3 +578,149 @@ def notify_info_yielded(event): yield v return decorated return decorator + + +def get_distance(config, data_source, info): + """Returns the ``data_source`` weight and the maximum source weight + for albums or individual tracks. + """ + dist = beets.autotag.Distance() + if info.data_source == data_source: + dist.add('source', config['source_weight'].as_number()) + return dist + + +@six.add_metaclass(abc.ABCMeta) +class MetadataSourcePlugin(object): + def __init__(self): + super(MetadataSourcePlugin, self).__init__() + self.config.add({'source_weight': 0.5}) + + @abc.abstractproperty + def id_regex(self): + raise NotImplementedError + + @abc.abstractproperty + def data_source(self): + raise NotImplementedError + + @abc.abstractproperty + def search_url(self): + raise NotImplementedError + + @abc.abstractproperty + def album_url(self): + raise NotImplementedError + + @abc.abstractproperty + def track_url(self): + raise NotImplementedError + + @abc.abstractmethod + def _search_api(self, query_type, filters, keywords=''): + raise NotImplementedError + + @abc.abstractmethod + def album_for_id(self, album_id): + raise NotImplementedError + + @abc.abstractmethod + def track_for_id(self, track_id=None, track_data=None): + raise NotImplementedError + + @staticmethod + def get_artist(artists, id_key='id', name_key='name'): + """Returns an artist string (all artists) and an artist_id (the main + artist) for a list of artist object dicts. + + :param artists: Iterable of artist dicts returned by API. + :type artists: list[dict] + :param id_key: Key corresponding to ``artist_id`` value. + :type id_key: str + :param name_key: Keys corresponding to values to concatenate + for ``artist``. + :type name_key: str + :return: Normalized artist string. + :rtype: str + """ + artist_id = None + artist_names = [] + for artist in artists: + if not artist_id: + artist_id = artist[id_key] + name = artist[name_key] + # Move articles to the front. + name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I) + artist_names.append(name) + artist = ', '.join(artist_names).replace(' ,', ',') or None + return artist, artist_id + + def _get_id(self, url_type, id_): + """Parse an ID from its URL if necessary. + + :param url_type: Type of URL. Either 'album' or 'track'. + :type url_type: str + :param id_: Album/track ID or URL. + :type id_: str + :return: Album/track ID. + :rtype: str + """ + self._log.debug( + u"Searching {} for {} '{}'", self.data_source, url_type, id_ + ) + match = re.search(self.id_regex['pattern'].format(url_type), str(id_)) + if match: + id_ = match.group(self.id_regex['match_group']) + if id_: + return id_ + return None + + def candidates(self, items, artist, album, va_likely): + """Returns a list of AlbumInfo objects for Search API results + matching an ``album`` and ``artist`` (if not various). + + :param items: List of items comprised by an album to be matched. + :type items: list[beets.library.Item] + :param artist: The artist of the album to be matched. + :type artist: str + :param album: The name of the album to be matched. + :type album: str + :param va_likely: True if the album to be matched likely has + Various Artists. + :type va_likely: bool + :return: Candidate AlbumInfo objects. + :rtype: list[beets.autotag.hooks.AlbumInfo] + """ + query_filters = {'album': album} + if not va_likely: + query_filters['artist'] = artist + albums = self._search_api(query_type='album', filters=query_filters) + return [self.album_for_id(album_id=a['id']) for a in albums] + + def item_candidates(self, item, artist, title): + """Returns a list of TrackInfo objects for Search API results + matching ``title`` and ``artist``. + + :param item: Singleton item to be matched. + :type item: beets.library.Item + :param artist: The artist of the track to be matched. + :type artist: str + :param title: The title of the track to be matched. + :type title: str + :return: Candidate TrackInfo objects. + :rtype: list[beets.autotag.hooks.TrackInfo] + """ + tracks = self._search_api( + query_type='track', keywords=title, filters={'artist': artist} + ) + return [self.track_for_id(track_data=track) for track in tracks] + + def album_distance(self, items, album_info, mapping): + return get_distance( + data_source=self.data_source, info=album_info, config=self.config + ) + + def track_distance(self, item, track_info): + return get_distance( + data_source=self.data_source, info=track_info, config=self.config + ) diff --git a/beetsplug/deezer.py b/beetsplug/deezer.py new file mode 100644 index 000000000..a9a8e1b5b --- /dev/null +++ b/beetsplug/deezer.py @@ -0,0 +1,231 @@ +# -*- coding: utf-8 -*- +# This file is part of beets. +# Copyright 2019, Rahul Ahuja. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Deezer release and track search support to the autotagger +""" +from __future__ import absolute_import, print_function, division + +import collections + +import six +import unidecode +import requests + +from beets import ui +from beets.autotag import AlbumInfo, TrackInfo +from beets.plugins import MetadataSourcePlugin, BeetsPlugin + + +class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): + data_source = 'Deezer' + + # Base URLs for the Deezer API + # Documentation: https://developers.deezer.com/api/ + search_url = 'https://api.deezer.com/search/' + album_url = 'https://api.deezer.com/album/' + track_url = 'https://api.deezer.com/track/' + + id_regex = { + 'pattern': r'(^|deezer\.com/)?([a-z]*/)?({}/)?([0-9]*)', + 'match_group': 4, + } + + def __init__(self): + super(DeezerPlugin, self).__init__() + + def album_for_id(self, album_id): + """Fetch an album by its Deezer ID or URL and return an + AlbumInfo object or None if the album is not found. + + :param album_id: Deezer ID or URL for the album. + :type album_id: str + :return: AlbumInfo object for album. + :rtype: beets.autotag.hooks.AlbumInfo or None + """ + deezer_id = self._get_id('album', album_id) + if deezer_id is None: + return None + + album_data = requests.get(self.album_url + deezer_id).json() + artist, artist_id = self.get_artist(album_data['contributors']) + + release_date = album_data['release_date'] + date_parts = [int(part) for part in release_date.split('-')] + num_date_parts = len(date_parts) + + if num_date_parts == 3: + year, month, day = date_parts + elif num_date_parts == 2: + year, month = date_parts + day = None + elif num_date_parts == 1: + year = date_parts[0] + month = None + day = None + else: + raise ui.UserError( + u"Invalid `release_date` returned " + u"by {} API: '{}'".format(self.data_source, release_date) + ) + + tracks_data = requests.get( + self.album_url + deezer_id + '/tracks' + ).json()['data'] + tracks = [] + medium_totals = collections.defaultdict(int) + for i, track_data in enumerate(tracks_data, start=1): + track = self._get_track(track_data) + track.index = i + medium_totals[track.medium] += 1 + tracks.append(track) + for track in tracks: + track.medium_total = medium_totals[track.medium] + + return AlbumInfo( + album=album_data['title'], + album_id=deezer_id, + artist=artist, + artist_credit=self.get_artist([album_data['artist']])[0], + artist_id=artist_id, + tracks=tracks, + albumtype=album_data['record_type'], + va=len(album_data['contributors']) == 1 + and artist.lower() == 'various artists', + year=year, + month=month, + day=day, + label=album_data['label'], + mediums=max(medium_totals.keys()), + data_source=self.data_source, + data_url=album_data['link'], + ) + + def _get_track(self, track_data): + """Convert a Deezer track object dict to a TrackInfo object. + + :param track_data: Deezer Track object dict + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo + """ + artist, artist_id = self.get_artist( + track_data.get('contributors', [track_data['artist']]) + ) + return TrackInfo( + title=track_data['title'], + track_id=track_data['id'], + artist=artist, + artist_id=artist_id, + length=track_data['duration'], + index=track_data['track_position'], + medium=track_data['disk_number'], + medium_index=track_data['track_position'], + data_source=self.data_source, + data_url=track_data['link'], + ) + + def track_for_id(self, track_id=None, track_data=None): + """Fetch a track by its Deezer ID or URL and return a + TrackInfo object or None if the track is not found. + + :param track_id: (Optional) Deezer ID or URL for the track. Either + ``track_id`` or ``track_data`` must be provided. + :type track_id: str + :param track_data: (Optional) Simplified track object dict. May be + provided instead of ``track_id`` to avoid unnecessary API calls. + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo or None + """ + if track_data is None: + deezer_id = self._get_id('track', track_id) + if deezer_id is None: + return None + track_data = requests.get(self.track_url + deezer_id).json() + track = self._get_track(track_data) + + # Get album's tracks to set `track.index` (position on the entire + # release) and `track.medium_total` (total number of tracks on + # the track's disc). + album_tracks_data = requests.get( + self.album_url + str(track_data['album']['id']) + '/tracks' + ).json()['data'] + medium_total = 0 + for i, track_data in enumerate(album_tracks_data, start=1): + if track_data['disk_number'] == track.medium: + medium_total += 1 + if track_data['id'] == track.track_id: + track.index = i + track.medium_total = medium_total + return track + + @staticmethod + def _construct_search_query(filters=None, keywords=''): + """Construct a query string with the specified filters and keywords to + be provided to the Deezer Search API + (https://developers.deezer.com/api/search). + + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: Query string to be provided to the Search API. + :rtype: str + """ + query_components = [ + keywords, + ' '.join('{}:"{}"'.format(k, v) for k, v in filters.items()), + ] + query = ' '.join([q for q in query_components if q]) + if not isinstance(query, six.text_type): + query = query.decode('utf8') + return unidecode.unidecode(query) + + def _search_api(self, query_type, filters=None, keywords=''): + """Query the Deezer Search API for the specified ``keywords``, applying + the provided ``filters``. + + :param query_type: The Deezer Search API method to use. Valid types + are: 'album', 'artist', 'history', 'playlist', 'podcast', + 'radio', 'track', 'user', and 'track'. + :type query_type: str + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: JSON data for the class:`Response ` object or None + if no search results are returned. + :rtype: dict or None + """ + query = self._construct_search_query( + keywords=keywords, filters=filters + ) + if not query: + return None + self._log.debug( + u"Searching {} for '{}'".format(self.data_source, query) + ) + response = requests.get( + self.search_url + query_type, params={'q': query} + ) + response.raise_for_status() + response_data = response.json().get('data', []) + self._log.debug( + u"Found {} result(s) from {} for '{}'", + len(response_data), + self.data_source, + query, + ) + return response_data diff --git a/docs/changelog.rst b/docs/changelog.rst index da1948758..ac2f43c32 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -66,6 +66,9 @@ New features: * The 'data_source' field is now also applied as an album-level flexible attribute during imports, allowing for more refined album level searches. :bug:`3350` :bug:`1693` +* :doc:`/plugins/deezer`: Added Deezer plugin as an import metadata provider: + you can now match tracks and albums using the `Deezer`_ database. + Thanks to :user:`rhlahuja`. Fixes: @@ -127,6 +130,10 @@ For plugin developers: longer separate R128 backend instances. Instead the targetlevel is passed to ``compute_album_gain`` and ``compute_track_gain``. :bug:`3065` +* The ``beets.plugins.MetadataSourcePlugin`` base class has been added to + simplify development of plugins which query album, track, and search + APIs to provide metadata matches for the importer. Refer to the Spotify and + Deezer plugins for examples of using this template class. For packagers: @@ -146,6 +153,7 @@ For packagers: .. _MediaFile: https://github.com/beetbox/mediafile .. _Confuse: https://github.com/beetbox/confuse .. _works: https://musicbrainz.org/doc/Work +.. _Deezer: https://www.deezer.com 1.4.9 (May 30, 2019) diff --git a/docs/plugins/deezer.rst b/docs/plugins/deezer.rst new file mode 100644 index 000000000..cb964c612 --- /dev/null +++ b/docs/plugins/deezer.rst @@ -0,0 +1,32 @@ +Deezer Plugin +============== + +The ``deezer`` plugin provides metadata matches for the importer using the +`Deezer`_ `Album`_ and `Track`_ APIs. + +.. _Deezer: https://www.deezer.com +.. _Album: https://developers.deezer.com/api/album +.. _Track: https://developers.deezer.com/api/track + +Basic Usage +----------- +First, enable the ``deezer`` plugin (see :ref:`using-plugins`). + +You can enter the URL for an album or song on Deezer at the ``enter Id`` +prompt during import:: + + Enter search, enter Id, aBort, eDit, edit Candidates, plaY? i + Enter release ID: https://www.deezer.com/en/album/572261 + +Configuration +------------- +Put these options in config.yaml under the ``deezer:`` section: + +- **source_weight**: Penalty applied to Deezer matches during import. Set to + 0.0 to disable. + Default: ``0.5``. + +Here's an example:: + + deezer: + source_weight: 0.7 diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index 4f3e6fbff..b9f512b1f 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -47,6 +47,7 @@ like this:: bucket chroma convert + deezer discogs duplicates edit @@ -105,10 +106,14 @@ Autotagger Extensions * :doc:`chroma`: Use acoustic fingerprinting to identify audio files with missing or incorrect metadata. * :doc:`discogs`: Search for releases in the `Discogs`_ database. +* :doc:`spotify`: Search for releases in the `Spotify`_ database. +* :doc:`deezer`: Search for releases in the `Deezer`_ database. * :doc:`fromfilename`: Guess metadata for untagged tracks from their filenames. .. _Discogs: https://www.discogs.com/ +.. _Spotify: https://www.spotify.com +.. _Deezer: https://www.deezer.com/ Metadata --------