diff --git a/beets/plugins.py b/beets/plugins.py index c14d7b423..8974cb117 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -705,22 +705,27 @@ class MetadataSourcePlugin(metaclass=abc.ABCMeta): return artist_string, artist_id - def _get_id(self, url_type, id_): + @staticmethod + def _get_id(url_type, id_, id_regex): """Parse an ID from its URL if necessary. :param url_type: Type of URL. Either 'album' or 'track'. :type url_type: str :param id_: Album/track ID or URL. :type id_: str + :param id_regex: A dictionary containing a regular expression + extracting an ID from an URL (if it's not an ID already) in + 'pattern' and the number of the match group in 'match_group'. + :type id_regex: dict :return: Album/track ID. :rtype: str """ - self._log.debug( - "Searching {} for {} '{}'", self.data_source, url_type, id_ + log.debug( + "Extracting {} ID from '{}'", url_type, id_ ) - match = re.search(self.id_regex['pattern'].format(url_type), str(id_)) + match = re.search(id_regex['pattern'].format(url_type), str(id_)) if match: - id_ = match.group(self.id_regex['match_group']) + id_ = match.group(id_regex['match_group']) if id_: return id_ return None diff --git a/beets/util/id_extractors.py b/beets/util/id_extractors.py new file mode 100644 index 000000000..b1020e78c --- /dev/null +++ b/beets/util/id_extractors.py @@ -0,0 +1,65 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Helpers around the extraction of album/track ID's from metadata sources.""" + +import re + +# Spotify IDs consist of 22 alphanumeric characters +# (zero-left-padded base62 representation of randomly generated UUID4) +spotify_id_regex = { + 'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})', + 'match_group': 2, +} + +deezer_id_regex = { + 'pattern': r'(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)', + 'match_group': 4, +} + +beatport_id_regex = { + 'pattern': r'(^|beatport\.com/release/.+/)(\d+)$', + 'match_group': 2, +} + +# A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID, +# the URL can be used as the identifier. The Bandcamp metadata source plugin +# works that way - https://github.com/unrblt/beets-bandcamp. Bandcamp album +# URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum + + +def extract_discogs_id_regex(album_id): + """Returns the Discogs_id or None.""" + # Discogs-IDs are simple integers. In order to avoid confusion with + # other metadata plugins, we only look for very specific formats of the + # input string: + # - plain integer, optionally wrapped in brackets and prefixed by an + # 'r', as this is how discogs displays the release ID on its webpage. + # - legacy url format: discogs.com//release/ + # - legacy url short format: discogs.com/release/ + # - current url format: discogs.com/release/- + # See #291, #4080 and #4085 for the discussions leading up to these + # patterns. + # Regex has been tested here https://regex101.com/r/TOu7kw/1 + + for pattern in [ + r'^\[?r?(?P\d+)\]?$', + r'discogs\.com/release/(?P\d+)-?', + r'discogs\.com/[^/]+/release/(?P\d+)', + ]: + match = re.search(pattern, album_id) + if match: + return int(match.group('id')) + + return None diff --git a/beetsplug/beatport.py b/beetsplug/beatport.py index 133441d7e..eabf5dc31 100644 --- a/beetsplug/beatport.py +++ b/beetsplug/beatport.py @@ -28,6 +28,7 @@ import beets.ui from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance import confuse +from beets.util.id_extractors import beatport_id_regex AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing) @@ -267,6 +268,7 @@ class BeatportTrack(BeatportObject): class BeatportPlugin(BeetsPlugin): data_source = 'Beatport' + id_regex = beatport_id_regex def __init__(self): super().__init__() @@ -380,11 +382,13 @@ class BeatportPlugin(BeetsPlugin): or None if the query is not a valid ID or release is not found. """ self._log.debug('Searching for release {0}', release_id) - match = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id) - if not match: + + release_id = self._get_id('album', release_id, self.id_regex) + if release_id is None: self._log.debug('Not a valid Beatport release ID.') return None - release = self.client.get_release(match.group(2)) + + release = self.client.get_release(release_id) if release: return self._get_album_info(release) return None diff --git a/beetsplug/deezer.py b/beetsplug/deezer.py index 221673b50..3cbfe4b9b 100644 --- a/beetsplug/deezer.py +++ b/beetsplug/deezer.py @@ -23,6 +23,7 @@ import requests from beets import ui from beets.autotag import AlbumInfo, TrackInfo from beets.plugins import MetadataSourcePlugin, BeetsPlugin +from betts.utils.id_extractors import deezer_id_regex class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): @@ -34,10 +35,7 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): album_url = 'https://api.deezer.com/album/' track_url = 'https://api.deezer.com/track/' - id_regex = { - 'pattern': r'(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)', - 'match_group': 4, - } + id_regex = deezer_id_regex def __init__(self): super().__init__() @@ -51,7 +49,7 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): :return: AlbumInfo object for album. :rtype: beets.autotag.hooks.AlbumInfo or None """ - deezer_id = self._get_id('album', album_id) + deezer_id = self._get_id('album', album_id, self.id_regex) if deezer_id is None: return None @@ -154,7 +152,7 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): :rtype: beets.autotag.hooks.TrackInfo or None """ if track_data is None: - deezer_id = self._get_id('track', track_id) + deezer_id = self._get_id('track', track_id, self.id_regex) if deezer_id is None: return None track_data = requests.get(self.track_url + deezer_id).json() diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 103aa1107..c8798db88 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -18,6 +18,7 @@ python3-discogs-client library. import beets.ui from beets import config +from beets.util.id_extractors import extract_discogs_id_regex from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.plugins import MetadataSourcePlugin, BeetsPlugin, get_distance import confuse @@ -218,31 +219,6 @@ class DiscogsPlugin(BeetsPlugin): # first 10 results, don't overwhelm with options return candidates[:10] - @staticmethod - def extract_release_id_regex(album_id): - """Returns the Discogs_id or None.""" - # Discogs-IDs are simple integers. In order to avoid confusion with - # other metadata plugins, we only look for very specific formats of the - # input string: - # - plain integer, optionally wrapped in brackets and prefixed by an - # 'r', as this is how discogs displays the release ID on its webpage. - # - legacy url format: discogs.com//release/ - # - current url format: discogs.com/release/- - # See #291, #4080 and #4085 for the discussions leading up to these - # patterns. - # Regex has been tested here https://regex101.com/r/wyLdB4/2 - - for pattern in [ - r'^\[?r?(?P\d+)\]?$', - r'discogs\.com/release/(?P\d+)-', - r'discogs\.com/[^/]+/release/(?P\d+)', - ]: - match = re.search(pattern, album_id) - if match: - return int(match.group('id')) - - return None - def album_for_id(self, album_id): """Fetches an album by its Discogs ID and returns an AlbumInfo object or None if the album is not found. @@ -252,7 +228,7 @@ class DiscogsPlugin(BeetsPlugin): self._log.debug('Searching for release {0}', album_id) - discogs_id = self.extract_release_id_regex(album_id) + discogs_id = extract_discogs_id_regex(album_id) if not discogs_id: return None @@ -365,7 +341,7 @@ class DiscogsPlugin(BeetsPlugin): else: genre = base_genre - discogs_albumid = self.extract_release_id_regex(result.data.get('uri')) + discogs_albumid = extract_discogs_id_regex(result.data.get('uri')) # Extract information for the optional AlbumInfo fields that are # contained on nested discogs fields. diff --git a/beetsplug/spotify.py b/beetsplug/spotify.py index 393e9c50a..026b9da1c 100644 --- a/beetsplug/spotify.py +++ b/beetsplug/spotify.py @@ -32,6 +32,7 @@ from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.dbcore import types from beets.library import DateType from beets.plugins import BeetsPlugin, MetadataSourcePlugin +from beets.util.id_extractors import spotify_id_regex DEFAULT_WAITING_TIME = 5 @@ -69,12 +70,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): track_url = 'https://api.spotify.com/v1/tracks/' audio_features_url = 'https://api.spotify.com/v1/audio-features/' - # Spotify IDs consist of 22 alphanumeric characters - # (zero-left-padded base62 representation of randomly generated UUID4) - id_regex = { - 'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})', - 'match_group': 2, - } + id_regex = spotify_id_regex spotify_audio_features = { 'acousticness': 'spotify_acousticness', @@ -216,7 +212,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): :return: AlbumInfo object for album :rtype: beets.autotag.hooks.AlbumInfo or None """ - spotify_id = self._get_id('album', album_id) + spotify_id = self._get_id('album', album_id, self.id_regex) if spotify_id is None: return None @@ -330,7 +326,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): :rtype: beets.autotag.hooks.TrackInfo or None """ if track_data is None: - spotify_id = self._get_id('track', track_id) + spotify_id = self._get_id('track', track_id, self.id_regex) if spotify_id is None: return None track_data = self._handle_response( diff --git a/test/test_discogs.py b/test/test_discogs.py index c2aa7682c..25b9962b0 100644 --- a/test/test_discogs.py +++ b/test/test_discogs.py @@ -21,6 +21,7 @@ from test._common import Bag from test.helper import capture_log from beets import config +from beets.util.id_extractors import extract_discogs_id_regex from beetsplug.discogs import DiscogsPlugin @@ -371,7 +372,7 @@ class DGAlbumInfoTest(_common.TestCase): ('005b84a0-ecd6-39f1-b2f6-6eb48756b268', ''), ] for test_pattern, expected in test_patterns: - match = DiscogsPlugin.extract_release_id_regex(test_pattern) + match = extract_discogs_id_regex(test_pattern) if not match: match = '' self.assertEqual(match, expected) diff --git a/test/test_plugins.py b/test/test_plugins.py index 2e5b24380..c9609a357 100644 --- a/test/test_plugins.py +++ b/test/test_plugins.py @@ -26,6 +26,9 @@ from beets.library import Item from beets.dbcore import types from mediafile import MediaFile from beets.util import displayable_path, bytestring_path, syspath +from beets.plugins import MetadataSourcePlugin +from beets.util.id_extractors import spotify_id_regex, deezer_id_regex, \ + beatport_id_regex from test.test_importer import ImportHelper, AutotagStub from test.test_ui_importer import TerminalImportSessionSetup @@ -558,6 +561,69 @@ class PromptChoicesTest(TerminalImportSessionSetup, unittest.TestCase, require=ANY) +class ParseSpotifyIDTest(unittest.TestCase): + def test_parse_id_correct(self): + id_string = "39WqpoPgZxygo6YQjehLJJ" + out = MetadataSourcePlugin._get_id( + "album", id_string, spotify_id_regex) + self.assertEqual(out, id_string) + + def test_parse_id_non_id_returns_none(self): + id_string = "blah blah" + out = MetadataSourcePlugin._get_id( + "album", id_string, spotify_id_regex) + self.assertEqual(out, None) + + def test_parse_id_url_finds_id(self): + id_string = "39WqpoPgZxygo6YQjehLJJ" + id_url = "https://open.spotify.com/album/%s" % id_string + out = MetadataSourcePlugin._get_id( + "album", id_url, spotify_id_regex) + self.assertEqual(out, id_string) + + +class ParseDeezerIDTest(unittest.TestCase): + def test_parse_id_correct(self): + id_string = "176356382" + out = MetadataSourcePlugin._get_id( + "album", id_string, deezer_id_regex) + self.assertEqual(out, id_string) + + def test_parse_id_non_id_returns_none(self): + id_string = "blah blah" + out = MetadataSourcePlugin._get_id( + "album", id_string, deezer_id_regex) + self.assertEqual(out, None) + + def test_parse_id_url_finds_id(self): + id_string = "176356382" + id_url = "https://www.deezer.com/album/%s" % id_string + out = MetadataSourcePlugin._get_id( + "album", id_url, deezer_id_regex) + self.assertEqual(out, id_string) + + +class ParseBeatportIDTest(unittest.TestCase): + def test_parse_id_correct(self): + id_string = "3089651" + out = MetadataSourcePlugin._get_id( + "album", id_string, beatport_id_regex) + self.assertEqual(out, id_string) + + def test_parse_id_non_id_returns_none(self): + id_string = "blah blah" + out = MetadataSourcePlugin._get_id( + "album", id_string, beatport_id_regex) + self.assertEqual(out, None) + + def test_parse_id_url_finds_id(self): + id_string = "3089651" + id_url = "https://www.beatport.com/release/album-name/%s" % id_string + out = MetadataSourcePlugin._get_id( + "album", id_url, beatport_id_regex) + self.assertEqual(out, id_string) + + def suite(): return unittest.TestLoader().loadTestsFromName(__name__)