From b520981c9ce73e34f6fd13b790986502b8c1ef74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Thu, 8 May 2025 04:09:59 +0100 Subject: [PATCH] plugins: restructure id extraction --- beets/plugins.py | 28 ++----------- beets/util/id_extractors.py | 68 ++++++++++---------------------- beetsplug/beatport.py | 5 +-- beetsplug/deezer.py | 44 +++++++++------------ beetsplug/discogs.py | 6 +-- beetsplug/musicbrainz.py | 49 +++++------------------ beetsplug/spotify.py | 27 ++++++------- test/plugins/test_discogs.py | 32 --------------- test/plugins/test_musicbrainz.py | 18 --------- test/test_plugins.py | 64 ------------------------------ test/util/test_id_extractors.py | 34 ++++++++++++++++ 11 files changed, 103 insertions(+), 272 deletions(-) create mode 100644 test/util/test_id_extractors.py diff --git a/beets/plugins.py b/beets/plugins.py index 8751e11ad..26e70ed72 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -37,6 +37,7 @@ import mediafile import beets from beets import logging +from beets.util.id_extractors import extract_release_id if sys.version_info >= (3, 10): from typing import ParamSpec @@ -56,7 +57,6 @@ if TYPE_CHECKING: from beets.importer import ImportSession, ImportTask from beets.library import Album, Item, Library from beets.ui import Subcommand - from beets.util.id_extractors import RegexDict # TYPE_CHECKING guard is needed for any derived type # which uses an import from `beets.library` and `beets.imported` @@ -778,11 +778,6 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta): super().__init__() self.config.add({"source_weight": 0.5}) - @property - @abc.abstractmethod - def id_regex(self) -> RegexDict: - raise NotImplementedError - @property @abc.abstractmethod def data_source(self) -> str: @@ -872,24 +867,9 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta): return artist_string, artist_id - @staticmethod - def _get_id(url_type: str, id_: str, id_regex: RegexDict) -> str | None: - """Parse an ID from its URL if necessary. - - :param url_type: Type of URL. Either 'album' or 'track'. - :param id_: Album/track ID or URL. - :param id_regex: A dictionary containing a regular expression - extracting an ID from an URL (if it's not an ID already) in - 'pattern' and the number of the match group in 'match_group'. - :return: Album/track ID. - """ - log.debug("Extracting {} ID from '{}'", url_type, id_) - match = re.search(id_regex["pattern"].format(url_type), str(id_)) - if match: - id_ = match.group(id_regex["match_group"]) - if id_: - return id_ - return None + def _get_id(self, id_string: str) -> str | None: + """Parse release ID from the given ID string.""" + return extract_release_id(self.data_source.lower(), id_string) def candidates( self, diff --git a/beets/util/id_extractors.py b/beets/util/id_extractors.py index 4dbab087d..bbe2c32a4 100644 --- a/beets/util/id_extractors.py +++ b/beets/util/id_extractors.py @@ -14,47 +14,15 @@ """Helpers around the extraction of album/track ID's from metadata sources.""" +from __future__ import annotations + import re -from typing import TypedDict - -class RegexDict(TypedDict): - """A dictionary containing a regex pattern and the number of the - match group. - """ - - pattern: str - match_group: int - - -# Spotify IDs consist of 22 alphanumeric characters -# (zero-left-padded base62 representation of randomly generated UUID4) -spotify_id_regex: RegexDict = { - "pattern": r"(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})", - "match_group": 2, -} - -deezer_id_regex: RegexDict = { - "pattern": r"(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)", - "match_group": 4, -} - -beatport_id_regex: RegexDict = { - "pattern": r"(^|beatport\.com/release/.+/)(\d+)$", - "match_group": 2, -} - -# A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID, -# the URL can be used as the identifier. The Bandcamp metadata source plugin -# works that way - https://github.com/snejus/beetcamp. Bandcamp album -# URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum - - -def extract_discogs_id_regex(album_id): - """Returns the Discogs_id or None.""" - # Discogs-IDs are simple integers. In order to avoid confusion with - # other metadata plugins, we only look for very specific formats of the - # input string: +PATTERN_BY_SOURCE = { + "spotify": re.compile(r"(?:^|open\.spotify\.com/[^/]+/)([0-9A-Za-z]{22})"), + "deezer": re.compile(r"(?:^|deezer\.com/)(?:[a-z]*/)?(?:[^/]+/)?(\d+)"), + "beatport": re.compile(r"(?:^|beatport\.com/release/.+/)(\d+)$"), + "musicbrainz": re.compile(r"(\w{8}(?:-\w{4}){3}-\w{12})"), # - plain integer, optionally wrapped in brackets and prefixed by an # 'r', as this is how discogs displays the release ID on its webpage. # - legacy url format: discogs.com//release/ @@ -62,15 +30,19 @@ def extract_discogs_id_regex(album_id): # - current url format: discogs.com/release/- # See #291, #4080 and #4085 for the discussions leading up to these # patterns. - # Regex has been tested here https://regex101.com/r/TOu7kw/1 + "discogs": re.compile( + r"(?:^|\[?r|discogs\.com/(?:[^/]+/)?release/)(\d+)\b" + ), + # There is no such thing as a Bandcamp album or artist ID, the URL can be + # used as the identifier. The Bandcamp metadata source plugin works that way + # - https://github.com/snejus/beetcamp. Bandcamp album URLs usually look + # like: https://nameofartist.bandcamp.com/album/nameofalbum + "bandcamp": re.compile(r"(.+)"), + "tidal": re.compile(r"([^/]+)$"), +} - for pattern in [ - r"^\[?r?(?P\d+)\]?$", - r"discogs\.com/release/(?P\d+)-?", - r"discogs\.com/[^/]+/release/(?P\d+)", - ]: - match = re.search(pattern, album_id) - if match: - return int(match.group("id")) +def extract_release_id(source: str, id_: str) -> str | None: + if m := PATTERN_BY_SOURCE[source].search(str(id_)): + return m[1] return None diff --git a/beetsplug/beatport.py b/beetsplug/beatport.py index fab720c2b..d98fab722 100644 --- a/beetsplug/beatport.py +++ b/beetsplug/beatport.py @@ -30,7 +30,6 @@ import beets import beets.ui from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance -from beets.util.id_extractors import beatport_id_regex AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing) USER_AGENT = f"beets/{beets.__version__} +https://beets.io/" @@ -282,7 +281,6 @@ class BeatportTrack(BeatportObject): class BeatportPlugin(BeetsPlugin): data_source = "Beatport" - id_regex = beatport_id_regex def __init__(self): super().__init__() @@ -394,8 +392,7 @@ class BeatportPlugin(BeetsPlugin): """ self._log.debug("Searching for release {0}", release_id) - release_id = self._get_id("album", release_id, self.id_regex) - if release_id is None: + if not (release_id := self._get_id(release_id)): self._log.debug("Not a valid Beatport release ID.") return None diff --git a/beetsplug/deezer.py b/beetsplug/deezer.py index 25815e8d3..2e5d8473a 100644 --- a/beetsplug/deezer.py +++ b/beetsplug/deezer.py @@ -14,6 +14,8 @@ """Adds Deezer release and track search support to the autotagger""" +from __future__ import annotations + import collections import time @@ -25,7 +27,6 @@ from beets.autotag import AlbumInfo, TrackInfo from beets.dbcore import types from beets.library import DateType from beets.plugins import BeetsPlugin, MetadataSourcePlugin -from beets.util.id_extractors import deezer_id_regex class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): @@ -43,8 +44,6 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): album_url = "https://api.deezer.com/album/" track_url = "https://api.deezer.com/track/" - id_regex = deezer_id_regex - def __init__(self): super().__init__() @@ -75,21 +74,15 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): return None return data - def album_for_id(self, album_id): - """Fetch an album by its Deezer ID or URL and return an - AlbumInfo object or None if the album is not found. + def album_for_id(self, album_id: str) -> AlbumInfo | None: + """Fetch an album by its Deezer ID or URL.""" + if not (deezer_id := self._get_id(album_id)): + return None - :param album_id: Deezer ID or URL for the album. - :type album_id: str - :return: AlbumInfo object for album. - :rtype: beets.autotag.hooks.AlbumInfo or None - """ - deezer_id = self._get_id("album", album_id, self.id_regex) - if deezer_id is None: - return None - album_data = self.fetch_data(self.album_url + deezer_id) - if album_data is None: + album_url = f"{self.album_url}{deezer_id}" + if not (album_data := self.fetch_data(album_url)): return None + contributors = album_data.get("contributors") if contributors is not None: artist, artist_id = self.get_artist(contributors) @@ -132,7 +125,7 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): tracks_data.extend(tracks_obj["data"]) tracks = [] - medium_totals = collections.defaultdict(int) + medium_totals: dict[int | None, int] = collections.defaultdict(int) for i, track_data in enumerate(tracks_data, start=1): track = self._get_track(track_data) track.index = i @@ -150,13 +143,15 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): artist_id=artist_id, tracks=tracks, albumtype=album_data["record_type"], - va=len(album_data["contributors"]) == 1 - and artist.lower() == "various artists", + va=( + len(album_data["contributors"]) == 1 + and (artist or "").lower() == "various artists" + ), year=year, month=month, day=day, label=album_data["label"], - mediums=max(medium_totals.keys()), + mediums=max(filter(None, medium_totals.keys())), data_source=self.data_source, data_url=album_data["link"], cover_art_url=album_data.get("cover_xl"), @@ -204,12 +199,11 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): :rtype: beets.autotag.hooks.TrackInfo or None """ if track_data is None: - deezer_id = self._get_id("track", track_id, self.id_regex) - if deezer_id is None: - return None - track_data = self.fetch_data(self.track_url + deezer_id) - if track_data is None: + if not (deezer_id := self._get_id(track_id)) or not ( + track_data := self.fetch_data(f"{self.track_url}{deezer_id}") + ): return None + track = self._get_track(track_data) # Get album's tracks to set `track.index` (position on the entire diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 19521b035..a8d08c1e9 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -38,7 +38,7 @@ import beets.ui from beets import config from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance -from beets.util.id_extractors import extract_discogs_id_regex +from beets.util.id_extractors import extract_release_id USER_AGENT = f"beets/{beets.__version__} +https://beets.io/" API_KEY = "rAzVUQYRaoFjeBjyWuWZ" @@ -266,7 +266,7 @@ class DiscogsPlugin(BeetsPlugin): """ self._log.debug("Searching for release {0}", album_id) - discogs_id = extract_discogs_id_regex(album_id) + discogs_id = extract_release_id("discogs", album_id) if not discogs_id: return None @@ -401,7 +401,7 @@ class DiscogsPlugin(BeetsPlugin): else: genre = base_genre - discogs_albumid = extract_discogs_id_regex(result.data.get("uri")) + discogs_albumid = extract_release_id("discogs", result.data.get("uri")) # Extract information for the optional AlbumInfo fields that are # contained on nested discogs fields. diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index e1a640d84..34a46715d 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -16,7 +16,6 @@ from __future__ import annotations -import re import traceback from collections import Counter from itertools import product @@ -28,13 +27,8 @@ import musicbrainzngs import beets import beets.autotag.hooks from beets import config, plugins, util -from beets.plugins import BeetsPlugin, MetadataSourcePlugin -from beets.util.id_extractors import ( - beatport_id_regex, - deezer_id_regex, - extract_discogs_id_regex, - spotify_id_regex, -) +from beets.plugins import BeetsPlugin +from beets.util.id_extractors import extract_release_id if TYPE_CHECKING: from collections.abc import Iterator, Sequence @@ -302,17 +296,6 @@ def _set_date_str( setattr(info, key, date_num) -def _parse_id(s: str) -> str | None: - """Search for a MusicBrainz ID in the given string and return it. If - no ID can be found, return None. - """ - # Find the first thing that looks like a UUID/MBID. - match = re.search("[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}", s) - if match is not None: - return match.group() if match else None - return None - - def _is_translation(r): _trans_key = "transl-tracklisting" return r["type"] == _trans_key and r["direction"] == "backward" @@ -753,24 +736,10 @@ class MusicBrainzPlugin(BeetsPlugin): source.capitalize(), ) - if "discogs" in urls: - info.discogs_albumid = extract_discogs_id_regex(urls["discogs"]) - if "bandcamp" in urls: - info.bandcamp_album_id = urls["bandcamp"] - if "spotify" in urls: - info.spotify_album_id = MetadataSourcePlugin._get_id( - "album", urls["spotify"], spotify_id_regex + for source, url in urls.items(): + setattr( + info, f"{source}_album_id", extract_release_id(source, url) ) - if "deezer" in urls: - info.deezer_album_id = MetadataSourcePlugin._get_id( - "album", urls["deezer"], deezer_id_regex - ) - if "beatport" in urls: - info.beatport_album_id = MetadataSourcePlugin._get_id( - "album", urls["beatport"], beatport_id_regex - ) - if "tidal" in urls: - info.tidal_album_id = urls["tidal"].split("/")[-1] extra_albumdatas = plugins.send("mb_album_extract", data=release) for extra_albumdata in extra_albumdatas: @@ -869,10 +838,10 @@ class MusicBrainzPlugin(BeetsPlugin): MusicBrainzAPIError. """ self._log.debug("Requesting MusicBrainz release {}", album_id) - albumid = _parse_id(album_id) - if not albumid: + if not (albumid := extract_release_id("musicbrainz", album_id)): self._log.debug("Invalid MBID ({0}).", album_id) return None + try: res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES) @@ -906,10 +875,10 @@ class MusicBrainzPlugin(BeetsPlugin): """Fetches a track by its MusicBrainz ID. Returns a TrackInfo object or None if no track is found. May raise a MusicBrainzAPIError. """ - trackid = _parse_id(track_id) - if not trackid: + if not (trackid := extract_release_id("musicbrainz", track_id)): self._log.debug("Invalid MBID ({0}).", track_id) return None + try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) except musicbrainzngs.ResponseError: diff --git a/beetsplug/spotify.py b/beetsplug/spotify.py index 44a0e0ce7..c0d212971 100644 --- a/beetsplug/spotify.py +++ b/beetsplug/spotify.py @@ -17,6 +17,8 @@ Spotify playlist construction. """ +from __future__ import annotations + import base64 import collections import json @@ -33,7 +35,6 @@ from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.dbcore import types from beets.library import DateType from beets.plugins import BeetsPlugin, MetadataSourcePlugin -from beets.util.id_extractors import spotify_id_regex DEFAULT_WAITING_TIME = 5 @@ -71,8 +72,6 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): track_url = "https://api.spotify.com/v1/tracks/" audio_features_url = "https://api.spotify.com/v1/audio-features/" - id_regex = spotify_id_regex - spotify_audio_features = { "acousticness": "spotify_acousticness", "danceability": "spotify_danceability", @@ -233,7 +232,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): self._log.error(f"Request failed. Error: {e}") raise SpotifyAPIError("Request failed.") - def album_for_id(self, album_id): + def album_for_id(self, album_id: str) -> AlbumInfo | None: """Fetch an album by its Spotify ID or URL and return an AlbumInfo object or None if the album is not found. @@ -242,8 +241,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): :return: AlbumInfo object for album :rtype: beets.autotag.hooks.AlbumInfo or None """ - spotify_id = self._get_id("album", album_id, self.id_regex) - if spotify_id is None: + if not (spotify_id := self._get_id(album_id)): return None album_data = self._handle_response( @@ -285,7 +283,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): tracks_items.extend(tracks_data["items"]) tracks = [] - medium_totals = collections.defaultdict(int) + medium_totals: dict[int | None, int] = collections.defaultdict(int) for i, track_data in enumerate(tracks_items, start=1): track = self._get_track(track_data) track.index = i @@ -309,7 +307,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): month=month, day=day, label=album_data["label"], - mediums=max(medium_totals.keys()), + mediums=max(filter(None, medium_totals.keys())), data_source=self.data_source, data_url=album_data["external_urls"]["spotify"], ) @@ -359,13 +357,14 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): :return: TrackInfo object for track :rtype: beets.autotag.hooks.TrackInfo or None """ - if track_data is None: - spotify_id = self._get_id("track", track_id, self.id_regex) - if spotify_id is None: + if not track_data: + if not (spotify_id := self._get_id(track_id)) or not ( + track_data := self._handle_response( + requests.get, f"{self.track_url}{spotify_id}" + ) + ): return None - track_data = self._handle_response( - requests.get, self.track_url + spotify_id - ) + track = self._get_track(track_data) # Get album's tracks to set `track.index` (position on the entire diff --git a/test/plugins/test_discogs.py b/test/plugins/test_discogs.py index 5e327ab27..eb9a625b1 100644 --- a/test/plugins/test_discogs.py +++ b/test/plugins/test_discogs.py @@ -21,7 +21,6 @@ import pytest from beets import config from beets.test._common import Bag from beets.test.helper import BeetsTestCase, capture_log -from beets.util.id_extractors import extract_discogs_id_regex from beetsplug.discogs import DiscogsPlugin @@ -369,37 +368,6 @@ class DGAlbumInfoTest(BeetsTestCase): assert d is None assert "Release does not contain the required fields" in logs[0] - def test_album_for_id(self): - """Test parsing for a valid Discogs release_id""" - test_patterns = [ - ( - "http://www.discogs.com/G%C3%BCnther-Lause-Meru-Ep/release/4354798", - 4354798, - ), - ( - "http://www.discogs.com/release/4354798-G%C3%BCnther-Lause-Meru-Ep", - 4354798, - ), - ( - "http://www.discogs.com/G%C3%BCnther-4354798Lause-Meru-Ep/release/4354798", # NOQA E501 - 4354798, - ), - ( - "http://www.discogs.com/release/4354798-G%C3%BCnther-4354798Lause-Meru-Ep/", # NOQA E501 - 4354798, - ), - ("[r4354798]", 4354798), - ("r4354798", 4354798), - ("4354798", 4354798), - ("yet-another-metadata-provider.org/foo/12345", ""), - ("005b84a0-ecd6-39f1-b2f6-6eb48756b268", ""), - ] - for test_pattern, expected in test_patterns: - match = extract_discogs_id_regex(test_pattern) - if not match: - match = "" - assert match == expected - def test_default_genre_style_settings(self): """Test genre default settings, genres to genre, styles to style""" release = self._make_release_from_positions(["1", "2"]) diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index b8640c870..0f142a353 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -662,24 +662,6 @@ class MBAlbumInfoTest(MusicBrainzTestCase): assert t[1].trackdisambig == "SECOND TRACK" -class ParseIDTest(BeetsTestCase): - def test_parse_id_correct(self): - id_string = "28e32c71-1450-463e-92bf-e0a46446fc11" - out = musicbrainz._parse_id(id_string) - assert out == id_string - - def test_parse_id_non_id_returns_none(self): - id_string = "blah blah" - out = musicbrainz._parse_id(id_string) - assert out is None - - def test_parse_id_url_finds_id(self): - id_string = "28e32c71-1450-463e-92bf-e0a46446fc11" - id_url = "https://musicbrainz.org/entity/%s" % id_string - out = musicbrainz._parse_id(id_url) - assert out == id_string - - class ArtistFlatteningTest(BeetsTestCase): def _credit_dict(self, suffix=""): return { diff --git a/test/test_plugins.py b/test/test_plugins.py index 417debbdd..3e809e492 100644 --- a/test/test_plugins.py +++ b/test/test_plugins.py @@ -30,16 +30,10 @@ from beets.importer import ( SingletonImportTask, ) from beets.library import Item -from beets.plugins import MetadataSourcePlugin from beets.test import helper from beets.test.helper import AutotagStub, ImportHelper, TerminalImportMixin from beets.test.helper import PluginTestCase as BasePluginTestCase from beets.util import displayable_path, syspath -from beets.util.id_extractors import ( - beatport_id_regex, - deezer_id_regex, - spotify_id_regex, -) class PluginLoaderTestCase(BasePluginTestCase): @@ -547,61 +541,3 @@ class PromptChoicesTest(TerminalImportMixin, PluginImportTestCase): self.mock_input_options.assert_called_once_with( opts, default="a", require=ANY ) - - -class ParseSpotifyIDTest(unittest.TestCase): - def test_parse_id_correct(self): - id_string = "39WqpoPgZxygo6YQjehLJJ" - out = MetadataSourcePlugin._get_id("album", id_string, spotify_id_regex) - assert out == id_string - - def test_parse_id_non_id_returns_none(self): - id_string = "blah blah" - out = MetadataSourcePlugin._get_id("album", id_string, spotify_id_regex) - assert out is None - - def test_parse_id_url_finds_id(self): - id_string = "39WqpoPgZxygo6YQjehLJJ" - id_url = "https://open.spotify.com/album/%s" % id_string - out = MetadataSourcePlugin._get_id("album", id_url, spotify_id_regex) - assert out == id_string - - -class ParseDeezerIDTest(unittest.TestCase): - def test_parse_id_correct(self): - id_string = "176356382" - out = MetadataSourcePlugin._get_id("album", id_string, deezer_id_regex) - assert out == id_string - - def test_parse_id_non_id_returns_none(self): - id_string = "blah blah" - out = MetadataSourcePlugin._get_id("album", id_string, deezer_id_regex) - assert out is None - - def test_parse_id_url_finds_id(self): - id_string = "176356382" - id_url = "https://www.deezer.com/album/%s" % id_string - out = MetadataSourcePlugin._get_id("album", id_url, deezer_id_regex) - assert out == id_string - - -class ParseBeatportIDTest(unittest.TestCase): - def test_parse_id_correct(self): - id_string = "3089651" - out = MetadataSourcePlugin._get_id( - "album", id_string, beatport_id_regex - ) - assert out == id_string - - def test_parse_id_non_id_returns_none(self): - id_string = "blah blah" - out = MetadataSourcePlugin._get_id( - "album", id_string, beatport_id_regex - ) - assert out is None - - def test_parse_id_url_finds_id(self): - id_string = "3089651" - id_url = "https://www.beatport.com/release/album-name/%s" % id_string - out = MetadataSourcePlugin._get_id("album", id_url, beatport_id_regex) - assert out == id_string diff --git a/test/util/test_id_extractors.py b/test/util/test_id_extractors.py new file mode 100644 index 000000000..8d4823419 --- /dev/null +++ b/test/util/test_id_extractors.py @@ -0,0 +1,34 @@ +import pytest + +from beets.util.id_extractors import extract_release_id + + +@pytest.mark.parametrize( + "source, id_string, expected", + [ + ("spotify", "39WqpoPgZxygo6YQjehLJJ", "39WqpoPgZxygo6YQjehLJJ"), + ("spotify", "blah blah", None), + ("spotify", "https://open.spotify.com/album/39WqpoPgZxygo6YQjehLJJ", "39WqpoPgZxygo6YQjehLJJ"), # noqa: E501 + ("deezer", "176356382", "176356382"), + ("deezer", "blah blah", None), + ("deezer", "https://www.deezer.com/album/176356382", "176356382"), + ("beatport", "3089651", "3089651"), + ("beatport", "blah blah", None), + ("beatport", "https://www.beatport.com/release/album-name/3089651", "3089651"), # noqa: E501 + ("discogs", "http://www.discogs.com/G%C3%BCnther-Lause-Meru-Ep/release/4354798", "4354798"), # noqa: E501 + ("discogs", "http://www.discogs.com/release/4354798-G%C3%BCnther-Lause-Meru-Ep", "4354798"), # noqa: E501 + ("discogs", "http://www.discogs.com/G%C3%BCnther-4354798Lause-Meru-Ep/release/4354798", "4354798"), # noqa: E501 + ("discogs", "http://www.discogs.com/release/4354798-G%C3%BCnther-4354798Lause-Meru-Ep/", "4354798"), # noqa: E501 + ("discogs", "[r4354798]", "4354798"), + ("discogs", "r4354798", "4354798"), + ("discogs", "4354798", "4354798"), + ("discogs", "yet-another-metadata-provider.org/foo/12345", None), + ("discogs", "005b84a0-ecd6-39f1-b2f6-6eb48756b268", None), + ("musicbrainz", "28e32c71-1450-463e-92bf-e0a46446fc11", "28e32c71-1450-463e-92bf-e0a46446fc11"), # noqa: E501 + ("musicbrainz", "blah blah", None), + ("musicbrainz", "https://musicbrainz.org/entity/28e32c71-1450-463e-92bf-e0a46446fc11", "28e32c71-1450-463e-92bf-e0a46446fc11"), # noqa: E501 + ("bandcamp", "https://nameofartist.bandcamp.com/album/nameofalbum", "https://nameofartist.bandcamp.com/album/nameofalbum"), # noqa: E501 + ], +) # fmt: skip +def test_extract_release_id(source, id_string, expected): + assert extract_release_id(source, id_string) == expected