diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index c6e51fc98..c3c7388bd 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -20,7 +20,7 @@ from collections import Counter from contextlib import suppress from functools import cached_property from itertools import product -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Literal, TypedDict from urllib.parse import urljoin from confuse.exceptions import NotFoundError @@ -107,6 +107,15 @@ UrlSource = Literal[ ] +class ArtistInfo(TypedDict): + artist: str + artist_sort: str + artist_credit: str + artists: list[str] + artists_sort: list[str] + artists_credit: list[str] + + def _preferred_alias( aliases: list[Alias], languages: list[str] | None = None ) -> Alias | None: @@ -137,71 +146,10 @@ def _preferred_alias( return next(matches, None) -def _multi_artist_credit( - credit: list[ArtistCredit], include_join_phrase: bool -) -> tuple[list[str], list[str], list[str]]: - """Given a list representing an ``artist-credit`` block, accumulate - data into a triple of joined artist name lists: canonical, sort, and - credit. - """ - artist_parts = [] - artist_sort_parts = [] - artist_credit_parts = [] - for el in credit: - alias = _preferred_alias(el["artist"].get("aliases", [])) - - # An artist. - if alias: - cur_artist_name = alias["name"] - else: - cur_artist_name = el["artist"]["name"] - artist_parts.append(cur_artist_name) - - # Artist sort name. - if alias: - artist_sort_parts.append(alias["sort_name"]) - elif "sort_name" in el["artist"]: - artist_sort_parts.append(el["artist"]["sort_name"]) - else: - artist_sort_parts.append(cur_artist_name) - - # Artist credit. - if "name" in el: - artist_credit_parts.append(el["name"]) - else: - artist_credit_parts.append(cur_artist_name) - - if include_join_phrase and (joinphrase := el.get("joinphrase")): - artist_parts.append(joinphrase) - artist_sort_parts.append(joinphrase) - artist_credit_parts.append(joinphrase) - - return ( - artist_parts, - artist_sort_parts, - artist_credit_parts, - ) - - def track_url(trackid: str) -> str: return urljoin(BASE_URL, f"recording/{trackid}") -def _flatten_artist_credit(credit: list[ArtistCredit]) -> tuple[str, str, str]: - """Given a list representing an ``artist-credit`` block, flatten the - data into a triple of joined artist name strings: canonical, sort, and - credit. - """ - artist_parts, artist_sort_parts, artist_credit_parts = _multi_artist_credit( - credit, include_join_phrase=True - ) - return ( - "".join(artist_parts), - "".join(artist_sort_parts), - "".join(artist_credit_parts), - ) - - def _artist_ids(credit: list[ArtistCredit]) -> list[str]: """ Given a list representing an ``artist-credit``, @@ -358,6 +306,53 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): "'musicbrainz.search_limit'", ) + @staticmethod + def _parse_artist_credits(artist_credits: list[ArtistCredit]) -> ArtistInfo: + """Normalize MusicBrainz artist-credit data into tag-friendly fields. + + MusicBrainz represents credits as a sequence of credited artists, each + with a display name and a `joinphrase` (for example `' & '`, `' feat. + '`, or `''`). This helper converts that structured representation into + both: + + - Single string values suitable for common tags (concatenated names with + joinphrases preserved). + - Parallel lists that keep the per-artist granularity for callers that + need to reason about individual credited artists. + + When available, a preferred alias is used for the canonical artist name + and sort name, while the credit name preserves the exact credited text + from the release. + """ + artist_parts: list[str] = [] + artist_sort_parts: list[str] = [] + artist_credit_parts: list[str] = [] + artists: list[str] = [] + artists_sort: list[str] = [] + artists_credit: list[str] = [] + + for el in artist_credits: + alias = _preferred_alias(el["artist"].get("aliases", [])) + artist_object = alias or el["artist"] + + joinphrase = el["joinphrase"] + for name, parts, multi in ( + (artist_object["name"], artist_parts, artists), + (artist_object["sort_name"], artist_sort_parts, artists_sort), + (el["name"], artist_credit_parts, artists_credit), + ): + parts.extend([name, joinphrase]) + multi.append(name) + + return { + "artist": "".join(artist_parts), + "artist_sort": "".join(artist_sort_parts), + "artist_credit": "".join(artist_credit_parts), + "artists": artists, + "artists_sort": artists_sort, + "artists_credit": artists_credit, + } + def track_info( self, recording: Recording, @@ -393,21 +388,7 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): isrc=( ";".join(isrcs) if (isrcs := recording.get("isrcs")) else None ), - ) - - # Get the artist names. - ( - info.artist, - info.artist_sort, - info.artist_credit, - ) = _flatten_artist_credit(recording["artist_credit"]) - - ( - info.artists, - info.artists_sort, - info.artists_credit, - ) = _multi_artist_credit( - recording["artist_credit"], include_join_phrase=False + **self._parse_artist_credits(recording["artist_credit"]), ) info.artists_ids = _artist_ids(recording["artist_credit"]) @@ -459,19 +440,6 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): """Takes a MusicBrainz release result dictionary and returns a beets AlbumInfo object containing the interesting data about that release. """ - # Get artist name using join phrases. - artist_name, artist_sort_name, artist_credit_name = ( - _flatten_artist_credit(release["artist_credit"]) - ) - - ( - artists_names, - artists_sort_names, - artists_credit_names, - ) = _multi_artist_credit( - release["artist_credit"], include_join_phrase=False - ) - ntracks = sum(len(m["tracks"]) for m in release["media"]) # The MusicBrainz API omits 'relations' @@ -539,19 +507,8 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): if track.get("title"): ti.title = track["title"] if track.get("artist_credit"): - # Get the artist names. - ( - ti.artist, - ti.artist_sort, - ti.artist_credit, - ) = _flatten_artist_credit(track["artist_credit"]) - - ( - ti.artists, - ti.artists_sort, - ti.artists_credit, - ) = _multi_artist_credit( - track["artist_credit"], include_join_phrase=False + ti.update( + **self._parse_artist_credits(track["artist_credit"]) ) ti.artists_ids = _artist_ids(track["artist_credit"]) @@ -563,18 +520,13 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): album_artist_ids = _artist_ids(release["artist_credit"]) info = beets.autotag.hooks.AlbumInfo( + **self._parse_artist_credits(release["artist_credit"]), album=release["title"], album_id=release["id"], - artist=artist_name, artist_id=album_artist_ids[0], - artists=artists_names, artists_ids=album_artist_ids, tracks=track_infos, mediums=len(release["media"]), - artist_sort=artist_sort_name, - artists_sort=artists_sort_names, - artist_credit=artist_credit_name, - artists_credit=artists_credit_names, data_source=self.data_source, data_url=album_url(release["id"]), barcode=release.get("barcode"), diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index 9f52ffe0a..d1eeb5ef2 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -28,6 +28,7 @@ from beets import config from beets.library import Item from beets.test.helper import BeetsTestCase, PluginMixin from beetsplug import musicbrainz +from beetsplug.musicbrainz import MusicBrainzPlugin from .factories import musicbrainz as factories @@ -747,45 +748,47 @@ class MBAlbumInfoTest(MusicBrainzTestCase): class ArtistTest(unittest.TestCase): - def _credit_dict(self, suffix=""): + def _credit_dict(self, suffix="", joinphrase="") -> mb.ArtistCredit: return { "artist": { "name": f"NAME{suffix}", + "id": f"ID{suffix}", "sort_name": f"SORT{suffix}", + "country": None, + "disambiguation": "", + "type": "Person", + "type_id": "b6e035f4-3ce9-331c-97df-83397230b0df", }, "name": f"CREDIT{suffix}", + "joinphrase": joinphrase, } def test_single_artist(self): credit = [self._credit_dict()] - a, s, c = musicbrainz._flatten_artist_credit(credit) - assert a == "NAME" - assert s == "SORT" - assert c == "CREDIT" - a, s, c = musicbrainz._multi_artist_credit( - credit, include_join_phrase=False - ) - assert a == ["NAME"] - assert s == ["SORT"] - assert c == ["CREDIT"] + assert MusicBrainzPlugin._parse_artist_credits(credit) == { + "artist": "NAME", + "artist_sort": "SORT", + "artist_credit": "CREDIT", + "artists": ["NAME"], + "artists_sort": ["SORT"], + "artists_credit": ["CREDIT"], + } def test_two_artists(self): credit = [ - {**self._credit_dict("a"), "joinphrase": " AND "}, + self._credit_dict("a", " AND "), self._credit_dict("b"), ] - a, s, c = musicbrainz._flatten_artist_credit(credit) - assert a == "NAMEa AND NAMEb" - assert s == "SORTa AND SORTb" - assert c == "CREDITa AND CREDITb" - a, s, c = musicbrainz._multi_artist_credit( - credit, include_join_phrase=False - ) - assert a == ["NAMEa", "NAMEb"] - assert s == ["SORTa", "SORTb"] - assert c == ["CREDITa", "CREDITb"] + assert MusicBrainzPlugin._parse_artist_credits(credit) == { + "artist": "NAMEa AND NAMEb", + "artist_sort": "SORTa AND SORTb", + "artist_credit": "CREDITa AND CREDITb", + "artists": ["NAMEa", "NAMEb"], + "artists_sort": ["SORTa", "SORTb"], + "artists_credit": ["CREDITa", "CREDITb"], + } def test_preferred_alias(self): aliases = [