Simplify multi artist credit parsing

This commit is contained in:
Šarūnas Nejus 2026-01-22 01:24:39 +00:00
parent 2754462c5a
commit c39c5021b3
No known key found for this signature in database
2 changed files with 86 additions and 131 deletions

View file

@ -20,7 +20,7 @@ from collections import Counter
from contextlib import suppress
from functools import cached_property
from itertools import product
from typing import TYPE_CHECKING, Literal
from typing import TYPE_CHECKING, Literal, TypedDict
from urllib.parse import urljoin
from confuse.exceptions import NotFoundError
@ -107,6 +107,15 @@ UrlSource = Literal[
]
class ArtistInfo(TypedDict):
artist: str
artist_sort: str
artist_credit: str
artists: list[str]
artists_sort: list[str]
artists_credit: list[str]
def _preferred_alias(
aliases: list[Alias], languages: list[str] | None = None
) -> Alias | None:
@ -137,71 +146,10 @@ def _preferred_alias(
return next(matches, None)
def _multi_artist_credit(
credit: list[ArtistCredit], include_join_phrase: bool
) -> tuple[list[str], list[str], list[str]]:
"""Given a list representing an ``artist-credit`` block, accumulate
data into a triple of joined artist name lists: canonical, sort, and
credit.
"""
artist_parts = []
artist_sort_parts = []
artist_credit_parts = []
for el in credit:
alias = _preferred_alias(el["artist"].get("aliases", []))
# An artist.
if alias:
cur_artist_name = alias["name"]
else:
cur_artist_name = el["artist"]["name"]
artist_parts.append(cur_artist_name)
# Artist sort name.
if alias:
artist_sort_parts.append(alias["sort_name"])
elif "sort_name" in el["artist"]:
artist_sort_parts.append(el["artist"]["sort_name"])
else:
artist_sort_parts.append(cur_artist_name)
# Artist credit.
if "name" in el:
artist_credit_parts.append(el["name"])
else:
artist_credit_parts.append(cur_artist_name)
if include_join_phrase and (joinphrase := el.get("joinphrase")):
artist_parts.append(joinphrase)
artist_sort_parts.append(joinphrase)
artist_credit_parts.append(joinphrase)
return (
artist_parts,
artist_sort_parts,
artist_credit_parts,
)
def track_url(trackid: str) -> str:
return urljoin(BASE_URL, f"recording/{trackid}")
def _flatten_artist_credit(credit: list[ArtistCredit]) -> tuple[str, str, str]:
"""Given a list representing an ``artist-credit`` block, flatten the
data into a triple of joined artist name strings: canonical, sort, and
credit.
"""
artist_parts, artist_sort_parts, artist_credit_parts = _multi_artist_credit(
credit, include_join_phrase=True
)
return (
"".join(artist_parts),
"".join(artist_sort_parts),
"".join(artist_credit_parts),
)
def _artist_ids(credit: list[ArtistCredit]) -> list[str]:
"""
Given a list representing an ``artist-credit``,
@ -358,6 +306,53 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
"'musicbrainz.search_limit'",
)
@staticmethod
def _parse_artist_credits(artist_credits: list[ArtistCredit]) -> ArtistInfo:
"""Normalize MusicBrainz artist-credit data into tag-friendly fields.
MusicBrainz represents credits as a sequence of credited artists, each
with a display name and a `joinphrase` (for example `' & '`, `' feat.
'`, or `''`). This helper converts that structured representation into
both:
- Single string values suitable for common tags (concatenated names with
joinphrases preserved).
- Parallel lists that keep the per-artist granularity for callers that
need to reason about individual credited artists.
When available, a preferred alias is used for the canonical artist name
and sort name, while the credit name preserves the exact credited text
from the release.
"""
artist_parts: list[str] = []
artist_sort_parts: list[str] = []
artist_credit_parts: list[str] = []
artists: list[str] = []
artists_sort: list[str] = []
artists_credit: list[str] = []
for el in artist_credits:
alias = _preferred_alias(el["artist"].get("aliases", []))
artist_object = alias or el["artist"]
joinphrase = el["joinphrase"]
for name, parts, multi in (
(artist_object["name"], artist_parts, artists),
(artist_object["sort_name"], artist_sort_parts, artists_sort),
(el["name"], artist_credit_parts, artists_credit),
):
parts.extend([name, joinphrase])
multi.append(name)
return {
"artist": "".join(artist_parts),
"artist_sort": "".join(artist_sort_parts),
"artist_credit": "".join(artist_credit_parts),
"artists": artists,
"artists_sort": artists_sort,
"artists_credit": artists_credit,
}
def track_info(
self,
recording: Recording,
@ -393,21 +388,7 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
isrc=(
";".join(isrcs) if (isrcs := recording.get("isrcs")) else None
),
)
# Get the artist names.
(
info.artist,
info.artist_sort,
info.artist_credit,
) = _flatten_artist_credit(recording["artist_credit"])
(
info.artists,
info.artists_sort,
info.artists_credit,
) = _multi_artist_credit(
recording["artist_credit"], include_join_phrase=False
**self._parse_artist_credits(recording["artist_credit"]),
)
info.artists_ids = _artist_ids(recording["artist_credit"])
@ -459,19 +440,6 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
"""Takes a MusicBrainz release result dictionary and returns a beets
AlbumInfo object containing the interesting data about that release.
"""
# Get artist name using join phrases.
artist_name, artist_sort_name, artist_credit_name = (
_flatten_artist_credit(release["artist_credit"])
)
(
artists_names,
artists_sort_names,
artists_credit_names,
) = _multi_artist_credit(
release["artist_credit"], include_join_phrase=False
)
ntracks = sum(len(m["tracks"]) for m in release["media"])
# The MusicBrainz API omits 'relations'
@ -539,19 +507,8 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
if track.get("title"):
ti.title = track["title"]
if track.get("artist_credit"):
# Get the artist names.
(
ti.artist,
ti.artist_sort,
ti.artist_credit,
) = _flatten_artist_credit(track["artist_credit"])
(
ti.artists,
ti.artists_sort,
ti.artists_credit,
) = _multi_artist_credit(
track["artist_credit"], include_join_phrase=False
ti.update(
**self._parse_artist_credits(track["artist_credit"])
)
ti.artists_ids = _artist_ids(track["artist_credit"])
@ -563,18 +520,13 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
album_artist_ids = _artist_ids(release["artist_credit"])
info = beets.autotag.hooks.AlbumInfo(
**self._parse_artist_credits(release["artist_credit"]),
album=release["title"],
album_id=release["id"],
artist=artist_name,
artist_id=album_artist_ids[0],
artists=artists_names,
artists_ids=album_artist_ids,
tracks=track_infos,
mediums=len(release["media"]),
artist_sort=artist_sort_name,
artists_sort=artists_sort_names,
artist_credit=artist_credit_name,
artists_credit=artists_credit_names,
data_source=self.data_source,
data_url=album_url(release["id"]),
barcode=release.get("barcode"),

View file

@ -28,6 +28,7 @@ from beets import config
from beets.library import Item
from beets.test.helper import BeetsTestCase, PluginMixin
from beetsplug import musicbrainz
from beetsplug.musicbrainz import MusicBrainzPlugin
from .factories import musicbrainz as factories
@ -747,45 +748,47 @@ class MBAlbumInfoTest(MusicBrainzTestCase):
class ArtistTest(unittest.TestCase):
def _credit_dict(self, suffix=""):
def _credit_dict(self, suffix="", joinphrase="") -> mb.ArtistCredit:
return {
"artist": {
"name": f"NAME{suffix}",
"id": f"ID{suffix}",
"sort_name": f"SORT{suffix}",
"country": None,
"disambiguation": "",
"type": "Person",
"type_id": "b6e035f4-3ce9-331c-97df-83397230b0df",
},
"name": f"CREDIT{suffix}",
"joinphrase": joinphrase,
}
def test_single_artist(self):
credit = [self._credit_dict()]
a, s, c = musicbrainz._flatten_artist_credit(credit)
assert a == "NAME"
assert s == "SORT"
assert c == "CREDIT"
a, s, c = musicbrainz._multi_artist_credit(
credit, include_join_phrase=False
)
assert a == ["NAME"]
assert s == ["SORT"]
assert c == ["CREDIT"]
assert MusicBrainzPlugin._parse_artist_credits(credit) == {
"artist": "NAME",
"artist_sort": "SORT",
"artist_credit": "CREDIT",
"artists": ["NAME"],
"artists_sort": ["SORT"],
"artists_credit": ["CREDIT"],
}
def test_two_artists(self):
credit = [
{**self._credit_dict("a"), "joinphrase": " AND "},
self._credit_dict("a", " AND "),
self._credit_dict("b"),
]
a, s, c = musicbrainz._flatten_artist_credit(credit)
assert a == "NAMEa AND NAMEb"
assert s == "SORTa AND SORTb"
assert c == "CREDITa AND CREDITb"
a, s, c = musicbrainz._multi_artist_credit(
credit, include_join_phrase=False
)
assert a == ["NAMEa", "NAMEb"]
assert s == ["SORTa", "SORTb"]
assert c == ["CREDITa", "CREDITb"]
assert MusicBrainzPlugin._parse_artist_credits(credit) == {
"artist": "NAMEa AND NAMEb",
"artist_sort": "SORTa AND SORTb",
"artist_credit": "CREDITa AND CREDITb",
"artists": ["NAMEa", "NAMEb"],
"artists_sort": ["SORTa", "SORTb"],
"artists_credit": ["CREDITa", "CREDITb"],
}
def test_preferred_alias(self):
aliases = [