mirror of
https://github.com/beetbox/beets.git
synced 2026-02-08 08:25:23 +01:00
Simplify multi artist credit parsing
This commit is contained in:
parent
2754462c5a
commit
c39c5021b3
2 changed files with 86 additions and 131 deletions
|
|
@ -20,7 +20,7 @@ from collections import Counter
|
|||
from contextlib import suppress
|
||||
from functools import cached_property
|
||||
from itertools import product
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
from typing import TYPE_CHECKING, Literal, TypedDict
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from confuse.exceptions import NotFoundError
|
||||
|
|
@ -107,6 +107,15 @@ UrlSource = Literal[
|
|||
]
|
||||
|
||||
|
||||
class ArtistInfo(TypedDict):
|
||||
artist: str
|
||||
artist_sort: str
|
||||
artist_credit: str
|
||||
artists: list[str]
|
||||
artists_sort: list[str]
|
||||
artists_credit: list[str]
|
||||
|
||||
|
||||
def _preferred_alias(
|
||||
aliases: list[Alias], languages: list[str] | None = None
|
||||
) -> Alias | None:
|
||||
|
|
@ -137,71 +146,10 @@ def _preferred_alias(
|
|||
return next(matches, None)
|
||||
|
||||
|
||||
def _multi_artist_credit(
|
||||
credit: list[ArtistCredit], include_join_phrase: bool
|
||||
) -> tuple[list[str], list[str], list[str]]:
|
||||
"""Given a list representing an ``artist-credit`` block, accumulate
|
||||
data into a triple of joined artist name lists: canonical, sort, and
|
||||
credit.
|
||||
"""
|
||||
artist_parts = []
|
||||
artist_sort_parts = []
|
||||
artist_credit_parts = []
|
||||
for el in credit:
|
||||
alias = _preferred_alias(el["artist"].get("aliases", []))
|
||||
|
||||
# An artist.
|
||||
if alias:
|
||||
cur_artist_name = alias["name"]
|
||||
else:
|
||||
cur_artist_name = el["artist"]["name"]
|
||||
artist_parts.append(cur_artist_name)
|
||||
|
||||
# Artist sort name.
|
||||
if alias:
|
||||
artist_sort_parts.append(alias["sort_name"])
|
||||
elif "sort_name" in el["artist"]:
|
||||
artist_sort_parts.append(el["artist"]["sort_name"])
|
||||
else:
|
||||
artist_sort_parts.append(cur_artist_name)
|
||||
|
||||
# Artist credit.
|
||||
if "name" in el:
|
||||
artist_credit_parts.append(el["name"])
|
||||
else:
|
||||
artist_credit_parts.append(cur_artist_name)
|
||||
|
||||
if include_join_phrase and (joinphrase := el.get("joinphrase")):
|
||||
artist_parts.append(joinphrase)
|
||||
artist_sort_parts.append(joinphrase)
|
||||
artist_credit_parts.append(joinphrase)
|
||||
|
||||
return (
|
||||
artist_parts,
|
||||
artist_sort_parts,
|
||||
artist_credit_parts,
|
||||
)
|
||||
|
||||
|
||||
def track_url(trackid: str) -> str:
|
||||
return urljoin(BASE_URL, f"recording/{trackid}")
|
||||
|
||||
|
||||
def _flatten_artist_credit(credit: list[ArtistCredit]) -> tuple[str, str, str]:
|
||||
"""Given a list representing an ``artist-credit`` block, flatten the
|
||||
data into a triple of joined artist name strings: canonical, sort, and
|
||||
credit.
|
||||
"""
|
||||
artist_parts, artist_sort_parts, artist_credit_parts = _multi_artist_credit(
|
||||
credit, include_join_phrase=True
|
||||
)
|
||||
return (
|
||||
"".join(artist_parts),
|
||||
"".join(artist_sort_parts),
|
||||
"".join(artist_credit_parts),
|
||||
)
|
||||
|
||||
|
||||
def _artist_ids(credit: list[ArtistCredit]) -> list[str]:
|
||||
"""
|
||||
Given a list representing an ``artist-credit``,
|
||||
|
|
@ -358,6 +306,53 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
|
|||
"'musicbrainz.search_limit'",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_artist_credits(artist_credits: list[ArtistCredit]) -> ArtistInfo:
|
||||
"""Normalize MusicBrainz artist-credit data into tag-friendly fields.
|
||||
|
||||
MusicBrainz represents credits as a sequence of credited artists, each
|
||||
with a display name and a `joinphrase` (for example `' & '`, `' feat.
|
||||
'`, or `''`). This helper converts that structured representation into
|
||||
both:
|
||||
|
||||
- Single string values suitable for common tags (concatenated names with
|
||||
joinphrases preserved).
|
||||
- Parallel lists that keep the per-artist granularity for callers that
|
||||
need to reason about individual credited artists.
|
||||
|
||||
When available, a preferred alias is used for the canonical artist name
|
||||
and sort name, while the credit name preserves the exact credited text
|
||||
from the release.
|
||||
"""
|
||||
artist_parts: list[str] = []
|
||||
artist_sort_parts: list[str] = []
|
||||
artist_credit_parts: list[str] = []
|
||||
artists: list[str] = []
|
||||
artists_sort: list[str] = []
|
||||
artists_credit: list[str] = []
|
||||
|
||||
for el in artist_credits:
|
||||
alias = _preferred_alias(el["artist"].get("aliases", []))
|
||||
artist_object = alias or el["artist"]
|
||||
|
||||
joinphrase = el["joinphrase"]
|
||||
for name, parts, multi in (
|
||||
(artist_object["name"], artist_parts, artists),
|
||||
(artist_object["sort_name"], artist_sort_parts, artists_sort),
|
||||
(el["name"], artist_credit_parts, artists_credit),
|
||||
):
|
||||
parts.extend([name, joinphrase])
|
||||
multi.append(name)
|
||||
|
||||
return {
|
||||
"artist": "".join(artist_parts),
|
||||
"artist_sort": "".join(artist_sort_parts),
|
||||
"artist_credit": "".join(artist_credit_parts),
|
||||
"artists": artists,
|
||||
"artists_sort": artists_sort,
|
||||
"artists_credit": artists_credit,
|
||||
}
|
||||
|
||||
def track_info(
|
||||
self,
|
||||
recording: Recording,
|
||||
|
|
@ -393,21 +388,7 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
|
|||
isrc=(
|
||||
";".join(isrcs) if (isrcs := recording.get("isrcs")) else None
|
||||
),
|
||||
)
|
||||
|
||||
# Get the artist names.
|
||||
(
|
||||
info.artist,
|
||||
info.artist_sort,
|
||||
info.artist_credit,
|
||||
) = _flatten_artist_credit(recording["artist_credit"])
|
||||
|
||||
(
|
||||
info.artists,
|
||||
info.artists_sort,
|
||||
info.artists_credit,
|
||||
) = _multi_artist_credit(
|
||||
recording["artist_credit"], include_join_phrase=False
|
||||
**self._parse_artist_credits(recording["artist_credit"]),
|
||||
)
|
||||
|
||||
info.artists_ids = _artist_ids(recording["artist_credit"])
|
||||
|
|
@ -459,19 +440,6 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
|
|||
"""Takes a MusicBrainz release result dictionary and returns a beets
|
||||
AlbumInfo object containing the interesting data about that release.
|
||||
"""
|
||||
# Get artist name using join phrases.
|
||||
artist_name, artist_sort_name, artist_credit_name = (
|
||||
_flatten_artist_credit(release["artist_credit"])
|
||||
)
|
||||
|
||||
(
|
||||
artists_names,
|
||||
artists_sort_names,
|
||||
artists_credit_names,
|
||||
) = _multi_artist_credit(
|
||||
release["artist_credit"], include_join_phrase=False
|
||||
)
|
||||
|
||||
ntracks = sum(len(m["tracks"]) for m in release["media"])
|
||||
|
||||
# The MusicBrainz API omits 'relations'
|
||||
|
|
@ -539,19 +507,8 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
|
|||
if track.get("title"):
|
||||
ti.title = track["title"]
|
||||
if track.get("artist_credit"):
|
||||
# Get the artist names.
|
||||
(
|
||||
ti.artist,
|
||||
ti.artist_sort,
|
||||
ti.artist_credit,
|
||||
) = _flatten_artist_credit(track["artist_credit"])
|
||||
|
||||
(
|
||||
ti.artists,
|
||||
ti.artists_sort,
|
||||
ti.artists_credit,
|
||||
) = _multi_artist_credit(
|
||||
track["artist_credit"], include_join_phrase=False
|
||||
ti.update(
|
||||
**self._parse_artist_credits(track["artist_credit"])
|
||||
)
|
||||
|
||||
ti.artists_ids = _artist_ids(track["artist_credit"])
|
||||
|
|
@ -563,18 +520,13 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin):
|
|||
|
||||
album_artist_ids = _artist_ids(release["artist_credit"])
|
||||
info = beets.autotag.hooks.AlbumInfo(
|
||||
**self._parse_artist_credits(release["artist_credit"]),
|
||||
album=release["title"],
|
||||
album_id=release["id"],
|
||||
artist=artist_name,
|
||||
artist_id=album_artist_ids[0],
|
||||
artists=artists_names,
|
||||
artists_ids=album_artist_ids,
|
||||
tracks=track_infos,
|
||||
mediums=len(release["media"]),
|
||||
artist_sort=artist_sort_name,
|
||||
artists_sort=artists_sort_names,
|
||||
artist_credit=artist_credit_name,
|
||||
artists_credit=artists_credit_names,
|
||||
data_source=self.data_source,
|
||||
data_url=album_url(release["id"]),
|
||||
barcode=release.get("barcode"),
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ from beets import config
|
|||
from beets.library import Item
|
||||
from beets.test.helper import BeetsTestCase, PluginMixin
|
||||
from beetsplug import musicbrainz
|
||||
from beetsplug.musicbrainz import MusicBrainzPlugin
|
||||
|
||||
from .factories import musicbrainz as factories
|
||||
|
||||
|
|
@ -747,45 +748,47 @@ class MBAlbumInfoTest(MusicBrainzTestCase):
|
|||
|
||||
|
||||
class ArtistTest(unittest.TestCase):
|
||||
def _credit_dict(self, suffix=""):
|
||||
def _credit_dict(self, suffix="", joinphrase="") -> mb.ArtistCredit:
|
||||
return {
|
||||
"artist": {
|
||||
"name": f"NAME{suffix}",
|
||||
"id": f"ID{suffix}",
|
||||
"sort_name": f"SORT{suffix}",
|
||||
"country": None,
|
||||
"disambiguation": "",
|
||||
"type": "Person",
|
||||
"type_id": "b6e035f4-3ce9-331c-97df-83397230b0df",
|
||||
},
|
||||
"name": f"CREDIT{suffix}",
|
||||
"joinphrase": joinphrase,
|
||||
}
|
||||
|
||||
def test_single_artist(self):
|
||||
credit = [self._credit_dict()]
|
||||
a, s, c = musicbrainz._flatten_artist_credit(credit)
|
||||
assert a == "NAME"
|
||||
assert s == "SORT"
|
||||
assert c == "CREDIT"
|
||||
|
||||
a, s, c = musicbrainz._multi_artist_credit(
|
||||
credit, include_join_phrase=False
|
||||
)
|
||||
assert a == ["NAME"]
|
||||
assert s == ["SORT"]
|
||||
assert c == ["CREDIT"]
|
||||
assert MusicBrainzPlugin._parse_artist_credits(credit) == {
|
||||
"artist": "NAME",
|
||||
"artist_sort": "SORT",
|
||||
"artist_credit": "CREDIT",
|
||||
"artists": ["NAME"],
|
||||
"artists_sort": ["SORT"],
|
||||
"artists_credit": ["CREDIT"],
|
||||
}
|
||||
|
||||
def test_two_artists(self):
|
||||
credit = [
|
||||
{**self._credit_dict("a"), "joinphrase": " AND "},
|
||||
self._credit_dict("a", " AND "),
|
||||
self._credit_dict("b"),
|
||||
]
|
||||
a, s, c = musicbrainz._flatten_artist_credit(credit)
|
||||
assert a == "NAMEa AND NAMEb"
|
||||
assert s == "SORTa AND SORTb"
|
||||
assert c == "CREDITa AND CREDITb"
|
||||
|
||||
a, s, c = musicbrainz._multi_artist_credit(
|
||||
credit, include_join_phrase=False
|
||||
)
|
||||
assert a == ["NAMEa", "NAMEb"]
|
||||
assert s == ["SORTa", "SORTb"]
|
||||
assert c == ["CREDITa", "CREDITb"]
|
||||
assert MusicBrainzPlugin._parse_artist_credits(credit) == {
|
||||
"artist": "NAMEa AND NAMEb",
|
||||
"artist_sort": "SORTa AND SORTb",
|
||||
"artist_credit": "CREDITa AND CREDITb",
|
||||
"artists": ["NAMEa", "NAMEb"],
|
||||
"artists_sort": ["SORTa", "SORTb"],
|
||||
"artists_credit": ["CREDITa", "CREDITb"],
|
||||
}
|
||||
|
||||
def test_preferred_alias(self):
|
||||
aliases = [
|
||||
|
|
|
|||
Loading…
Reference in a new issue