Fix musicbrainz extra tags dict (#5789)

Fixes #5788

This PR fixes a regression that occurred when the MusicBrainz autotagger
was moved from core functionality into a standalone plugin. During this
transition, the handling of user-configured `extra_tags` was broken,
causing `beets` to die with `AttributeError` when this option was set.

Key changes in `musicbrainz` plugin

- Properly process values from `extra_tags` using the same logic as
before the migration.
- Centralize common bits from `candidates` and `item_candidates`
implementations under `_search_api` to move this class closer towards
`MetadataSourcePlugin` definition.
This commit is contained in:
Šarūnas Nejus 2025-05-19 09:23:23 +01:00 committed by GitHub
commit 3a663ad52e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 192 additions and 182 deletions

View file

@ -239,12 +239,7 @@ class BeetsPlugin:
return Distance()
def candidates(
self,
items: list[Item],
artist: str,
album: str,
va_likely: bool,
extra_tags: dict[str, Any] | None = None,
self, items: list[Item], artist: str, album: str, va_likely: bool
) -> Iterator[AlbumInfo]:
"""Return :py:class:`AlbumInfo` candidates that match the given album.
@ -252,9 +247,6 @@ class BeetsPlugin:
:param artist: Album artist
:param album: Album name
:param va_likely: Whether the album is likely to be by various artists
:param extra_tags: is a an optional dictionary of extra tags to search.
Only relevant to :py:class:`MusicBrainzPlugin` autotagger and can be
ignored by other plugins
"""
yield from ()
@ -872,12 +864,7 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta):
return extract_release_id(self.data_source.lower(), id_string)
def candidates(
self,
items: list[Item],
artist: str,
album: str,
va_likely: bool,
extra_tags: dict[str, Any] | None = None,
self, items: list[Item], artist: str, album: str, va_likely: bool
) -> Iterator[AlbumInfo]:
query_filters = {"album": album}
if not va_likely:

View file

@ -806,7 +806,7 @@ class AutotagStub:
for p in self.patchers:
p.stop()
def candidates(self, items, artist, album, va_likely, extra_tags=None):
def candidates(self, items, artist, album, va_likely):
if self.matching == self.IDENT:
yield self._make_album_match(artist, album, len(items))

View file

@ -803,7 +803,7 @@ def as_string(value: Any) -> str:
return str(value)
def plurality(objs: Sequence[T]) -> tuple[T, int]:
def plurality(objs: Iterable[T]) -> tuple[T, int]:
"""Given a sequence of hashble objects, returns the object that
is most common in the set and the its number of appearance. The
sequence must contain at least one object.

View file

@ -361,7 +361,7 @@ class BeatportPlugin(BeetsPlugin):
data_source=self.data_source, info=track_info, config=self.config
)
def candidates(self, items, artist, release, va_likely, extra_tags=None):
def candidates(self, items, artist, release, va_likely):
"""Returns a list of AlbumInfo objects for beatport search results
matching release and artist (if not various).
"""

View file

@ -200,7 +200,7 @@ class AcoustidPlugin(plugins.BeetsPlugin):
dist.add_expr("track_id", info.track_id not in recording_ids)
return dist
def candidates(self, items, artist, album, va_likely, extra_tags=None):
def candidates(self, items, artist, album, va_likely):
albums = []
for relid in prefix(_all_releases(items), MAX_RELEASES):
album = self.mb.album_for_id(relid)

View file

@ -156,7 +156,7 @@ class DiscogsPlugin(BeetsPlugin):
data_source="Discogs", info=track_info, config=self.config
)
def candidates(self, items, artist, album, va_likely, extra_tags=None):
def candidates(self, items, artist, album, va_likely):
"""Returns a list of AlbumInfo objects for discogs search results
matching an album and artist (if not various).
"""

View file

@ -18,6 +18,7 @@ from __future__ import annotations
import traceback
from collections import Counter
from functools import cached_property
from itertools import product
from typing import TYPE_CHECKING, Any
from urllib.parse import urljoin
@ -32,6 +33,7 @@ from beets.util.id_extractors import extract_release_id
if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
from typing import Literal
from beets.library import Item
@ -44,10 +46,10 @@ BASE_URL = "https://musicbrainz.org/"
SKIPPED_TRACKS = ["[data track]"]
FIELDS_TO_MB_KEYS = {
"barcode": "barcode",
"catalognum": "catno",
"country": "country",
"label": "label",
"barcode": "barcode",
"media": "format",
"year": "date",
}
@ -383,7 +385,7 @@ class MusicBrainzPlugin(BeetsPlugin):
"deezer": False,
"tidal": False,
},
"extra_tags": {},
"extra_tags": [],
},
)
hostname = self.config["host"].as_str()
@ -747,6 +749,67 @@ class MusicBrainzPlugin(BeetsPlugin):
return info
@cached_property
def extra_mb_field_by_tag(self) -> dict[str, str]:
"""Map configured extra tags to their MusicBrainz API field names.
Process user configuration to determine which additional MusicBrainz
fields should be included in search queries.
"""
mb_field_by_tag = {
t: FIELDS_TO_MB_KEYS[t]
for t in self.config["extra_tags"].as_str_seq()
if t in FIELDS_TO_MB_KEYS
}
if mb_field_by_tag:
self._log.debug("Additional search terms: {}", mb_field_by_tag)
return mb_field_by_tag
def get_album_criteria(
self, items: list[Item], artist: str, album: str, va_likely: bool
) -> dict[str, str]:
criteria = {
"release": album,
"tracks": str(len(items)),
} | ({"arid": VARIOUS_ARTISTS_ID} if va_likely else {"artist": artist})
for tag, mb_field in self.extra_mb_field_by_tag.items():
most_common, _ = util.plurality(i.get(tag) for i in items)
value = str(most_common)
if tag == "catalognum":
value = value.replace(" ", "")
criteria[mb_field] = value
return criteria
def _search_api(
self,
query_type: Literal["recording", "release"],
filters: dict[str, str],
) -> list[JSONDict]:
"""Perform MusicBrainz API search and return results.
Execute a search against the MusicBrainz API for recordings or releases
using the provided criteria. Handles API errors by converting them into
MusicBrainzAPIError exceptions with contextual information.
"""
filters = {
k: _v for k, v in filters.items() if (_v := v.lower().strip())
}
self._log.debug(
"Searching for MusicBrainz {}s with: {!r}", query_type, filters
)
try:
method = getattr(musicbrainzngs, f"search_{query_type}s")
res = method(limit=self.config["searchlimit"].get(int), **filters)
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(
exc, f"{query_type} search", filters, traceback.format_exc()
)
return res[f"{query_type}-list"]
def candidates(
self,
items: list[Item],
@ -755,80 +818,19 @@ class MusicBrainzPlugin(BeetsPlugin):
va_likely: bool,
extra_tags: dict[str, Any] | None = None,
) -> Iterator[beets.autotag.hooks.AlbumInfo]:
"""Searches for a single album ("release" in MusicBrainz parlance)
and returns an iterator over AlbumInfo objects. May raise a
MusicBrainzAPIError.
criteria = self.get_album_criteria(items, artist, album, va_likely)
release_ids = (r["id"] for r in self._search_api("release", criteria))
The query consists of an artist name, an album name, and,
optionally, a number of tracks on the album and any other extra tags.
"""
# Build search criteria.
criteria = {"release": album.lower().strip()}
if artist is not None:
criteria["artist"] = artist.lower().strip()
else:
# Various Artists search.
criteria["arid"] = VARIOUS_ARTISTS_ID
if track_count := len(items):
criteria["tracks"] = str(track_count)
if self.config["extra_tags"]:
tag_list = self.config["extra_tags"].get()
self._log.debug("Additional search terms: {0}", tag_list)
for tag, value in tag_list.items():
if key := FIELDS_TO_MB_KEYS.get(tag):
value = str(value).lower().strip()
if key == "catno":
value = value.replace(" ", "")
if value:
criteria[key] = value
# Abort if we have no search terms.
if not any(criteria.values()):
return
try:
self._log.debug(
"Searching for MusicBrainz releases with: {!r}", criteria
)
res = musicbrainzngs.search_releases(
limit=self.config["searchlimit"].get(int), **criteria
)
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(
exc, "release search", criteria, traceback.format_exc()
)
for release in res["release-list"]:
# The search result is missing some data (namely, the tracks),
# so we just use the ID and fetch the rest of the information.
albuminfo = self.album_for_id(release["id"])
if albuminfo is not None:
yield albuminfo
yield from filter(None, map(self.album_for_id, release_ids))
def item_candidates(
self, item: Item, artist: str, title: str
) -> Iterator[beets.autotag.hooks.TrackInfo]:
"""Searches for a single track and returns an iterable of TrackInfo
objects. May raise a MusicBrainzAPIError.
"""
criteria = {
"artist": artist.lower().strip(),
"recording": title.lower().strip(),
}
criteria = {"artist": artist, "recording": title}
if not any(criteria.values()):
return
try:
res = musicbrainzngs.search_recordings(
limit=self.config["searchlimit"].get(int), **criteria
)
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(
exc, "recording search", criteria, traceback.format_exc()
)
for recording in res["recording-list"]:
yield self.track_info(recording)
yield from filter(
None, map(self.track_info, self._search_api("recording", criteria))
)
def album_for_id(
self, album_id: str

View file

@ -8,13 +8,28 @@ Unreleased
New features:
* :doc:`plugins/musicbrainz`: The MusicBrainz autotagger has been moved to
a separate plugin. The default :ref:`plugins-config` includes `musicbrainz`,
but if you've customized your `plugins` list in your configuration, you'll
need to explicitly add `musicbrainz` to continue using this functionality.
Configuration option `musicbrainz.enabled` has thus been deprecated.
:bug:`2686`
:bug:`4605`
* :doc:`plugins/web`: Show notifications when a track plays. This uses the
Media Session API to customize media notifications.
Bug fixes:
* :doc:`plugins/musicbrainz`: fix regression where user configured
``extra_tags`` have been read incorrectly.
:bug:`5788`
For packagers:
* Optional ``extra_tags`` parameter has been removed from
``BeetsPlugin.candidates`` method signature since it is never passed in. If
you override this method in your plugin, feel free to remove this parameter.
Other changes:
2.3.1 (May 14, 2025)
@ -39,13 +54,6 @@ been dropped.
New features:
* :doc:`plugins/musicbrainz`: The MusicBrainz autotagger has been moved to
a separate plugin. The default :ref:`plugins-config` includes `musicbrainz`,
but if you've customized your `plugins` list in your configuration, you'll
need to explicitly add `musicbrainz` to continue using this functionality.
Configuration option `musicbrainz.enabled` has thus been deprecated.
:bug:`2686`
:bug:`4605`
* :doc:`plugins/lastgenre`: The new configuration option, ``keep_existing``,
provides more fine-grained control over how pre-populated genre tags are
handled. The ``force`` option now behaves in a more conventional manner.

View file

@ -102,7 +102,7 @@ MusicBrainz. Additional tags to be queried can be supplied with the
.. code-block:: yaml
musicbrainz:
extra_tags: [year, catalognum, country, media, label]
extra_tags: [barcode, catalognum, country, label, media, year]
This setting should improve the autotagger results if the metadata with the
given tags match the metadata returned by MusicBrainz.

View file

@ -16,8 +16,11 @@
from unittest import mock
import pytest
from beets import config
from beets.test.helper import BeetsTestCase
from beets.library import Item
from beets.test.helper import BeetsTestCase, PluginMixin
from beetsplug import musicbrainz
@ -754,89 +757,6 @@ class ArtistFlatteningTest(BeetsTestCase):
class MBLibraryTest(MusicBrainzTestCase):
def test_match_track(self):
with mock.patch("musicbrainzngs.search_recordings") as p:
p.return_value = {
"recording-list": [
{
"title": "foo",
"id": "bar",
"length": 42,
}
],
}
ti = list(self.mb.item_candidates(None, "hello", "there"))[0]
p.assert_called_with(artist="hello", recording="there", limit=5)
assert ti.title == "foo"
assert ti.track_id == "bar"
def test_candidates(self):
mbid = "d2a6f856-b553-40a0-ac54-a321e8e2da99"
with mock.patch("musicbrainzngs.search_releases") as sp:
sp.return_value = {
"release-list": [
{
"id": mbid,
}
],
}
with mock.patch("musicbrainzngs.get_release_by_id") as gp:
gp.return_value = {
"release": {
"title": "hi",
"id": mbid,
"status": "status",
"medium-list": [
{
"track-list": [
{
"id": "baz",
"recording": {
"title": "foo",
"id": "bar",
"length": 42,
},
"position": 9,
"number": "A1",
}
],
"position": 5,
}
],
"artist-credit": [
{
"artist": {
"name": "some-artist",
"id": "some-id",
},
}
],
"release-group": {
"id": "another-id",
},
}
}
ai = list(self.mb.candidates([], "hello", "there", False))[0]
sp.assert_called_with(artist="hello", release="there", limit=5)
gp.assert_called_with(mbid, mock.ANY)
assert ai.tracks[0].title == "foo"
assert ai.album == "hi"
def test_match_track_empty(self):
with mock.patch("musicbrainzngs.search_recordings") as p:
til = list(self.mb.item_candidates(None, " ", " "))
assert not p.called
assert til == []
def test_candidates_empty(self):
with mock.patch("musicbrainzngs.search_releases") as p:
ail = list(self.mb.candidates([], " ", " ", False))
assert not p.called
assert ail == []
def test_follow_pseudo_releases(self):
side_effect = [
{
@ -1063,3 +983,96 @@ class MBLibraryTest(MusicBrainzTestCase):
gp.side_effect = side_effect
album = self.mb.album_for_id("d2a6f856-b553-40a0-ac54-a321e8e2da02")
assert album.country is None
class TestMusicBrainzPlugin(PluginMixin):
plugin = "musicbrainz"
mbid = "d2a6f856-b553-40a0-ac54-a321e8e2da99"
RECORDING = {"title": "foo", "id": "bar", "length": 42}
@pytest.fixture
def plugin_config(self):
return {}
@pytest.fixture
def mb(self, plugin_config):
self.config[self.plugin].set(plugin_config)
return musicbrainz.MusicBrainzPlugin()
@pytest.mark.parametrize(
"plugin_config,va_likely,expected_additional_criteria",
[
({}, False, {"artist": "Artist "}),
({}, True, {"arid": "89ad4ac3-39f7-470e-963a-56509c546377"}),
(
{"extra_tags": ["label", "catalognum"]},
False,
{"artist": "Artist ", "label": "abc", "catno": "ABC123"},
),
],
)
def test_get_album_criteria(
self, mb, va_likely, expected_additional_criteria
):
items = [
Item(catalognum="ABC 123", label="abc"),
Item(catalognum="ABC 123", label="abc"),
Item(catalognum="ABC 123", label="def"),
]
assert mb.get_album_criteria(items, "Artist ", " Album", va_likely) == {
"release": " Album",
"tracks": str(len(items)),
**expected_additional_criteria,
}
def test_item_candidates(self, monkeypatch, mb):
monkeypatch.setattr(
"musicbrainzngs.search_recordings",
lambda *_, **__: {"recording-list": [self.RECORDING]},
)
candidates = list(mb.item_candidates(Item(), "hello", "there"))
assert len(candidates) == 1
assert candidates[0].track_id == self.RECORDING["id"]
def test_candidates(self, monkeypatch, mb):
monkeypatch.setattr(
"musicbrainzngs.search_releases",
lambda *_, **__: {"release-list": [{"id": self.mbid}]},
)
monkeypatch.setattr(
"musicbrainzngs.get_release_by_id",
lambda *_, **__: {
"release": {
"title": "hi",
"id": self.mbid,
"status": "status",
"medium-list": [
{
"track-list": [
{
"id": "baz",
"recording": self.RECORDING,
"position": 9,
"number": "A1",
}
],
"position": 5,
}
],
"artist-credit": [
{"artist": {"name": "some-artist", "id": "some-id"}}
],
"release-group": {"id": "another-id"},
}
},
)
candidates = list(mb.candidates([], "hello", "there", False))
assert len(candidates) == 1
assert candidates[0].tracks[0].track_id == self.RECORDING["id"]
assert candidates[0].album == "hi"