This commit is contained in:
Alexis Sardá 2025-12-04 21:53:36 +00:00 committed by GitHub
commit 90d788a372
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 327 additions and 52 deletions

View file

@ -119,7 +119,7 @@ def match_by_id(items: Iterable[Item]) -> AlbumInfo | None:
return None
# If all album IDs are equal, look up the album.
log.debug("Searching for discovered album ID: {}", first)
return metadata_plugins.album_for_id(first)
return metadata_plugins.album_for_id(first, items)
def _recommendation(
@ -272,7 +272,7 @@ def tag_album(
if search_ids:
for search_id in search_ids:
log.debug("Searching for album ID: {}", search_id)
if info := metadata_plugins.album_for_id(search_id):
if info := metadata_plugins.album_for_id(search_id, items):
_add_candidate(items, candidates, info)
if opt_candidate := candidates.get(info.album_id):
plugins.send("album_matched", match=opt_candidate)

View file

@ -24,7 +24,8 @@ from .plugins import BeetsPlugin, find_plugins, notify_info_yielded, send
if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
from .autotag.hooks import AlbumInfo, Item, TrackInfo
from .autotag.hooks import AlbumInfo, TrackInfo
from .library.models import Item
@cache
@ -35,10 +36,11 @@ def find_metadata_source_plugins() -> list[MetadataSourcePlugin]:
@notify_info_yielded("albuminfo_received")
def candidates(*args, **kwargs) -> Iterable[AlbumInfo]:
def candidates(items, *args, **kwargs) -> Iterable[AlbumInfo]:
"""Return matching album candidates from all metadata source plugins."""
for plugin in find_metadata_source_plugins():
yield from plugin.candidates(*args, **kwargs)
for info in plugin.candidates(items, *args, **kwargs):
yield plugin.before_album_info_emitted(items, info)
@notify_info_yielded("trackinfo_received")
@ -48,13 +50,17 @@ def item_candidates(*args, **kwargs) -> Iterable[TrackInfo]:
yield from plugin.item_candidates(*args, **kwargs)
def album_for_id(_id: str) -> AlbumInfo | None:
def album_for_id(
_id: str,
items: Iterable[Item],
) -> AlbumInfo | None:
"""Get AlbumInfo object for the given ID string.
A single ID can yield just a single album, so we return the first match.
"""
for plugin in find_metadata_source_plugins():
if info := plugin.album_for_id(album_id=_id):
info = plugin.before_album_info_emitted(items, info)
send("albuminfo_received", info=info)
return info
@ -127,6 +133,18 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
found."""
raise NotImplementedError
def before_album_info_emitted(
self,
items: Iterable[Item],
album_info: AlbumInfo,
) -> AlbumInfo:
"""Called after an :py:class:`AlbumInfo` object has been found for a set
of :py:class:`Item` objects but before the ``albuminfo_received``
:py:type:`plugins.EventType` has been sent. The returned instance will
be the payload of the event.
"""
return album_info
@abc.abstractmethod
def track_for_id(self, track_id: str) -> TrackInfo | None:
"""Return a :py:class:`TrackInfo` object or None if no matching release was

View file

@ -59,6 +59,7 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
{
"scripts": [],
"custom_tags_only": False,
"multiple_allowed": False,
"album_custom_tags": {
"album_transl": "album",
"album_artist_transl": "artist",
@ -112,6 +113,28 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
" the mbpseudo plugin"
)
@override
def before_album_info_emitted(
self,
items: Iterable[Item],
album_info: AlbumInfo,
) -> AlbumInfo:
if isinstance(album_info, PseudoAlbumInfo):
for item in items:
# particularly relevant for reimport but could also happen during import
if "mb_albumid" in item:
del item["mb_albumid"]
if "mb_trackid" in item:
del item["mb_trackid"]
self._log.debug(
"Using {0} release for distance calculations for album {1}",
album_info.determine_best_ref(list(items)),
album_info.album_id,
)
return album_info
@override
def candidates(
self,
@ -126,50 +149,79 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
for album_info in super().candidates(
items, artist, album, va_likely
):
if isinstance(album_info, PseudoAlbumInfo):
self._log.debug(
"Using {0} release for distance calculations for album {1}",
album_info.determine_best_ref(items),
album_info.album_id,
)
yield album_info # first yield pseudo to give it priority
# always yield pseudo first to give it priority
if isinstance(album_info, MultiPseudoAlbumInfo):
yield from album_info.unwrap()
yield album_info
elif isinstance(album_info, PseudoAlbumInfo):
yield album_info
yield album_info.get_official_release()
else:
yield album_info
def _get_raw_pseudo_release(self, pseudo_album_id: str) -> JSONDict:
try:
return self._release_getter(pseudo_album_id, RELEASE_INCLUDES)[
"release"
]
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(
exc,
"get pseudo-release by ID",
pseudo_album_id,
traceback.format_exc(),
)
@override
def album_info(self, release: JSONDict) -> AlbumInfo:
official_release = super().album_info(release)
if release.get("status") == _STATUS_PSEUDO:
return official_release
elif pseudo_release_ids := self._intercept_mb_release(release):
album_id = self._extract_id(pseudo_release_ids[0])
try:
raw_pseudo_release = self._release_getter(
album_id, RELEASE_INCLUDES
)["release"]
pseudo_release = super().album_info(raw_pseudo_release)
if self.config["custom_tags_only"].get(bool):
self._replace_artist_with_alias(
raw_pseudo_release, pseudo_release
)
self._add_custom_tags(official_release, pseudo_release)
return official_release
else:
# already pseudo-release, but wrap in our class for the other checks
return PseudoAlbumInfo(
pseudo_release=_merge_pseudo_and_actual_album(
pseudo_release, official_release
),
pseudo_release=official_release,
official_release=official_release,
)
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(
exc,
"get pseudo-release by ID",
album_id,
traceback.format_exc(),
elif pseudo_release_ids := self._intercept_mb_release(release):
custom_tags_only = self.config["custom_tags_only"].get(bool)
languages = list(config["import"]["languages"].as_str_seq())
if len(pseudo_release_ids) == 1 or len(languages) == 0:
album_info = self._get_raw_pseudo_release(pseudo_release_ids[0])
return self._resolve_pseudo_album_info(
official_release, custom_tags_only, languages, album_info
)
pseudo_releases = [
self._get_raw_pseudo_release(i) for i in pseudo_release_ids
]
# sort according to the desired languages specified in the config
def sort_fun(rel: JSONDict) -> int:
lang = rel.get("text-representation", {}).get("language", "")
# noinspection PyBroadException
try:
return languages.index(lang[0:2])
except Exception:
return len(languages)
pseudo_releases.sort(key=sort_fun)
multiple_allowed = self.config["multiple_allowed"].get(bool)
if custom_tags_only or not multiple_allowed:
return self._resolve_pseudo_album_info(
official_release,
custom_tags_only,
languages,
pseudo_releases[0],
)
pseudo_album_infos = [
self._resolve_pseudo_album_info(
official_release, custom_tags_only, languages, i
)
for i in pseudo_releases
]
return MultiPseudoAlbumInfo(
*pseudo_album_infos, official_release=official_release
)
else:
return official_release
@ -179,13 +231,15 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
if self._has_desired_script(data) or not isinstance(album_id, str):
return []
return [
pr_id
ans = [
self._extract_id(pr_id)
for rel in data.get("release-relation-list", [])
if (pr_id := self._wanted_pseudo_release_id(album_id, rel))
is not None
]
return list(filter(None, ans))
def _has_desired_script(self, release: JSONDict) -> bool:
if len(self._scripts) == 0:
return False
@ -218,15 +272,38 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
else:
return None
def _resolve_pseudo_album_info(
self,
official_release: AlbumInfo,
custom_tags_only: bool,
languages: list[str],
raw_pseudo_release: JSONDict,
) -> AlbumInfo:
pseudo_release = super().album_info(raw_pseudo_release)
if custom_tags_only:
self._replace_artist_with_alias(
languages, raw_pseudo_release, pseudo_release
)
self._add_custom_tags(official_release, pseudo_release)
return official_release
else:
return PseudoAlbumInfo(
pseudo_release=_merge_pseudo_and_actual_album(
pseudo_release, official_release
),
official_release=official_release,
)
def _replace_artist_with_alias(
self,
languages: list[str],
raw_pseudo_release: JSONDict,
pseudo_release: AlbumInfo,
):
"""Use the pseudo-release's language to search for artist
alias if the user hasn't configured import languages."""
if len(config["import"]["languages"].as_str_seq()) > 0:
if languages:
return
lang = raw_pseudo_release.get("text-representation", {}).get("language")
@ -314,7 +391,7 @@ class PseudoAlbumInfo(AlbumInfo):
**kwargs,
):
super().__init__(pseudo_release.tracks, **kwargs)
self.__dict__["_pseudo_source"] = True
self.__dict__["_pseudo_source"] = False
self.__dict__["_official_release"] = official_release
for k, v in pseudo_release.items():
if k not in kwargs:
@ -364,3 +441,35 @@ class PseudoAlbumInfo(AlbumInfo):
result[k] = deepcopy(v, memo)
return result
class MultiPseudoAlbumInfo(AlbumInfo):
"""For releases that have multiple pseudo-releases"""
def __init__(
self,
*args,
official_release: AlbumInfo,
**kwargs,
):
super().__init__(official_release.tracks, **kwargs)
self.__dict__["_pseudo_album_infos"] = [
arg for arg in args if isinstance(arg, PseudoAlbumInfo)
]
for k, v in official_release.items():
if k not in kwargs:
self[k] = v
def unwrap(self) -> list[PseudoAlbumInfo]:
return self.__dict__["_pseudo_album_infos"]
def __deepcopy__(self, memo):
cls = self.__class__
result = cls.__new__(cls)
memo[id(self)] = result
result.__dict__.update(self.__dict__)
for k, v in self.items():
result[k] = deepcopy(v, memo)
return result

View file

@ -102,7 +102,9 @@ class MBSyncPlugin(BeetsPlugin):
continue
if not (
album_info := metadata_plugins.album_for_id(album.mb_albumid)
album_info := metadata_plugins.album_for_id(
album.mb_albumid, album.items()
)
):
self._log.info(
"Release ID {0.mb_albumid} not found for album {0}", album

View file

@ -222,7 +222,9 @@ class MissingPlugin(BeetsPlugin):
item_mbids = {x.mb_trackid for x in album.items()}
# fetch missing items
# TODO: Implement caching that without breaking other stuff
if album_info := metadata_plugins.album_for_id(album.mb_albumid):
if album_info := metadata_plugins.album_for_id(
album.mb_albumid, album.items()
):
for track_info in album_info.tracks:
if track_info.track_id not in item_mbids:
self._log.debug(

View file

@ -61,6 +61,9 @@ Bug fixes:
For plugin developers:
- Metadata plugins can now implement a ``before_album_info_emitted`` method to
modify ``AlbumInfo`` objects before they are emitted as part of the
``albuminfo_received`` event.
- A new plugin event, ``album_matched``, is sent when an album that is being
imported has been matched to its metadata and the corresponding distance has
been calculated.

View file

@ -39,6 +39,23 @@ Therefore, the minimum configuration for this plugin looks like this:
scripts:
- Latn
A release may have multiple pseudo-releases, for example when there is both a
transliteration and a translation available. By default, only 1 pseudo-release
per official release is emitted as candidate, using the languages from the
configuration to decide which one has most priority. If you're importing in
timid mode and you would like to receive all valid pseudo-releases as additional
candidates, you can add the following to the configuration:
.. code-block:: yaml
mbpseudo:
multiple_allowed: yes
.. note::
Reimporting in particular might not give you a pseudo-release proposal if
multiple candidates exist and are allowed.
Note that the `search_limit` configuration applies to the initial search for
official releases, and that the `data_source` in the database will be
"MusicBrainz". Nevertheless, `data_source_mismatch_penalty` must also be
@ -61,6 +78,9 @@ sources may look like this:
deezer:
data_source_mismatch_penalty: 0.2
Custom Tags Only
----------------
By default, the data from the pseudo-release will be used to create a proposal
that is independent from the official release and sets all properties in its
metadata. It's possible to change the configuration so that some information

View file

@ -1,5 +1,6 @@
import json
import pathlib
from copy import deepcopy
import pytest
@ -12,6 +13,7 @@ from beets.test.helper import PluginMixin
from beetsplug._typing import JSONDict
from beetsplug.mbpseudo import (
_STATUS_PSEUDO,
MultiPseudoAlbumInfo,
MusicBrainzPseudoReleasePlugin,
PseudoAlbumInfo,
)
@ -47,6 +49,7 @@ class TestPseudoAlbumInfo:
self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
):
info = PseudoAlbumInfo(pseudo_release_info, official_release_info)
info.use_pseudo_as_ref()
assert info.album == "In Bloom"
def test_get_attr_from_official(
@ -124,13 +127,44 @@ class TestMBPseudoPlugin(PluginMixin):
else:
assert mbpseudo_plugin._extract_id(album_id) == album_id
def test_reimport_logic(
self,
mbpseudo_plugin: MusicBrainzPseudoReleasePlugin,
official_release_info: AlbumInfo,
pseudo_release_info: AlbumInfo,
):
pseudo_info = PseudoAlbumInfo(
pseudo_release_info, official_release_info
)
item = Item()
item["title"] = "百花繚乱"
# if items don't have mb_*, they are not modified
mbpseudo_plugin.before_album_info_emitted([item], pseudo_info)
assert pseudo_info.album == item.title
pseudo_info.use_pseudo_as_ref()
assert pseudo_info.album == "In Bloom"
item["mb_albumid"] = "mb_aid"
item["mb_trackid"] = "mb_tid"
assert item.get("mb_albumid") == "mb_aid"
assert item.get("mb_trackid") == "mb_tid"
# if items have mb_*, they are deleted
mbpseudo_plugin.before_album_info_emitted([item], pseudo_info)
assert pseudo_info.album == item.title
assert item.get("mb_albumid") == ""
assert item.get("mb_trackid") == ""
def test_album_info_for_pseudo_release(
self,
mbpseudo_plugin: MusicBrainzPseudoReleasePlugin,
pseudo_release: JSONDict,
):
album_info = mbpseudo_plugin.album_info(pseudo_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo)
assert isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainzPseudoRelease"
assert album_info.albumstatus == _STATUS_PSEUDO
@ -148,7 +182,8 @@ class TestMBPseudoPlugin(PluginMixin):
official_release: JSONDict,
json_key: str,
):
del official_release["release"]["release-relation-list"][0][json_key]
for r in official_release["release"]["release-relation-list"]:
del r[json_key]
album_info = mbpseudo_plugin.album_info(official_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo)
@ -159,9 +194,8 @@ class TestMBPseudoPlugin(PluginMixin):
mbpseudo_plugin: MusicBrainzPseudoReleasePlugin,
official_release: JSONDict,
):
official_release["release"]["release-relation-list"][0]["release"][
"text-representation"
]["script"] = "Null"
for r in official_release["release"]["release-relation-list"]:
r["release"]["text-representation"]["script"] = "Null"
album_info = mbpseudo_plugin.album_info(official_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo)
@ -226,6 +260,64 @@ class TestMBPseudoPlugin(PluginMixin):
assert match.info.album == "In Bloom"
class TestMBPseudoPluginMultipleAllowed(PluginMixin):
plugin = "mbpseudo"
@pytest.fixture(scope="class")
def plugin_config(self):
return {"scripts": ["Latn", "Dummy"], "multiple_allowed": True}
@pytest.fixture(scope="class")
def mbpseudo_plugin(self, plugin_config) -> MusicBrainzPseudoReleasePlugin:
self.config[self.plugin].set(plugin_config)
config["import"]["languages"] = ["jp", "en"]
return MusicBrainzPseudoReleasePlugin()
@pytest.fixture(scope="class")
def official_release(self, rsrc_dir: pathlib.Path) -> JSONDict:
info_json = (rsrc_dir / "official_release.json").read_text(
encoding="utf-8"
)
return json.loads(info_json)
@pytest.fixture(scope="class")
def pseudo_release(self, rsrc_dir: pathlib.Path) -> JSONDict:
info_json = (rsrc_dir / "pseudo_release.json").read_text(
encoding="utf-8"
)
return json.loads(info_json)
def test_multiple_releases(
self,
mbpseudo_plugin: MusicBrainzPseudoReleasePlugin,
official_release: JSONDict,
pseudo_release: JSONDict,
):
def mock_release_getter(album_id: str, _) -> JSONDict:
if album_id == "dc3ee2df-0bc1-49eb-b8c4-34473d279a43":
return pseudo_release
else:
clone = deepcopy(pseudo_release)
clone["release"]["id"] = album_id
clone["release"]["text-representation"]["language"] = "jpn"
return clone
mbpseudo_plugin._release_getter = mock_release_getter
album_info = mbpseudo_plugin.album_info(official_release["release"])
assert isinstance(album_info, MultiPseudoAlbumInfo)
assert album_info.data_source == "MusicBrainzPseudoRelease"
assert len(album_info.unwrap()) == 2
assert (
album_info.unwrap()[0].album_id
== "dc3ee2df-mock-49eb-b8c4-34473d279a43"
)
assert (
album_info.unwrap()[1].album_id
== "dc3ee2df-0bc1-49eb-b8c4-34473d279a43"
)
class TestMBPseudoPluginCustomTagsOnly(PluginMixin):
plugin = "mbpseudo"

View file

@ -754,6 +754,35 @@
"medium-count": 0,
"artist-credit-phrase": "Lilas Ikuta"
}
},
{
"type": "transl-tracklisting",
"type-id": "fc399d47-23a7-4c28-bfcf-0607a562b644",
"target": "dc3ee2df-mock-49eb-b8c4-34473d279a43",
"direction": "forward",
"release": {
"id": "dc3ee2df-mock-49eb-b8c4-34473d279a43",
"title": "Mock Title",
"quality": "normal",
"text-representation": {
"language": "jpn",
"script": "Latn"
},
"artist-credit": [
{
"name": "Lilas Ikuta",
"artist": {
"id": "55e42264-ef27-49d8-93fd-29f930dc96e4",
"name": "幾田りら",
"sort-name": "Ikuta, Lilas",
"country": "JP"
}
}
],
"medium-list": [],
"medium-count": 0,
"artist-credit-phrase": "Lilas Ikuta"
}
}
],
"url-relation-list": [

View file

@ -1517,7 +1517,7 @@ class ImportPretendTest(IOMixin, AutotagImportTestCase):
assert self.__run(importer) == [f"No files imported from {empty_path}"]
def mocked_get_album_by_id(id_):
def mocked_get_album_by_id(id_, _):
"""Return album candidate for the given id.
The two albums differ only in the release title and artist name, so that