diff --git a/beets/autotag/match.py b/beets/autotag/match.py index d0f3fd134..192f32696 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -119,7 +119,7 @@ def match_by_id(items: Iterable[Item]) -> AlbumInfo | None: return None # If all album IDs are equal, look up the album. log.debug("Searching for discovered album ID: {}", first) - return metadata_plugins.album_for_id(first) + return metadata_plugins.album_for_id(first, items) def _recommendation( @@ -272,7 +272,7 @@ def tag_album( if search_ids: for search_id in search_ids: log.debug("Searching for album ID: {}", search_id) - if info := metadata_plugins.album_for_id(search_id): + if info := metadata_plugins.album_for_id(search_id, items): _add_candidate(items, candidates, info) if opt_candidate := candidates.get(info.album_id): plugins.send("album_matched", match=opt_candidate) diff --git a/beets/metadata_plugins.py b/beets/metadata_plugins.py index f42e8f690..1871ed14f 100644 --- a/beets/metadata_plugins.py +++ b/beets/metadata_plugins.py @@ -24,7 +24,8 @@ from .plugins import BeetsPlugin, find_plugins, notify_info_yielded, send if TYPE_CHECKING: from collections.abc import Iterable, Sequence - from .autotag.hooks import AlbumInfo, Item, TrackInfo + from .autotag.hooks import AlbumInfo, TrackInfo + from .library.models import Item @cache @@ -35,10 +36,11 @@ def find_metadata_source_plugins() -> list[MetadataSourcePlugin]: @notify_info_yielded("albuminfo_received") -def candidates(*args, **kwargs) -> Iterable[AlbumInfo]: +def candidates(items, *args, **kwargs) -> Iterable[AlbumInfo]: """Return matching album candidates from all metadata source plugins.""" for plugin in find_metadata_source_plugins(): - yield from plugin.candidates(*args, **kwargs) + for info in plugin.candidates(items, *args, **kwargs): + yield plugin.before_album_info_emitted(items, info) @notify_info_yielded("trackinfo_received") @@ -48,13 +50,17 @@ def item_candidates(*args, **kwargs) -> Iterable[TrackInfo]: yield from plugin.item_candidates(*args, **kwargs) -def album_for_id(_id: str) -> AlbumInfo | None: +def album_for_id( + _id: str, + items: Iterable[Item], +) -> AlbumInfo | None: """Get AlbumInfo object for the given ID string. A single ID can yield just a single album, so we return the first match. """ for plugin in find_metadata_source_plugins(): if info := plugin.album_for_id(album_id=_id): + info = plugin.before_album_info_emitted(items, info) send("albuminfo_received", info=info) return info @@ -127,6 +133,18 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta): found.""" raise NotImplementedError + def before_album_info_emitted( + self, + items: Iterable[Item], + album_info: AlbumInfo, + ) -> AlbumInfo: + """Called after an :py:class:`AlbumInfo` object has been found for a set + of :py:class:`Item` objects but before the ``albuminfo_received`` + :py:type:`plugins.EventType` has been sent. The returned instance will + be the payload of the event. + """ + return album_info + @abc.abstractmethod def track_for_id(self, track_id: str) -> TrackInfo | None: """Return a :py:class:`TrackInfo` object or None if no matching release was diff --git a/beetsplug/mbpseudo.py b/beetsplug/mbpseudo.py index 9cfa99969..b5b4f4f8d 100644 --- a/beetsplug/mbpseudo.py +++ b/beetsplug/mbpseudo.py @@ -59,6 +59,7 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin): { "scripts": [], "custom_tags_only": False, + "multiple_allowed": False, "album_custom_tags": { "album_transl": "album", "album_artist_transl": "artist", @@ -112,6 +113,28 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin): " the mbpseudo plugin" ) + @override + def before_album_info_emitted( + self, + items: Iterable[Item], + album_info: AlbumInfo, + ) -> AlbumInfo: + if isinstance(album_info, PseudoAlbumInfo): + for item in items: + # particularly relevant for reimport but could also happen during import + if "mb_albumid" in item: + del item["mb_albumid"] + if "mb_trackid" in item: + del item["mb_trackid"] + + self._log.debug( + "Using {0} release for distance calculations for album {1}", + album_info.determine_best_ref(list(items)), + album_info.album_id, + ) + + return album_info + @override def candidates( self, @@ -126,51 +149,80 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin): for album_info in super().candidates( items, artist, album, va_likely ): - if isinstance(album_info, PseudoAlbumInfo): - self._log.debug( - "Using {0} release for distance calculations for album {1}", - album_info.determine_best_ref(items), - album_info.album_id, - ) - yield album_info # first yield pseudo to give it priority + # always yield pseudo first to give it priority + if isinstance(album_info, MultiPseudoAlbumInfo): + yield from album_info.unwrap() + yield album_info + elif isinstance(album_info, PseudoAlbumInfo): + yield album_info yield album_info.get_official_release() else: yield album_info + def _get_raw_pseudo_release(self, pseudo_album_id: str) -> JSONDict: + try: + return self._release_getter(pseudo_album_id, RELEASE_INCLUDES)[ + "release" + ] + except musicbrainzngs.MusicBrainzError as exc: + raise MusicBrainzAPIError( + exc, + "get pseudo-release by ID", + pseudo_album_id, + traceback.format_exc(), + ) + @override def album_info(self, release: JSONDict) -> AlbumInfo: official_release = super().album_info(release) if release.get("status") == _STATUS_PSEUDO: - return official_release + # already pseudo-release, but wrap in our class for the other checks + return PseudoAlbumInfo( + pseudo_release=official_release, + official_release=official_release, + ) elif pseudo_release_ids := self._intercept_mb_release(release): - album_id = self._extract_id(pseudo_release_ids[0]) - try: - raw_pseudo_release = self._release_getter( - album_id, RELEASE_INCLUDES - )["release"] - pseudo_release = super().album_info(raw_pseudo_release) - - if self.config["custom_tags_only"].get(bool): - self._replace_artist_with_alias( - raw_pseudo_release, pseudo_release - ) - self._add_custom_tags(official_release, pseudo_release) - return official_release - else: - return PseudoAlbumInfo( - pseudo_release=_merge_pseudo_and_actual_album( - pseudo_release, official_release - ), - official_release=official_release, - ) - except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError( - exc, - "get pseudo-release by ID", - album_id, - traceback.format_exc(), + custom_tags_only = self.config["custom_tags_only"].get(bool) + languages = list(config["import"]["languages"].as_str_seq()) + if len(pseudo_release_ids) == 1 or len(languages) == 0: + album_info = self._get_raw_pseudo_release(pseudo_release_ids[0]) + return self._resolve_pseudo_album_info( + official_release, custom_tags_only, languages, album_info ) + + pseudo_releases = [ + self._get_raw_pseudo_release(i) for i in pseudo_release_ids + ] + + # sort according to the desired languages specified in the config + def sort_fun(rel: JSONDict) -> int: + lang = rel.get("text-representation", {}).get("language", "") + # noinspection PyBroadException + try: + return languages.index(lang[0:2]) + except Exception: + return len(languages) + + pseudo_releases.sort(key=sort_fun) + multiple_allowed = self.config["multiple_allowed"].get(bool) + if custom_tags_only or not multiple_allowed: + return self._resolve_pseudo_album_info( + official_release, + custom_tags_only, + languages, + pseudo_releases[0], + ) + + pseudo_album_infos = [ + self._resolve_pseudo_album_info( + official_release, custom_tags_only, languages, i + ) + for i in pseudo_releases + ] + return MultiPseudoAlbumInfo( + *pseudo_album_infos, official_release=official_release + ) else: return official_release @@ -179,13 +231,15 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin): if self._has_desired_script(data) or not isinstance(album_id, str): return [] - return [ - pr_id + ans = [ + self._extract_id(pr_id) for rel in data.get("release-relation-list", []) if (pr_id := self._wanted_pseudo_release_id(album_id, rel)) is not None ] + return list(filter(None, ans)) + def _has_desired_script(self, release: JSONDict) -> bool: if len(self._scripts) == 0: return False @@ -218,15 +272,38 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin): else: return None + def _resolve_pseudo_album_info( + self, + official_release: AlbumInfo, + custom_tags_only: bool, + languages: list[str], + raw_pseudo_release: JSONDict, + ) -> AlbumInfo: + pseudo_release = super().album_info(raw_pseudo_release) + if custom_tags_only: + self._replace_artist_with_alias( + languages, raw_pseudo_release, pseudo_release + ) + self._add_custom_tags(official_release, pseudo_release) + return official_release + else: + return PseudoAlbumInfo( + pseudo_release=_merge_pseudo_and_actual_album( + pseudo_release, official_release + ), + official_release=official_release, + ) + def _replace_artist_with_alias( self, + languages: list[str], raw_pseudo_release: JSONDict, pseudo_release: AlbumInfo, ): """Use the pseudo-release's language to search for artist alias if the user hasn't configured import languages.""" - if len(config["import"]["languages"].as_str_seq()) > 0: + if languages: return lang = raw_pseudo_release.get("text-representation", {}).get("language") @@ -314,7 +391,7 @@ class PseudoAlbumInfo(AlbumInfo): **kwargs, ): super().__init__(pseudo_release.tracks, **kwargs) - self.__dict__["_pseudo_source"] = True + self.__dict__["_pseudo_source"] = False self.__dict__["_official_release"] = official_release for k, v in pseudo_release.items(): if k not in kwargs: @@ -364,3 +441,35 @@ class PseudoAlbumInfo(AlbumInfo): result[k] = deepcopy(v, memo) return result + + +class MultiPseudoAlbumInfo(AlbumInfo): + """For releases that have multiple pseudo-releases""" + + def __init__( + self, + *args, + official_release: AlbumInfo, + **kwargs, + ): + super().__init__(official_release.tracks, **kwargs) + self.__dict__["_pseudo_album_infos"] = [ + arg for arg in args if isinstance(arg, PseudoAlbumInfo) + ] + for k, v in official_release.items(): + if k not in kwargs: + self[k] = v + + def unwrap(self) -> list[PseudoAlbumInfo]: + return self.__dict__["_pseudo_album_infos"] + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + + memo[id(self)] = result + result.__dict__.update(self.__dict__) + for k, v in self.items(): + result[k] = deepcopy(v, memo) + + return result diff --git a/beetsplug/mbsync.py b/beetsplug/mbsync.py index 3f7daec6c..93d05cc0b 100644 --- a/beetsplug/mbsync.py +++ b/beetsplug/mbsync.py @@ -102,7 +102,9 @@ class MBSyncPlugin(BeetsPlugin): continue if not ( - album_info := metadata_plugins.album_for_id(album.mb_albumid) + album_info := metadata_plugins.album_for_id( + album.mb_albumid, album.items() + ) ): self._log.info( "Release ID {0.mb_albumid} not found for album {0}", album diff --git a/beetsplug/missing.py b/beetsplug/missing.py index cbdda4599..f22824679 100644 --- a/beetsplug/missing.py +++ b/beetsplug/missing.py @@ -222,7 +222,9 @@ class MissingPlugin(BeetsPlugin): item_mbids = {x.mb_trackid for x in album.items()} # fetch missing items # TODO: Implement caching that without breaking other stuff - if album_info := metadata_plugins.album_for_id(album.mb_albumid): + if album_info := metadata_plugins.album_for_id( + album.mb_albumid, album.items() + ): for track_info in album_info.tracks: if track_info.track_id not in item_mbids: self._log.debug( diff --git a/docs/changelog.rst b/docs/changelog.rst index b9a5c1f3f..6971c3e1d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -61,6 +61,9 @@ Bug fixes: For plugin developers: +- Metadata plugins can now implement a ``before_album_info_emitted`` method to + modify ``AlbumInfo`` objects before they are emitted as part of the + ``albuminfo_received`` event. - A new plugin event, ``album_matched``, is sent when an album that is being imported has been matched to its metadata and the corresponding distance has been calculated. diff --git a/docs/plugins/mbpseudo.rst b/docs/plugins/mbpseudo.rst index 56658db26..2ea6f2ba9 100644 --- a/docs/plugins/mbpseudo.rst +++ b/docs/plugins/mbpseudo.rst @@ -39,6 +39,23 @@ Therefore, the minimum configuration for this plugin looks like this: scripts: - Latn +A release may have multiple pseudo-releases, for example when there is both a +transliteration and a translation available. By default, only 1 pseudo-release +per official release is emitted as candidate, using the languages from the +configuration to decide which one has most priority. If you're importing in +timid mode and you would like to receive all valid pseudo-releases as additional +candidates, you can add the following to the configuration: + +.. code-block:: yaml + + mbpseudo: + multiple_allowed: yes + +.. note:: + + Reimporting in particular might not give you a pseudo-release proposal if + multiple candidates exist and are allowed. + Note that the `search_limit` configuration applies to the initial search for official releases, and that the `data_source` in the database will be "MusicBrainz". Nevertheless, `data_source_mismatch_penalty` must also be @@ -61,6 +78,9 @@ sources may look like this: deezer: data_source_mismatch_penalty: 0.2 +Custom Tags Only +---------------- + By default, the data from the pseudo-release will be used to create a proposal that is independent from the official release and sets all properties in its metadata. It's possible to change the configuration so that some information diff --git a/test/plugins/test_mbpseudo.py b/test/plugins/test_mbpseudo.py index 621e08950..882e1cd6b 100644 --- a/test/plugins/test_mbpseudo.py +++ b/test/plugins/test_mbpseudo.py @@ -1,5 +1,6 @@ import json import pathlib +from copy import deepcopy import pytest @@ -12,6 +13,7 @@ from beets.test.helper import PluginMixin from beetsplug._typing import JSONDict from beetsplug.mbpseudo import ( _STATUS_PSEUDO, + MultiPseudoAlbumInfo, MusicBrainzPseudoReleasePlugin, PseudoAlbumInfo, ) @@ -47,6 +49,7 @@ class TestPseudoAlbumInfo: self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo ): info = PseudoAlbumInfo(pseudo_release_info, official_release_info) + info.use_pseudo_as_ref() assert info.album == "In Bloom" def test_get_attr_from_official( @@ -124,13 +127,44 @@ class TestMBPseudoPlugin(PluginMixin): else: assert mbpseudo_plugin._extract_id(album_id) == album_id + def test_reimport_logic( + self, + mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + official_release_info: AlbumInfo, + pseudo_release_info: AlbumInfo, + ): + pseudo_info = PseudoAlbumInfo( + pseudo_release_info, official_release_info + ) + + item = Item() + item["title"] = "百花繚乱" + + # if items don't have mb_*, they are not modified + mbpseudo_plugin.before_album_info_emitted([item], pseudo_info) + assert pseudo_info.album == item.title + + pseudo_info.use_pseudo_as_ref() + assert pseudo_info.album == "In Bloom" + + item["mb_albumid"] = "mb_aid" + item["mb_trackid"] = "mb_tid" + assert item.get("mb_albumid") == "mb_aid" + assert item.get("mb_trackid") == "mb_tid" + + # if items have mb_*, they are deleted + mbpseudo_plugin.before_album_info_emitted([item], pseudo_info) + assert pseudo_info.album == item.title + assert item.get("mb_albumid") == "" + assert item.get("mb_trackid") == "" + def test_album_info_for_pseudo_release( self, mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, pseudo_release: JSONDict, ): album_info = mbpseudo_plugin.album_info(pseudo_release["release"]) - assert not isinstance(album_info, PseudoAlbumInfo) + assert isinstance(album_info, PseudoAlbumInfo) assert album_info.data_source == "MusicBrainzPseudoRelease" assert album_info.albumstatus == _STATUS_PSEUDO @@ -148,7 +182,8 @@ class TestMBPseudoPlugin(PluginMixin): official_release: JSONDict, json_key: str, ): - del official_release["release"]["release-relation-list"][0][json_key] + for r in official_release["release"]["release-relation-list"]: + del r[json_key] album_info = mbpseudo_plugin.album_info(official_release["release"]) assert not isinstance(album_info, PseudoAlbumInfo) @@ -159,9 +194,8 @@ class TestMBPseudoPlugin(PluginMixin): mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, official_release: JSONDict, ): - official_release["release"]["release-relation-list"][0]["release"][ - "text-representation" - ]["script"] = "Null" + for r in official_release["release"]["release-relation-list"]: + r["release"]["text-representation"]["script"] = "Null" album_info = mbpseudo_plugin.album_info(official_release["release"]) assert not isinstance(album_info, PseudoAlbumInfo) @@ -226,6 +260,64 @@ class TestMBPseudoPlugin(PluginMixin): assert match.info.album == "In Bloom" +class TestMBPseudoPluginMultipleAllowed(PluginMixin): + plugin = "mbpseudo" + + @pytest.fixture(scope="class") + def plugin_config(self): + return {"scripts": ["Latn", "Dummy"], "multiple_allowed": True} + + @pytest.fixture(scope="class") + def mbpseudo_plugin(self, plugin_config) -> MusicBrainzPseudoReleasePlugin: + self.config[self.plugin].set(plugin_config) + config["import"]["languages"] = ["jp", "en"] + return MusicBrainzPseudoReleasePlugin() + + @pytest.fixture(scope="class") + def official_release(self, rsrc_dir: pathlib.Path) -> JSONDict: + info_json = (rsrc_dir / "official_release.json").read_text( + encoding="utf-8" + ) + return json.loads(info_json) + + @pytest.fixture(scope="class") + def pseudo_release(self, rsrc_dir: pathlib.Path) -> JSONDict: + info_json = (rsrc_dir / "pseudo_release.json").read_text( + encoding="utf-8" + ) + return json.loads(info_json) + + def test_multiple_releases( + self, + mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + official_release: JSONDict, + pseudo_release: JSONDict, + ): + def mock_release_getter(album_id: str, _) -> JSONDict: + if album_id == "dc3ee2df-0bc1-49eb-b8c4-34473d279a43": + return pseudo_release + else: + clone = deepcopy(pseudo_release) + clone["release"]["id"] = album_id + clone["release"]["text-representation"]["language"] = "jpn" + return clone + + mbpseudo_plugin._release_getter = mock_release_getter + + album_info = mbpseudo_plugin.album_info(official_release["release"]) + assert isinstance(album_info, MultiPseudoAlbumInfo) + assert album_info.data_source == "MusicBrainzPseudoRelease" + assert len(album_info.unwrap()) == 2 + assert ( + album_info.unwrap()[0].album_id + == "dc3ee2df-mock-49eb-b8c4-34473d279a43" + ) + assert ( + album_info.unwrap()[1].album_id + == "dc3ee2df-0bc1-49eb-b8c4-34473d279a43" + ) + + class TestMBPseudoPluginCustomTagsOnly(PluginMixin): plugin = "mbpseudo" diff --git a/test/rsrc/mbpseudo/official_release.json b/test/rsrc/mbpseudo/official_release.json index 63f1d60dd..308bd7304 100644 --- a/test/rsrc/mbpseudo/official_release.json +++ b/test/rsrc/mbpseudo/official_release.json @@ -754,6 +754,35 @@ "medium-count": 0, "artist-credit-phrase": "Lilas Ikuta" } + }, + { + "type": "transl-tracklisting", + "type-id": "fc399d47-23a7-4c28-bfcf-0607a562b644", + "target": "dc3ee2df-mock-49eb-b8c4-34473d279a43", + "direction": "forward", + "release": { + "id": "dc3ee2df-mock-49eb-b8c4-34473d279a43", + "title": "Mock Title", + "quality": "normal", + "text-representation": { + "language": "jpn", + "script": "Latn" + }, + "artist-credit": [ + { + "name": "Lilas Ikuta", + "artist": { + "id": "55e42264-ef27-49d8-93fd-29f930dc96e4", + "name": "幾田りら", + "sort-name": "Ikuta, Lilas", + "country": "JP" + } + } + ], + "medium-list": [], + "medium-count": 0, + "artist-credit-phrase": "Lilas Ikuta" + } } ], "url-relation-list": [ diff --git a/test/test_importer.py b/test/test_importer.py index c1768df3e..6498ebc82 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -1517,7 +1517,7 @@ class ImportPretendTest(IOMixin, AutotagImportTestCase): assert self.__run(importer) == [f"No files imported from {empty_path}"] -def mocked_get_album_by_id(id_): +def mocked_get_album_by_id(id_, _): """Return album candidate for the given id. The two albums differ only in the release title and artist name, so that