diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index fe4ce3378..14dfdfdcb 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -4,4 +4,3 @@ # Specific ownerships: /beets/metadata_plugins.py @semohr /beetsplug/titlecase.py @henry-oberholtzer -/beetsplug/mbpseudo.py @asardaes diff --git a/beetsplug/mbpseudo.py b/beetsplug/mbpseudo.py deleted file mode 100644 index d084d1531..000000000 --- a/beetsplug/mbpseudo.py +++ /dev/null @@ -1,350 +0,0 @@ -# This file is part of beets. -# Copyright 2025, Alexis Sarda-Espinosa. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Adds pseudo-releases from MusicBrainz as candidates during import.""" - -from __future__ import annotations - -import itertools -from copy import deepcopy -from typing import TYPE_CHECKING, Any - -import mediafile -from typing_extensions import override - -from beets import config -from beets.autotag.distance import distance -from beets.autotag.hooks import AlbumInfo -from beets.autotag.match import assign_items -from beets.plugins import find_plugins -from beets.util.id_extractors import extract_release_id -from beetsplug.musicbrainz import ( - MusicBrainzPlugin, - _merge_pseudo_and_actual_album, - _preferred_alias, -) - -if TYPE_CHECKING: - from collections.abc import Iterable, Sequence - - from beets.autotag import AlbumMatch - from beets.autotag.distance import Distance - from beets.library import Item - from beetsplug._typing import JSONDict - -_STATUS_PSEUDO = "Pseudo-Release" - - -class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin): - def __init__(self) -> None: - super().__init__() - - self.config.add( - { - "scripts": [], - "custom_tags_only": False, - "album_custom_tags": { - "album_transl": "album", - "album_artist_transl": "artist", - }, - "track_custom_tags": { - "title_transl": "title", - "artist_transl": "artist", - }, - } - ) - - self._scripts = self.config["scripts"].as_str_seq() - self._log.debug("Desired scripts: {0}", self._scripts) - - album_custom_tags = self.config["album_custom_tags"].get().keys() - track_custom_tags = self.config["track_custom_tags"].get().keys() - self._log.debug( - "Custom tags for albums and tracks: {0} + {1}", - album_custom_tags, - track_custom_tags, - ) - for custom_tag in album_custom_tags | track_custom_tags: - if not isinstance(custom_tag, str): - continue - - media_field = mediafile.MediaField( - mediafile.MP3DescStorageStyle(custom_tag), - mediafile.MP4StorageStyle( - f"----:com.apple.iTunes:{custom_tag}" - ), - mediafile.StorageStyle(custom_tag), - mediafile.ASFStorageStyle(custom_tag), - ) - try: - self.add_media_field(custom_tag, media_field) - except ValueError: - # ignore errors due to duplicates - pass - - self.register_listener("pluginload", self._on_plugins_loaded) - self.register_listener("album_matched", self._adjust_final_album_match) - - # noinspection PyMethodMayBeStatic - def _on_plugins_loaded(self): - for plugin in find_plugins(): - if isinstance(plugin, MusicBrainzPlugin) and not isinstance( - plugin, MusicBrainzPseudoReleasePlugin - ): - raise RuntimeError( - "The musicbrainz plugin should not be enabled together with" - " the mbpseudo plugin" - ) - - @override - def candidates( - self, - items: Sequence[Item], - artist: str, - album: str, - va_likely: bool, - ) -> Iterable[AlbumInfo]: - if len(self._scripts) == 0: - yield from super().candidates(items, artist, album, va_likely) - else: - for album_info in super().candidates( - items, artist, album, va_likely - ): - if isinstance(album_info, PseudoAlbumInfo): - self._log.debug( - "Using {0} release for distance calculations for album {1}", - album_info.determine_best_ref(items), - album_info.album_id, - ) - yield album_info # first yield pseudo to give it priority - yield album_info.get_official_release() - else: - yield album_info - - @override - def album_info(self, release: JSONDict) -> AlbumInfo: - official_release = super().album_info(release) - - if release.get("status") == _STATUS_PSEUDO: - return official_release - - if (ids := self._intercept_mb_release(release)) and ( - album_id := self._extract_id(ids[0]) - ): - raw_pseudo_release = self.mb_api.get_release(album_id) - pseudo_release = super().album_info(raw_pseudo_release) - - if self.config["custom_tags_only"].get(bool): - self._replace_artist_with_alias( - raw_pseudo_release, pseudo_release - ) - self._add_custom_tags(official_release, pseudo_release) - return official_release - else: - return PseudoAlbumInfo( - pseudo_release=_merge_pseudo_and_actual_album( - pseudo_release, official_release - ), - official_release=official_release, - ) - else: - return official_release - - def _intercept_mb_release(self, data: JSONDict) -> list[str]: - album_id = data["id"] if "id" in data else None - if self._has_desired_script(data) or not isinstance(album_id, str): - return [] - - return [ - pr_id - for rel in data.get("release-relations", []) - if (pr_id := self._wanted_pseudo_release_id(album_id, rel)) - is not None - ] - - def _has_desired_script(self, release: JSONDict) -> bool: - if len(self._scripts) == 0: - return False - elif script := release.get("text-representation", {}).get("script"): - return script in self._scripts - else: - return False - - def _wanted_pseudo_release_id( - self, - album_id: str, - relation: JSONDict, - ) -> str | None: - if ( - len(self._scripts) == 0 - or relation.get("type", "") != "transl-tracklisting" - or relation.get("direction", "") != "forward" - or "release" not in relation - ): - return None - - release = relation["release"] - if "id" in release and self._has_desired_script(release): - self._log.debug( - "Adding pseudo-release {0} for main release {1}", - release["id"], - album_id, - ) - return release["id"] - else: - return None - - def _replace_artist_with_alias( - self, - raw_pseudo_release: JSONDict, - pseudo_release: AlbumInfo, - ): - """Use the pseudo-release's language to search for artist - alias if the user hasn't configured import languages.""" - - if len(config["import"]["languages"].as_str_seq()) > 0: - return - - lang = raw_pseudo_release.get("text-representation", {}).get("language") - artist_credits = raw_pseudo_release.get("release-group", {}).get( - "artist-credit", [] - ) - aliases = [ - artist_credit.get("artist", {}).get("aliases", []) - for artist_credit in artist_credits - ] - - if lang and len(lang) >= 2 and len(aliases) > 0: - locale = lang[0:2] - aliases_flattened = list(itertools.chain.from_iterable(aliases)) - self._log.debug( - "Using locale '{0}' to search aliases {1}", - locale, - aliases_flattened, - ) - if alias_dict := _preferred_alias(aliases_flattened, [locale]): - if alias := alias_dict.get("name"): - self._log.debug("Got alias '{0}'", alias) - pseudo_release.artist = alias - for track in pseudo_release.tracks: - track.artist = alias - - def _add_custom_tags( - self, - official_release: AlbumInfo, - pseudo_release: AlbumInfo, - ): - for tag_key, pseudo_key in ( - self.config["album_custom_tags"].get().items() - ): - official_release[tag_key] = pseudo_release[pseudo_key] - - track_custom_tags = self.config["track_custom_tags"].get().items() - for track, pseudo_track in zip( - official_release.tracks, pseudo_release.tracks - ): - for tag_key, pseudo_key in track_custom_tags: - track[tag_key] = pseudo_track[pseudo_key] - - def _adjust_final_album_match(self, match: AlbumMatch): - album_info = match.info - if isinstance(album_info, PseudoAlbumInfo): - self._log.debug( - "Switching {0} to pseudo-release source for final proposal", - album_info.album_id, - ) - album_info.use_pseudo_as_ref() - new_pairs, *_ = assign_items(match.items, album_info.tracks) - album_info.mapping = dict(new_pairs) - - if album_info.data_source == self.data_source: - album_info.data_source = "MusicBrainz" - - @override - def _extract_id(self, url: str) -> str | None: - return extract_release_id("MusicBrainz", url) - - -class PseudoAlbumInfo(AlbumInfo): - """This is a not-so-ugly hack. - - We want the pseudo-release to result in a distance that is lower or equal to that of - the official release, otherwise it won't qualify as a good candidate. However, if - the input is in a script that's different from the pseudo-release (and we want to - translate/transliterate it in the library), it will receive unwanted penalties. - - This class is essentially a view of the ``AlbumInfo`` of both official and - pseudo-releases, where it's possible to change the details that are exposed to other - parts of the auto-tagger, enabling a "fair" distance calculation based on the - current input's script but still preferring the translation/transliteration in the - final proposal. - """ - - def __init__( - self, - pseudo_release: AlbumInfo, - official_release: AlbumInfo, - **kwargs, - ): - super().__init__(pseudo_release.tracks, **kwargs) - self.__dict__["_pseudo_source"] = True - self.__dict__["_official_release"] = official_release - for k, v in pseudo_release.items(): - if k not in kwargs: - self[k] = v - - def get_official_release(self) -> AlbumInfo: - return self.__dict__["_official_release"] - - def determine_best_ref(self, items: Sequence[Item]) -> str: - self.use_pseudo_as_ref() - pseudo_dist = self._compute_distance(items) - - self.use_official_as_ref() - official_dist = self._compute_distance(items) - - if official_dist < pseudo_dist: - self.use_official_as_ref() - return "official" - else: - self.use_pseudo_as_ref() - return "pseudo" - - def _compute_distance(self, items: Sequence[Item]) -> Distance: - mapping, _, _ = assign_items(items, self.tracks) - return distance(items, self, mapping) - - def use_pseudo_as_ref(self): - self.__dict__["_pseudo_source"] = True - - def use_official_as_ref(self): - self.__dict__["_pseudo_source"] = False - - def __getattr__(self, attr: str) -> Any: - # ensure we don't duplicate an official release's id, always return pseudo's - if self.__dict__["_pseudo_source"] or attr == "album_id": - return super().__getattr__(attr) - else: - return self.__dict__["_official_release"].__getattr__(attr) - - def __deepcopy__(self, memo): - cls = self.__class__ - result = cls.__new__(cls) - - memo[id(self)] = result - result.__dict__.update(self.__dict__) - for k, v in self.items(): - result[k] = deepcopy(v, memo) - - return result diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index aac20e9ac..29357c371 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -18,16 +18,21 @@ from __future__ import annotations from collections import Counter from contextlib import suppress +from copy import deepcopy from functools import cached_property -from itertools import product +from itertools import chain, product from typing import TYPE_CHECKING, Any from urllib.parse import urljoin +import mediafile from confuse.exceptions import NotFoundError import beets import beets.autotag.hooks from beets import config, plugins, util +from beets.autotag.distance import distance +from beets.autotag.hooks import AlbumInfo +from beets.autotag.match import assign_items from beets.metadata_plugins import MetadataSourcePlugin from beets.util.deprecation import deprecate_for_user from beets.util.id_extractors import extract_release_id @@ -39,6 +44,8 @@ if TYPE_CHECKING: from collections.abc import Iterable, Sequence from typing import Literal + from beets.autotag import AlbumMatch + from beets.autotag.distance import Distance from beets.library import Item from ._typing import JSONDict @@ -96,6 +103,8 @@ BROWSE_INCLUDES = [ BROWSE_CHUNKSIZE = 100 BROWSE_MAXTRACKS = 500 +_STATUS_PSEUDO = "Pseudo-Release" + def _preferred_alias( aliases: list[JSONDict], languages: list[str] | None = None @@ -257,7 +266,7 @@ def _preferred_release_event( def _set_date_str( - info: beets.autotag.hooks.AlbumInfo, + info: AlbumInfo, date_str: str, original: bool = False, ): @@ -281,8 +290,8 @@ def _set_date_str( def _merge_pseudo_and_actual_album( - pseudo: beets.autotag.hooks.AlbumInfo, actual: beets.autotag.hooks.AlbumInfo -) -> beets.autotag.hooks.AlbumInfo: + pseudo: AlbumInfo, actual: AlbumInfo +) -> AlbumInfo: """ Merges a pseudo release with its actual release. @@ -342,8 +351,21 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): "tidal": False, }, "extra_tags": [], + "pseudo_releases": { + "scripts": [], + "custom_tags_only": False, + "album_custom_tags": { + "album_transl": "album", + "album_artist_transl": "artist", + }, + "track_custom_tags": { + "title_transl": "title", + "artist_transl": "artist", + }, + }, }, ) + self._apply_pseudo_release_config() # TODO: Remove in 3.0.0 with suppress(NotFoundError): self.config["search_limit"] = self.config["match"][ @@ -355,6 +377,41 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): "'musicbrainz.search_limit'", ) + def _apply_pseudo_release_config(self): + self._scripts = self.config["pseudo_releases"]["scripts"].as_str_seq() + self._log.debug("Desired pseudo-release scripts: {0}", self._scripts) + + album_custom_tags = ( + self.config["pseudo_releases"]["album_custom_tags"].get().keys() + ) + track_custom_tags = ( + self.config["pseudo_releases"]["track_custom_tags"].get().keys() + ) + self._log.debug( + "Custom tags for albums and tracks: {0} + {1}", + album_custom_tags, + track_custom_tags, + ) + for custom_tag in album_custom_tags | track_custom_tags: + if not isinstance(custom_tag, str): + continue + + media_field = mediafile.MediaField( + mediafile.MP3DescStorageStyle(custom_tag), + mediafile.MP4StorageStyle( + f"----:com.apple.iTunes:{custom_tag}" + ), + mediafile.StorageStyle(custom_tag), + mediafile.ASFStorageStyle(custom_tag), + ) + try: + self.add_media_field(custom_tag, media_field) + except ValueError: + # ignore errors due to duplicates + pass + + self.register_listener("album_matched", self._adjust_final_album_match) + def track_info( self, recording: JSONDict, @@ -458,7 +515,7 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): return info - def album_info(self, release: JSONDict) -> beets.autotag.hooks.AlbumInfo: + def album_info(self, release: JSONDict) -> AlbumInfo: """Takes a MusicBrainz release result dictionary and returns a beets AlbumInfo object containing the interesting data about that release. """ @@ -572,7 +629,7 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): track_infos.append(ti) album_artist_ids = _artist_ids(release["artist-credit"]) - info = beets.autotag.hooks.AlbumInfo( + info = AlbumInfo( album=release["title"], album_id=release["id"], artist=artist_name, @@ -758,13 +815,23 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): artist: str, album: str, va_likely: bool, - ) -> Iterable[beets.autotag.hooks.AlbumInfo]: + ) -> Iterable[AlbumInfo]: criteria = self.get_album_criteria(items, artist, album, va_likely) - release_ids = (r["id"] for r in self._search_api("release", criteria)) + release_ids = [r["id"] for r in self._search_api("release", criteria)] for id_ in release_ids: with suppress(HTTPNotFoundError): - if album_info := self.album_for_id(id_): + album_info = self.album_for_id(id_) + # always yield pseudo first to give it priority + if isinstance(album_info, PseudoAlbumInfo): + self._log.debug( + "Using {0} release for distance calculations for album {1}", + album_info.determine_best_ref(list(items)), + album_info.album_id, + ) + yield album_info + yield album_info.get_official_release() + elif isinstance(album_info, AlbumInfo): yield album_info def item_candidates( @@ -776,12 +843,9 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): None, map(self.track_info, self._search_api("recording", criteria)) ) - def album_for_id( - self, album_id: str - ) -> beets.autotag.hooks.AlbumInfo | None: + def album_for_id(self, album_id: str) -> AlbumInfo | None: """Fetches an album by its MusicBrainz ID and returns an AlbumInfo - object or None if the album is not found. May raise a - MusicBrainzAPIError. + object or None if the album is not found. """ self._log.debug("Requesting MusicBrainz release {}", album_id) if not (albumid := self._extract_id(album_id)): @@ -796,31 +860,90 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): self._log.debug("Release {} not found on MusicBrainz.", albumid) return None - # resolve linked release relations - actual_res = None - - if res.get("status") == "Pseudo-Release" and ( - relations := res.get("release-relations") - ): - for rel in relations: - if ( - rel["type"] == "transl-tracklisting" - and rel["direction"] == "backward" - ): - actual_res = self.mb_api.get_release( - rel["release"]["id"], includes=RELEASE_INCLUDES - ) - - # release is potentially a pseudo release release = self.album_info(res) - # should be None unless we're dealing with a pseudo release - if actual_res is not None: - actual_release = self.album_info(actual_res) - return _merge_pseudo_and_actual_album(release, actual_release) + if res.get("status") == _STATUS_PSEUDO: + return self._handle_main_pseudo_release(res, release) + elif pseudo_release_ids := self._intercept_mb_release(res): + return self._handle_intercepted_pseudo_releases( + release, pseudo_release_ids + ) else: return release + def _handle_main_pseudo_release( + self, + pseudo_release: dict[str, Any], + pseudo_album_info: AlbumInfo, + ) -> AlbumInfo: + actual_res = None + for rel in pseudo_release.get("release-relations", []): + if ( + rel["type"] == "transl-tracklisting" + and rel["direction"] == "backward" + ): + actual_res = self.mb_api.get_release( + rel["release"]["id"], includes=RELEASE_INCLUDES + ) + if actual_res: + break + + if actual_res is None: + return pseudo_album_info + + actual_release = self.album_info(actual_res) + merged_release = _merge_pseudo_and_actual_album( + pseudo_album_info, actual_release + ) + + if self._has_desired_script(pseudo_release): + return PseudoAlbumInfo( + pseudo_release=merged_release, + official_release=actual_release, + ) + else: + return merged_release + + def _handle_intercepted_pseudo_releases( + self, + release: AlbumInfo, + pseudo_release_ids: list[str], + ) -> AlbumInfo: + languages = list(config["import"]["languages"].as_str_seq()) + pseudo_config = self.config["pseudo_releases"] + custom_tags_only = pseudo_config["custom_tags_only"].get(bool) + + if len(pseudo_release_ids) == 1 or len(languages) == 0: + # only 1 pseudo-release or no language preference specified + album_info = self.mb_api.get_release( + pseudo_release_ids[0], includes=RELEASE_INCLUDES + ) + return self._resolve_pseudo_album_info( + release, custom_tags_only, languages, album_info + ) + + pseudo_releases = [ + self.mb_api.get_release(i, includes=RELEASE_INCLUDES) + for i in pseudo_release_ids + ] + + # sort according to the desired languages specified in the config + def sort_fun(rel: JSONDict) -> int: + lang = rel.get("text-representation", {}).get("language", "") + # noinspection PyBroadException + try: + return languages.index(lang[0:2]) + except Exception: + return len(languages) + + pseudo_releases.sort(key=sort_fun) + return self._resolve_pseudo_album_info( + release, + custom_tags_only, + languages, + pseudo_releases[0], + ) + def track_for_id( self, track_id: str ) -> beets.autotag.hooks.TrackInfo | None: @@ -837,3 +960,214 @@ class MusicBrainzPlugin(MusicBrainzAPIMixin, MetadataSourcePlugin): ) return None + + def _intercept_mb_release(self, data: JSONDict) -> list[str]: + album_id = data["id"] if "id" in data else None + if self._has_desired_script(data) or not isinstance(album_id, str): + return [] + + ans = [ + self._extract_id(pr_id) + for rel in data.get("release-relations", []) + if (pr_id := self._wanted_pseudo_release_id(album_id, rel)) + is not None + ] + + return list(filter(None, ans)) + + def _has_desired_script(self, release: JSONDict) -> bool: + if len(self._scripts) == 0: + return False + elif script := release.get("text-representation", {}).get("script"): + return script in self._scripts + else: + return False + + def _wanted_pseudo_release_id( + self, + album_id: str, + relation: JSONDict, + ) -> str | None: + if ( + len(self._scripts) == 0 + or relation.get("type", "") != "transl-tracklisting" + or relation.get("direction", "") != "forward" + or "release" not in relation + ): + return None + + release = relation["release"] + if "id" in release and self._has_desired_script(release): + self._log.debug( + "Adding pseudo-release {0} for main release {1}", + release["id"], + album_id, + ) + return release["id"] + else: + return None + + def _resolve_pseudo_album_info( + self, + official_release: AlbumInfo, + custom_tags_only: bool, + languages: list[str], + raw_pseudo_release: JSONDict, + ) -> AlbumInfo: + pseudo_release = self.album_info(raw_pseudo_release) + if custom_tags_only: + self._replace_artist_with_alias( + languages, raw_pseudo_release, pseudo_release + ) + self._add_custom_tags(official_release, pseudo_release) + return official_release + else: + return PseudoAlbumInfo( + pseudo_release=_merge_pseudo_and_actual_album( + pseudo_release, official_release + ), + official_release=official_release, + ) + + def _replace_artist_with_alias( + self, + languages: list[str], + raw_pseudo_release: JSONDict, + pseudo_release: AlbumInfo, + ): + """Use the pseudo-release's language to search for artist + alias if the user hasn't configured import languages.""" + + if languages: + return + + lang = raw_pseudo_release.get("text-representation", {}).get("language") + artist_credits = raw_pseudo_release.get("release-group", {}).get( + "artist-credit", [] + ) + aliases = [ + artist_credit.get("artist", {}).get("aliases", []) + for artist_credit in artist_credits + ] + + if lang and len(lang) >= 2 and len(aliases) > 0: + locale = lang[0:2] + aliases_flattened = list(chain.from_iterable(aliases)) + self._log.debug( + "Using locale '{0}' to search aliases {1}", + locale, + aliases_flattened, + ) + if alias_dict := _preferred_alias(aliases_flattened, [locale]): + if alias := alias_dict.get("name"): + self._log.debug("Got alias '{0}'", alias) + pseudo_release.artist = alias + for track in pseudo_release.tracks: + track.artist = alias + + def _add_custom_tags( + self, + official_release: AlbumInfo, + pseudo_release: AlbumInfo, + ): + for tag_key, pseudo_key in ( + self.config["pseudo_releases"]["album_custom_tags"].get().items() + ): + official_release[tag_key] = pseudo_release[pseudo_key] + + track_custom_tags = ( + self.config["pseudo_releases"]["track_custom_tags"].get().items() + ) + for track, pseudo_track in zip( + official_release.tracks, pseudo_release.tracks + ): + for tag_key, pseudo_key in track_custom_tags: + track[tag_key] = pseudo_track[pseudo_key] + + def _adjust_final_album_match(self, match: AlbumMatch): + album_info = match.info + if isinstance(album_info, PseudoAlbumInfo): + self._log.debug( + "Switching {0} to pseudo-release source for final proposal", + album_info.album_id, + ) + album_info.use_pseudo_as_ref() + mapping = match.mapping + new_mappings, _, _ = assign_items( + list(mapping.keys()), album_info.tracks + ) + mapping.update(new_mappings) + + +class PseudoAlbumInfo(AlbumInfo): + """This is a not-so-ugly hack. + + We want the pseudo-release to result in a distance that is lower or equal to that of + the official release, otherwise it won't qualify as a good candidate. However, if + the input is in a script that's different from the pseudo-release (and we want to + translate/transliterate it in the library), it will receive unwanted penalties. + + This class is essentially a view of the ``AlbumInfo`` of both official and + pseudo-releases, where it's possible to change the details that are exposed to other + parts of the auto-tagger, enabling a "fair" distance calculation based on the + current input's script but still preferring the translation/transliteration in the + final proposal. + """ + + def __init__( + self, + pseudo_release: AlbumInfo, + official_release: AlbumInfo, + **kwargs, + ): + super().__init__(pseudo_release.tracks, **kwargs) + self.__dict__["_pseudo_source"] = False + self.__dict__["_official_release"] = official_release + for k, v in pseudo_release.items(): + if k not in kwargs: + self[k] = v + + def get_official_release(self) -> AlbumInfo: + return self.__dict__["_official_release"] + + def determine_best_ref(self, items: Sequence[Item]) -> str: + self.use_pseudo_as_ref() + pseudo_dist = self._compute_distance(items) + + self.use_official_as_ref() + official_dist = self._compute_distance(items) + + if official_dist < pseudo_dist: + self.use_official_as_ref() + return "official" + else: + self.use_pseudo_as_ref() + return "pseudo" + + def _compute_distance(self, items: Sequence[Item]) -> Distance: + mapping, _, _ = assign_items(items, self.tracks) + return distance(items, self, mapping) + + def use_pseudo_as_ref(self): + self.__dict__["_pseudo_source"] = True + + def use_official_as_ref(self): + self.__dict__["_pseudo_source"] = False + + def __getattr__(self, attr: str) -> Any: + # ensure we don't duplicate an official release's id, always return pseudo's + if self.__dict__["_pseudo_source"] or attr == "album_id": + return super().__getattr__(attr) + else: + return self.__dict__["_official_release"].__getattr__(attr) + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + + memo[id(self)] = result + result.__dict__.update(self.__dict__) + for k, v in self.items(): + result[k] = deepcopy(v, memo) + + return result diff --git a/docs/changelog.rst b/docs/changelog.rst index cd2c2b4c3..9618a9413 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,8 @@ Unreleased New features: +- :doc:`plugins/musicbrainz`: Additional functionality for pseudo-releases. + Bug fixes: For packagers: @@ -39,8 +41,6 @@ New features: - :doc:`plugins/importsource`: Added new plugin that tracks original import paths and optionally suggests removing source files when items are removed from the library. -- :doc:`plugins/mbpseudo`: Add a new `mbpseudo` plugin to proactively receive - MusicBrainz pseudo-releases as recommendations during import. - Added support for Python 3.13. - :doc:`/plugins/convert`: ``force`` can be passed to override checks like no_convert, never_convert_lossy_files, same format, and max_bitrate @@ -141,7 +141,6 @@ For plugin developers: - :doc:`plugins/listenbrainz` - :doc:`plugins/mbcollection` - - :doc:`plugins/mbpseudo` - :doc:`plugins/missing` - :doc:`plugins/musicbrainz` - :doc:`plugins/parentwork` diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index 1583ac5ab..488f16e3e 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -101,7 +101,6 @@ databases. They share the following configuration options: loadext lyrics mbcollection - mbpseudo mbsubmit mbsync metasync @@ -154,9 +153,6 @@ Autotagger Extensions :doc:`musicbrainz ` Search for releases in the MusicBrainz_ database. -:doc:`mbpseudo ` - Search for releases and pseudo-releases in the MusicBrainz_ database. - :doc:`spotify ` Search for releases in the Spotify_ database. diff --git a/docs/plugins/mbpseudo.rst b/docs/plugins/mbpseudo.rst deleted file mode 100644 index 56658db26..000000000 --- a/docs/plugins/mbpseudo.rst +++ /dev/null @@ -1,103 +0,0 @@ -MusicBrainz Pseudo-Release Plugin -================================= - -The `mbpseudo` plugin can be used *instead of* the `musicbrainz` plugin to -search for MusicBrainz pseudo-releases_ during the import process, which are -added to the normal candidates from the MusicBrainz search. - -.. _pseudo-releases: https://musicbrainz.org/doc/Style/Specific_types_of_releases/Pseudo-Releases - -This is useful for releases whose title and track titles are written with a -script_ that can be translated or transliterated into a different one. - -.. _script: https://en.wikipedia.org/wiki/ISO_15924 - -Pseudo-releases will only be included if the initial search in MusicBrainz -returns releases whose script is *not* desired and whose relationships include -pseudo-releases with desired scripts. - -Configuration -------------- - -Since this plugin first searches for official releases from MusicBrainz, all -options from the `musicbrainz` plugin's :ref:`musicbrainz-config` are supported, -but they must be specified under `mbpseudo` in the configuration file. -Additionally, the configuration expects an array of scripts that are desired for -the pseudo-releases. For ``artist`` in particular, keep in mind that even -pseudo-releases might specify it with the original script, so you should also -configure import :ref:`languages` to give artist aliases more priority. -Therefore, the minimum configuration for this plugin looks like this: - -.. code-block:: yaml - - plugins: mbpseudo # remove musicbrainz - - import: - languages: en - - mbpseudo: - scripts: - - Latn - -Note that the `search_limit` configuration applies to the initial search for -official releases, and that the `data_source` in the database will be -"MusicBrainz". Nevertheless, `data_source_mismatch_penalty` must also be -specified under `mbpseudo` if desired (see also -:ref:`metadata-source-plugin-configuration`). An example with multiple data -sources may look like this: - -.. code-block:: yaml - - plugins: mbpseudo deezer - - import: - languages: en - - mbpseudo: - data_source_mismatch_penalty: 0 - scripts: - - Latn - - deezer: - data_source_mismatch_penalty: 0.2 - -By default, the data from the pseudo-release will be used to create a proposal -that is independent from the official release and sets all properties in its -metadata. It's possible to change the configuration so that some information -from the pseudo-release is instead added as custom tags, keeping the metadata -from the official release: - -.. code-block:: yaml - - mbpseudo: - # other config not shown - custom_tags_only: yes - -The default custom tags with this configuration are specified as mappings where -the keys define the tag names and the values define the pseudo-release property -that will be used to set the tag's value: - -.. code-block:: yaml - - mbpseudo: - album_custom_tags: - album_transl: album - album_artist_transl: artist - track_custom_tags: - title_transl: title - artist_transl: artist - -Note that the information for each set of custom tags corresponds to different -metadata levels (album or track level), which is why ``artist`` appears twice -even though it effectively references album artist and track artist -respectively. - -If you want to modify any mapping under ``album_custom_tags`` or -``track_custom_tags``, you must specify *everything* for that set of tags in -your configuration file because any customization replaces the whole dictionary -of mappings for that level. - -.. note:: - - These custom tags are also added to the music files, not only to the - database. diff --git a/docs/plugins/musicbrainz.rst b/docs/plugins/musicbrainz.rst index 60c3bc4a2..f30c514b5 100644 --- a/docs/plugins/musicbrainz.rst +++ b/docs/plugins/musicbrainz.rst @@ -42,6 +42,15 @@ Default tidal: no data_source_mismatch_penalty: 0.5 search_limit: 5 + pseudo_releases: + scripts: [] + custom_tags_only: no + album_custom_tags: + album_transl: album + album_artist_transl: artist + track_custom_tags: + title_transl: title + artist_transl: artist .. conf:: host :default: musicbrainz.org @@ -149,3 +158,82 @@ Default .. _limited: https://musicbrainz.org/doc/XML_Web_Service/Rate_Limiting .. _main server: https://musicbrainz.org/ + +Pseudo-Releases +--------------- + +This plugin can also search for MusicBrainz pseudo-releases_ during the import +process, which are added to the normal candidates from the MusicBrainz search. + +.. _pseudo-releases: https://musicbrainz.org/doc/Style/Specific_types_of_releases/Pseudo-Releases + +This is useful for releases whose title and track titles are written with a +script_ that can be translated or transliterated into a different one. + +.. _script: https://en.wikipedia.org/wiki/ISO_15924 + +The configuration expects an array of scripts that are desired for the +pseudo-releases. For ``artist`` in particular, keep in mind that even +pseudo-releases might specify it with the original script, so you should also +configure import :ref:`languages` to give artist aliases more priority. +Therefore, the minimum configuration to enable this functionality looks like +this: + +.. code-block:: yaml + + import: + languages: en + + musicbrainz: + # other config not shown + pseudo_releases: + scripts: + - Latn + +Pseudo-releases will only be included if the initial search in MusicBrainz +returns releases whose script is *not* desired and whose relationships include +pseudo-releases with desired scripts. + +By default, the data from the pseudo-release will be used to create a proposal +that is independent from the original release and sets all properties in its +metadata. It's possible to change the configuration so that some information +from the pseudo-release is instead added as custom tags, keeping the metadata +from the original release: + +.. code-block:: yaml + + musicbrainz: + pseudo_releases: + # other config not shown + custom_tags_only: yes + +The default custom tags with this configuration are specified as mappings where +the keys define the tag names and the values define the pseudo-release property +that will be used to set the tag's value: + +.. code-block:: yaml + + musicbrainz: + pseudo_releases: + # other config not shown + album_custom_tags: + album_transl: album + album_artist_transl: artist + track_custom_tags: + title_transl: title + artist_transl: artist + +Note that the information for each set of custom tags corresponds to different +metadata levels (album or track level), which is why ``artist`` appears twice +even though it effectively references album artist and track artist +respectively. + +If you want to modify any mapping under ``album_custom_tags`` or +``track_custom_tags``, you must specify *everything* for that set of tags in +your configuration file because any customization replaces the whole dictionary +of mappings for that level. + +.. note:: + + These custom tags are also added to the music files, not only to the + database. diff --git a/test/plugins/test_mbpseudo.py b/test/plugins/test_musicbrainz_pseudo.py similarity index 62% rename from test/plugins/test_mbpseudo.py rename to test/plugins/test_musicbrainz_pseudo.py index 2fb6321b3..52e1891b7 100644 --- a/test/plugins/test_mbpseudo.py +++ b/test/plugins/test_musicbrainz_pseudo.py @@ -11,11 +11,7 @@ from beets.autotag.distance import Distance from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.library import Item from beets.test.helper import PluginMixin -from beetsplug.mbpseudo import ( - _STATUS_PSEUDO, - MusicBrainzPseudoReleasePlugin, - PseudoAlbumInfo, -) +from beetsplug.musicbrainz import MusicBrainzPlugin, PseudoAlbumInfo if TYPE_CHECKING: import pathlib @@ -25,7 +21,7 @@ if TYPE_CHECKING: @pytest.fixture(scope="module") def rsrc_dir(pytestconfig: pytest.Config): - return pytestconfig.rootpath / "test" / "rsrc" / "mbpseudo" + return pytestconfig.rootpath / "test" / "rsrc" / "musicbrainz" @pytest.fixture @@ -71,6 +67,7 @@ class TestPseudoAlbumInfo: self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo ): info = PseudoAlbumInfo(pseudo_release_info, official_release_info) + info.use_pseudo_as_ref() assert info.album == "In Bloom" def test_get_attr_from_official( @@ -95,59 +92,59 @@ class TestPseudoAlbumInfo: class TestMBPseudoMixin(PluginMixin): - plugin = "mbpseudo" + plugin = "musicbrainz" @pytest.fixture(autouse=True) - def patch_get_release(self, monkeypatch, pseudo_release: JSONDict): + def patch_get_release( + self, + monkeypatch, + official_release: JSONDict, + pseudo_release: JSONDict, + ): + def mock_get_release(_, album_id: str, **kwargs): + if album_id == official_release["id"]: + return deepcopy(official_release) + else: + return deepcopy(pseudo_release) + monkeypatch.setattr( "beetsplug._utils.musicbrainz.MusicBrainzAPI.get_release", - lambda _, album_id: deepcopy( - {pseudo_release["id"]: pseudo_release}[album_id] - ), + mock_get_release, ) @pytest.fixture(scope="class") def plugin_config(self): - return {"scripts": ["Latn", "Dummy"]} + return {"pseudo_releases": {"scripts": ["Latn", "Dummy"]}} @pytest.fixture - def mbpseudo_plugin(self, plugin_config) -> MusicBrainzPseudoReleasePlugin: + def musicbrainz_plugin(self, plugin_config) -> MusicBrainzPlugin: self.config[self.plugin].set(plugin_config) - return MusicBrainzPseudoReleasePlugin() + return MusicBrainzPlugin() - -class TestMBPseudoPlugin(TestMBPseudoMixin): - def test_scripts_init( - self, mbpseudo_plugin: MusicBrainzPseudoReleasePlugin - ): - assert mbpseudo_plugin._scripts == ["Latn", "Dummy"] - - @pytest.mark.parametrize( - "album_id", - [ - "a5ce1d11-2e32-45a4-b37f-c1589d46b103", - "-5ce1d11-2e32-45a4-b37f-c1589d46b103", - ], - ) - def test_extract_id_uses_music_brainz_pattern( - self, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, - album_id: str, - ): - if album_id.startswith("-"): - assert mbpseudo_plugin._extract_id(album_id) is None + @staticmethod + def get_album_info( + musicbrainz_plugin: MusicBrainzPlugin, + raw: JSONDict, + ) -> AlbumInfo: + if info := musicbrainz_plugin.album_for_id(raw["id"]): + return info else: - assert mbpseudo_plugin._extract_id(album_id) == album_id + raise AssertionError("AlbumInfo is None") + + +class TestMBPseudoReleases(TestMBPseudoMixin): + def test_scripts_init(self, musicbrainz_plugin: MusicBrainzPlugin): + assert musicbrainz_plugin._scripts == ["Latn", "Dummy"] def test_album_info_for_pseudo_release( self, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + musicbrainz_plugin: MusicBrainzPlugin, pseudo_release: JSONDict, ): - album_info = mbpseudo_plugin.album_info(pseudo_release) - assert not isinstance(album_info, PseudoAlbumInfo) - assert album_info.data_source == "MusicBrainzPseudoRelease" - assert album_info.albumstatus == _STATUS_PSEUDO + album_info = self.get_album_info(musicbrainz_plugin, pseudo_release) + assert isinstance(album_info, PseudoAlbumInfo) + assert album_info.data_source == "MusicBrainz" + assert album_info.albumstatus == "Official" @pytest.mark.parametrize( "json_key", @@ -159,41 +156,51 @@ class TestMBPseudoPlugin(TestMBPseudoMixin): ) def test_interception_skip_when_rel_values_dont_match( self, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + musicbrainz_plugin: MusicBrainzPlugin, official_release: JSONDict, json_key: str, ): - del official_release["release-relations"][0][json_key] + for r in official_release["release-relations"]: + del r[json_key] - album_info = mbpseudo_plugin.album_info(official_release) + album_info = self.get_album_info(musicbrainz_plugin, official_release) assert not isinstance(album_info, PseudoAlbumInfo) - assert album_info.data_source == "MusicBrainzPseudoRelease" + assert album_info.data_source == "MusicBrainz" def test_interception_skip_when_script_doesnt_match( self, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + musicbrainz_plugin: MusicBrainzPlugin, official_release: JSONDict, ): - official_release["release-relations"][0]["release"][ - "text-representation" - ]["script"] = "Null" + for r in official_release["release-relations"]: + r["release"]["text-representation"]["script"] = "Null" - album_info = mbpseudo_plugin.album_info(official_release) + album_info = self.get_album_info(musicbrainz_plugin, official_release) assert not isinstance(album_info, PseudoAlbumInfo) - assert album_info.data_source == "MusicBrainzPseudoRelease" + assert album_info.data_source == "MusicBrainz" + + def test_interception_skip_when_relations_missing( + self, + musicbrainz_plugin: MusicBrainzPlugin, + official_release: JSONDict, + ): + del official_release["release-relations"] + album_info = self.get_album_info(musicbrainz_plugin, official_release) + assert not isinstance(album_info, PseudoAlbumInfo) + assert album_info.data_source == "MusicBrainz" def test_interception( self, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + musicbrainz_plugin: MusicBrainzPlugin, official_release: JSONDict, ): - album_info = mbpseudo_plugin.album_info(official_release) + album_info = self.get_album_info(musicbrainz_plugin, official_release) assert isinstance(album_info, PseudoAlbumInfo) - assert album_info.data_source == "MusicBrainzPseudoRelease" + assert album_info.data_source == "MusicBrainz" def test_final_adjustment_skip( self, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + musicbrainz_plugin: MusicBrainzPlugin, ): match = AlbumMatch( distance=Distance(), @@ -202,20 +209,18 @@ class TestMBPseudoPlugin(TestMBPseudoMixin): extra_items=[], extra_tracks=[], ) - - mbpseudo_plugin._adjust_final_album_match(match) - assert match.info.data_source == "mb" + musicbrainz_plugin._adjust_final_album_match(match) def test_final_adjustment( self, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + musicbrainz_plugin: MusicBrainzPlugin, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo, ): pseudo_album_info = PseudoAlbumInfo( pseudo_release=pseudo_release_info, official_release=official_release_info, - data_source=mbpseudo_plugin.data_source, + data_source=musicbrainz_plugin.data_source, ) pseudo_album_info.use_official_as_ref() @@ -230,28 +235,33 @@ class TestMBPseudoPlugin(TestMBPseudoMixin): extra_tracks=[], ) - mbpseudo_plugin._adjust_final_album_match(match) + musicbrainz_plugin._adjust_final_album_match(match) assert match.info.data_source == "MusicBrainz" assert match.info.album_id == "pseudo" assert match.info.album == "In Bloom" -class TestMBPseudoPluginCustomTagsOnly(TestMBPseudoMixin): +class TestMBPseudoReleasesCustomTagsOnly(TestMBPseudoMixin): @pytest.fixture(scope="class") def plugin_config(self): - return {"scripts": ["Latn", "Dummy"], "custom_tags_only": True} + return { + "pseudo_releases": { + "scripts": ["Latn", "Dummy"], + "custom_tags_only": True, + } + } def test_custom_tags( self, config, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + musicbrainz_plugin: MusicBrainzPlugin, official_release: JSONDict, ): - config["import"]["languages"] = ["en", "jp"] - album_info = mbpseudo_plugin.album_info(official_release) + config["import"]["languages"] = [] + album_info = self.get_album_info(musicbrainz_plugin, official_release) assert not isinstance(album_info, PseudoAlbumInfo) - assert album_info.data_source == "MusicBrainzPseudoRelease" + assert album_info.data_source == "MusicBrainz" assert album_info["album_transl"] == "In Bloom" assert album_info["album_artist_transl"] == "Lilas Ikuta" assert album_info.tracks[0]["title_transl"] == "In Bloom" @@ -260,13 +270,14 @@ class TestMBPseudoPluginCustomTagsOnly(TestMBPseudoMixin): def test_custom_tags_with_import_languages( self, config, - mbpseudo_plugin: MusicBrainzPseudoReleasePlugin, + musicbrainz_plugin: MusicBrainzPlugin, official_release: JSONDict, ): config["import"]["languages"] = [] - album_info = mbpseudo_plugin.album_info(official_release) + config["import"]["languages"] = ["en", "jp"] + album_info = self.get_album_info(musicbrainz_plugin, official_release) assert not isinstance(album_info, PseudoAlbumInfo) - assert album_info.data_source == "MusicBrainzPseudoRelease" + assert album_info.data_source == "MusicBrainz" assert album_info["album_transl"] == "In Bloom" assert album_info["album_artist_transl"] == "Lilas Ikuta" assert album_info.tracks[0]["title_transl"] == "In Bloom" diff --git a/test/rsrc/mbpseudo/official_release.json b/test/rsrc/musicbrainz/official_release.json similarity index 100% rename from test/rsrc/mbpseudo/official_release.json rename to test/rsrc/musicbrainz/official_release.json diff --git a/test/rsrc/mbpseudo/pseudo_release.json b/test/rsrc/musicbrainz/pseudo_release.json similarity index 100% rename from test/rsrc/mbpseudo/pseudo_release.json rename to test/rsrc/musicbrainz/pseudo_release.json