Fix data source penalty for mbpseudo

This commit is contained in:
asardaes 2025-10-14 12:09:42 -06:00
parent defc602310
commit cb758988ed
3 changed files with 86 additions and 43 deletions

View file

@ -95,7 +95,6 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
@override
def album_info(self, release: JSONDict) -> AlbumInfo:
official_release = super().album_info(release)
official_release.data_source = "MusicBrainz"
if release.get("status") == _STATUS_PSEUDO:
return official_release
@ -113,7 +112,6 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
pseudo_release, official_release
),
official_release=official_release,
data_source="MusicBrainz",
)
except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError(
@ -172,17 +170,20 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
def _adjust_final_album_match(self, match: AlbumMatch):
album_info = match.info
if isinstance(album_info, PseudoAlbumInfo):
mapping = match.mapping
self._log.debug(
"Switching {0} to pseudo-release source for final proposal",
album_info.album_id,
)
album_info.use_pseudo_as_ref()
mapping = match.mapping
new_mappings, _, _ = assign_items(
list(mapping.keys()), album_info.tracks
)
mapping.update(new_mappings)
if album_info.data_source == self.data_source:
album_info.data_source = "MusicBrainz"
@override
def _extract_id(self, url: str) -> str | None:
return extract_release_id("MusicBrainz", url)
@ -220,17 +221,12 @@ class PseudoAlbumInfo(AlbumInfo):
return self.__dict__["_official_release"]
def determine_best_ref(self, items: Sequence[Item]) -> str:
ds = self.data_source
self.data_source = None
self.use_pseudo_as_ref()
pseudo_dist = self._compute_distance(items)
self.use_official_as_ref()
official_dist = self._compute_distance(items)
self.data_source = ds
if official_dist < pseudo_dist:
self.use_official_as_ref()
return "official"

View file

@ -19,7 +19,7 @@ pseudo-releases with desired scripts.
Configuration
-------------
Since this plugin first searches for official releases from MusicBrainz, most
Since this plugin first searches for official releases from MusicBrainz, all
options from the `musicbrainz` plugin's :ref:`musicbrainz-config` are supported,
but they must be specified under `mbpseudo` in the configuration file.
Additionally, the configuration expects an array of scripts that are desired for
@ -36,8 +36,8 @@ like this:
Note that the `search_limit` configuration applies to the initial search for
official releases, and that the `data_source` in the database will be
"MusicBrainz". Because of this, the only configuration that must remain under
`musicbrainz` is `data_source_mismatch_penalty` (see also
"MusicBrainz". Nevertheless, `data_source_mismatch_penalty` must also be
specified under `mbpseudo` (see also
:ref:`metadata-source-plugin-configuration`). An example with multiple data
sources may look like this:
@ -46,11 +46,9 @@ sources may look like this:
plugins: mbpseudo deezer
mbpseudo:
data_source_mismatch_penalty: 0
scripts:
- Latn
musicbrainz:
data_source_mismatch_penalty: 0
deezer:
data_source_mismatch_penalty: 0.5
data_source_mismatch_penalty: 0.2

View file

@ -3,6 +3,8 @@ import pathlib
import pytest
from beets.autotag import AlbumMatch
from beets.autotag.distance import Distance
from beets.autotag.hooks import AlbumInfo, TrackInfo
from beets.library import Item
from beets.test.helper import PluginMixin
@ -14,48 +16,50 @@ from beetsplug.mbpseudo import (
)
@pytest.fixture(scope="module")
def official_release_info() -> AlbumInfo:
return AlbumInfo(
tracks=[TrackInfo(title="百花繚乱")],
album_id="official",
album="百花繚乱",
)
@pytest.fixture(scope="module")
def pseudo_release_info() -> AlbumInfo:
return AlbumInfo(
tracks=[TrackInfo(title="In Bloom")],
album_id="pseudo",
album="In Bloom",
)
class TestPseudoAlbumInfo:
@pytest.fixture
def official_release(self) -> AlbumInfo:
return AlbumInfo(
tracks=[TrackInfo(title="百花繚乱")],
album_id="official",
album="百花繚乱",
)
@pytest.fixture
def pseudo_release(self) -> AlbumInfo:
return AlbumInfo(
tracks=[TrackInfo(title="In Bloom")],
album_id="pseudo",
album="In Bloom",
)
def test_album_id_always_from_pseudo(
self, official_release: AlbumInfo, pseudo_release: AlbumInfo
self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
):
info = PseudoAlbumInfo(pseudo_release, official_release)
info = PseudoAlbumInfo(pseudo_release_info, official_release_info)
info.use_official_as_ref()
assert info.album_id == "pseudo"
def test_get_attr_from_pseudo(
self, official_release: AlbumInfo, pseudo_release: AlbumInfo
self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
):
info = PseudoAlbumInfo(pseudo_release, official_release)
info = PseudoAlbumInfo(pseudo_release_info, official_release_info)
assert info.album == "In Bloom"
def test_get_attr_from_official(
self, official_release: AlbumInfo, pseudo_release: AlbumInfo
self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
):
info = PseudoAlbumInfo(pseudo_release, official_release)
info = PseudoAlbumInfo(pseudo_release_info, official_release_info)
info.use_official_as_ref()
assert info.album == info.get_official_release().album
def test_determine_best_ref(
self, official_release: AlbumInfo, pseudo_release: AlbumInfo
self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
):
info = PseudoAlbumInfo(
pseudo_release, official_release, data_source="test"
pseudo_release_info, official_release_info, data_source="test"
)
item = Item()
item["title"] = "百花繚乱"
@ -126,7 +130,7 @@ class TestMBPseudoPlugin(PluginMixin):
):
album_info = mbpseudo_plugin.album_info(pseudo_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainz"
assert album_info.data_source == "MusicBrainzPseudoRelease"
assert album_info.albumstatus == _STATUS_PSEUDO
@pytest.mark.parametrize(
@ -147,7 +151,7 @@ class TestMBPseudoPlugin(PluginMixin):
album_info = mbpseudo_plugin.album_info(official_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainz"
assert album_info.data_source == "MusicBrainzPseudoRelease"
def test_interception_skip_when_script_doesnt_match(
self,
@ -160,7 +164,7 @@ class TestMBPseudoPlugin(PluginMixin):
album_info = mbpseudo_plugin.album_info(official_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainz"
assert album_info.data_source == "MusicBrainzPseudoRelease"
def test_interception(
self,
@ -173,4 +177,49 @@ class TestMBPseudoPlugin(PluginMixin):
)
album_info = mbpseudo_plugin.album_info(official_release["release"])
assert isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainz"
assert album_info.data_source == "MusicBrainzPseudoRelease"
def test_final_adjustment_skip(
self,
mbpseudo_plugin: MusicBrainzPseudoReleasePlugin,
):
match = AlbumMatch(
distance=Distance(),
info=AlbumInfo(tracks=[], data_source="mb"),
mapping={},
extra_items=[],
extra_tracks=[],
)
mbpseudo_plugin._adjust_final_album_match(match)
assert match.info.data_source == "mb"
def test_final_adjustment(
self,
mbpseudo_plugin: MusicBrainzPseudoReleasePlugin,
official_release_info: AlbumInfo,
pseudo_release_info: AlbumInfo,
):
pseudo_album_info = PseudoAlbumInfo(
pseudo_release=pseudo_release_info,
official_release=official_release_info,
data_source=mbpseudo_plugin.data_source,
)
pseudo_album_info.use_official_as_ref()
item = Item()
item["title"] = "百花繚乱"
match = AlbumMatch(
distance=Distance(),
info=pseudo_album_info,
mapping={item: pseudo_album_info.tracks[0]},
extra_items=[],
extra_tracks=[],
)
mbpseudo_plugin._adjust_final_album_match(match)
assert match.info.data_source == "MusicBrainz"
assert match.info.album_id == "pseudo"
assert match.info.album == "In Bloom"