Fix data source penalty for mbpseudo

This commit is contained in:
asardaes 2025-10-14 12:09:42 -06:00
parent defc602310
commit cb758988ed
3 changed files with 86 additions and 43 deletions

View file

@ -95,7 +95,6 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
@override @override
def album_info(self, release: JSONDict) -> AlbumInfo: def album_info(self, release: JSONDict) -> AlbumInfo:
official_release = super().album_info(release) official_release = super().album_info(release)
official_release.data_source = "MusicBrainz"
if release.get("status") == _STATUS_PSEUDO: if release.get("status") == _STATUS_PSEUDO:
return official_release return official_release
@ -113,7 +112,6 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
pseudo_release, official_release pseudo_release, official_release
), ),
official_release=official_release, official_release=official_release,
data_source="MusicBrainz",
) )
except musicbrainzngs.MusicBrainzError as exc: except musicbrainzngs.MusicBrainzError as exc:
raise MusicBrainzAPIError( raise MusicBrainzAPIError(
@ -172,17 +170,20 @@ class MusicBrainzPseudoReleasePlugin(MusicBrainzPlugin):
def _adjust_final_album_match(self, match: AlbumMatch): def _adjust_final_album_match(self, match: AlbumMatch):
album_info = match.info album_info = match.info
if isinstance(album_info, PseudoAlbumInfo): if isinstance(album_info, PseudoAlbumInfo):
mapping = match.mapping
self._log.debug( self._log.debug(
"Switching {0} to pseudo-release source for final proposal", "Switching {0} to pseudo-release source for final proposal",
album_info.album_id, album_info.album_id,
) )
album_info.use_pseudo_as_ref() album_info.use_pseudo_as_ref()
mapping = match.mapping
new_mappings, _, _ = assign_items( new_mappings, _, _ = assign_items(
list(mapping.keys()), album_info.tracks list(mapping.keys()), album_info.tracks
) )
mapping.update(new_mappings) mapping.update(new_mappings)
if album_info.data_source == self.data_source:
album_info.data_source = "MusicBrainz"
@override @override
def _extract_id(self, url: str) -> str | None: def _extract_id(self, url: str) -> str | None:
return extract_release_id("MusicBrainz", url) return extract_release_id("MusicBrainz", url)
@ -220,17 +221,12 @@ class PseudoAlbumInfo(AlbumInfo):
return self.__dict__["_official_release"] return self.__dict__["_official_release"]
def determine_best_ref(self, items: Sequence[Item]) -> str: def determine_best_ref(self, items: Sequence[Item]) -> str:
ds = self.data_source
self.data_source = None
self.use_pseudo_as_ref() self.use_pseudo_as_ref()
pseudo_dist = self._compute_distance(items) pseudo_dist = self._compute_distance(items)
self.use_official_as_ref() self.use_official_as_ref()
official_dist = self._compute_distance(items) official_dist = self._compute_distance(items)
self.data_source = ds
if official_dist < pseudo_dist: if official_dist < pseudo_dist:
self.use_official_as_ref() self.use_official_as_ref()
return "official" return "official"

View file

@ -19,7 +19,7 @@ pseudo-releases with desired scripts.
Configuration Configuration
------------- -------------
Since this plugin first searches for official releases from MusicBrainz, most Since this plugin first searches for official releases from MusicBrainz, all
options from the `musicbrainz` plugin's :ref:`musicbrainz-config` are supported, options from the `musicbrainz` plugin's :ref:`musicbrainz-config` are supported,
but they must be specified under `mbpseudo` in the configuration file. but they must be specified under `mbpseudo` in the configuration file.
Additionally, the configuration expects an array of scripts that are desired for Additionally, the configuration expects an array of scripts that are desired for
@ -36,8 +36,8 @@ like this:
Note that the `search_limit` configuration applies to the initial search for Note that the `search_limit` configuration applies to the initial search for
official releases, and that the `data_source` in the database will be official releases, and that the `data_source` in the database will be
"MusicBrainz". Because of this, the only configuration that must remain under "MusicBrainz". Nevertheless, `data_source_mismatch_penalty` must also be
`musicbrainz` is `data_source_mismatch_penalty` (see also specified under `mbpseudo` (see also
:ref:`metadata-source-plugin-configuration`). An example with multiple data :ref:`metadata-source-plugin-configuration`). An example with multiple data
sources may look like this: sources may look like this:
@ -46,11 +46,9 @@ sources may look like this:
plugins: mbpseudo deezer plugins: mbpseudo deezer
mbpseudo: mbpseudo:
data_source_mismatch_penalty: 0
scripts: scripts:
- Latn - Latn
musicbrainz:
data_source_mismatch_penalty: 0
deezer: deezer:
data_source_mismatch_penalty: 0.5 data_source_mismatch_penalty: 0.2

View file

@ -3,6 +3,8 @@ import pathlib
import pytest import pytest
from beets.autotag import AlbumMatch
from beets.autotag.distance import Distance
from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.autotag.hooks import AlbumInfo, TrackInfo
from beets.library import Item from beets.library import Item
from beets.test.helper import PluginMixin from beets.test.helper import PluginMixin
@ -14,48 +16,50 @@ from beetsplug.mbpseudo import (
) )
class TestPseudoAlbumInfo: @pytest.fixture(scope="module")
@pytest.fixture def official_release_info() -> AlbumInfo:
def official_release(self) -> AlbumInfo:
return AlbumInfo( return AlbumInfo(
tracks=[TrackInfo(title="百花繚乱")], tracks=[TrackInfo(title="百花繚乱")],
album_id="official", album_id="official",
album="百花繚乱", album="百花繚乱",
) )
@pytest.fixture
def pseudo_release(self) -> AlbumInfo: @pytest.fixture(scope="module")
def pseudo_release_info() -> AlbumInfo:
return AlbumInfo( return AlbumInfo(
tracks=[TrackInfo(title="In Bloom")], tracks=[TrackInfo(title="In Bloom")],
album_id="pseudo", album_id="pseudo",
album="In Bloom", album="In Bloom",
) )
class TestPseudoAlbumInfo:
def test_album_id_always_from_pseudo( def test_album_id_always_from_pseudo(
self, official_release: AlbumInfo, pseudo_release: AlbumInfo self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
): ):
info = PseudoAlbumInfo(pseudo_release, official_release) info = PseudoAlbumInfo(pseudo_release_info, official_release_info)
info.use_official_as_ref() info.use_official_as_ref()
assert info.album_id == "pseudo" assert info.album_id == "pseudo"
def test_get_attr_from_pseudo( def test_get_attr_from_pseudo(
self, official_release: AlbumInfo, pseudo_release: AlbumInfo self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
): ):
info = PseudoAlbumInfo(pseudo_release, official_release) info = PseudoAlbumInfo(pseudo_release_info, official_release_info)
assert info.album == "In Bloom" assert info.album == "In Bloom"
def test_get_attr_from_official( def test_get_attr_from_official(
self, official_release: AlbumInfo, pseudo_release: AlbumInfo self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
): ):
info = PseudoAlbumInfo(pseudo_release, official_release) info = PseudoAlbumInfo(pseudo_release_info, official_release_info)
info.use_official_as_ref() info.use_official_as_ref()
assert info.album == info.get_official_release().album assert info.album == info.get_official_release().album
def test_determine_best_ref( def test_determine_best_ref(
self, official_release: AlbumInfo, pseudo_release: AlbumInfo self, official_release_info: AlbumInfo, pseudo_release_info: AlbumInfo
): ):
info = PseudoAlbumInfo( info = PseudoAlbumInfo(
pseudo_release, official_release, data_source="test" pseudo_release_info, official_release_info, data_source="test"
) )
item = Item() item = Item()
item["title"] = "百花繚乱" item["title"] = "百花繚乱"
@ -126,7 +130,7 @@ class TestMBPseudoPlugin(PluginMixin):
): ):
album_info = mbpseudo_plugin.album_info(pseudo_release["release"]) album_info = mbpseudo_plugin.album_info(pseudo_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo) assert not isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainz" assert album_info.data_source == "MusicBrainzPseudoRelease"
assert album_info.albumstatus == _STATUS_PSEUDO assert album_info.albumstatus == _STATUS_PSEUDO
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -147,7 +151,7 @@ class TestMBPseudoPlugin(PluginMixin):
album_info = mbpseudo_plugin.album_info(official_release["release"]) album_info = mbpseudo_plugin.album_info(official_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo) assert not isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainz" assert album_info.data_source == "MusicBrainzPseudoRelease"
def test_interception_skip_when_script_doesnt_match( def test_interception_skip_when_script_doesnt_match(
self, self,
@ -160,7 +164,7 @@ class TestMBPseudoPlugin(PluginMixin):
album_info = mbpseudo_plugin.album_info(official_release["release"]) album_info = mbpseudo_plugin.album_info(official_release["release"])
assert not isinstance(album_info, PseudoAlbumInfo) assert not isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainz" assert album_info.data_source == "MusicBrainzPseudoRelease"
def test_interception( def test_interception(
self, self,
@ -173,4 +177,49 @@ class TestMBPseudoPlugin(PluginMixin):
) )
album_info = mbpseudo_plugin.album_info(official_release["release"]) album_info = mbpseudo_plugin.album_info(official_release["release"])
assert isinstance(album_info, PseudoAlbumInfo) assert isinstance(album_info, PseudoAlbumInfo)
assert album_info.data_source == "MusicBrainz" assert album_info.data_source == "MusicBrainzPseudoRelease"
def test_final_adjustment_skip(
self,
mbpseudo_plugin: MusicBrainzPseudoReleasePlugin,
):
match = AlbumMatch(
distance=Distance(),
info=AlbumInfo(tracks=[], data_source="mb"),
mapping={},
extra_items=[],
extra_tracks=[],
)
mbpseudo_plugin._adjust_final_album_match(match)
assert match.info.data_source == "mb"
def test_final_adjustment(
self,
mbpseudo_plugin: MusicBrainzPseudoReleasePlugin,
official_release_info: AlbumInfo,
pseudo_release_info: AlbumInfo,
):
pseudo_album_info = PseudoAlbumInfo(
pseudo_release=pseudo_release_info,
official_release=official_release_info,
data_source=mbpseudo_plugin.data_source,
)
pseudo_album_info.use_official_as_ref()
item = Item()
item["title"] = "百花繚乱"
match = AlbumMatch(
distance=Distance(),
info=pseudo_album_info,
mapping={item: pseudo_album_info.tracks[0]},
extra_items=[],
extra_tracks=[],
)
mbpseudo_plugin._adjust_final_album_match(match)
assert match.info.data_source == "MusicBrainz"
assert match.info.album_id == "pseudo"
assert match.info.album == "In Bloom"