match.current_metadata -> util.get_most_common_tags

This commit is contained in:
Šarūnas Nejus 2025-05-25 10:52:39 +01:00
parent 509cbdcbe4
commit 1c9aebd36c
No known key found for this signature in database
GPG key ID: DD28F6704DBE3435
6 changed files with 127 additions and 130 deletions

View file

@ -24,13 +24,7 @@ from beets.library import Album, Item, LibModel
from beets.util import unique_list
from .hooks import AlbumInfo, AlbumMatch, Distance, TrackInfo, TrackMatch
from .match import (
Proposal,
Recommendation,
current_metadata,
tag_album,
tag_item,
)
from .match import Proposal, Recommendation, tag_album, tag_item
__all__ = [
"AlbumInfo",
@ -43,7 +37,6 @@ __all__ = [
"apply_album_metadata",
"apply_item_metadata",
"apply_metadata",
"current_metadata",
"tag_album",
"tag_item",
]

View file

@ -36,7 +36,7 @@ from beets.autotag import (
TrackMatch,
hooks,
)
from beets.util import plurality
from beets.util import get_most_common_tags
if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
@ -80,44 +80,6 @@ class Proposal(NamedTuple):
# Primary matching functionality.
def current_metadata(
items: Iterable[Item],
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Extract the likely current metadata for an album given a list of its
items. Return two dictionaries:
- The most common value for each field.
- Whether each field's value was unanimous (values are booleans).
"""
assert items # Must be nonempty.
likelies = {}
consensus = {}
fields = [
"artist",
"album",
"albumartist",
"year",
"disctotal",
"mb_albumid",
"label",
"barcode",
"catalognum",
"country",
"media",
"albumdisambig",
]
for field in fields:
values = [item[field] for item in items if item]
likelies[field], freq = plurality(values)
consensus[field] = freq == len(values)
# If there's an album artist consensus, use this for the artist.
if consensus["albumartist"] and likelies["albumartist"]:
likelies["artist"] = likelies["albumartist"]
return likelies, consensus
def assign_items(
items: Sequence[Item],
tracks: Sequence[TrackInfo],
@ -231,7 +193,7 @@ def distance(
keys are a subset of `items` and the values are a subset of
`album_info.tracks`.
"""
likelies, _ = current_metadata(items)
likelies, _ = get_most_common_tags(items)
dist = hooks.Distance()
@ -499,7 +461,7 @@ def tag_album(
candidates.
"""
# Get current metadata.
likelies, consensus = current_metadata(items)
likelies, consensus = get_most_common_tags(items)
cur_artist: str = likelies["artist"]
cur_album: str = likelies["album"]
log.debug("Tagging {0} - {1}", cur_artist, cur_album)

View file

@ -228,7 +228,7 @@ class ImportTask(BaseImportTask):
or APPLY (in which case the data comes from the choice).
"""
if self.choice_flag in (Action.ASIS, Action.RETAG):
likelies, consensus = autotag.current_metadata(self.items)
likelies, consensus = util.get_most_common_tags(self.items)
return likelies
elif self.choice_flag is Action.APPLY and self.match:
return self.match.info.copy()

View file

@ -56,6 +56,8 @@ if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
from logging import Logger
from beets.library import Item
if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
@ -814,6 +816,44 @@ def plurality(objs: Iterable[T]) -> tuple[T, int]:
return c.most_common(1)[0]
def get_most_common_tags(
items: Sequence[Item],
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Extract the likely current metadata for an album given a list of its
items. Return two dictionaries:
- The most common value for each field.
- Whether each field's value was unanimous (values are booleans).
"""
assert items # Must be nonempty.
likelies = {}
consensus = {}
fields = [
"artist",
"album",
"albumartist",
"year",
"disctotal",
"mb_albumid",
"label",
"barcode",
"catalognum",
"country",
"media",
"albumdisambig",
]
for field in fields:
values = [item[field] for item in items if item]
likelies[field], freq = plurality(values)
consensus[field] = freq == len(values)
# If there's an album artist consensus, use this for the artist.
if consensus["albumartist"] and likelies["albumartist"]:
likelies["artist"] = likelies["albumartist"]
return likelies, consensus
# stdout and stderr as bytes
class CommandOutput(NamedTuple):
stdout: bytes

View file

@ -24,86 +24,6 @@ from beets.autotag import AlbumInfo, TrackInfo, correct_list_fields, match
from beets.autotag.hooks import Distance, string_dist
from beets.library import Item
from beets.test.helper import BeetsTestCase, ConfigMixin
from beets.util import plurality
class PluralityTest(BeetsTestCase):
def test_plurality_consensus(self):
objs = [1, 1, 1, 1]
obj, freq = plurality(objs)
assert obj == 1
assert freq == 4
def test_plurality_near_consensus(self):
objs = [1, 1, 2, 1]
obj, freq = plurality(objs)
assert obj == 1
assert freq == 3
def test_plurality_conflict(self):
objs = [1, 1, 2, 2, 3]
obj, freq = plurality(objs)
assert obj in (1, 2)
assert freq == 2
def test_plurality_empty_sequence_raises_error(self):
with pytest.raises(ValueError, match="must be non-empty"):
plurality([])
def test_current_metadata_finds_pluralities(self):
items = [
Item(artist="The Beetles", album="The White Album"),
Item(artist="The Beatles", album="The White Album"),
Item(artist="The Beatles", album="Teh White Album"),
]
likelies, consensus = match.current_metadata(items)
assert likelies["artist"] == "The Beatles"
assert likelies["album"] == "The White Album"
assert not consensus["artist"]
def test_current_metadata_artist_consensus(self):
items = [
Item(artist="The Beatles", album="The White Album"),
Item(artist="The Beatles", album="The White Album"),
Item(artist="The Beatles", album="Teh White Album"),
]
likelies, consensus = match.current_metadata(items)
assert likelies["artist"] == "The Beatles"
assert likelies["album"] == "The White Album"
assert consensus["artist"]
def test_albumartist_consensus(self):
items = [
Item(artist="tartist1", album="album", albumartist="aartist"),
Item(artist="tartist2", album="album", albumartist="aartist"),
Item(artist="tartist3", album="album", albumartist="aartist"),
]
likelies, consensus = match.current_metadata(items)
assert likelies["artist"] == "aartist"
assert not consensus["artist"]
def test_current_metadata_likelies(self):
fields = [
"artist",
"album",
"albumartist",
"year",
"disctotal",
"mb_albumid",
"label",
"barcode",
"catalognum",
"country",
"media",
"albumdisambig",
]
items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)]
likelies, _ = match.current_metadata(items)
for f in fields:
if isinstance(likelies[f], int):
assert likelies[f] == 0
else:
assert likelies[f] == f"{f}_1"
def _make_item(title, track, artist="some artist"):

View file

@ -24,7 +24,10 @@ from unittest.mock import Mock, patch
import pytest
from beets import util
from beets.library import Item
from beets.test import _common
from beets.test.helper import BeetsTestCase
from beets.util import plurality
class UtilTest(unittest.TestCase):
@ -217,3 +220,82 @@ class TestPathLegalization:
expected_path,
expected_truncated,
)
class PluralityTest(BeetsTestCase):
def test_plurality_consensus(self):
objs = [1, 1, 1, 1]
obj, freq = plurality(objs)
assert obj == 1
assert freq == 4
def test_plurality_near_consensus(self):
objs = [1, 1, 2, 1]
obj, freq = plurality(objs)
assert obj == 1
assert freq == 3
def test_plurality_conflict(self):
objs = [1, 1, 2, 2, 3]
obj, freq = plurality(objs)
assert obj in (1, 2)
assert freq == 2
def test_plurality_empty_sequence_raises_error(self):
with pytest.raises(ValueError, match="must be non-empty"):
plurality([])
def test_current_metadata_finds_pluralities(self):
items = [
Item(artist="The Beetles", album="The White Album"),
Item(artist="The Beatles", album="The White Album"),
Item(artist="The Beatles", album="Teh White Album"),
]
likelies, consensus = util.get_most_common_tags(items)
assert likelies["artist"] == "The Beatles"
assert likelies["album"] == "The White Album"
assert not consensus["artist"]
def test_current_metadata_artist_consensus(self):
items = [
Item(artist="The Beatles", album="The White Album"),
Item(artist="The Beatles", album="The White Album"),
Item(artist="The Beatles", album="Teh White Album"),
]
likelies, consensus = util.get_most_common_tags(items)
assert likelies["artist"] == "The Beatles"
assert likelies["album"] == "The White Album"
assert consensus["artist"]
def test_albumartist_consensus(self):
items = [
Item(artist="tartist1", album="album", albumartist="aartist"),
Item(artist="tartist2", album="album", albumartist="aartist"),
Item(artist="tartist3", album="album", albumartist="aartist"),
]
likelies, consensus = util.get_most_common_tags(items)
assert likelies["artist"] == "aartist"
assert not consensus["artist"]
def test_current_metadata_likelies(self):
fields = [
"artist",
"album",
"albumartist",
"year",
"disctotal",
"mb_albumid",
"label",
"barcode",
"catalognum",
"country",
"media",
"albumdisambig",
]
items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)]
likelies, _ = util.get_most_common_tags(items)
for f in fields:
if isinstance(likelies[f], int):
assert likelies[f] == 0
else:
assert likelies[f] == f"{f}_1"