Isolated autottag related plugin functions into an new metadata_plugins

file.
This commit is contained in:
Sebastian Mohr 2025-07-07 13:27:37 +02:00
parent 24dd40eed2
commit 06b6f4f27b
3 changed files with 376 additions and 303 deletions

View file

@ -24,7 +24,7 @@ from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar
import lap
import numpy as np
from beets import config, logging, plugins
from beets import config, logging, metadata_plugins
from beets.autotag import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch, hooks
from beets.util import get_most_common_tags
@ -119,7 +119,7 @@ def match_by_id(items: Iterable[Item]) -> AlbumInfo | None:
return None
# If all album IDs are equal, look up the album.
log.debug("Searching for discovered album ID: {0}", first)
return plugins.album_for_id(first)
return metadata_plugins.album_for_id(first)
def _recommendation(
@ -274,7 +274,7 @@ def tag_album(
if search_ids:
for search_id in search_ids:
log.debug("Searching for album ID: {0}", search_id)
if info := plugins.album_for_id(search_id):
if info := metadata_plugins.album_for_id(search_id):
_add_candidate(items, candidates, info)
# Use existing metadata or text search.
@ -311,7 +311,7 @@ def tag_album(
log.debug("Album might be VA: {0}", va_likely)
# Get the results from the data sources.
for matched_candidate in plugins.candidates(
for matched_candidate in metadata_plugins.candidates(
items, search_artist, search_album, va_likely
):
_add_candidate(items, candidates, matched_candidate)
@ -346,7 +346,7 @@ def tag_item(
if trackids:
for trackid in trackids:
log.debug("Searching for track ID: {0}", trackid)
if info := plugins.track_for_id(trackid):
if info := metadata_plugins.track_for_id(trackid):
dist = track_distance(item, info, incl_artist=True)
candidates[info.track_id] = hooks.TrackMatch(dist, info)
# If this is a good match, then don't keep searching.
@ -372,7 +372,7 @@ def tag_item(
log.debug("Item search terms: {0} - {1}", search_artist, search_title)
# Get and evaluate candidate metadata.
for track_info in plugins.item_candidates(
for track_info in metadata_plugins.item_candidates(
item, search_artist, search_title
):
dist = track_distance(item, track_info, incl_artist=True)

367
beets/metadata_plugins.py Normal file
View file

@ -0,0 +1,367 @@
"""Metadata source plugin interface.
This allows beets to lookup metadata from various sources. We define
a common interface for all metadata sources which need to be
implemented as plugins.
"""
from __future__ import annotations
import abc
import re
from typing import (
TYPE_CHECKING,
Any,
Generic,
Iterator,
Literal,
Sequence,
TypedDict,
TypeVar,
)
from typing_extensions import NotRequired
from .plugins import BeetsPlugin, find_plugins, notify_info_yielded, send
if TYPE_CHECKING:
from confuse import ConfigView
from .autotag import Distance
from .autotag.hooks import AlbumInfo, Item, TrackInfo
def find_metadata_source_plugins() -> list[MetadataSourcePluginNext]:
"""Returns a list of MetadataSourcePluginNew subclass instances from all
currently loaded beets plugins.
"""
return [
plugin
for plugin in find_plugins()
if isinstance(plugin, MetadataSourcePluginNext)
]
@notify_info_yielded("albuminfo_received")
def candidates(*args, **kwargs) -> Iterator[AlbumInfo]:
"""Return matching album candidates by using all metadata source
plugins."""
for plugin in find_metadata_source_plugins():
yield from plugin.candidates(*args, **kwargs)
@notify_info_yielded("trackinfo_received")
def item_candidates(*args, **kwargs) -> Iterator[TrackInfo]:
"""Return matching track candidates by using all metadata source
plugins."""
for plugin in find_metadata_source_plugins():
yield from plugin.item_candidates(*args, **kwargs)
def album_for_id(_id: str) -> AlbumInfo | None:
"""Get AlbumInfo object for the given ID string.
A single ID can yield just a single album, so we return the first match.
"""
for plugin in find_metadata_source_plugins():
if info := plugin.album_for_id(album_id=_id):
send("albuminfo_received", info=info)
return info
return None
def track_for_id(_id: str) -> TrackInfo | None:
"""Get TrackInfo object for the given ID string.
A single ID can yield just a single track, so we return the first match.
"""
for plugin in find_metadata_source_plugins():
if info := plugin.track_for_id(_id):
send("trackinfo_received", info=info)
return info
return None
def track_distance(item: Item, info: TrackInfo) -> Distance:
"""Gets the track distance calculated by all loaded plugins.
Returns a Distance object.
"""
from beets.autotag.hooks import Distance
dist = Distance()
for plugin in find_metadata_source_plugins():
dist.update(plugin.track_distance(item, info))
return dist
def album_distance(
items: Sequence[Item],
album_info: AlbumInfo,
mapping: dict[Item, TrackInfo],
) -> Distance:
"""Returns the album distance calculated by plugins."""
from beets.autotag.hooks import Distance
dist = Distance()
for plugin in find_metadata_source_plugins():
dist.update(plugin.album_distance(items, album_info, mapping))
return dist
def _get_distance(
config: ConfigView, data_source: str, info: AlbumInfo | TrackInfo
) -> Distance:
"""Returns the ``data_source`` weight and the maximum source weight
for albums or individual tracks.
"""
from beets.autotag.hooks import Distance
dist = Distance()
if info.data_source == data_source:
dist.add("source", config["source_weight"].as_number())
return dist
class MetadataSourcePluginNext(BeetsPlugin, metaclass=abc.ABCMeta):
"""A plugin that provides metadata from a specific source.
This base class implements a contract for plugins that provide metadata
from a specific source. The plugin must implement the methods to search for albums
and tracks, and to retrieve album and track information by ID.
TODO: Rename once all plugins are migrated to this interface.
"""
data_source: str
def __init__(self, data_source: str, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.data_source = data_source or self.__class__.__name__
self.config.add({"source_weight": 0.5})
# --------------------------------- id lookup -------------------------------- #
def albums_for_ids(self, ids: Sequence[str]) -> Iterator[AlbumInfo | None]:
"""Batch lookup of album metadata for a list of album IDs.
Given a list of album identifiers, yields corresponding AlbumInfo
objects. Missing albums result in None values in the output iterator.
Plugins may implement this for optimized batched lookups instead of
single calls to album_for_id.
"""
return iter(self.album_for_id(id) for id in ids)
@abc.abstractmethod
def album_for_id(self, album_id: str) -> AlbumInfo | None:
"""Return :py:class:`AlbumInfo` object or None if no matching release was
found."""
raise NotImplementedError
def tracks_for_ids(self, ids: Sequence[str]) -> Iterator[TrackInfo | None]:
"""Batch lookup of track metadata for a list of track IDs.
Given a list of track identifiers, yields corresponding TrackInfo objects.
Missing tracks result in None values in the output iterator. Plugins may
implement this for optimized batched lookups instead of single calls to
track_for_id.
"""
return iter(self.track_for_id(id) for id in ids)
@abc.abstractmethod
def track_for_id(self, track_id: str) -> TrackInfo | None:
"""Return a :py:class:`AlbumInfo` object or None if no matching release was
found.
"""
raise NotImplementedError
# ---------------------------------- search ---------------------------------- #
@abc.abstractmethod
def candidates(
self,
items: Sequence[Item],
artist: str,
album: str,
va_likely: bool,
extra_tags: dict[str, Any] | None = None,
) -> Iterator[AlbumInfo]:
"""Return :py:class:`AlbumInfo` candidates that match the given album.
Used in the autotag functionality to search for albums.
:param items: List of items in the album
:param artist: Album artist
:param album: Album name
:param va_likely: Whether the album is likely to be by various artists
:param extra_tags: is a an optional dictionary of extra tags to search.
TODO: remove:
Currently relevant to :py:class:`MusicBrainzPlugin` autotagger and can be
ignored by other plugins
"""
raise NotImplementedError
@abc.abstractmethod
def item_candidates(
self, item: Item, artist: str, title: str
) -> Iterator[TrackInfo]:
"""Return :py:class:`TrackInfo` candidates that match the given track.
Used in the autotag functionality to search for tracks.
:param item: Track item
:param artist: Track artist
:param title: Track title
"""
raise NotImplementedError
# --------------------------------- distances -------------------------------- #
def album_distance(
self,
items: Sequence[Item],
album_info: AlbumInfo,
mapping: dict[Item, TrackInfo],
) -> Distance:
return _get_distance(
data_source=self.data_source, info=album_info, config=self.config
)
def track_distance(
self,
item: Item,
info: TrackInfo,
) -> Distance:
return _get_distance(
data_source=self.data_source, info=info, config=self.config
)
class IDResponse(TypedDict):
"""Response from the API containing an ID."""
id: str
class SearchFilter(TypedDict):
artist: NotRequired[str]
album: NotRequired[str]
R = TypeVar("R", bound=IDResponse)
class SearchApiMetadataSourcePluginNext(
Generic[R], MetadataSourcePluginNext, metaclass=abc.ABCMeta
):
"""Helper class to implement a metadata source plugin with an API.
Plugins using this ABC must implement an API search method to
retrieve album and track information by ID,
i.e. `album_for_id` and `track_for_id`, and a search method to
perform a search on the API. The search method should return a list
of identifiers for the requested type (album or track).
"""
@abc.abstractmethod
def _search_api(
self,
query_type: Literal["album", "track"],
filters: SearchFilter | None = None,
keywords: str = "",
) -> Sequence[R] | None:
"""Perform a search on the API.
:param query_type: The type of query to perform.
:param filters: A dictionary of filters to apply to the search.
:param keywords: Additional keywords to include in the search.
Should return a list of identifiers for the requested type (album or track).
"""
raise NotImplementedError
def candidates(
self,
items: Sequence[Item],
artist: str,
album: str,
va_likely: bool,
extra_tags: dict[str, Any] | None = None,
) -> Iterator[AlbumInfo]:
query_filters: SearchFilter = {"album": album}
if not va_likely:
query_filters["artist"] = artist
results = self._search_api("album", query_filters)
if not results:
return
yield from filter(
None, self.albums_for_ids([result["id"] for result in results])
)
def item_candidates(
self, item: Item, artist: str, title: str
) -> Iterator[TrackInfo]:
results = self._search_api("track", {"artist": artist}, keywords=title)
if not results:
return
yield from filter(
None, self.tracks_for_ids([result["id"] for result in results])
)
def artists_to_artist_str(
artists,
id_key: str | int = "id",
name_key: str | int = "name",
join_key: str | int | None = None,
) -> tuple[str, str | None]:
"""Returns an artist string (all artists) and an artist_id (the main
artist) for a list of artist object dicts.
For each artist, this function moves articles (such as 'a', 'an',
and 'the') to the front and strips trailing disambiguation numbers. It
returns a tuple containing the comma-separated string of all
normalized artists and the ``id`` of the main/first artist.
Alternatively a keyword can be used to combine artists together into a
single string by passing the join_key argument.
:param artists: Iterable of artist dicts or lists returned by API.
:type artists: list[dict] or list[list]
:param id_key: Key or index corresponding to the value of ``id`` for
the main/first artist. Defaults to 'id'.
:param name_key: Key or index corresponding to values of names
to concatenate for the artist string (containing all artists).
Defaults to 'name'.
:param join_key: Key or index corresponding to a field containing a
keyword to use for combining artists into a single string, for
example "Feat.", "Vs.", "And" or similar. The default is None
which keeps the default behaviour (comma-separated).
:return: Normalized artist string.
"""
artist_id = None
artist_string = ""
artists = list(artists) # In case a generator was passed.
total = len(artists)
for idx, artist in enumerate(artists):
if not artist_id:
artist_id = artist[id_key]
name = artist[name_key]
# Strip disambiguation number.
name = re.sub(r" \(\d+\)$", "", name)
# Move articles to the front.
name = re.sub(r"^(.*?), (a|an|the)$", r"\2 \1", name, flags=re.I)
# Use a join keyword if requested and available.
if idx < (total - 1): # Skip joining on last.
if join_key and artist.get(join_key, None):
name += f" {artist[join_key]} "
else:
name += ", "
artist_string += name
return artist_string, artist_id

View file

@ -23,22 +23,12 @@ import sys
import traceback
from collections import defaultdict
from functools import wraps
from typing import (
TYPE_CHECKING,
Any,
Callable,
Generic,
Literal,
Sequence,
TypedDict,
TypeVar,
)
from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar
import mediafile
import beets
from beets import logging
from beets.util.id_extractors import extract_release_id
if TYPE_CHECKING:
from beets.event_types import EventType
@ -115,7 +105,7 @@ class PluginLogFilter(logging.Filter):
# Managing the plugins themselves.
class BeetsPlugin:
class BeetsPlugin(metaclass=abc.ABCMeta):
"""The base class for all beets plugins. Plugins provide
functionality by defining a subclass of BeetsPlugin and overriding
the abstract methods defined here.
@ -218,66 +208,6 @@ class BeetsPlugin:
"""Return a dict mapping prefixes to Query subclasses."""
return {}
def track_distance(
self,
item: Item,
info: TrackInfo,
) -> Distance:
"""Should return a Distance object to be added to the
distance for every track comparison.
"""
from beets.autotag.distance import Distance
return Distance()
def album_distance(
self,
items: Sequence[Item],
album_info: AlbumInfo,
mapping: dict[Item, TrackInfo],
) -> Distance:
"""Should return a Distance object to be added to the
distance for every album-level comparison.
"""
from beets.autotag.distance import Distance
return Distance()
def candidates(
self, items: list[Item], artist: str, album: str, va_likely: bool
) -> Iterable[AlbumInfo]:
"""Return :py:class:`AlbumInfo` candidates that match the given album.
:param items: List of items in the album
:param artist: Album artist
:param album: Album name
:param va_likely: Whether the album is likely to be by various artists
"""
yield from ()
def item_candidates(
self, item: Item, artist: str, title: str
) -> Iterable[TrackInfo]:
"""Return :py:class:`TrackInfo` candidates that match the given track.
:param item: Track item
:param artist: Track artist
:param title: Track title
"""
yield from ()
def album_for_id(self, album_id: str) -> AlbumInfo | None:
"""Return an AlbumInfo object or None if no matching release was
found.
"""
return None
def track_for_id(self, track_id: str) -> TrackInfo | None:
"""Return a TrackInfo object or None if no matching release was
found.
"""
return None
def add_media_field(
self, name: str, descriptor: mediafile.MediaField
) -> None:
@ -372,7 +302,7 @@ def load_plugins(names: Sequence[str] = ()) -> None:
isinstance(obj, type)
and issubclass(obj, BeetsPlugin)
and obj != BeetsPlugin
and obj != MetadataSourcePlugin
and not inspect.isabstract(obj)
and obj not in _classes
):
_classes.add(obj)
@ -456,32 +386,6 @@ def named_queries(model_cls: type[AnyModel]) -> dict[str, FieldQueryType]:
return queries
def track_distance(item: Item, info: TrackInfo) -> Distance:
"""Gets the track distance calculated by all loaded plugins.
Returns a Distance object.
"""
from beets.autotag.distance import Distance
dist = Distance()
for plugin in find_plugins():
dist.update(plugin.track_distance(item, info))
return dist
def album_distance(
items: Sequence[Item],
album_info: AlbumInfo,
mapping: dict[Item, TrackInfo],
) -> Distance:
"""Returns the album distance calculated by plugins."""
from beets.autotag.distance import Distance
dist = Distance()
for plugin in find_plugins():
dist.update(plugin.album_distance(items, album_info, mapping))
return dist
def notify_info_yielded(event: str) -> Callable[[IterF[P, Ret]], IterF[P, Ret]]:
"""Makes a generator send the event 'event' every time it yields.
This decorator is supposed to decorate a generator, but any function
@ -502,46 +406,6 @@ def notify_info_yielded(event: str) -> Callable[[IterF[P, Ret]], IterF[P, Ret]]:
return decorator
@notify_info_yielded("albuminfo_received")
def candidates(*args, **kwargs) -> Iterable[AlbumInfo]:
"""Return matching album candidates from all plugins."""
for plugin in find_plugins():
yield from plugin.candidates(*args, **kwargs)
@notify_info_yielded("trackinfo_received")
def item_candidates(*args, **kwargs) -> Iterable[TrackInfo]:
"""Return matching track candidates from all plugins."""
for plugin in find_plugins():
yield from plugin.item_candidates(*args, **kwargs)
def album_for_id(_id: str) -> AlbumInfo | None:
"""Get AlbumInfo object for the given ID string.
A single ID can yield just a single album, so we return the first match.
"""
for plugin in find_plugins():
if info := plugin.album_for_id(_id):
send("albuminfo_received", info=info)
return info
return None
def track_for_id(_id: str) -> TrackInfo | None:
"""Get TrackInfo object for the given ID string.
A single ID can yield just a single track, so we return the first match.
"""
for plugin in find_plugins():
if info := plugin.track_for_id(_id):
send("trackinfo_received", info=info)
return info
return None
def template_funcs() -> TFuncMap[str]:
"""Get all the template functions declared by plugins as a
dictionary.
@ -656,18 +520,6 @@ def feat_tokens(for_artist: bool = True) -> str:
)
def get_distance(
config: ConfigView, data_source: str, info: AlbumInfo | TrackInfo
) -> Distance:
"""Returns the ``data_source`` weight and the maximum source weight
for albums or individual tracks.
"""
from beets.autotag.distance import Distance
dist = Distance()
if info.data_source == data_source:
dist.add("source", config["source_weight"].as_number())
return dist
def apply_item_changes(
@ -695,149 +547,3 @@ def apply_item_changes(
item.try_write()
item.store()
class Response(TypedDict):
"""A dictionary with the response of a plugin API call.
May be extended by plugins to include additional information, but `id`
is required.
"""
id: str
R = TypeVar("R", bound=Response)
class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta):
def __init__(self):
super().__init__()
self.config.add({"source_weight": 0.5})
@property
@abc.abstractmethod
def data_source(self) -> str:
raise NotImplementedError
@property
@abc.abstractmethod
def search_url(self) -> str:
raise NotImplementedError
@property
@abc.abstractmethod
def album_url(self) -> str:
raise NotImplementedError
@property
@abc.abstractmethod
def track_url(self) -> str:
raise NotImplementedError
@abc.abstractmethod
def _search_api(
self,
query_type: Literal["album", "track"],
filters: dict[str, str],
keywords: str = "",
) -> Sequence[R]:
raise NotImplementedError
@abc.abstractmethod
def album_for_id(self, album_id: str) -> AlbumInfo | None:
raise NotImplementedError
@abc.abstractmethod
def track_for_id(self, track_id: str) -> TrackInfo | None:
raise NotImplementedError
@staticmethod
def get_artist(
artists,
id_key: str | int = "id",
name_key: str | int = "name",
join_key: str | int | None = None,
) -> tuple[str, str | None]:
"""Returns an artist string (all artists) and an artist_id (the main
artist) for a list of artist object dicts.
For each artist, this function moves articles (such as 'a', 'an',
and 'the') to the front and strips trailing disambiguation numbers. It
returns a tuple containing the comma-separated string of all
normalized artists and the ``id`` of the main/first artist.
Alternatively a keyword can be used to combine artists together into a
single string by passing the join_key argument.
:param artists: Iterable of artist dicts or lists returned by API.
:type artists: list[dict] or list[list]
:param id_key: Key or index corresponding to the value of ``id`` for
the main/first artist. Defaults to 'id'.
:param name_key: Key or index corresponding to values of names
to concatenate for the artist string (containing all artists).
Defaults to 'name'.
:param join_key: Key or index corresponding to a field containing a
keyword to use for combining artists into a single string, for
example "Feat.", "Vs.", "And" or similar. The default is None
which keeps the default behaviour (comma-separated).
:return: Normalized artist string.
"""
artist_id = None
artist_string = ""
artists = list(artists) # In case a generator was passed.
total = len(artists)
for idx, artist in enumerate(artists):
if not artist_id:
artist_id = artist[id_key]
name = artist[name_key]
# Strip disambiguation number.
name = re.sub(r" \(\d+\)$", "", name)
# Move articles to the front.
name = re.sub(r"^(.*?), (a|an|the)$", r"\2 \1", name, flags=re.I)
# Use a join keyword if requested and available.
if idx < (total - 1): # Skip joining on last.
if join_key and artist.get(join_key, None):
name += f" {artist[join_key]} "
else:
name += ", "
artist_string += name
return artist_string, artist_id
def _get_id(self, id_string: str) -> str | None:
"""Parse release ID from the given ID string."""
return extract_release_id(self.data_source.lower(), id_string)
def candidates(
self, items: list[Item], artist: str, album: str, va_likely: bool
) -> Iterable[AlbumInfo]:
query_filters = {"album": album}
if not va_likely:
query_filters["artist"] = artist
for result in self._search_api("album", query_filters):
if info := self.album_for_id(result["id"]):
yield info
def item_candidates(
self, item: Item, artist: str, title: str
) -> Iterable[TrackInfo]:
for result in self._search_api(
"track", {"artist": artist}, keywords=title
):
if info := self.track_for_id(result["id"]):
yield info
def album_distance(
self,
items: Sequence[Item],
album_info: AlbumInfo,
mapping: dict[Item, TrackInfo],
) -> Distance:
return get_distance(
data_source=self.data_source, info=album_info, config=self.config
)
def track_distance(self, item: Item, info: TrackInfo) -> Distance:
return get_distance(
data_source=self.data_source, info=info, config=self.config
)