From be74936134d136572ebfc6e0eb0e2b47275859d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 18 May 2025 09:25:05 +0100
Subject: [PATCH 01/49] Handle extra_tags the way they used to be handled

---
 beets/util/__init__.py           |  2 +-
 beetsplug/musicbrainz.py         | 65 +++++++++++++++++++++-----------
 docs/changelog.rst               | 18 +++++----
 test/plugins/test_musicbrainz.py | 44 ++++++++++++++++++++-
 4 files changed, 98 insertions(+), 31 deletions(-)

diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index a1ce55caa..6bc4d14ee 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -803,7 +803,7 @@ def as_string(value: Any) -> str:
         return str(value)
 
 
-def plurality(objs: Sequence[T]) -> tuple[T, int]:
+def plurality(objs: Iterable[T]) -> tuple[T, int]:
     """Given a sequence of hashble objects, returns the object that
     is most common in the set and the its number of appearance. The
     sequence must contain at least one object.
diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py
index 34a46715d..bdfeb0968 100644
--- a/beetsplug/musicbrainz.py
+++ b/beetsplug/musicbrainz.py
@@ -18,6 +18,7 @@ from __future__ import annotations
 
 import traceback
 from collections import Counter
+from functools import cached_property
 from itertools import product
 from typing import TYPE_CHECKING, Any
 from urllib.parse import urljoin
@@ -383,7 +384,7 @@ class MusicBrainzPlugin(BeetsPlugin):
                     "deezer": False,
                     "tidal": False,
                 },
-                "extra_tags": {},
+                "extra_tags": [],
             },
         )
         hostname = self.config["host"].as_str()
@@ -747,6 +748,46 @@ class MusicBrainzPlugin(BeetsPlugin):
 
         return info
 
+    @cached_property
+    def extra_mb_field_by_tag(self) -> dict[str, str]:
+        """Map configured extra tags to their MusicBrainz API field names.
+
+        Process user configuration to determine which additional MusicBrainz
+        fields should be included in search queries.
+        """
+        mb_field_by_tag = {
+            t: FIELDS_TO_MB_KEYS[t]
+            for t in self.config["extra_tags"].as_str_seq()
+            if t in FIELDS_TO_MB_KEYS
+        }
+        if mb_field_by_tag:
+            self._log.debug("Additional search terms: {}", mb_field_by_tag)
+
+        return mb_field_by_tag
+
+    def get_album_criteria(
+        self, items: list[Item], artist: str, album: str, va_likely: bool
+    ) -> dict[str, str]:
+        # Build search criteria.
+        criteria = {"release": album.lower().strip()}
+        if artist is not None:
+            criteria["artist"] = artist.lower().strip()
+        else:
+            # Various Artists search.
+            criteria["arid"] = VARIOUS_ARTISTS_ID
+        if track_count := len(items):
+            criteria["tracks"] = str(track_count)
+
+        for tag, mb_field in self.extra_mb_field_by_tag.items():
+            most_common, _ = util.plurality(i.get(tag) for i in items)
+            value = str(most_common).lower().strip()
+            if tag == "catalognum":
+                value = value.replace(" ", "")
+            if value:
+                criteria[mb_field] = value
+
+        return criteria
+
     def candidates(
         self,
         items: list[Item],
@@ -762,27 +803,7 @@ class MusicBrainzPlugin(BeetsPlugin):
         The query consists of an artist name, an album name, and,
         optionally, a number of tracks on the album and any other extra tags.
         """
-        # Build search criteria.
-        criteria = {"release": album.lower().strip()}
-        if artist is not None:
-            criteria["artist"] = artist.lower().strip()
-        else:
-            # Various Artists search.
-            criteria["arid"] = VARIOUS_ARTISTS_ID
-        if track_count := len(items):
-            criteria["tracks"] = str(track_count)
-
-        if self.config["extra_tags"]:
-            tag_list = self.config["extra_tags"].get()
-            self._log.debug("Additional search terms: {0}", tag_list)
-            for tag, value in tag_list.items():
-                if key := FIELDS_TO_MB_KEYS.get(tag):
-                    value = str(value).lower().strip()
-                    if key == "catno":
-                        value = value.replace(" ", "")
-                    if value:
-                        criteria[key] = value
-
+        criteria = self.get_album_criteria(items, artist, album, va_likely)
         # Abort if we have no search terms.
         if not any(criteria.values()):
             return
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 31da975e2..a01ee8c97 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -8,11 +8,22 @@ Unreleased
 
 New features:
 
+* :doc:`plugins/musicbrainz`: The MusicBrainz autotagger has been moved to
+  a separate plugin. The default :ref:`plugins-config` includes `musicbrainz`,
+  but if you've customized your `plugins` list in your configuration, you'll
+  need to explicitly add `musicbrainz` to continue using this functionality.
+  Configuration option `musicbrainz.enabled` has thus been deprecated.
+  :bug:`2686`
+  :bug:`4605`
 * :doc:`plugins/web`: Show notifications when a track plays. This uses the
   Media Session API to customize media notifications.
 
 Bug fixes:
 
+* :doc:`plugins/musicbrainz`: fix regression where user configured
+  ``extra_tags`` have been read incorrectly.
+  :bug:`5788`
+
 For packagers:
 
 Other changes:
@@ -39,13 +50,6 @@ been dropped.
 
 New features:
 
-* :doc:`plugins/musicbrainz`: The MusicBrainz autotagger has been moved to
-  a separate plugin. The default :ref:`plugins-config` includes `musicbrainz`,
-  but if you've customized your `plugins` list in your configuration, you'll
-  need to explicitly add `musicbrainz` to continue using this functionality.
-  Configuration option `musicbrainz.enabled` has thus been deprecated.
-  :bug:`2686`
-  :bug:`4605`
 * :doc:`plugins/lastgenre`: The new configuration option, ``keep_existing``,
   provides more fine-grained control over how pre-populated genre tags are
   handled. The ``force`` option now behaves in a more conventional manner.
diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py
index 0f142a353..d4104a3ba 100644
--- a/test/plugins/test_musicbrainz.py
+++ b/test/plugins/test_musicbrainz.py
@@ -16,8 +16,11 @@
 
 from unittest import mock
 
+import pytest
+
 from beets import config
-from beets.test.helper import BeetsTestCase
+from beets.library import Item
+from beets.test.helper import BeetsTestCase, PluginMixin
 from beetsplug import musicbrainz
 
 
@@ -1063,3 +1066,42 @@ class MBLibraryTest(MusicBrainzTestCase):
             gp.side_effect = side_effect
             album = self.mb.album_for_id("d2a6f856-b553-40a0-ac54-a321e8e2da02")
             assert album.country is None
+
+
+class TestMusicBrainzPlugin(PluginMixin):
+    plugin = "musicbrainz"
+
+    @pytest.fixture
+    def mb_plugin(self, plugin_config):
+        self.config[self.plugin].set(plugin_config)
+
+        return musicbrainz.MusicBrainzPlugin()
+
+    @pytest.mark.parametrize(
+        "plugin_config,va_likely,expected_additional_criteria",
+        [
+            ({}, False, {"artist": "artist"}),
+            ({}, True, {"artist": "artist"}),
+            (
+                {"extra_tags": ["label", "catalognum"]},
+                False,
+                {"artist": "artist", "label": "abc", "catno": "abc123"},
+            ),
+        ],
+    )
+    def test_get_album_criteria(
+        self, mb_plugin, va_likely, expected_additional_criteria
+    ):
+        items = [
+            Item(catalognum="ABC 123", label="abc"),
+            Item(catalognum="ABC 123", label="abc"),
+            Item(catalognum="ABC 123", label="def"),
+        ]
+
+        assert mb_plugin.get_album_criteria(
+            items, "Artist ", " Album", va_likely
+        ) == {
+            "release": "album",
+            "tracks": str(len(items)),
+            **expected_additional_criteria,
+        }

From 64878933153c9074ef6f4154184617ec7988a78e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 18 May 2025 09:46:40 +0100
Subject: [PATCH 02/49] Synchronise docs with the actual supported extra_tags

---
 beetsplug/musicbrainz.py         | 2 +-
 docs/plugins/musicbrainz.rst     | 2 +-
 test/plugins/test_musicbrainz.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py
index bdfeb0968..cc0b7adee 100644
--- a/beetsplug/musicbrainz.py
+++ b/beetsplug/musicbrainz.py
@@ -45,10 +45,10 @@ BASE_URL = "https://musicbrainz.org/"
 SKIPPED_TRACKS = ["[data track]"]
 
 FIELDS_TO_MB_KEYS = {
+    "barcode": "barcode",
     "catalognum": "catno",
     "country": "country",
     "label": "label",
-    "barcode": "barcode",
     "media": "format",
     "year": "date",
 }
diff --git a/docs/plugins/musicbrainz.rst b/docs/plugins/musicbrainz.rst
index ef10be66d..9068ec45d 100644
--- a/docs/plugins/musicbrainz.rst
+++ b/docs/plugins/musicbrainz.rst
@@ -102,7 +102,7 @@ MusicBrainz. Additional tags to be queried can be supplied with the
 .. code-block:: yaml
 
     musicbrainz:
-        extra_tags: [year, catalognum, country, media, label]
+        extra_tags: [barcode, catalognum, country, label, media, year]
 
 This setting should improve the autotagger results if the metadata with the
 given tags match the metadata returned by MusicBrainz.
diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py
index d4104a3ba..d74e3ff40 100644
--- a/test/plugins/test_musicbrainz.py
+++ b/test/plugins/test_musicbrainz.py
@@ -1081,7 +1081,7 @@ class TestMusicBrainzPlugin(PluginMixin):
         "plugin_config,va_likely,expected_additional_criteria",
         [
             ({}, False, {"artist": "artist"}),
-            ({}, True, {"artist": "artist"}),
+            ({}, True, {"arid": "89ad4ac3-39f7-470e-963a-56509c546377"}),
             (
                 {"extra_tags": ["label", "catalognum"]},
                 False,

From 0102f3ce7d206372eae5573a1e0ecdd9d19a4511 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 18 May 2025 12:15:45 +0100
Subject: [PATCH 03/49] Take into account `va_likely` param and remove
 redundant checks

- Instead of checking for empty `artist` query, use `va_likely`
  parameter to determine whether we should query for Various Artists or
  not.
- `album` / `title` is always a truthy string - no need to handle empty
  criteria case
- `tracks` list always has at least one track - no need to check for
  `len(items)`
---
 beetsplug/musicbrainz.py         | 23 ++++++++---------------
 test/plugins/test_musicbrainz.py | 16 +++-------------
 2 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py
index cc0b7adee..d6960300e 100644
--- a/beetsplug/musicbrainz.py
+++ b/beetsplug/musicbrainz.py
@@ -768,15 +768,14 @@ class MusicBrainzPlugin(BeetsPlugin):
     def get_album_criteria(
         self, items: list[Item], artist: str, album: str, va_likely: bool
     ) -> dict[str, str]:
-        # Build search criteria.
-        criteria = {"release": album.lower().strip()}
-        if artist is not None:
-            criteria["artist"] = artist.lower().strip()
-        else:
-            # Various Artists search.
-            criteria["arid"] = VARIOUS_ARTISTS_ID
-        if track_count := len(items):
-            criteria["tracks"] = str(track_count)
+        criteria = {
+            "release": album.lower().strip(),
+            "tracks": str(len(items)),
+        } | (
+            {"arid": VARIOUS_ARTISTS_ID}
+            if va_likely
+            else {"artist": artist.lower().strip()}
+        )
 
         for tag, mb_field in self.extra_mb_field_by_tag.items():
             most_common, _ = util.plurality(i.get(tag) for i in items)
@@ -804,9 +803,6 @@ class MusicBrainzPlugin(BeetsPlugin):
         optionally, a number of tracks on the album and any other extra tags.
         """
         criteria = self.get_album_criteria(items, artist, album, va_likely)
-        # Abort if we have no search terms.
-        if not any(criteria.values()):
-            return
 
         try:
             self._log.debug(
@@ -837,9 +833,6 @@ class MusicBrainzPlugin(BeetsPlugin):
             "recording": title.lower().strip(),
         }
 
-        if not any(criteria.values()):
-            return
-
         try:
             res = musicbrainzngs.search_recordings(
                 limit=self.config["searchlimit"].get(int), **criteria
diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py
index d74e3ff40..90befff9b 100644
--- a/test/plugins/test_musicbrainz.py
+++ b/test/plugins/test_musicbrainz.py
@@ -823,23 +823,13 @@ class MBLibraryTest(MusicBrainzTestCase):
 
                 ai = list(self.mb.candidates([], "hello", "there", False))[0]
 
-                sp.assert_called_with(artist="hello", release="there", limit=5)
+                sp.assert_called_with(
+                    artist="hello", release="there", tracks="0", limit=5
+                )
                 gp.assert_called_with(mbid, mock.ANY)
                 assert ai.tracks[0].title == "foo"
                 assert ai.album == "hi"
 
-    def test_match_track_empty(self):
-        with mock.patch("musicbrainzngs.search_recordings") as p:
-            til = list(self.mb.item_candidates(None, " ", " "))
-            assert not p.called
-            assert til == []
-
-    def test_candidates_empty(self):
-        with mock.patch("musicbrainzngs.search_releases") as p:
-            ail = list(self.mb.candidates([], " ", " ", False))
-            assert not p.called
-            assert ail == []
-
     def test_follow_pseudo_releases(self):
         side_effect = [
             {

From 2ec65ed8ca5cf098d17ca0702dfae60dab0afeb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 18 May 2025 14:22:19 +0100
Subject: [PATCH 04/49] Deduplicate candidate methods using _search_api method

---
 beetsplug/musicbrainz.py         |  88 ++++++++-----------
 test/plugins/test_musicbrainz.py | 143 ++++++++++++++-----------------
 2 files changed, 100 insertions(+), 131 deletions(-)

diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py
index d6960300e..ceb931179 100644
--- a/beetsplug/musicbrainz.py
+++ b/beetsplug/musicbrainz.py
@@ -33,6 +33,7 @@ from beets.util.id_extractors import extract_release_id
 
 if TYPE_CHECKING:
     from collections.abc import Iterator, Sequence
+    from typing import Literal
 
     from beets.library import Item
 
@@ -769,24 +770,46 @@ class MusicBrainzPlugin(BeetsPlugin):
         self, items: list[Item], artist: str, album: str, va_likely: bool
     ) -> dict[str, str]:
         criteria = {
-            "release": album.lower().strip(),
+            "release": album,
             "tracks": str(len(items)),
-        } | (
-            {"arid": VARIOUS_ARTISTS_ID}
-            if va_likely
-            else {"artist": artist.lower().strip()}
-        )
+        } | ({"arid": VARIOUS_ARTISTS_ID} if va_likely else {"artist": artist})
 
         for tag, mb_field in self.extra_mb_field_by_tag.items():
             most_common, _ = util.plurality(i.get(tag) for i in items)
-            value = str(most_common).lower().strip()
+            value = str(most_common)
             if tag == "catalognum":
                 value = value.replace(" ", "")
-            if value:
-                criteria[mb_field] = value
+
+            criteria[mb_field] = value
 
         return criteria
 
+    def _search_api(
+        self,
+        query_type: Literal["recording", "release"],
+        filters: dict[str, str],
+    ) -> list[JSONDict]:
+        """Perform MusicBrainz API search and return results.
+
+        Execute a search against the MusicBrainz API for recordings or releases
+        using the provided criteria. Handles API errors by converting them into
+        MusicBrainzAPIError exceptions with contextual information.
+        """
+        filters = {
+            k: _v for k, v in filters.items() if (_v := v.lower().strip())
+        }
+        self._log.debug(
+            "Searching for MusicBrainz {}s with: {!r}", query_type, filters
+        )
+        try:
+            method = getattr(musicbrainzngs, f"search_{query_type}s")
+            res = method(limit=self.config["searchlimit"].get(int), **filters)
+        except musicbrainzngs.MusicBrainzError as exc:
+            raise MusicBrainzAPIError(
+                exc, f"{query_type} search", filters, traceback.format_exc()
+            )
+        return res[f"{query_type}-list"]
+
     def candidates(
         self,
         items: list[Item],
@@ -795,54 +818,19 @@ class MusicBrainzPlugin(BeetsPlugin):
         va_likely: bool,
         extra_tags: dict[str, Any] | None = None,
     ) -> Iterator[beets.autotag.hooks.AlbumInfo]:
-        """Searches for a single album ("release" in MusicBrainz parlance)
-        and returns an iterator over AlbumInfo objects. May raise a
-        MusicBrainzAPIError.
-
-        The query consists of an artist name, an album name, and,
-        optionally, a number of tracks on the album and any other extra tags.
-        """
         criteria = self.get_album_criteria(items, artist, album, va_likely)
+        release_ids = (r["id"] for r in self._search_api("release", criteria))
 
-        try:
-            self._log.debug(
-                "Searching for MusicBrainz releases with: {!r}", criteria
-            )
-            res = musicbrainzngs.search_releases(
-                limit=self.config["searchlimit"].get(int), **criteria
-            )
-        except musicbrainzngs.MusicBrainzError as exc:
-            raise MusicBrainzAPIError(
-                exc, "release search", criteria, traceback.format_exc()
-            )
-        for release in res["release-list"]:
-            # The search result is missing some data (namely, the tracks),
-            # so we just use the ID and fetch the rest of the information.
-            albuminfo = self.album_for_id(release["id"])
-            if albuminfo is not None:
-                yield albuminfo
+        yield from filter(None, map(self.album_for_id, release_ids))
 
     def item_candidates(
         self, item: Item, artist: str, title: str
     ) -> Iterator[beets.autotag.hooks.TrackInfo]:
-        """Searches for a single track and returns an iterable of TrackInfo
-        objects. May raise a MusicBrainzAPIError.
-        """
-        criteria = {
-            "artist": artist.lower().strip(),
-            "recording": title.lower().strip(),
-        }
+        criteria = {"artist": artist, "recording": title}
 
-        try:
-            res = musicbrainzngs.search_recordings(
-                limit=self.config["searchlimit"].get(int), **criteria
-            )
-        except musicbrainzngs.MusicBrainzError as exc:
-            raise MusicBrainzAPIError(
-                exc, "recording search", criteria, traceback.format_exc()
-            )
-        for recording in res["recording-list"]:
-            yield self.track_info(recording)
+        yield from filter(
+            None, map(self.track_info, self._search_api("recording", criteria))
+        )
 
     def album_for_id(
         self, album_id: str
diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py
index 90befff9b..d9a06d00a 100644
--- a/test/plugins/test_musicbrainz.py
+++ b/test/plugins/test_musicbrainz.py
@@ -757,79 +757,6 @@ class ArtistFlatteningTest(BeetsTestCase):
 
 
 class MBLibraryTest(MusicBrainzTestCase):
-    def test_match_track(self):
-        with mock.patch("musicbrainzngs.search_recordings") as p:
-            p.return_value = {
-                "recording-list": [
-                    {
-                        "title": "foo",
-                        "id": "bar",
-                        "length": 42,
-                    }
-                ],
-            }
-            ti = list(self.mb.item_candidates(None, "hello", "there"))[0]
-
-            p.assert_called_with(artist="hello", recording="there", limit=5)
-            assert ti.title == "foo"
-            assert ti.track_id == "bar"
-
-    def test_candidates(self):
-        mbid = "d2a6f856-b553-40a0-ac54-a321e8e2da99"
-        with mock.patch("musicbrainzngs.search_releases") as sp:
-            sp.return_value = {
-                "release-list": [
-                    {
-                        "id": mbid,
-                    }
-                ],
-            }
-            with mock.patch("musicbrainzngs.get_release_by_id") as gp:
-                gp.return_value = {
-                    "release": {
-                        "title": "hi",
-                        "id": mbid,
-                        "status": "status",
-                        "medium-list": [
-                            {
-                                "track-list": [
-                                    {
-                                        "id": "baz",
-                                        "recording": {
-                                            "title": "foo",
-                                            "id": "bar",
-                                            "length": 42,
-                                        },
-                                        "position": 9,
-                                        "number": "A1",
-                                    }
-                                ],
-                                "position": 5,
-                            }
-                        ],
-                        "artist-credit": [
-                            {
-                                "artist": {
-                                    "name": "some-artist",
-                                    "id": "some-id",
-                                },
-                            }
-                        ],
-                        "release-group": {
-                            "id": "another-id",
-                        },
-                    }
-                }
-
-                ai = list(self.mb.candidates([], "hello", "there", False))[0]
-
-                sp.assert_called_with(
-                    artist="hello", release="there", tracks="0", limit=5
-                )
-                gp.assert_called_with(mbid, mock.ANY)
-                assert ai.tracks[0].title == "foo"
-                assert ai.album == "hi"
-
     def test_follow_pseudo_releases(self):
         side_effect = [
             {
@@ -1061,8 +988,15 @@ class MBLibraryTest(MusicBrainzTestCase):
 class TestMusicBrainzPlugin(PluginMixin):
     plugin = "musicbrainz"
 
+    mbid = "d2a6f856-b553-40a0-ac54-a321e8e2da99"
+    RECORDING = {"title": "foo", "id": "bar", "length": 42}
+
     @pytest.fixture
-    def mb_plugin(self, plugin_config):
+    def plugin_config(self):
+        return {}
+
+    @pytest.fixture
+    def mb(self, plugin_config):
         self.config[self.plugin].set(plugin_config)
 
         return musicbrainz.MusicBrainzPlugin()
@@ -1070,17 +1004,17 @@ class TestMusicBrainzPlugin(PluginMixin):
     @pytest.mark.parametrize(
         "plugin_config,va_likely,expected_additional_criteria",
         [
-            ({}, False, {"artist": "artist"}),
+            ({}, False, {"artist": "Artist "}),
             ({}, True, {"arid": "89ad4ac3-39f7-470e-963a-56509c546377"}),
             (
                 {"extra_tags": ["label", "catalognum"]},
                 False,
-                {"artist": "artist", "label": "abc", "catno": "abc123"},
+                {"artist": "Artist ", "label": "abc", "catno": "ABC123"},
             ),
         ],
     )
     def test_get_album_criteria(
-        self, mb_plugin, va_likely, expected_additional_criteria
+        self, mb, va_likely, expected_additional_criteria
     ):
         items = [
             Item(catalognum="ABC 123", label="abc"),
@@ -1088,10 +1022,57 @@ class TestMusicBrainzPlugin(PluginMixin):
             Item(catalognum="ABC 123", label="def"),
         ]
 
-        assert mb_plugin.get_album_criteria(
-            items, "Artist ", " Album", va_likely
-        ) == {
-            "release": "album",
+        assert mb.get_album_criteria(items, "Artist ", " Album", va_likely) == {
+            "release": " Album",
             "tracks": str(len(items)),
             **expected_additional_criteria,
         }
+
+    def test_item_candidates(self, monkeypatch, mb):
+        monkeypatch.setattr(
+            "musicbrainzngs.search_recordings",
+            lambda *_, **__: {"recording-list": [self.RECORDING]},
+        )
+
+        candidates = list(mb.item_candidates(Item(), "hello", "there"))
+
+        assert len(candidates) == 1
+        assert candidates[0].track_id == self.RECORDING["id"]
+
+    def test_candidates(self, monkeypatch, mb):
+        monkeypatch.setattr(
+            "musicbrainzngs.search_releases",
+            lambda *_, **__: {"release-list": [{"id": self.mbid}]},
+        )
+        monkeypatch.setattr(
+            "musicbrainzngs.get_release_by_id",
+            lambda *_, **__: {
+                "release": {
+                    "title": "hi",
+                    "id": self.mbid,
+                    "status": "status",
+                    "medium-list": [
+                        {
+                            "track-list": [
+                                {
+                                    "id": "baz",
+                                    "recording": self.RECORDING,
+                                    "position": 9,
+                                    "number": "A1",
+                                }
+                            ],
+                            "position": 5,
+                        }
+                    ],
+                    "artist-credit": [
+                        {"artist": {"name": "some-artist", "id": "some-id"}}
+                    ],
+                    "release-group": {"id": "another-id"},
+                }
+            },
+        )
+        candidates = list(mb.candidates([], "hello", "there", False))
+
+        assert len(candidates) == 1
+        assert candidates[0].tracks[0].track_id == self.RECORDING["id"]
+        assert candidates[0].album == "hi"

From e8e9369bc7d2a4d408dfe6c61fdb3d5ef52eff2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Mon, 19 May 2025 09:18:06 +0100
Subject: [PATCH 05/49] Remove unused extra_tags parameter

---
 beets/plugins.py      | 17 ++---------------
 beets/test/helper.py  |  2 +-
 beetsplug/beatport.py |  2 +-
 beetsplug/chroma.py   |  2 +-
 beetsplug/discogs.py  |  2 +-
 docs/changelog.rst    |  4 ++++
 6 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/beets/plugins.py b/beets/plugins.py
index 26e70ed72..25452560a 100644
--- a/beets/plugins.py
+++ b/beets/plugins.py
@@ -239,12 +239,7 @@ class BeetsPlugin:
         return Distance()
 
     def candidates(
-        self,
-        items: list[Item],
-        artist: str,
-        album: str,
-        va_likely: bool,
-        extra_tags: dict[str, Any] | None = None,
+        self, items: list[Item], artist: str, album: str, va_likely: bool
     ) -> Iterator[AlbumInfo]:
         """Return :py:class:`AlbumInfo` candidates that match the given album.
 
@@ -252,9 +247,6 @@ class BeetsPlugin:
         :param artist: Album artist
         :param album: Album name
         :param va_likely: Whether the album is likely to be by various artists
-        :param extra_tags: is a an optional dictionary of extra tags to search.
-            Only relevant to :py:class:`MusicBrainzPlugin` autotagger and can be
-            ignored by other plugins
         """
         yield from ()
 
@@ -872,12 +864,7 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta):
         return extract_release_id(self.data_source.lower(), id_string)
 
     def candidates(
-        self,
-        items: list[Item],
-        artist: str,
-        album: str,
-        va_likely: bool,
-        extra_tags: dict[str, Any] | None = None,
+        self, items: list[Item], artist: str, album: str, va_likely: bool
     ) -> Iterator[AlbumInfo]:
         query_filters = {"album": album}
         if not va_likely:
diff --git a/beets/test/helper.py b/beets/test/helper.py
index 66b4ddb71..a24836e84 100644
--- a/beets/test/helper.py
+++ b/beets/test/helper.py
@@ -806,7 +806,7 @@ class AutotagStub:
         for p in self.patchers:
             p.stop()
 
-    def candidates(self, items, artist, album, va_likely, extra_tags=None):
+    def candidates(self, items, artist, album, va_likely):
         if self.matching == self.IDENT:
             yield self._make_album_match(artist, album, len(items))
 
diff --git a/beetsplug/beatport.py b/beetsplug/beatport.py
index d98fab722..20147b5cc 100644
--- a/beetsplug/beatport.py
+++ b/beetsplug/beatport.py
@@ -361,7 +361,7 @@ class BeatportPlugin(BeetsPlugin):
             data_source=self.data_source, info=track_info, config=self.config
         )
 
-    def candidates(self, items, artist, release, va_likely, extra_tags=None):
+    def candidates(self, items, artist, release, va_likely):
         """Returns a list of AlbumInfo objects for beatport search results
         matching release and artist (if not various).
         """
diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py
index 08fb97f59..518a41776 100644
--- a/beetsplug/chroma.py
+++ b/beetsplug/chroma.py
@@ -200,7 +200,7 @@ class AcoustidPlugin(plugins.BeetsPlugin):
         dist.add_expr("track_id", info.track_id not in recording_ids)
         return dist
 
-    def candidates(self, items, artist, album, va_likely, extra_tags=None):
+    def candidates(self, items, artist, album, va_likely):
         albums = []
         for relid in prefix(_all_releases(items), MAX_RELEASES):
             album = self.mb.album_for_id(relid)
diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index a8d08c1e9..1852f300f 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -156,7 +156,7 @@ class DiscogsPlugin(BeetsPlugin):
             data_source="Discogs", info=track_info, config=self.config
         )
 
-    def candidates(self, items, artist, album, va_likely, extra_tags=None):
+    def candidates(self, items, artist, album, va_likely):
         """Returns a list of AlbumInfo objects for discogs search results
         matching an album and artist (if not various).
         """
diff --git a/docs/changelog.rst b/docs/changelog.rst
index a01ee8c97..3370f5396 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -26,6 +26,10 @@ Bug fixes:
 
 For packagers:
 
+* Optional ``extra_tags`` parameter has been removed from
+  ``BeetsPlugin.candidates`` method signature since it is never passed in. If
+  you override this method in your plugin, feel free to remove this parameter.
+
 Other changes:
 
 2.3.1 (May 14, 2025)

From 9242db04a556a587ac6097b00a62498331a2bc8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Fri, 9 May 2025 12:51:05 +0100
Subject: [PATCH 06/49] discogs: add configurable search_limit

---
 beetsplug/discogs.py     | 14 +++++++-------
 docs/changelog.rst       |  2 ++
 docs/plugins/discogs.rst | 15 ++++++++++++---
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index 1852f300f..187a30e4e 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -73,6 +73,7 @@ class DiscogsPlugin(BeetsPlugin):
                 "separator": ", ",
                 "index_tracks": False,
                 "append_style_genre": False,
+                "search_limit": 5,
             }
         )
         self.config["apikey"].redact = True
@@ -257,8 +258,8 @@ class DiscogsPlugin(BeetsPlugin):
             )
             if track_result:
                 candidates.append(track_result)
-        # first 10 results, don't overwhelm with options
-        return candidates[:10]
+
+        return candidates
 
     def album_for_id(self, album_id):
         """Fetches an album by its Discogs ID and returns an AlbumInfo object
@@ -303,8 +304,9 @@ class DiscogsPlugin(BeetsPlugin):
         query = re.sub(r"(?i)\b(CD|disc|vinyl)\s*\d+", "", query)
 
         try:
-            releases = self.discogs_client.search(query, type="release").page(1)
-
+            results = self.discogs_client.search(query, type="release")
+            results.per_page = self.config["search_limit"].as_number()
+            releases = results.page(1)
         except CONNECTION_ERRORS:
             self._log.debug(
                 "Communication error while searching for {0!r}",
@@ -312,9 +314,7 @@ class DiscogsPlugin(BeetsPlugin):
                 exc_info=True,
             )
             return []
-        return [
-            album for album in map(self.get_album_info, releases[:5]) if album
-        ]
+        return map(self.get_album_info, releases)
 
     def get_master_year(self, master_id):
         """Fetches a master release given its Discogs ID and returns its year
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 3370f5396..ebb9880a9 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -17,6 +17,8 @@ New features:
   :bug:`4605`
 * :doc:`plugins/web`: Show notifications when a track plays. This uses the
   Media Session API to customize media notifications.
+* :doc:`plugins/discogs`: Add configurable ``search_limit`` option to
+  limit the number of results returned by the Discogs metadata search queries.
 
 Bug fixes:
 
diff --git a/docs/plugins/discogs.rst b/docs/plugins/discogs.rst
index ac67f2d0a..c8df12a41 100644
--- a/docs/plugins/discogs.rst
+++ b/docs/plugins/discogs.rst
@@ -101,11 +101,20 @@ This option is useful when importing classical music.
 
 Other configurations available under ``discogs:`` are:
 
-- **append_style_genre**: Appends the Discogs style (if found) to the genre tag. This can be useful if you want more granular genres to categorize your music.
-  For example, a release in Discogs might have a genre of "Electronic" and a style of "Techno": enabling this setting would set the genre to be "Electronic, Techno" (assuming default separator of ``", "``) instead of just "Electronic".
+- **append_style_genre**: Appends the Discogs style (if found) to the genre
+  tag. This can be useful if you want more granular genres to categorize your
+  music. For example, a release in Discogs might have a genre of "Electronic"
+  and a style of "Techno": enabling this setting would set the genre to be
+  "Electronic, Techno" (assuming default separator of ``", "``) instead of just
+  "Electronic".
   Default: ``False``
-- **separator**: How to join multiple genre and style values from Discogs into a string.
+- **separator**: How to join multiple genre and style values from Discogs into
+  a string.
   Default: ``", "``
+- **search_limit**: The maximum number of results to return from Discogs. This is
+  useful if you want to limit the number of results returned to speed up
+  searches.
+  Default: ``5``
 
 
 Troubleshooting

From 09862aeaea523a6de688543500f1ddfb664fa3ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Fri, 9 May 2025 13:00:06 +0100
Subject: [PATCH 07/49] discogs: Add types to public methods

---
 beetsplug/discogs.py | 44 ++++++++++++++------------------------------
 1 file changed, 14 insertions(+), 30 deletions(-)

diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index 187a30e4e..cd6e0cfd1 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -26,6 +26,7 @@ import socket
 import time
 import traceback
 from string import ascii_lowercase
+from typing import TYPE_CHECKING
 
 import confuse
 from discogs_client import Client, Master, Release
@@ -40,6 +41,11 @@ from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist
 from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
 from beets.util.id_extractors import extract_release_id
 
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from beets.library import Item
+
 USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
 API_KEY = "rAzVUQYRaoFjeBjyWuWZ"
 API_SECRET = "plxtUTqoCzwxZpqdPysCwGuBSmZNdZVy"
@@ -157,16 +163,9 @@ class DiscogsPlugin(BeetsPlugin):
             data_source="Discogs", info=track_info, config=self.config
         )
 
-    def candidates(self, items, artist, album, va_likely):
-        """Returns a list of AlbumInfo objects for discogs search results
-        matching an album and artist (if not various).
-        """
-        if not album and not artist:
-            self._log.debug(
-                "Skipping Discogs query. Files missing album and artist tags."
-            )
-            return []
-
+    def candidates(
+        self, items: list[Item], artist: str, album: str, va_likely: bool
+    ) -> Iterable[AlbumInfo]:
         if va_likely:
             query = album
         else:
@@ -220,24 +219,9 @@ class DiscogsPlugin(BeetsPlugin):
 
         return None
 
-    def item_candidates(self, item, artist, title):
-        """Returns a list of TrackInfo objects for Search API results
-        matching ``title`` and ``artist``.
-        :param item: Singleton item to be matched.
-        :type item: beets.library.Item
-        :param artist: The artist of the track to be matched.
-        :type artist: str
-        :param title: The title of the track to be matched.
-        :type title: str
-        :return: Candidate TrackInfo objects.
-        :rtype: list[beets.autotag.hooks.TrackInfo]
-        """
-        if not artist and not title:
-            self._log.debug(
-                "Skipping Discogs query. File missing artist and title tags."
-            )
-            return []
-
+    def item_candidates(
+        self, item: Item, artist: str, title: str
+    ) -> Iterable[TrackInfo]:
         query = f"{artist} {title}"
         try:
             albums = self.get_albums(query)
@@ -261,7 +245,7 @@ class DiscogsPlugin(BeetsPlugin):
 
         return candidates
 
-    def album_for_id(self, album_id):
+    def album_for_id(self, album_id: str) -> AlbumInfo | None:
         """Fetches an album by its Discogs ID and returns an AlbumInfo object
         or None if the album is not found.
         """
@@ -292,7 +276,7 @@ class DiscogsPlugin(BeetsPlugin):
             return None
         return self.get_album_info(result)
 
-    def get_albums(self, query):
+    def get_albums(self, query: str) -> Iterable[AlbumInfo]:
         """Returns a list of AlbumInfo objects for a discogs search query."""
         # Strip non-word characters from query. Things like "!" and "-" can
         # cause a query to return no results, even if they match the artist or

From 12149b3e6d525075f0521da178bd29fd180882ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Fri, 9 May 2025 13:44:54 +0100
Subject: [PATCH 08/49] discogs: remove duplicate error handling

---
 beetsplug/discogs.py | 31 +++----------------------------
 1 file changed, 3 insertions(+), 28 deletions(-)

diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index cd6e0cfd1..93791ad5e 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -166,22 +166,7 @@ class DiscogsPlugin(BeetsPlugin):
     def candidates(
         self, items: list[Item], artist: str, album: str, va_likely: bool
     ) -> Iterable[AlbumInfo]:
-        if va_likely:
-            query = album
-        else:
-            query = f"{artist} {album}"
-        try:
-            return self.get_albums(query)
-        except DiscogsAPIError as e:
-            self._log.debug("API Error: {0} (query: {1})", e, query)
-            if e.status_code == 401:
-                self.reset_auth()
-                return self.candidates(items, artist, album, va_likely)
-            else:
-                return []
-        except CONNECTION_ERRORS:
-            self._log.debug("Connection error in album search", exc_info=True)
-            return []
+        return self.get_albums(f"{artist} {album}" if va_likely else album)
 
     def get_track_from_album_by_title(
         self, album_info, title, dist_threshold=0.3
@@ -222,18 +207,8 @@ class DiscogsPlugin(BeetsPlugin):
     def item_candidates(
         self, item: Item, artist: str, title: str
     ) -> Iterable[TrackInfo]:
-        query = f"{artist} {title}"
-        try:
-            albums = self.get_albums(query)
-        except DiscogsAPIError as e:
-            self._log.debug("API Error: {0} (query: {1})", e, query)
-            if e.status_code == 401:
-                self.reset_auth()
-                return self.item_candidates(item, artist, title)
-            else:
-                return []
-        except CONNECTION_ERRORS:
-            self._log.debug("Connection error in track search", exc_info=True)
+        albums = self.candidates([item], artist, title, False)
+
         candidates = []
         for album_cur in albums:
             self._log.debug("searching within album {0}", album_cur.album)

From 8e5858254b056fd555c8afce12128f0a7c40d7e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Fri, 9 May 2025 14:27:41 +0100
Subject: [PATCH 09/49] discogs: cache master release lookups

---
 beetsplug/discogs.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index 93791ad5e..ad1310712 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -25,6 +25,7 @@ import re
 import socket
 import time
 import traceback
+from functools import cache
 from string import ascii_lowercase
 from typing import TYPE_CHECKING
 
@@ -275,16 +276,16 @@ class DiscogsPlugin(BeetsPlugin):
             return []
         return map(self.get_album_info, releases)
 
-    def get_master_year(self, master_id):
+    @cache
+    def get_master_year(self, master_id: str) -> int | None:
         """Fetches a master release given its Discogs ID and returns its year
         or None if the master release is not found.
         """
-        self._log.debug("Searching for master release {0}", master_id)
+        self._log.debug("Getting master release {0}", master_id)
         result = Master(self.discogs_client, {"id": master_id})
 
         try:
-            year = result.fetch("year")
-            return year
+            return result.fetch("year")
         except DiscogsAPIError as e:
             if e.status_code != 404:
                 self._log.debug(

From 9cc7ecaceb39456e0f739be87c4eef096f9634c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Fri, 9 May 2025 14:53:05 +0100
Subject: [PATCH 10/49] discogs: cache TRACK_INDEX_RE

---
 beetsplug/discogs.py         | 42 ++++++++++++++++++++----------------
 test/plugins/test_discogs.py | 40 ++++++++++++++++------------------
 2 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index ad1310712..8fdd515bd 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -61,6 +61,22 @@ CONNECTION_ERRORS = (
 )
 
 
+TRACK_INDEX_RE = re.compile(
+    r"""
+    (.*?)   # medium: everything before medium_index.
+    (\d*?)  # medium_index: a number at the end of
+            # `position`, except if followed by a subtrack index.
+            # subtrack_index: can only be matched if medium
+            # or medium_index have been matched, and can be
+    (
+        (?<=\w)\.[\w]+  # a dot followed by a string (A.1, 2.A)
+      | (?<=\d)[A-Z]+   # a string that follows a number (1A, B2a)
+    )?
+    """,
+    re.VERBOSE,
+)
+
+
 class ReleaseFormat(TypedDict):
     name: str
     qty: int
@@ -655,33 +671,21 @@ class DiscogsPlugin(BeetsPlugin):
             medium_index=medium_index,
         )
 
-    def get_track_index(self, position):
+    @staticmethod
+    def get_track_index(
+        position: str,
+    ) -> tuple[str | None, str | None, str | None]:
         """Returns the medium, medium index and subtrack index for a discogs
         track position."""
         # Match the standard Discogs positions (12.2.9), which can have several
         # forms (1, 1-1, A1, A1.1, A1a, ...).
-        match = re.match(
-            r"^(.*?)"  # medium: everything before medium_index.
-            r"(\d*?)"  # medium_index: a number at the end of
-            # `position`, except if followed by a subtrack
-            # index.
-            # subtrack_index: can only be matched if medium
-            # or medium_index have been matched, and can be
-            r"((?<=\w)\.[\w]+"  # - a dot followed by a string (A.1, 2.A)
-            r"|(?<=\d)[A-Z]+"  # - a string that follows a number (1A, B2a)
-            r")?"
-            r"$",
-            position.upper(),
-        )
-
-        if match:
+        medium = index = subindex = None
+        if match := TRACK_INDEX_RE.fullmatch(position.upper()):
             medium, index, subindex = match.groups()
 
             if subindex and subindex.startswith("."):
                 subindex = subindex[1:]
-        else:
-            self._log.debug("Invalid position: {0}", position)
-            medium = index = subindex = None
+
         return medium or None, index or None, subindex or None
 
     def get_track_length(self, duration):
diff --git a/test/plugins/test_discogs.py b/test/plugins/test_discogs.py
index eb9a625b1..c31ac7511 100644
--- a/test/plugins/test_discogs.py
+++ b/test/plugins/test_discogs.py
@@ -171,27 +171,6 @@ class DGAlbumInfoTest(BeetsTestCase):
         assert t[3].index == 4
         assert t[3].medium_total == 1
 
-    def test_parse_position(self):
-        """Test the conversion of discogs `position` to medium, medium_index
-        and subtrack_index."""
-        # List of tuples (discogs_position, (medium, medium_index, subindex)
-        positions = [
-            ("1", (None, "1", None)),
-            ("A12", ("A", "12", None)),
-            ("12-34", ("12-", "34", None)),
-            ("CD1-1", ("CD1-", "1", None)),
-            ("1.12", (None, "1", "12")),
-            ("12.a", (None, "12", "A")),
-            ("12.34", (None, "12", "34")),
-            ("1ab", (None, "1", "AB")),
-            # Non-standard
-            ("IV", ("IV", None, None)),
-        ]
-
-        d = DiscogsPlugin()
-        for position, expected in positions:
-            assert d.get_track_index(position) == expected
-
     def test_parse_tracklist_without_sides(self):
         """Test standard Discogs position 12.2.9#1: "without sides"."""
         release = self._make_release_from_positions(["1", "2", "3"])
@@ -417,3 +396,22 @@ def test_get_media_and_albumtype(formats, expected_media, expected_albumtype):
     result = DiscogsPlugin.get_media_and_albumtype(formats)
 
     assert result == (expected_media, expected_albumtype)
+
+
+@pytest.mark.parametrize(
+    "position, medium, index, subindex",
+    [
+        ("1", None, "1", None),
+        ("A12", "A", "12", None),
+        ("12-34", "12-", "34", None),
+        ("CD1-1", "CD1-", "1", None),
+        ("1.12", None, "1", "12"),
+        ("12.a", None, "12", "A"),
+        ("12.34", None, "12", "34"),
+        ("1ab", None, "1", "AB"),
+        # Non-standard
+        ("IV", "IV", None, None),
+    ],
+)
+def test_get_track_index(position, medium, index, subindex):
+    assert DiscogsPlugin.get_track_index(position) == (medium, index, subindex)

From d9b67acff5478fc96bce61e80c7ff97f2f6d6545 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Fri, 9 May 2025 14:55:25 +0100
Subject: [PATCH 11/49] discogs: simplify getting track from album

---
 beetsplug/discogs.py | 59 ++++++++++++--------------------------------
 1 file changed, 16 insertions(+), 43 deletions(-)

diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index 8fdd515bd..34d8f21d1 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -43,7 +43,7 @@ from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
 from beets.util.id_extractors import extract_release_id
 
 if TYPE_CHECKING:
-    from collections.abc import Iterable
+    from collections.abc import Callable, Iterable
 
     from beets.library import Item
 
@@ -185,57 +185,30 @@ class DiscogsPlugin(BeetsPlugin):
     ) -> Iterable[AlbumInfo]:
         return self.get_albums(f"{artist} {album}" if va_likely else album)
 
-    def get_track_from_album_by_title(
-        self, album_info, title, dist_threshold=0.3
-    ):
-        def compare_func(track_info):
-            track_title = getattr(track_info, "title", None)
-            dist = string_dist(track_title, title)
-            return track_title and dist < dist_threshold
-
-        return self.get_track_from_album(album_info, compare_func)
-
-    def get_track_from_album(self, album_info, compare_func):
-        """Return the first track of the release where `compare_func` returns
-        true.
-
-        :return: TrackInfo object.
-        :rtype: beets.autotag.hooks.TrackInfo
-        """
-        if not album_info:
+    def get_track_from_album(
+        self, album_info: AlbumInfo, compare: Callable[[TrackInfo], float]
+    ) -> TrackInfo | None:
+        """Return the best matching track of the release."""
+        scores_and_tracks = [(compare(t), t) for t in album_info.tracks]
+        score, track_info = min(scores_and_tracks, key=lambda x: x[0])
+        if score > 0.3:
             return None
 
-        for track_info in album_info.tracks:
-            # check for matching position
-            if not compare_func(track_info):
-                continue
-
-            # attach artist info if not provided
-            if not track_info["artist"]:
-                track_info["artist"] = album_info.artist
-                track_info["artist_id"] = album_info.artist_id
-            # attach album info
-            track_info["album"] = album_info.album
-
-            return track_info
-
-        return None
+        track_info["artist"] = album_info.artist
+        track_info["artist_id"] = album_info.artist_id
+        track_info["album"] = album_info.album
+        return track_info
 
     def item_candidates(
         self, item: Item, artist: str, title: str
     ) -> Iterable[TrackInfo]:
         albums = self.candidates([item], artist, title, False)
 
-        candidates = []
-        for album_cur in albums:
-            self._log.debug("searching within album {0}", album_cur.album)
-            track_result = self.get_track_from_album_by_title(
-                album_cur, item["title"]
-            )
-            if track_result:
-                candidates.append(track_result)
+        def compare_func(track_info: TrackInfo) -> float:
+            return string_dist(track_info.title, title)
 
-        return candidates
+        tracks = (self.get_track_from_album(a, compare_func) for a in albums)
+        return list(filter(None, tracks))
 
     def album_for_id(self, album_id: str) -> AlbumInfo | None:
         """Fetches an album by its Discogs ID and returns an AlbumInfo object

From d3ef627494ed6678b3189a633c3b82f8bf30011e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 18 May 2025 04:17:19 +0100
Subject: [PATCH 12/49] Expect plugins to return Iterables instead of Iterators

---
 beets/plugins.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/beets/plugins.py b/beets/plugins.py
index 25452560a..63e5d3bde 100644
--- a/beets/plugins.py
+++ b/beets/plugins.py
@@ -46,7 +46,7 @@ else:
 
 
 if TYPE_CHECKING:
-    from collections.abc import Iterator
+    from collections.abc import Iterable
 
     from confuse import ConfigView
 
@@ -70,7 +70,7 @@ if TYPE_CHECKING:
     P = ParamSpec("P")
     Ret = TypeVar("Ret", bound=Any)
     Listener = Callable[..., None]
-    IterF = Callable[P, Iterator[Ret]]
+    IterF = Callable[P, Iterable[Ret]]
 
 
 PLUGIN_NAMESPACE = "beetsplug"
@@ -240,7 +240,7 @@ class BeetsPlugin:
 
     def candidates(
         self, items: list[Item], artist: str, album: str, va_likely: bool
-    ) -> Iterator[AlbumInfo]:
+    ) -> Iterable[AlbumInfo]:
         """Return :py:class:`AlbumInfo` candidates that match the given album.
 
         :param items: List of items in the album
@@ -252,7 +252,7 @@ class BeetsPlugin:
 
     def item_candidates(
         self, item: Item, artist: str, title: str
-    ) -> Iterator[TrackInfo]:
+    ) -> Iterable[TrackInfo]:
         """Return :py:class:`TrackInfo` candidates that match the given track.
 
         :param item: Track item
@@ -487,7 +487,7 @@ def notify_info_yielded(event: str) -> Callable[[IterF[P, Ret]], IterF[P, Ret]]:
 
     def decorator(func: IterF[P, Ret]) -> IterF[P, Ret]:
         @wraps(func)
-        def wrapper(*args: P.args, **kwargs: P.kwargs) -> Iterator[Ret]:
+        def wrapper(*args: P.args, **kwargs: P.kwargs) -> Iterable[Ret]:
             for v in func(*args, **kwargs):
                 send(event, info=v)
                 yield v
@@ -498,14 +498,14 @@ def notify_info_yielded(event: str) -> Callable[[IterF[P, Ret]], IterF[P, Ret]]:
 
 
 @notify_info_yielded("albuminfo_received")
-def candidates(*args, **kwargs) -> Iterator[AlbumInfo]:
+def candidates(*args, **kwargs) -> Iterable[AlbumInfo]:
     """Return matching album candidates from all plugins."""
     for plugin in find_plugins():
         yield from plugin.candidates(*args, **kwargs)
 
 
 @notify_info_yielded("trackinfo_received")
-def item_candidates(*args, **kwargs) -> Iterator[TrackInfo]:
+def item_candidates(*args, **kwargs) -> Iterable[TrackInfo]:
     """Return matching track candidates from all plugins."""
     for plugin in find_plugins():
         yield from plugin.item_candidates(*args, **kwargs)
@@ -865,7 +865,7 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta):
 
     def candidates(
         self, items: list[Item], artist: str, album: str, va_likely: bool
-    ) -> Iterator[AlbumInfo]:
+    ) -> Iterable[AlbumInfo]:
         query_filters = {"album": album}
         if not va_likely:
             query_filters["artist"] = artist
@@ -875,7 +875,7 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta):
 
     def item_candidates(
         self, item: Item, artist: str, title: str
-    ) -> Iterator[TrackInfo]:
+    ) -> Iterable[TrackInfo]:
         for result in self._search_api(
             "track", {"artist": artist}, keywords=title
         ):

From e151b4b49bb85cdf8074201b56fadfbc6079cab1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Mon, 19 May 2025 08:55:58 +0100
Subject: [PATCH 13/49] Implement track_for_id to allow fetching singletons by
 discogs id

---
 beetsplug/discogs.py | 8 ++++++++
 docs/changelog.rst   | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index 34d8f21d1..696f1d1ac 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -241,6 +241,14 @@ class DiscogsPlugin(BeetsPlugin):
             return None
         return self.get_album_info(result)
 
+    def track_for_id(self, track_id: str) -> TrackInfo | None:
+        if album := self.album_for_id(track_id):
+            for track in album.tracks:
+                if track.track_id == track_id:
+                    return track
+
+        return None
+
     def get_albums(self, query: str) -> Iterable[AlbumInfo]:
         """Returns a list of AlbumInfo objects for a discogs search query."""
         # Strip non-word characters from query. Things like "!" and "-" can
diff --git a/docs/changelog.rst b/docs/changelog.rst
index ebb9880a9..825e287f6 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -19,6 +19,9 @@ New features:
   Media Session API to customize media notifications.
 * :doc:`plugins/discogs`: Add configurable ``search_limit`` option to
   limit the number of results returned by the Discogs metadata search queries.
+* :doc:`plugins/discogs`: Implement ``track_for_id`` method to allow retrieving
+  singletons by their Discogs ID.
+  :bug:`4661`
 
 Bug fixes:
 

From 395aec96a36a440ab0ca9afc87f76471676bac98 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Wed, 9 Apr 2025 09:42:36 +0200
Subject: [PATCH 14/49] fetchart: fixup #5244

by restoring config validation and making things more Pythonic
---
 beetsplug/fetchart.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py
index 5451b4dbb..85bed6233 100644
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@@ -1295,7 +1295,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
 
     @staticmethod
     def _is_source_file_removal_enabled():
-        return config["import"]["delete"] or config["import"]["move"]
+        return config["import"]["delete"].get(bool) or config["import"]["move"].get(bool)
 
     # Asynchronous; after music is added to the library.
     def fetch_art(self, session, task):
@@ -1339,7 +1339,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
         """Place the discovered art in the filesystem."""
         if task in self.art_candidates:
             candidate = self.art_candidates.pop(task)
-            removal_enabled = FetchArtPlugin._is_source_file_removal_enabled()
+            removal_enabled = self._is_source_file_removal_enabled()
 
             self._set_art(task.album, candidate, not removal_enabled)
 

From a6f2389aed35474123e6f97bbdd22aa6b27f3236 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Wed, 9 Apr 2025 00:49:42 +0200
Subject: [PATCH 15/49] typing: fetchart + tests

---
 beets/test/helper.py     |  31 ++-
 beetsplug/fetchart.py    | 484 +++++++++++++++++++++++++--------------
 test/plugins/test_art.py |  47 +++-
 3 files changed, 381 insertions(+), 181 deletions(-)

diff --git a/beets/test/helper.py b/beets/test/helper.py
index a24836e84..b86db5b23 100644
--- a/beets/test/helper.py
+++ b/beets/test/helper.py
@@ -886,20 +886,43 @@ class FetchImageHelper:
     def run(self, *args, **kwargs):
         super().run(*args, **kwargs)
 
-    IMAGEHEADER = {
+    IMAGEHEADER: dict[str, bytes] = {
         "image/jpeg": b"\xff\xd8\xff" + b"\x00" * 3 + b"JFIF",
         "image/png": b"\211PNG\r\n\032\n",
+        "image/gif": b"GIF89a",
+        # dummy type that is definitely not a valid image content type
+        "image/watercolour": b"watercolour",
+        "text/html": (
+            b"<!DOCTYPE html>\n<html>\n<head>\n</head>\n"
+            b"<body>\n</body>\n</html>"
+        ),
     }
 
-    def mock_response(self, url, content_type="image/jpeg", file_type=None):
+    def mock_response(
+        self,
+        url: str,
+        content_type: str = "image/jpeg",
+        file_type: None | str = None,
+    ) -> None:
+        # Potentially return a file of a type that differs from the
+        # server-advertised content type to mimic misbehaving servers.
         if file_type is None:
             file_type = content_type
+
+        try:
+            # imghdr reads 32 bytes
+            header = self.IMAGEHEADER[file_type].ljust(32, b"\x00")
+        except KeyError:
+            # If we can't return a file that looks like real file of the requested
+            # type, better fail the test than returning something else, which might
+            # violate assumption made when writing a test.
+            raise AssertionError(f"Mocking {file_type} responses not supported")
+
         responses.add(
             responses.GET,
             url,
             content_type=content_type,
-            # imghdr reads 32 bytes
-            body=self.IMAGEHEADER.get(file_type, b"").ljust(32, b"\x00"),
+            body=header,
         )
 
 
diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py
index 85bed6233..b04347f2c 100644
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@@ -14,10 +14,15 @@
 
 """Fetches album art."""
 
+from __future__ import annotations
+
 import os
 import re
+from abc import ABC, abstractmethod
 from collections import OrderedDict
 from contextlib import closing
+from enum import Enum
+from typing import TYPE_CHECKING, AnyStr, ClassVar, Literal, Tuple, Type
 
 import confuse
 import requests
@@ -27,8 +32,15 @@ from beets import config, importer, plugins, ui, util
 from beets.util import bytestring_path, get_temp_filename, sorted_walk, syspath
 from beets.util.artresizer import ArtResizer
 
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Iterator, Sequence
+    from logging import Logger
+
+    from beets.importer import ImportSession, ImportTask
+    from beets.library import Album, Library
+
 try:
-    from bs4 import BeautifulSoup
+    from bs4 import BeautifulSoup, Tag
 
     HAS_BEAUTIFUL_SOUP = True
 except ImportError:
@@ -39,33 +51,54 @@ CONTENT_TYPES = {"image/jpeg": [b"jpg", b"jpeg"], "image/png": [b"png"]}
 IMAGE_EXTENSIONS = [ext for exts in CONTENT_TYPES.values() for ext in exts]
 
 
+class ImageAction(Enum):
+    """Indicates whether an image is useable or requires post-processing."""
+
+    BAD = 0
+    EXACT = 1
+    DOWNSCALE = 2
+    DOWNSIZE = 3
+    DEINTERLACE = 4
+    REFORMAT = 5
+
+
+class MetadataMatch(Enum):
+    """Indicates whether a `Candidate` matches the search criteria exactly."""
+
+    EXACT = 0
+    FALLBACK = 1
+
+
+SourceLocation = Literal["local", "remote"]
+
+
 class Candidate:
     """Holds information about a matching artwork, deals with validation of
     dimension restrictions and resizing.
     """
 
-    CANDIDATE_BAD = 0
-    CANDIDATE_EXACT = 1
-    CANDIDATE_DOWNSCALE = 2
-    CANDIDATE_DOWNSIZE = 3
-    CANDIDATE_DEINTERLACE = 4
-    CANDIDATE_REFORMAT = 5
-
-    MATCH_EXACT = 0
-    MATCH_FALLBACK = 1
-
     def __init__(
-        self, log, path=None, url=None, source="", match=None, size=None
+        self,
+        log: Logger,
+        source_name: str,
+        path: None | bytes = None,
+        url: None | str = None,
+        match: None | MetadataMatch = None,
+        size: None | Tuple[int, int] = None,
     ):
         self._log = log
         self.path = path
         self.url = url
-        self.source = source
-        self.check = None
+        self.source_name = source_name
+        self._check: None | ImageAction = None
         self.match = match
         self.size = size
 
-    def _validate(self, plugin, skip_check_for=None):
+    def _validate(
+        self,
+        plugin: FetchArtPlugin,
+        skip_check_for: None | list[ImageAction] = None,
+    ) -> ImageAction:
         """Determine whether the candidate artwork is valid based on
         its dimensions (width and ratio).
 
@@ -74,21 +107,16 @@ class Candidate:
         validated for a particular operation without changing
         plugin configuration.
 
-        Return `CANDIDATE_BAD` if the file is unusable.
-        Return `CANDIDATE_EXACT` if the file is usable as-is.
-        Return `CANDIDATE_DOWNSCALE` if the file must be rescaled.
-        Return `CANDIDATE_DOWNSIZE` if the file must be resized, and possibly
+        Return `ImageAction.BAD` if the file is unusable.
+        Return `ImageAction.EXACT` if the file is usable as-is.
+        Return `ImageAction.DOWNSCALE` if the file must be rescaled.
+        Return `ImageAction.DOWNSIZE` if the file must be resized, and possibly
             also rescaled.
-        Return `CANDIDATE_DEINTERLACE` if the file must be deinterlaced.
-        Return `CANDIDATE_REFORMAT` if the file has to be converted.
+        Return `ImageAction.DEINTERLACE` if the file must be deinterlaced.
+        Return `ImageAction.REFORMAT` if the file has to be converted.
         """
         if not self.path:
-            return self.CANDIDATE_BAD
-
-        if skip_check_for is None:
-            skip_check_for = []
-        if isinstance(skip_check_for, int):
-            skip_check_for = [skip_check_for]
+            return ImageAction.BAD
 
         if not (
             plugin.enforce_ratio
@@ -98,7 +126,7 @@ class Candidate:
             or plugin.deinterlace
             or plugin.cover_format
         ):
-            return self.CANDIDATE_EXACT
+            return ImageAction.EXACT
 
         # get_size returns None if no local imaging backend is available
         if not self.size:
@@ -113,7 +141,7 @@ class Candidate:
                 "`enforce_ratio` and `max_filesize` "
                 "may be violated."
             )
-            return self.CANDIDATE_EXACT
+            return ImageAction.EXACT
 
         short_edge = min(self.size)
         long_edge = max(self.size)
@@ -123,7 +151,7 @@ class Candidate:
             self._log.debug(
                 "image too small ({} < {})", self.size[0], plugin.minwidth
             )
-            return self.CANDIDATE_BAD
+            return ImageAction.BAD
 
         # Check aspect ratio.
         edge_diff = long_edge - short_edge
@@ -137,7 +165,7 @@ class Candidate:
                         short_edge,
                         plugin.margin_px,
                     )
-                    return self.CANDIDATE_BAD
+                    return ImageAction.BAD
             elif plugin.margin_percent:
                 margin_px = plugin.margin_percent * long_edge
                 if edge_diff > margin_px:
@@ -148,13 +176,13 @@ class Candidate:
                         short_edge,
                         margin_px,
                     )
-                    return self.CANDIDATE_BAD
+                    return ImageAction.BAD
             elif edge_diff:
                 # also reached for margin_px == 0 and margin_percent == 0.0
                 self._log.debug(
                     "image is not square ({} != {})", self.size[0], self.size[1]
                 )
-                return self.CANDIDATE_BAD
+                return ImageAction.BAD
 
         # Check maximum dimension.
         downscale = False
@@ -188,23 +216,29 @@ class Candidate:
                     plugin.cover_format,
                 )
 
-        if downscale and (self.CANDIDATE_DOWNSCALE not in skip_check_for):
-            return self.CANDIDATE_DOWNSCALE
-        if reformat and (self.CANDIDATE_REFORMAT not in skip_check_for):
-            return self.CANDIDATE_REFORMAT
+        skip_check_for = skip_check_for or []
+
+        if downscale and (ImageAction.DOWNSCALE not in skip_check_for):
+            return ImageAction.DOWNSCALE
+        if reformat and (ImageAction.REFORMAT not in skip_check_for):
+            return ImageAction.REFORMAT
         if plugin.deinterlace and (
-            self.CANDIDATE_DEINTERLACE not in skip_check_for
+            ImageAction.DEINTERLACE not in skip_check_for
         ):
-            return self.CANDIDATE_DEINTERLACE
-        if downsize and (self.CANDIDATE_DOWNSIZE not in skip_check_for):
-            return self.CANDIDATE_DOWNSIZE
-        return self.CANDIDATE_EXACT
+            return ImageAction.DEINTERLACE
+        if downsize and (ImageAction.DOWNSIZE not in skip_check_for):
+            return ImageAction.DOWNSIZE
+        return ImageAction.EXACT
 
-    def validate(self, plugin, skip_check_for=None):
-        self.check = self._validate(plugin, skip_check_for)
-        return self.check
+    def validate(
+        self,
+        plugin: FetchArtPlugin,
+        skip_check_for: None | list[ImageAction] = None,
+    ) -> ImageAction:
+        self._check = self._validate(plugin, skip_check_for)
+        return self._check
 
-    def resize(self, plugin):
+    def resize(self, plugin: FetchArtPlugin) -> None:
         """Resize the candidate artwork according to the plugin's
         configuration until it is valid or no further resizing is
         possible.
@@ -214,25 +248,30 @@ class Candidate:
         checks_performed = []
 
         # we don't want to resize the image if it's valid or bad
-        while current_check not in [self.CANDIDATE_BAD, self.CANDIDATE_EXACT]:
+        while current_check not in [ImageAction.BAD, ImageAction.EXACT]:
             self._resize(plugin, current_check)
             checks_performed.append(current_check)
             current_check = self.validate(
                 plugin, skip_check_for=checks_performed
             )
 
-    def _resize(self, plugin, check=None):
+    def _resize(
+        self, plugin: FetchArtPlugin, check: None | ImageAction = None
+    ) -> None:
         """Resize the candidate artwork according to the plugin's
         configuration and the specified check.
         """
-        if check == self.CANDIDATE_DOWNSCALE:
+        if check == ImageAction.DOWNSCALE:
             self.path = ArtResizer.shared.resize(
                 plugin.maxwidth,
                 self.path,
                 quality=plugin.quality,
                 max_filesize=plugin.max_filesize,
             )
-        elif check == self.CANDIDATE_DOWNSIZE:
+        elif check == ImageAction.DOWNSIZE:
+            # This must only be called when _validate returned something other
+            # than ImageAction.Bad or ImageAction.EXACT; then the size is known.
+            assert self.size is not None
             # dimensions are correct, so maxwidth is set to maximum dimension
             self.path = ArtResizer.shared.resize(
                 max(self.size),
@@ -240,9 +279,9 @@ class Candidate:
                 quality=plugin.quality,
                 max_filesize=plugin.max_filesize,
             )
-        elif check == self.CANDIDATE_DEINTERLACE:
+        elif check == ImageAction.DEINTERLACE:
             self.path = ArtResizer.shared.deinterlace(self.path)
-        elif check == self.CANDIDATE_REFORMAT:
+        elif check == ImageAction.REFORMAT:
             self.path = ArtResizer.shared.reformat(
                 self.path,
                 plugin.cover_format,
@@ -250,7 +289,7 @@ class Candidate:
             )
 
 
-def _logged_get(log, *args, **kwargs):
+def _logged_get(log: Logger, *args, **kwargs) -> requests.Response:
     """Like `requests.get`, but logs the effective URL to the specified
     `log` at the `DEBUG` level.
 
@@ -295,7 +334,9 @@ class RequestMixin:
     must be named `self._log`.
     """
 
-    def request(self, *args, **kwargs):
+    _log: Logger
+
+    def request(self, *args, **kwargs) -> requests.Response:
         """Like `requests.get`, but uses the logger `self._log`.
 
         See also `_logged_get`.
@@ -306,55 +347,83 @@ class RequestMixin:
 # ART SOURCES ################################################################
 
 
-class ArtSource(RequestMixin):
-    VALID_MATCHING_CRITERIA = ["default"]
+class ArtSource(RequestMixin, ABC):
+    # Specify whether this source fetches local or remote images
+    LOC: ClassVar[SourceLocation]
+    # A list of methods to match metadata, sorted by descending accuracy
+    VALID_MATCHING_CRITERIA: list[str] = ["default"]
+    # A human-readable name for the art source
+    NAME: ClassVar[str]
+    # The key to select the art source in the config. This value will also be
+    # stored in the database.
+    ID: ClassVar[str]
 
-    def __init__(self, log, config, match_by=None):
+    def __init__(
+        self,
+        log: Logger,
+        config: confuse.ConfigView,
+        match_by: None | list[str] = None,
+    ) -> None:
         self._log = log
         self._config = config
         self.match_by = match_by or self.VALID_MATCHING_CRITERIA
 
+    def describe(self) -> str:
+        return f"{self.ID}[{', '.join(self.match_by)}]"
+
     @staticmethod
-    def add_default_config(config):
+    def add_default_config(config: confuse.ConfigView) -> None:
         pass
 
     @classmethod
-    def available(cls, log, config):
+    def available(cls, log: Logger, config: confuse.ConfigView) -> bool:
         """Return whether or not all dependencies are met and the art source is
         in fact usable.
         """
         return True
 
-    def get(self, album, plugin, paths):
-        raise NotImplementedError()
+    @abstractmethod
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
+        pass
 
-    def _candidate(self, **kwargs):
-        return Candidate(source=self, log=self._log, **kwargs)
+    def _candidate(self, **kwargs) -> Candidate:
+        return Candidate(source_name=self.ID, log=self._log, **kwargs)
 
-    def fetch_image(self, candidate, plugin):
-        raise NotImplementedError()
+    @abstractmethod
+    def fetch_image(self, candidate: Candidate, plugin: FetchArtPlugin) -> None:
+        """Fetch the image to a temporary file if it is not already available
+        as a local file.
 
-    def cleanup(self, candidate):
+        After calling this, `Candidate.path` is set to the image path if
+        successful, or to `None` otherwise.
+        """
+        pass
+
+    def cleanup(self, candidate: Candidate) -> None:
         pass
 
 
 class LocalArtSource(ArtSource):
-    IS_LOCAL = True
-    LOC_STR = "local"
+    LOC = "local"
 
-    def fetch_image(self, candidate, plugin):
+    def fetch_image(self, candidate: Candidate, plugin: FetchArtPlugin) -> None:
         pass
 
 
 class RemoteArtSource(ArtSource):
-    IS_LOCAL = False
-    LOC_STR = "remote"
+    LOC = "remote"
 
-    def fetch_image(self, candidate, plugin):
+    def fetch_image(self, candidate: Candidate, plugin: FetchArtPlugin) -> None:
         """Downloads an image from a URL and checks whether it seems to
-        actually be an image. If so, returns a path to the downloaded image.
-        Otherwise, returns None.
+        actually be an image.
         """
+        assert candidate.path is None
+
         if plugin.maxwidth:
             candidate.url = ArtResizer.shared.proxy_url(
                 plugin.maxwidth, candidate.url
@@ -429,7 +498,7 @@ class RemoteArtSource(ArtSource):
             self._log.debug("error fetching art: {}", exc)
             return
 
-    def cleanup(self, candidate):
+    def cleanup(self, candidate: Candidate) -> None:
         if candidate.path:
             try:
                 util.remove(path=candidate.path)
@@ -439,19 +508,28 @@ class RemoteArtSource(ArtSource):
 
 class CoverArtArchive(RemoteArtSource):
     NAME = "Cover Art Archive"
+    ID = "coverart"
     VALID_MATCHING_CRITERIA = ["release", "releasegroup"]
     VALID_THUMBNAIL_SIZES = [250, 500, 1200]
 
     URL = "https://coverartarchive.org/release/{mbid}"
     GROUP_URL = "https://coverartarchive.org/release-group/{mbid}"
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         """Return the Cover Art Archive and Cover Art Archive release
         group URLs using album MusicBrainz release ID and release group
         ID.
         """
 
-        def get_image_urls(url, preferred_width=None):
+        def get_image_urls(
+            url: str,
+            preferred_width: None | str = None,
+        ) -> Iterator[str]:
             try:
                 response = self.request(url)
             except requests.RequestException:
@@ -500,34 +578,46 @@ class CoverArtArchive(RemoteArtSource):
 
         if "release" in self.match_by and album.mb_albumid:
             for url in get_image_urls(release_url, preferred_width):
-                yield self._candidate(url=url, match=Candidate.MATCH_EXACT)
+                yield self._candidate(url=url, match=MetadataMatch.EXACT)
 
         if "releasegroup" in self.match_by and album.mb_releasegroupid:
             for url in get_image_urls(release_group_url, preferred_width):
-                yield self._candidate(url=url, match=Candidate.MATCH_FALLBACK)
+                yield self._candidate(url=url, match=MetadataMatch.FALLBACK)
 
 
 class Amazon(RemoteArtSource):
     NAME = "Amazon"
+    ID = "amazon"
     URL = "https://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg"
     INDICES = (1, 2)
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         """Generate URLs using Amazon ID (ASIN) string."""
         if album.asin:
             for index in self.INDICES:
                 yield self._candidate(
                     url=self.URL % (album.asin, index),
-                    match=Candidate.MATCH_EXACT,
+                    match=MetadataMatch.EXACT,
                 )
 
 
 class AlbumArtOrg(RemoteArtSource):
     NAME = "AlbumArt.org scraper"
+    ID = "albumart"
     URL = "https://www.albumart.org/index_detail.php"
     PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"'
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ):
         """Return art URL from AlbumArt.org using album ASIN."""
         if not album.asin:
             return
@@ -543,13 +633,14 @@ class AlbumArtOrg(RemoteArtSource):
         m = re.search(self.PAT, resp.text)
         if m:
             image_url = m.group(1)
-            yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT)
+            yield self._candidate(url=image_url, match=MetadataMatch.EXACT)
         else:
             self._log.debug("no image found on page")
 
 
 class GoogleImages(RemoteArtSource):
     NAME = "Google Images"
+    ID = "google"
     URL = "https://www.googleapis.com/customsearch/v1"
 
     def __init__(self, *args, **kwargs):
@@ -558,7 +649,7 @@ class GoogleImages(RemoteArtSource):
         self.cx = (self._config["google_engine"].get(),)
 
     @staticmethod
-    def add_default_config(config):
+    def add_default_config(config: confuse.ConfigView):
         config.add(
             {
                 "google_key": None,
@@ -569,13 +660,18 @@ class GoogleImages(RemoteArtSource):
         config["google_engine"].redact = True
 
     @classmethod
-    def available(cls, log, config):
+    def available(cls, log: Logger, config: confuse.ConfigView) -> bool:
         has_key = bool(config["google_key"].get())
         if not has_key:
             log.debug("google: Disabling art source due to missing key")
         return has_key
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         """Return art URL from google custom search engine
         given an album title and interpreter.
         """
@@ -614,7 +710,7 @@ class GoogleImages(RemoteArtSource):
         if "items" in data.keys():
             for item in data["items"]:
                 yield self._candidate(
-                    url=item["link"], match=Candidate.MATCH_EXACT
+                    url=item["link"], match=MetadataMatch.EXACT
                 )
 
 
@@ -622,6 +718,7 @@ class FanartTV(RemoteArtSource):
     """Art from fanart.tv requested using their API"""
 
     NAME = "fanart.tv"
+    ID = "fanarttv"
     API_URL = "https://webservice.fanart.tv/v3/"
     API_ALBUMS = API_URL + "music/albums/"
     PROJECT_KEY = "61a7d0ab4e67162b7a0c7c35915cd48e"
@@ -631,7 +728,7 @@ class FanartTV(RemoteArtSource):
         self.client_key = self._config["fanarttv_key"].get()
 
     @staticmethod
-    def add_default_config(config):
+    def add_default_config(config: confuse.ConfigView):
         config.add(
             {
                 "fanarttv_key": None,
@@ -639,7 +736,12 @@ class FanartTV(RemoteArtSource):
         )
         config["fanarttv_key"].redact = True
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         if not album.mb_releasegroupid:
             return
 
@@ -695,15 +797,21 @@ class FanartTV(RemoteArtSource):
             # fanart.tv has a strict size requirement for album art to be
             # uploaded
             yield self._candidate(
-                url=item["url"], match=Candidate.MATCH_EXACT, size=(1000, 1000)
+                url=item["url"], match=MetadataMatch.EXACT, size=(1000, 1000)
             )
 
 
 class ITunesStore(RemoteArtSource):
     NAME = "iTunes Store"
+    ID = "itunes"
     API_URL = "https://itunes.apple.com/search"
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         """Return art URL from iTunes Store given an album title."""
         if not (album.albumartist and album.album):
             return
@@ -752,7 +860,7 @@ class ITunesStore(RemoteArtSource):
                     art_url = c["artworkUrl100"]
                     art_url = art_url.replace("100x100bb", image_suffix)
                     yield self._candidate(
-                        url=art_url, match=Candidate.MATCH_EXACT
+                        url=art_url, match=MetadataMatch.EXACT
                     )
             except KeyError as e:
                 self._log.debug(
@@ -767,7 +875,7 @@ class ITunesStore(RemoteArtSource):
                 "100x100bb", image_suffix
             )
             yield self._candidate(
-                url=fallback_art_url, match=Candidate.MATCH_FALLBACK
+                url=fallback_art_url, match=MetadataMatch.FALLBACK
             )
         except KeyError as e:
             self._log.debug(
@@ -779,6 +887,7 @@ class ITunesStore(RemoteArtSource):
 
 class Wikipedia(RemoteArtSource):
     NAME = "Wikipedia (queried through DBpedia)"
+    ID = "wikipedia"
     DBPEDIA_URL = "https://dbpedia.org/sparql"
     WIKIPEDIA_URL = "https://en.wikipedia.org/w/api.php"
     SPARQL_QUERY = """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -803,7 +912,12 @@ class Wikipedia(RemoteArtSource):
                   }}
                  Limit 1"""
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         if not (album.albumartist and album.album):
             return
 
@@ -913,9 +1027,7 @@ class Wikipedia(RemoteArtSource):
             results = data["query"]["pages"]
             for _, result in results.items():
                 image_url = result["imageinfo"][0]["url"]
-                yield self._candidate(
-                    url=image_url, match=Candidate.MATCH_EXACT
-                )
+                yield self._candidate(url=image_url, match=MetadataMatch.EXACT)
         except (ValueError, KeyError, IndexError):
             self._log.debug("wikipedia: error scraping imageinfo")
             return
@@ -923,9 +1035,12 @@ class Wikipedia(RemoteArtSource):
 
 class FileSystem(LocalArtSource):
     NAME = "Filesystem"
+    ID = "filesystem"
 
     @staticmethod
-    def filename_priority(filename, cover_names):
+    def filename_priority(
+        filename: AnyStr, cover_names: Sequence[AnyStr]
+    ) -> list[int]:
         """Sort order for image names.
 
         Return indexes of cover names found in the image filename. This
@@ -934,7 +1049,12 @@ class FileSystem(LocalArtSource):
         """
         return [idx for (idx, x) in enumerate(cover_names) if x in filename]
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         """Look for album art files in the specified directories."""
         if not paths:
             return
@@ -973,7 +1093,7 @@ class FileSystem(LocalArtSource):
                         util.displayable_path(fn),
                     )
                     yield self._candidate(
-                        path=os.path.join(path, fn), match=Candidate.MATCH_EXACT
+                        path=os.path.join(path, fn), match=MetadataMatch.EXACT
                     )
                 else:
                     remaining.append(fn)
@@ -986,12 +1106,13 @@ class FileSystem(LocalArtSource):
                 )
                 yield self._candidate(
                     path=os.path.join(path, remaining[0]),
-                    match=Candidate.MATCH_FALLBACK,
+                    match=MetadataMatch.FALLBACK,
                 )
 
 
 class LastFM(RemoteArtSource):
     NAME = "Last.fm"
+    ID = "lastfm"
 
     # Sizes in priority order.
     SIZES = OrderedDict(
@@ -1006,12 +1127,12 @@ class LastFM(RemoteArtSource):
 
     API_URL = "https://ws.audioscrobbler.com/2.0"
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
         self.key = (self._config["lastfm_key"].get(),)
 
     @staticmethod
-    def add_default_config(config):
+    def add_default_config(config: confuse.ConfigView) -> None:
         config.add(
             {
                 "lastfm_key": None,
@@ -1020,13 +1141,18 @@ class LastFM(RemoteArtSource):
         config["lastfm_key"].redact = True
 
     @classmethod
-    def available(cls, log, config):
+    def available(cls, log: Logger, config: confuse.ConfigView) -> bool:
         has_key = bool(config["lastfm_key"].get())
         if not has_key:
             log.debug("lastfm: Disabling art source due to missing key")
         return has_key
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         if not album.mb_albumid:
             return
 
@@ -1079,11 +1205,12 @@ class LastFM(RemoteArtSource):
 
 class Spotify(RemoteArtSource):
     NAME = "Spotify"
+    ID = "spotify"
 
     SPOTIFY_ALBUM_URL = "https://open.spotify.com/album/"
 
     @classmethod
-    def available(cls, log, config):
+    def available(cls, log: Logger, config: confuse.ConfigView) -> bool:
         if not HAS_BEAUTIFUL_SOUP:
             log.debug(
                 "To use Spotify as an album art source, "
@@ -1092,31 +1219,42 @@ class Spotify(RemoteArtSource):
             )
         return HAS_BEAUTIFUL_SOUP
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         try:
             url = self.SPOTIFY_ALBUM_URL + album.items().get().spotify_album_id
         except AttributeError:
             self._log.debug("Fetchart: no Spotify album ID found")
             return
+
         try:
             response = requests.get(url, timeout=10)
             response.raise_for_status()
         except requests.RequestException as e:
             self._log.debug("Error: " + str(e))
             return
+
         try:
             html = response.text
             soup = BeautifulSoup(html, "html.parser")
-            image_url = soup.find("meta", attrs={"property": "og:image"})[
-                "content"
-            ]
-            yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT)
         except ValueError:
+            self._log.debug(f"Spotify: error loading response: {response.text}")
+            return
+
+        tag = soup.find("meta", attrs={"property": "og:image"})
+        if tag is None or not isinstance(tag, Tag):
             self._log.debug(
-                "Spotify: error loading response: {}".format(response.text)
+                "Spotify: Unexpected response, og:image tag missing"
             )
             return
 
+        image_url = tag["content"]
+        yield self._candidate(url=image_url, match=MetadataMatch.EXACT)
+
 
 class CoverArtUrl(RemoteArtSource):
     # This source is intended to be used with a plugin that sets the
@@ -1125,8 +1263,14 @@ class CoverArtUrl(RemoteArtSource):
     # use that URL to fetch the image.
 
     NAME = "Cover Art URL"
+    ID = "cover_art_url"
 
-    def get(self, album, plugin, paths):
+    def get(
+        self,
+        album: Album,
+        plugin: FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[Candidate]:
         image_url = None
         try:
             # look for cover_art_url on album or first track
@@ -1139,44 +1283,27 @@ class CoverArtUrl(RemoteArtSource):
             self._log.debug(f"Cover art URL not found for {album}")
             return
         if image_url:
-            yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT)
+            yield self._candidate(url=image_url, match=MetadataMatch.EXACT)
         else:
             self._log.debug(f"Cover art URL not found for {album}")
             return
 
 
-# Try each source in turn.
-
-# Note that SOURCES_ALL is redundant (and presently unused). However, we keep
-# it around nn order not break plugins that "register" (a.k.a. monkey-patch)
-# their own fetchart sources.
-SOURCES_ALL = [
-    "filesystem",
-    "coverart",
-    "itunes",
-    "amazon",
-    "albumart",
-    "wikipedia",
-    "google",
-    "fanarttv",
-    "lastfm",
-    "spotify",
-]
-
-ART_SOURCES = {
-    "filesystem": FileSystem,
-    "coverart": CoverArtArchive,
-    "itunes": ITunesStore,
-    "albumart": AlbumArtOrg,
-    "amazon": Amazon,
-    "wikipedia": Wikipedia,
-    "google": GoogleImages,
-    "fanarttv": FanartTV,
-    "lastfm": LastFM,
-    "spotify": Spotify,
-    "cover_art_url": CoverArtUrl,
+# All art sources. The order they will be tried in is specified by the config.
+ART_SOURCES: set[Type[ArtSource]] = {
+    FileSystem,
+    CoverArtArchive,
+    ITunesStore,
+    AlbumArtOrg,
+    Amazon,
+    Wikipedia,
+    GoogleImages,
+    FanartTV,
+    LastFM,
+    Spotify,
+    CoverArtUrl,
 }
-SOURCE_NAMES = {v: k for k, v in ART_SOURCES.items()}
+
 
 # PLUGIN LOGIC ###############################################################
 
@@ -1185,12 +1312,12 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
     PAT_PX = r"(0|[1-9][0-9]*)px"
     PAT_PERCENT = r"(100(\.00?)?|[1-9]?[0-9](\.[0-9]{1,2})?)%"
 
-    def __init__(self):
+    def __init__(self) -> None:
         super().__init__()
 
         # Holds candidates corresponding to downloaded images between
         # fetching them and placing them in the filesystem.
-        self.art_candidates = {}
+        self.art_candidates: dict[ImportTask, Candidate] = {}
 
         self.config.add(
             {
@@ -1216,7 +1343,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
                 "cover_format": None,
             }
         )
-        for source in ART_SOURCES.values():
+        for source in ART_SOURCES:
             source.add_default_config(self.config)
 
         self.minwidth = self.config["minwidth"].get(int)
@@ -1237,7 +1364,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
         self.margin_px = None
         self.margin_percent = None
         self.deinterlace = self.config["deinterlace"].get(bool)
-        if type(self.enforce_ratio) is str:
+        if isinstance(self.enforce_ratio, str):
             if self.enforce_ratio[-1] == "%":
                 self.margin_percent = float(self.enforce_ratio[:-1]) / 100
             elif self.enforce_ratio[-2:] == "px":
@@ -1262,8 +1389,8 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
             self.register_listener("import_task_files", self.assign_art)
 
         available_sources = [
-            (s_name, c)
-            for (s_name, s_cls) in ART_SOURCES.items()
+            (s_cls.ID, c)
+            for s_cls in ART_SOURCES
             if s_cls.available(self._log, self.config)
             for c in s_cls.VALID_MATCHING_CRITERIA
         ]
@@ -1288,17 +1415,21 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
                         others.append((s, c))
                 sources = others + fs
 
+        sources_by_name = {s_cls.ID: s_cls for s_cls in ART_SOURCES}
+
         self.sources = [
-            ART_SOURCES[s](self._log, self.config, match_by=[c])
+            sources_by_name[s](self._log, self.config, match_by=[c])
             for s, c in sources
         ]
 
     @staticmethod
-    def _is_source_file_removal_enabled():
-        return config["import"]["delete"].get(bool) or config["import"]["move"].get(bool)
+    def _is_source_file_removal_enabled() -> bool:
+        return config["import"]["delete"].get(bool) or config["import"][
+            "move"
+        ].get(bool)
 
     # Asynchronous; after music is added to the library.
-    def fetch_art(self, session, task):
+    def fetch_art(self, session: ImportSession, task: ImportTask) -> None:
         """Find art for the album being imported."""
         if task.is_album:  # Only fetch art for full albums.
             if task.album.artpath and os.path.isfile(
@@ -1324,18 +1455,20 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
             if candidate:
                 self.art_candidates[task] = candidate
 
-    def _set_art(self, album, candidate, delete=False):
+    def _set_art(
+        self, album: Album, candidate: Candidate, delete: bool = False
+    ) -> None:
         album.set_art(candidate.path, delete)
         if self.store_source:
             # store the source of the chosen artwork in a flexible field
             self._log.debug(
                 "Storing art_source for {0.albumartist} - {0.album}", album
             )
-            album.art_source = SOURCE_NAMES[type(candidate.source)]
+            album.art_source = candidate.source_name
         album.store()
 
     # Synchronous; after music files are put in place.
-    def assign_art(self, session, task):
+    def assign_art(self, session: ImportSession, task: ImportTask):
         """Place the discovered art in the filesystem."""
         if task in self.art_candidates:
             candidate = self.art_candidates.pop(task)
@@ -1347,7 +1480,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
                 task.prune(candidate.path)
 
     # Manual album art fetching.
-    def commands(self):
+    def commands(self) -> list[ui.Subcommand]:
         cmd = ui.Subcommand("fetchart", help="download album art")
         cmd.parser.add_option(
             "-f",
@@ -1366,7 +1499,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
             help="quiet mode: do not output albums that already have artwork",
         )
 
-        def func(lib, opts, args):
+        def func(lib: Library, opts, args) -> None:
             self.batch_fetch_art(
                 lib, lib.albums(ui.decargs(args)), opts.force, opts.quiet
             )
@@ -1376,7 +1509,12 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
 
     # Utilities converted from functions to methods on logging overhaul
 
-    def art_for_album(self, album, paths, local_only=False):
+    def art_for_album(
+        self,
+        album: Album,
+        paths: None | Sequence[bytes],
+        local_only: bool = False,
+    ) -> None | Candidate:
         """Given an Album object, returns a path to downloaded art for the
         album (or None if no art is found). If `maxwidth`, then images are
         resized to this maximum pixel size. If `quality` then resized images
@@ -1387,22 +1525,22 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
         out = None
 
         for source in self.sources:
-            if source.IS_LOCAL or not local_only:
+            if source.LOC == "local" or not local_only:
                 self._log.debug(
-                    "trying source {0} for album {1.albumartist} - {1.album}",
-                    SOURCE_NAMES[type(source)],
-                    album,
+                    f"trying source {source.describe()}"
+                    f" for album {album.albumartist} - {album.album}"
                 )
                 # URLs might be invalid at this point, or the image may not
                 # fulfill the requirements
                 for candidate in source.get(album, self, paths):
                     source.fetch_image(candidate, self)
-                    if candidate.validate(self):
+                    if candidate.validate(self) != ImageAction.BAD:
                         out = candidate
+                        assert out.path is not None  # help mypy
                         self._log.debug(
-                            "using {0.LOC_STR} image {1}".format(
-                                source, util.displayable_path(out.path)
-                            )
+                            "using {0.LOC} image {1}",
+                            source,
+                            util.displayable_path(out.path),
                         )
                         break
                     # Remove temporary files for invalid candidates.
@@ -1415,7 +1553,13 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
 
         return out
 
-    def batch_fetch_art(self, lib, albums, force, quiet):
+    def batch_fetch_art(
+        self,
+        lib: Library,
+        albums: Iterable[Album],
+        force: bool,
+        quiet: bool,
+    ) -> None:
         """Fetch album art for each of the albums. This implements the manual
         fetchart CLI command.
         """
diff --git a/test/plugins/test_art.py b/test/plugins/test_art.py
index acb712354..b8a9a3bda 100644
--- a/test/plugins/test_art.py
+++ b/test/plugins/test_art.py
@@ -14,8 +14,11 @@
 
 """Tests for the album art fetchers."""
 
+from __future__ import annotations
+
 import os
 import shutil
+from typing import TYPE_CHECKING
 from unittest.mock import patch
 
 import confuse
@@ -37,6 +40,11 @@ from beetsplug import fetchart
 
 logger = logging.getLogger("beets.test_art")
 
+if TYPE_CHECKING:
+    from collections.abc import Iterator, Sequence
+
+    from beets.library import Album
+
 
 class Settings:
     """Used to pass settings to the ArtSources when the plugin isn't fully
@@ -48,6 +56,19 @@ class Settings:
             setattr(self, k, v)
 
 
+class DummyRemoteArtSource(fetchart.RemoteArtSource):
+    NAME = "Dummy Art Source"
+    ID = "dummy"
+
+    def get(
+        self,
+        album: Album,
+        plugin: fetchart.FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[fetchart.Candidate]:
+        return iter(())
+
+
 class UseThePlugin(CleanupModulesMixin, BeetsTestCase):
     modules = (fetchart.__name__, ArtResizer.__module__)
 
@@ -202,9 +223,11 @@ class FetchImageTest(FetchImageTestCase):
     def setUp(self):
         super().setUp()
         self.dpath = os.path.join(self.temp_dir, b"arttest")
-        self.source = fetchart.RemoteArtSource(logger, self.plugin.config)
+        self.source = DummyRemoteArtSource(logger, self.plugin.config)
         self.settings = Settings(maxwidth=0)
-        self.candidate = fetchart.Candidate(logger, url=self.URL)
+        self.candidate = fetchart.Candidate(
+            logger, self.source.ID, url=self.URL
+        )
 
     def test_invalid_type_returns_none(self):
         self.mock_response(self.URL, "image/watercolour")
@@ -432,7 +455,7 @@ class ITunesStoreTest(UseThePlugin):
         self.mock_response(fetchart.ITunesStore.API_URL, json)
         candidate = next(self.source.get(self.album, self.settings, []))
         assert candidate.url == "url_to_the_image"
-        assert candidate.match == fetchart.Candidate.MATCH_EXACT
+        assert candidate.match == fetchart.MetadataMatch.EXACT
 
     def test_itunesstore_no_result(self):
         json = '{"results": []}'
@@ -471,7 +494,7 @@ class ITunesStoreTest(UseThePlugin):
         self.mock_response(fetchart.ITunesStore.API_URL, json)
         candidate = next(self.source.get(self.album, self.settings, []))
         assert candidate.url == "url_to_the_image"
-        assert candidate.match == fetchart.Candidate.MATCH_FALLBACK
+        assert candidate.match == fetchart.MetadataMatch.FALLBACK
 
     def test_itunesstore_returns_result_without_artwork(self):
         json = """{
@@ -727,7 +750,11 @@ class ArtImporterTest(UseThePlugin):
         self.art_file = os.path.join(self.temp_dir, b"tmpcover.jpg")
         _common.touch(self.art_file)
         self.old_afa = self.plugin.art_for_album
-        self.afa_response = fetchart.Candidate(logger, path=self.art_file)
+        self.afa_response = fetchart.Candidate(
+            logger,
+            source_name="test",
+            path=self.art_file,
+        )
 
         def art_for_album(i, p, local_only=False):
             return self.afa_response
@@ -814,7 +841,11 @@ class ArtImporterTest(UseThePlugin):
     def test_do_not_delete_original_if_already_in_place(self):
         artdest = os.path.join(os.path.dirname(self.i.path), b"cover.jpg")
         shutil.copyfile(syspath(self.art_file), syspath(artdest))
-        self.afa_response = fetchart.Candidate(logger, path=artdest)
+        self.afa_response = fetchart.Candidate(
+            logger,
+            source_name="test",
+            path=artdest,
+        )
         self._fetch_art(True)
 
     def test_fetch_art_if_imported_file_deleted(self):
@@ -855,7 +886,9 @@ class ArtForAlbumTest(UseThePlugin):
 
         def fs_source_get(_self, album, settings, paths):
             if paths:
-                yield fetchart.Candidate(logger, path=self.image_file)
+                yield fetchart.Candidate(
+                    logger, source_name=_self.ID, path=self.image_file
+                )
 
         fetchart.FileSystem.get = fs_source_get
 

From 4fdfb393db0263c6001902d6b33ec34d2d552170 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Wed, 9 Apr 2025 12:43:15 +0200
Subject: [PATCH 16/49] update changelog

---
 docs/changelog.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 825e287f6..44f2d305d 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -35,6 +35,12 @@ For packagers:
   ``BeetsPlugin.candidates`` method signature since it is never passed in. If
   you override this method in your plugin, feel free to remove this parameter.
 
+For plugin developers:
+
+* The `fetchart` plugins has seen a few changes to function signatures and
+  source registration in the process of introducing typings to the code.
+  Custom art sources might need to be adapted.
+
 Other changes:
 
 2.3.1 (May 14, 2025)

From ff22da0691ffe87049235f1e1e81cd9d3613c1a8 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Wed, 9 Apr 2025 17:38:56 +0200
Subject: [PATCH 17/49] install imagemagick in CI environment

---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 748cf24d1..2bad34d8b 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -33,7 +33,7 @@ jobs:
         if: matrix.platform == 'ubuntu-latest'
         run: |
           sudo apt update
-          sudo apt install ffmpeg gobject-introspection libcairo2-dev libgirepository-2.0-dev pandoc
+          sudo apt install ffmpeg gobject-introspection libcairo2-dev libgirepository-2.0-dev pandoc imagemagick
 
       - name: Get changed lyrics files
         id: lyrics-update

From abbabcf92eecf382b7806b086af24c0202d3d820 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Mon, 14 Apr 2025 12:04:26 +0200
Subject: [PATCH 18/49] tests: skip tests that require ArtResizer.compare if it
 is broken

This is just a quick workaround to allow CI to pass. This didn't show up
in CI before because we didn't install ImageMagick.
---
 test/plugins/test_embedart.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/test/plugins/test_embedart.py b/test/plugins/test_embedart.py
index cb4d1a421..f2f02137b 100644
--- a/test/plugins/test_embedart.py
+++ b/test/plugins/test_embedart.py
@@ -34,8 +34,35 @@ def require_artresizer_compare(test):
     def wrapper(*args, **kwargs):
         if not ArtResizer.shared.can_compare:
             raise unittest.SkipTest("compare not available")
-        else:
-            return test(*args, **kwargs)
+
+        # PHASH computation in ImageMagick changed at some point in an
+        # undocumented way. Check at a low level that comparisons of our
+        # fixtures give the expected results. Only then, plugin logic tests
+        # below are meaningful.
+        # cf. https://github.com/ImageMagick/ImageMagick/discussions/5191
+        # It would be better to investigate what exactly change in IM and
+        # handle that in ArtResizer.IMBackend.{can_compare,compare}.
+        # Skipping the tests as below is a quick fix to CI, but users may
+        # still see unexpected behaviour.
+        abbey_artpath = os.path.join(_common.RSRC, b"abbey.jpg")
+        abbey_similarpath = os.path.join(_common.RSRC, b"abbey-similar.jpg")
+        abbey_differentpath = os.path.join(_common.RSRC, b"abbey-different.jpg")
+        compare_threshold = 20
+
+        similar_compares_ok = ArtResizer.shared.compare(
+            abbey_artpath,
+            abbey_similarpath,
+            compare_threshold,
+        )
+        different_compares_ok = ArtResizer.shared.compare(
+            abbey_artpath,
+            abbey_differentpath,
+            compare_threshold,
+        )
+        if not similar_compares_ok or different_compares_ok:
+            raise unittest.SkipTest("IM version with broken compare")
+
+        return test(*args, **kwargs)
 
     wrapper.__name__ = test.__name__
     return wrapper

From fd383ead514ec4102e373258a9343aab750373ac Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Thu, 17 Apr 2025 21:49:42 +0200
Subject: [PATCH 19/49] ci: run with fetchart dependencies (Pillow)

at least in the with-coverage tests
---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 2bad34d8b..333706dc7 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -60,7 +60,7 @@ jobs:
         env:
           LYRICS_UPDATED: ${{ steps.lyrics-update.outputs.any_changed }}
         run: |
-          poetry install --extras=autobpm --extras=lyrics --extras=docs --extras=replaygain --extras=reflink
+          poetry install --extras=autobpm --extras=lyrics --extras=docs --extras=replaygain --extras=reflink --extras=fetchart
           poe docs
           poe test-with-coverage
 

From 10d1c51a1d5051c2409c6bd4f25ef13a4f654f19 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 20 May 2025 09:19:10 +0200
Subject: [PATCH 20/49] fetchart: consistently use lazy string formatting for
 debug logging

---
 beetsplug/fetchart.py | 52 +++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py
index b04347f2c..25e649e68 100644
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@@ -22,6 +22,7 @@ from abc import ABC, abstractmethod
 from collections import OrderedDict
 from contextlib import closing
 from enum import Enum
+from functools import cached_property
 from typing import TYPE_CHECKING, AnyStr, ClassVar, Literal, Tuple, Type
 
 import confuse
@@ -368,7 +369,8 @@ class ArtSource(RequestMixin, ABC):
         self._config = config
         self.match_by = match_by or self.VALID_MATCHING_CRITERIA
 
-    def describe(self) -> str:
+    @cached_property
+    def description(self) -> str:
         return f"{self.ID}[{', '.join(self.match_by)}]"
 
     @staticmethod
@@ -487,7 +489,7 @@ class RemoteArtSource(ArtSource):
                     for chunk in data:
                         fh.write(chunk)
                 self._log.debug(
-                    "downloaded art to: {0}", util.displayable_path(filename)
+                    "downloaded art to: {}", util.displayable_path(filename)
                 )
                 candidate.path = util.bytestring_path(filename)
                 return
@@ -533,18 +535,14 @@ class CoverArtArchive(RemoteArtSource):
             try:
                 response = self.request(url)
             except requests.RequestException:
-                self._log.debug(
-                    "{}: error receiving response".format(self.NAME)
-                )
+                self._log.debug("{}: error receiving response", self.NAME)
                 return
 
             try:
                 data = response.json()
             except ValueError:
                 self._log.debug(
-                    "{}: error loading response: {}".format(
-                        self.NAME, response.text
-                    )
+                    "{}: error loading response: {}", self.NAME, response.text
                 )
                 return
 
@@ -624,7 +622,7 @@ class AlbumArtOrg(RemoteArtSource):
         # Get the page from albumart.org.
         try:
             resp = self.request(self.URL, params={"asin": album.asin})
-            self._log.debug("scraped art URL: {0}", resp.url)
+            self._log.debug("scraped art URL: {}", resp.url)
         except requests.RequestException:
             self._log.debug("error scraping art page")
             return
@@ -697,14 +695,12 @@ class GoogleImages(RemoteArtSource):
         try:
             data = response.json()
         except ValueError:
-            self._log.debug(
-                "google: error loading response: {}".format(response.text)
-            )
+            self._log.debug("google: error loading response: {}", response.text)
             return
 
         if "error" in data:
             reason = data["error"]["errors"][0]["reason"]
-            self._log.debug("google fetchart error: {0}", reason)
+            self._log.debug("google fetchart error: {}", reason)
             return
 
         if "items" in data.keys():
@@ -826,13 +822,13 @@ class ITunesStore(RemoteArtSource):
             r = self.request(self.API_URL, params=payload)
             r.raise_for_status()
         except requests.RequestException as e:
-            self._log.debug("iTunes search failed: {0}", e)
+            self._log.debug("iTunes search failed: {}", e)
             return
 
         try:
             candidates = r.json()["results"]
         except ValueError as e:
-            self._log.debug("Could not decode json response: {0}", e)
+            self._log.debug("Could not decode json response: {}", e)
             return
         except KeyError as e:
             self._log.debug(
@@ -1089,7 +1085,7 @@ class FileSystem(LocalArtSource):
             for fn in images:
                 if re.search(cover_pat, os.path.splitext(fn)[0], re.I):
                     self._log.debug(
-                        "using well-named art file {0}",
+                        "using well-named art file {}",
                         util.displayable_path(fn),
                     )
                     yield self._candidate(
@@ -1101,7 +1097,7 @@ class FileSystem(LocalArtSource):
             # Fall back to any image in the folder.
             if remaining and not plugin.cautious:
                 self._log.debug(
-                    "using fallback art file {0}",
+                    "using fallback art file {}",
                     util.displayable_path(remaining[0]),
                 )
                 yield self._candidate(
@@ -1197,9 +1193,7 @@ class LastFM(RemoteArtSource):
                             url=images[size], size=self.SIZES[size]
                         )
         except ValueError:
-            self._log.debug(
-                "lastfm: error loading response: {}".format(response.text)
-            )
+            self._log.debug("lastfm: error loading response: {}", response.text)
             return
 
 
@@ -1235,14 +1229,16 @@ class Spotify(RemoteArtSource):
             response = requests.get(url, timeout=10)
             response.raise_for_status()
         except requests.RequestException as e:
-            self._log.debug("Error: " + str(e))
+            self._log.debug("Error: {!s}", e)
             return
 
         try:
             html = response.text
             soup = BeautifulSoup(html, "html.parser")
         except ValueError:
-            self._log.debug(f"Spotify: error loading response: {response.text}")
+            self._log.debug(
+                "Spotify: error loading response: {}", response.text
+            )
             return
 
         tag = soup.find("meta", attrs={"property": "og:image"})
@@ -1278,14 +1274,14 @@ class CoverArtUrl(RemoteArtSource):
                 image_url = album.cover_art_url
             else:
                 image_url = album.items().get().cover_art_url
-            self._log.debug(f"Cover art URL {image_url} found for {album}")
+            self._log.debug("Cover art URL {} found for {}", image_url, album)
         except (AttributeError, TypeError):
-            self._log.debug(f"Cover art URL not found for {album}")
+            self._log.debug("Cover art URL not found for {}", album)
             return
         if image_url:
             yield self._candidate(url=image_url, match=MetadataMatch.EXACT)
         else:
-            self._log.debug(f"Cover art URL not found for {album}")
+            self._log.debug("Cover art URL not found for {}", album)
             return
 
 
@@ -1527,8 +1523,10 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
         for source in self.sources:
             if source.LOC == "local" or not local_only:
                 self._log.debug(
-                    f"trying source {source.describe()}"
-                    f" for album {album.albumartist} - {album.album}"
+                    "trying source {0.description}"
+                    " for album {1.albumartist} - {1.album}",
+                    source,
+                    album,
                 )
                 # URLs might be invalid at this point, or the image may not
                 # fulfill the requirements

From 728076e97df3b803c26b6d149d69a7ae3b3fa072 Mon Sep 17 00:00:00 2001
From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com>
Date: Tue, 20 May 2025 10:04:07 +0200
Subject: [PATCH 21/49] fetchart: assert some invariants to satisfy mypy

Eventually, it would be nice to avoid this by more expressive typings.
For now, avoid such larger refactoring.
---
 beetsplug/fetchart.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py
index 25e649e68..3473fe08b 100644
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@@ -262,6 +262,11 @@ class Candidate:
         """Resize the candidate artwork according to the plugin's
         configuration and the specified check.
         """
+        # This must only be called when _validate returned something other than
+        # ImageAction.Bad or ImageAction.EXACT; then path and size are known.
+        assert self.path is not None
+        assert self.size is not None
+
         if check == ImageAction.DOWNSCALE:
             self.path = ArtResizer.shared.resize(
                 plugin.maxwidth,
@@ -270,9 +275,6 @@ class Candidate:
                 max_filesize=plugin.max_filesize,
             )
         elif check == ImageAction.DOWNSIZE:
-            # This must only be called when _validate returned something other
-            # than ImageAction.Bad or ImageAction.EXACT; then the size is known.
-            assert self.size is not None
             # dimensions are correct, so maxwidth is set to maximum dimension
             self.path = ArtResizer.shared.resize(
                 max(self.size),
@@ -424,7 +426,11 @@ class RemoteArtSource(ArtSource):
         """Downloads an image from a URL and checks whether it seems to
         actually be an image.
         """
+        # This must only be called for candidates that were returned by
+        # self.get, which are expected to have a url and no path (because they
+        # haven't been downloaded yet).
         assert candidate.path is None
+        assert candidate.url is not None
 
         if plugin.maxwidth:
             candidate.url = ArtResizer.shared.proxy_url(

From 8e84268e06c6f5eb297614f294abc678c569c719 Mon Sep 17 00:00:00 2001
From: Manu <git@manuelgrabowski.de>
Date: Sun, 18 May 2025 21:37:01 +0200
Subject: [PATCH 22/49] Replace outdated bitesized label link

---
 CONTRIBUTING.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 5fccb8e80..2a6006b36 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -154,7 +154,7 @@ Code Contribution Ideas
 ^^^^^^^^^^^^^^^^^^^^^^^
 
 -  We maintain a set of `issues marked as
-   “bite-sized” <https://github.com/beetbox/beets/labels/bitesize>`__.
+   “good first issue” <https://github.com/beetbox/beets/labels/good%20first%20issue>`__.
    These are issues that would serve as a good introduction to the
    codebase. Claim one and start exploring!
 -  Like testing? Our `test

From 9584216209c3a6bd80ebe46c2113c41c8352ee37 Mon Sep 17 00:00:00 2001
From: Sebastian Mohr <39738318+semohr@users.noreply.github.com>
Date: Thu, 22 May 2025 11:35:40 +0200
Subject: [PATCH 23/49] Streamlined auto api referencing for documentation
 (#5795)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Description

The current developer documentation feels somewhat cluttered due to
inline auto-generated API references for certain classes. To improve
readability and maintainability, this PR introduces a more streamlined
approach that aligns better with best practices observed in other PyData
ecosystem documentation.

Specifically, this PR:
- Adds a dedicated `api/` folder to the documentation structure.
- Moves all auto-generated references (classes, methods, etc.) to this
folder.
- Enables clean, concise linking to API elements from the narrative
documentation—without interrupting human-written content with large
autogenerated blocks.

This separation makes the documentation easier to navigate and maintain,
while still providing full API reference coverage where needed.

- [x] Documentation
- [x] Changelog
---
 docs/.gitignore                        |   2 +
 docs/Makefile                          |   3 +-
 docs/_templates/autosummary/base.rst   |   3 +
 docs/_templates/autosummary/class.rst  |  28 +++++
 docs/_templates/autosummary/module.rst |  11 ++
 docs/api/database.rst                  |  47 ++++++++
 docs/api/plugins.rst                   |  11 ++
 docs/changelog.rst                     |   5 +
 docs/conf.py                           |  46 ++++++--
 docs/dev/index.rst                     |  10 ++
 docs/dev/library.rst                   | 156 ++++++-------------------
 docs/dev/plugins.rst                   |  11 +-
 12 files changed, 204 insertions(+), 129 deletions(-)
 create mode 100644 docs/.gitignore
 create mode 100644 docs/_templates/autosummary/base.rst
 create mode 100644 docs/_templates/autosummary/class.rst
 create mode 100644 docs/_templates/autosummary/module.rst
 create mode 100644 docs/api/database.rst
 create mode 100644 docs/api/plugins.rst

diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 000000000..1f041cc9d
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1,2 @@
+_build
+generated/
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
index f940dd931..d642530f1 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -6,6 +6,7 @@ SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = _build
+SOURCEDIR	  = .
 
 # When both are available, use Sphinx 2.x for autodoc compatibility.
 ifeq ($(shell which sphinx-build2 >/dev/null 2>&1 ; echo $$?),0)
@@ -39,7 +40,7 @@ help:
 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 
 clean:
-	-rm -rf $(BUILDDIR)/*
+	-rm -rf $(BUILDDIR)/* $(SOURCEDIR)/api/generated/*
 
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
diff --git a/docs/_templates/autosummary/base.rst b/docs/_templates/autosummary/base.rst
new file mode 100644
index 000000000..822f55dc2
--- /dev/null
+++ b/docs/_templates/autosummary/base.rst
@@ -0,0 +1,3 @@
+{{ fullname | escape | underline}}
+.. currentmodule:: {{ module }}
+.. auto{{ objtype }}:: {{ objname }}
\ No newline at end of file
diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst
new file mode 100644
index 000000000..6927f8360
--- /dev/null
+++ b/docs/_templates/autosummary/class.rst
@@ -0,0 +1,28 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+   :members:                                    <-- add at least this line
+   :private-members:
+   :show-inheritance:                           <-- plus I want to show inheritance...
+   :inherited-members:                          <-- ...and inherited members too
+
+   {% block methods %}
+   .. automethod:: __init__
+
+   {% if methods %}
+   .. rubric:: {{ _('Public methods summary') }}
+
+   .. autosummary::
+   {% for item in methods %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% for item in _methods %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   .. rubric:: {{ _('Methods definition') }}
+
diff --git a/docs/_templates/autosummary/module.rst b/docs/_templates/autosummary/module.rst
new file mode 100644
index 000000000..9383a2307
--- /dev/null
+++ b/docs/_templates/autosummary/module.rst
@@ -0,0 +1,11 @@
+{{ fullname | escape | underline}}
+{% block modules %}
+{% if modules %}
+.. rubric:: Modules
+
+{% for item in modules %}
+{{ item }}
+
+{%- endfor %}
+{% endif %}
+{% endblock %}
\ No newline at end of file
diff --git a/docs/api/database.rst b/docs/api/database.rst
new file mode 100644
index 000000000..627b5dc39
--- /dev/null
+++ b/docs/api/database.rst
@@ -0,0 +1,47 @@
+Database
+--------
+
+.. currentmodule:: beets.library
+
+
+Library
+'''''''
+
+.. autosummary::
+    :toctree: generated/
+
+    Library
+
+
+Models
+''''''
+
+.. autosummary::
+    :toctree: generated/
+
+    LibModel
+    Album
+    Item
+
+
+Transactions
+''''''''''''
+
+.. currentmodule:: beets.dbcore.db
+
+.. autosummary::
+    :toctree: generated/
+
+    Transaction
+
+Queries
+'''''''
+
+.. currentmodule:: beets.dbcore.query
+
+.. autosummary::
+    :toctree: generated/
+
+    Query
+    FieldQuery
+    AndQuery
\ No newline at end of file
diff --git a/docs/api/plugins.rst b/docs/api/plugins.rst
new file mode 100644
index 000000000..0d6c13718
--- /dev/null
+++ b/docs/api/plugins.rst
@@ -0,0 +1,11 @@
+Plugins
+-------
+
+.. currentmodule:: beets.plugins
+
+
+
+.. autosummary::
+    :toctree: generated/
+
+    BeetsPlugin
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 44f2d305d..09259e1fa 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -43,16 +43,21 @@ For plugin developers:
 
 Other changes:
 
+* Documentation structure for auto generated API references changed slightly.
+  Autogenerated API references are now located in the `docs/api` subdirectory.
+
 2.3.1 (May 14, 2025)
 --------------------
 
 Bug fixes:
+
 * :doc:`/reference/pathformat`: Fixed a regression where path legalization
   incorrectly removed parts of user-configured path formats that followed a dot
   (**.**).
   :bug:`5771`
 
 For packagers:
+
 * Force ``poetry`` version below 2 to avoid it mangling file modification times
   in ``sdist`` package.
   :bug:`5770`
diff --git a/docs/conf.py b/docs/conf.py
index 497c5e71e..d0f8cdffe 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,19 +1,35 @@
-AUTHOR = "Adrian Sampson"
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
 
-# General configuration
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-extensions = ["sphinx.ext.autodoc", "sphinx.ext.extlinks"]
-
-exclude_patterns = ["_build"]
-source_suffix = {".rst": "restructuredtext"}
-master_doc = "index"
 
 project = "beets"
+AUTHOR = "Adrian Sampson"
 copyright = "2016, Adrian Sampson"
 
+master_doc = "index"
+language = "en"
 version = "2.3"
 release = "2.3.1"
 
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.extlinks",
+]
+autosummary_generate = True
+exclude_patterns = ["_build"]
+templates_path = ["_templates"]
+source_suffix = {".rst": "restructuredtext", ".md": "markdown"}
+
+
 pygments_style = "sphinx"
 
 # External links to the bug tracker and other sites.
@@ -59,10 +75,24 @@ man_pages = [
     ),
 ]
 
-# Options for pydata theme
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+
 html_theme = "pydata_sphinx_theme"
 html_theme_options = {"collapse_navigation": True, "logo": {"text": "beets"}}
 html_title = "beets"
 html_logo = "_static/beets_logo_nobg.png"
 html_static_path = ["_static"]
 html_css_files = ["beets.css"]
+
+
+def skip_member(app, what, name, obj, skip, options):
+    if name.startswith("_"):
+        return True
+    return skip
+
+
+def setup(app):
+    app.connect("autodoc-skip-member", skip_member)
diff --git a/docs/dev/index.rst b/docs/dev/index.rst
index 63335160c..10b3566c2 100644
--- a/docs/dev/index.rst
+++ b/docs/dev/index.rst
@@ -10,8 +10,18 @@ and write metadata tags in media files.
 .. _MediaFile: https://mediafile.readthedocs.io/en/latest/
 
 .. toctree::
+    :maxdepth: 1
 
     plugins
     library
     importer
     cli
+
+
+.. toctree::
+    :maxdepth: 1
+    :caption: API Reference
+
+    ../api/plugins
+    ../api/database
+
diff --git a/docs/dev/library.rst b/docs/dev/library.rst
index 9740c8b90..8c47e4dc3 100644
--- a/docs/dev/library.rst
+++ b/docs/dev/library.rst
@@ -20,30 +20,18 @@ invocation of beets usually has only one :class:`Library`. It's powered by
 abstraction, something like a very minimal `ORM`_. The library is also
 responsible for handling queries to retrieve stored objects.
 
-.. autoclass:: Library(path, directory[, path_formats[, replacements]])
+Overview 
+''''''''
 
-    .. automethod:: __init__
+You can add new items or albums to the library via the
+:py:meth:`Library.add` and :py:meth:`Library.add_album` methods.
 
-    You can add new items or albums to the library:
+You may also query the library for items and albums using the
+:py:meth:`Library.items`, :py:meth:`Library.albums`, :py:meth:`Library.get_item` and :py:meth:`Library.get_album` methods.
 
-    .. automethod:: add
-
-    .. automethod:: add_album
-
-    And there are methods for querying the database:
-
-    .. automethod:: items
-
-    .. automethod:: albums
-
-    .. automethod:: get_item
-
-    .. automethod:: get_album
-
-    Any modifications must go through a :class:`Transaction` which you get can
-    using this method:
-
-    .. automethod:: transaction
+Any modifications to the library must go through a
+:class:`Transaction` object, which you can get using the
+:py:meth:`Library.transaction` context manager.
 
 .. _SQLite: https://sqlite.org/index.html
 .. _ORM: https://en.wikipedia.org/wiki/Object-relational_mapping
@@ -54,7 +42,7 @@ Model Classes
 
 The two model entities in beets libraries, :class:`Item` and :class:`Album`,
 share a base class, :class:`LibModel`, that provides common functionality. That
-class itself specialises :class:`dbcore.Model` which provides an ORM-like
+class itself specialises :class:`beets.dbcore.Model` which provides an ORM-like
 abstraction.
 
 To get or change the metadata of a model (an item or album), either access its
@@ -68,42 +56,25 @@ Model base
 Models use dirty-flags to track when the object's metadata goes out of
 sync with the database. The dirty dictionary maps field names to booleans
 indicating whether the field has been written since the object was last
-synchronized (via load or store) with the database.
+synchronized (via load or store) with the database. This logic is implemented
+in the model base class :class:`LibModel` and is inherited by both
+:class:`Item` and :class:`Album`.
 
-.. autoclass:: LibModel
+We provide CRUD-like methods for interacting with the database:
 
-    .. automethod:: all_keys
+* :py:meth:`LibModel.store`
+* :py:meth:`LibModel.load`
+* :py:meth:`LibModel.remove` 
+* :py:meth:`LibModel.add`
 
-    .. automethod:: __init__
+The base class :class:`beets.dbcore.Model` has a ``dict``-like interface, so
+normal the normal mapping API is supported:
 
-    .. autoattribute:: _types
+* :py:meth:`LibModel.keys`
+* :py:meth:`LibModel.update`
+* :py:meth:`LibModel.items`
+* :py:meth:`LibModel.get`
 
-    .. autoattribute:: _fields
-
-    There are CRUD-like methods for interacting with the database:
-
-    .. automethod:: store
-
-    .. automethod:: load
-
-    .. automethod:: remove
-
-    .. automethod:: add
-
-    The base class :class:`dbcore.Model` has a ``dict``-like interface, so
-    normal the normal mapping API is supported:
-
-    .. automethod:: keys
-
-    .. automethod:: update
-
-    .. automethod:: items
-
-    .. note::
-       The :py:meth:`Album.items` method is not inherited from
-       :py:meth:`LibModel.items` for historical reasons.
-
-    .. automethod:: get
 
 Item
 ''''
@@ -155,38 +126,6 @@ This leads to the following implementation policy:
   * On every modification to DB metadata (``item.field = ...``), the DB mtime
     is reset to zero.
 
-
-.. autoclass:: Item
-
-    .. automethod:: __init__
-
-    .. automethod:: from_path
-
-    .. automethod:: get_album
-
-    .. automethod:: destination
-
-    .. automethod:: current_mtime
-
-    The methods ``read()`` and ``write()`` are complementary: one reads a
-    file's tags and updates the item's metadata fields accordingly while the
-    other takes the item's fields and writes them to the file's tags.
-
-    .. automethod:: read
-
-    .. automethod:: write
-
-    .. automethod:: try_write
-
-    .. automethod:: try_sync
-
-    The :class:`Item` class supplements the normal model interface so that they
-    interacting with the filesystem as well:
-
-    .. automethod:: move
-
-    .. automethod:: remove
-
 Album
 '''''
 
@@ -205,35 +144,10 @@ For those fields that are both item-level and album-level (e.g., ``year`` or
 use an SQLite table called ``albums``, in which each column is an album
 metadata field.
 
-.. autoclass:: Album
 
-    .. automethod:: __init__
-
-    .. automethod:: item_dir
-
-    .. automethod:: items
-
-    Albums extend the normal model interface to also forward changes to their
-    items:
-
-    .. autoattribute:: item_keys
-
-    .. automethod:: store
-
-    .. automethod:: try_sync
-
-    .. automethod:: move
-
-    .. automethod:: remove
-
-    Albums also manage album art, image files that are associated with each
-    album:
-
-    .. automethod:: set_art
-
-    .. automethod:: move_art
-
-    .. automethod:: art_destination
+.. note::
+    The :py:meth:`Album.items` method is not inherited from
+    :py:meth:`LibModel.items` for historical reasons.
 
 Transactions
 ''''''''''''
@@ -241,24 +155,30 @@ Transactions
 The :class:`Library` class provides the basic methods necessary to access and
 manipulate its contents. To perform more complicated operations atomically, or
 to interact directly with the underlying SQLite database, you must use a
-*transaction* (see this `blog post`_ for motivation). For example::
+*transaction* (see this `blog post`_ for motivation). For example
+
+.. code-block:: python
 
     lib = Library()
     with lib.transaction() as tx:
         items = lib.items(query)
         lib.add_album(list(items))
 
-.. _blog post: https://beets.io/blog/sqlite-nightmare.html
-
 .. currentmodule:: beets.dbcore.db
 
-.. autoclass:: Transaction
-    :members:
+The :class:`Transaction` class is a context manager that provides a
+transactional interface to the underlying SQLite database. It is
+responsible for managing the transaction's lifecycle, including
+beginning, committing, and rolling back the transaction if
+an error occurs.
 
+.. _blog post: https://beets.io/blog/sqlite-nightmare.html
 
 Queries
 -------
 
+.. currentmodule:: beets.dbcore.query
+
 To access albums and items in a library, we use :doc:`/reference/query`.
 In beets, the :class:`Query` abstract base class represents a criterion that
 matches items or albums in the database.
diff --git a/docs/dev/plugins.rst b/docs/dev/plugins.rst
index 2d30f86c9..c24a94093 100644
--- a/docs/dev/plugins.rst
+++ b/docs/dev/plugins.rst
@@ -1,3 +1,11 @@
+Plugin Development Guide
+========================
+
+Beets plugins are Python modules or packages that extend the core functionality
+of beets. The plugin system is designed to be flexible, allowing developers to
+add virtually any type of features.
+
+
 .. _writing-plugins:
 
 Writing Plugins
@@ -413,9 +421,8 @@ to extend the kinds of metadata that they can easily manage.
 
 The ``MediaFile`` class uses ``MediaField`` descriptors to provide
 access to file tags. If you have created a descriptor you can add it through
-your plugins ``add_media_field()`` method.
+your plugins :py:meth:`beets.plugins.BeetsPlugin.add_media_field()`` method.
 
-.. automethod:: beets.plugins.BeetsPlugin.add_media_field
 .. _MediaFile: https://mediafile.readthedocs.io/en/latest/
 
 

From 43b20f285029227745bc9c36c58c118e4cfdd686 Mon Sep 17 00:00:00 2001
From: David Logie <djl@mksh.org>
Date: Thu, 22 May 2025 02:37:41 +0100
Subject: [PATCH 24/49] Make `mb_albumartistids` available at the album level.

---
 beets/library.py      | 2 ++
 test/test_importer.py | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/beets/library.py b/beets/library.py
index 8fd1c8022..9be3a1602 100644
--- a/beets/library.py
+++ b/beets/library.py
@@ -1179,6 +1179,7 @@ class Album(LibModel):
         "comp": types.BOOLEAN,
         "mb_albumid": types.STRING,
         "mb_albumartistid": types.STRING,
+        "mb_albumartistids": types.MULTI_VALUE_DSV,
         "albumtype": types.STRING,
         "albumtypes": types.SEMICOLON_SPACE_DSV,
         "label": types.STRING,
@@ -1235,6 +1236,7 @@ class Album(LibModel):
         "comp",
         "mb_albumid",
         "mb_albumartistid",
+        "mb_albumartistids",
         "albumtype",
         "albumtypes",
         "label",
diff --git a/test/test_importer.py b/test/test_importer.py
index 993362254..5070a058d 100644
--- a/test/test_importer.py
+++ b/test/test_importer.py
@@ -129,6 +129,11 @@ class NonAutotaggedImportTest(AsIsImporterMixin, ImportTestCase):
         self.run_asis_importer(delete=True)
         self.assertNotExists(os.path.join(self.import_dir, b"album"))
 
+    def test_album_mb_albumartistids(self):
+        self.run_asis_importer()
+        album = self.lib.albums()[0]
+        assert album.mb_albumartistids == album.items()[0].mb_albumartistids
+
     @unittest.skipUnless(_common.HAVE_SYMLINK, "need symlinks")
     def test_import_link_arrives(self):
         self.run_asis_importer(link=True)

From c9f98fca5550add0942cbaf10d16b0e753002fd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Mon, 26 May 2025 12:40:38 +0100
Subject: [PATCH 25/49] Use unittest.TestCase for tests that don't require the
 dir setup

---
 test/plugins/test_art.py            |  5 +++--
 test/plugins/test_beatport.py       |  3 ++-
 test/plugins/test_lastgenre.py      |  4 ++--
 test/plugins/test_musicbrainz.py    |  3 ++-
 test/plugins/test_subsonicupdate.py |  4 ++--
 test/plugins/test_the.py            |  5 +++--
 test/test_files.py                  |  2 +-
 test/test_importer.py               |  4 ++--
 test/test_logging.py                | 10 +++-------
 test/test_query.py                  |  4 ++--
 test/test_sort.py                   |  2 +-
 test/test_ui.py                     |  8 ++++----
 test/test_util.py                   |  3 +--
 13 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/test/plugins/test_art.py b/test/plugins/test_art.py
index b8a9a3bda..6577b54fc 100644
--- a/test/plugins/test_art.py
+++ b/test/plugins/test_art.py
@@ -18,6 +18,7 @@ from __future__ import annotations
 
 import os
 import shutil
+import unittest
 from typing import TYPE_CHECKING
 from unittest.mock import patch
 
@@ -1012,7 +1013,7 @@ class ArtForAlbumTest(UseThePlugin):
         self._assert_image_operated(self.IMG_348x348, self.RESIZE_OP, True)
 
 
-class DeprecatedConfigTest(BeetsTestCase):
+class DeprecatedConfigTest(unittest.TestCase):
     """While refactoring the plugin, the remote_priority option was deprecated,
     and a new codepath should translate its effect. Check that it actually does
     so.
@@ -1030,7 +1031,7 @@ class DeprecatedConfigTest(BeetsTestCase):
         assert isinstance(self.plugin.sources[-1], fetchart.FileSystem)
 
 
-class EnforceRatioConfigTest(BeetsTestCase):
+class EnforceRatioConfigTest(unittest.TestCase):
     """Throw some data at the regexes."""
 
     def _load_with_config(self, values, should_raise):
diff --git a/test/plugins/test_beatport.py b/test/plugins/test_beatport.py
index d072340b5..b92a3bf15 100644
--- a/test/plugins/test_beatport.py
+++ b/test/plugins/test_beatport.py
@@ -14,6 +14,7 @@
 
 """Tests for the 'beatport' plugin."""
 
+import unittest
 from datetime import timedelta
 
 from beets.test import _common
@@ -585,7 +586,7 @@ class BeatportTest(BeetsTestCase):
             assert track.genre == test_track.genre
 
 
-class BeatportResponseEmptyTest(BeetsTestCase):
+class BeatportResponseEmptyTest(unittest.TestCase):
     def _make_tracks_response(self):
         results = [
             {
diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py
index 49d219de9..d34860774 100644
--- a/test/plugins/test_lastgenre.py
+++ b/test/plugins/test_lastgenre.py
@@ -14,17 +14,17 @@
 
 """Tests for the 'lastgenre' plugin."""
 
+import unittest
 from unittest.mock import Mock
 
 import pytest
 
 from beets import config
 from beets.test import _common
-from beets.test.helper import BeetsTestCase
 from beetsplug import lastgenre
 
 
-class LastGenrePluginTest(BeetsTestCase):
+class LastGenrePluginTest(unittest.TestCase):
     def setUp(self):
         super().setUp()
         self.plugin = lastgenre.LastGenrePlugin()
diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py
index d9a06d00a..100cfc498 100644
--- a/test/plugins/test_musicbrainz.py
+++ b/test/plugins/test_musicbrainz.py
@@ -14,6 +14,7 @@
 
 """Tests for MusicBrainz API wrapper."""
 
+import unittest
 from unittest import mock
 
 import pytest
@@ -665,7 +666,7 @@ class MBAlbumInfoTest(MusicBrainzTestCase):
         assert t[1].trackdisambig == "SECOND TRACK"
 
 
-class ArtistFlatteningTest(BeetsTestCase):
+class ArtistFlatteningTest(unittest.TestCase):
     def _credit_dict(self, suffix=""):
         return {
             "artist": {
diff --git a/test/plugins/test_subsonicupdate.py b/test/plugins/test_subsonicupdate.py
index 891f75cb7..183c2bd67 100644
--- a/test/plugins/test_subsonicupdate.py
+++ b/test/plugins/test_subsonicupdate.py
@@ -1,11 +1,11 @@
 """Tests for the 'subsonic' plugin."""
 
+import unittest
 from urllib.parse import parse_qs, urlparse
 
 import responses
 
 from beets import config
-from beets.test.helper import BeetsTestCase
 from beetsplug import subsonicupdate
 
 
@@ -24,7 +24,7 @@ def _params(url):
     return parse_qs(urlparse(url).query)
 
 
-class SubsonicPluginTest(BeetsTestCase):
+class SubsonicPluginTest(unittest.TestCase):
     """Test class for subsonicupdate."""
 
     @responses.activate
diff --git a/test/plugins/test_the.py b/test/plugins/test_the.py
index bf073301b..c8f919de2 100644
--- a/test/plugins/test_the.py
+++ b/test/plugins/test_the.py
@@ -1,11 +1,12 @@
 """Tests for the 'the' plugin"""
 
+import unittest
+
 from beets import config
-from beets.test.helper import BeetsTestCase
 from beetsplug.the import FORMAT, PATTERN_A, PATTERN_THE, ThePlugin
 
 
-class ThePluginTest(BeetsTestCase):
+class ThePluginTest(unittest.TestCase):
     def test_unthe_with_default_patterns(self):
         assert ThePlugin().unthe("", PATTERN_THE) == ""
         assert (
diff --git a/test/test_files.py b/test/test_files.py
index 72b1610c0..8be94f328 100644
--- a/test/test_files.py
+++ b/test/test_files.py
@@ -200,7 +200,7 @@ class MoveTest(BeetsTestCase):
         assert self.i.path == util.normpath(self.dest)
 
 
-class HelperTest(BeetsTestCase):
+class HelperTest(unittest.TestCase):
     def test_ancestry_works_on_file(self):
         p = "/a/b/c"
         a = ["/", "/a", "/a/b"]
diff --git a/test/test_importer.py b/test/test_importer.py
index 5070a058d..9bb0e8a63 100644
--- a/test/test_importer.py
+++ b/test/test_importer.py
@@ -924,7 +924,7 @@ class ChooseCandidateTest(AutotagImportTestCase):
         assert self.lib.albums().get().album == "Applied Album MM"
 
 
-class InferAlbumDataTest(BeetsTestCase):
+class InferAlbumDataTest(unittest.TestCase):
     def setUp(self):
         super().setUp()
 
@@ -1220,7 +1220,7 @@ class ImportDuplicateSingletonTest(ImportTestCase):
         return item
 
 
-class TagLogTest(BeetsTestCase):
+class TagLogTest(unittest.TestCase):
     def test_tag_log_line(self):
         sio = StringIO()
         handler = logging.StreamHandler(sio)
diff --git a/test/test_logging.py b/test/test_logging.py
index d95a54387..1859ea2dd 100644
--- a/test/test_logging.py
+++ b/test/test_logging.py
@@ -3,21 +3,17 @@
 import logging as log
 import sys
 import threading
+import unittest
 from io import StringIO
 
 import beets.logging as blog
 import beetsplug
 from beets import plugins, ui
 from beets.test import _common, helper
-from beets.test.helper import (
-    AsIsImporterMixin,
-    BeetsTestCase,
-    ImportTestCase,
-    PluginMixin,
-)
+from beets.test.helper import AsIsImporterMixin, ImportTestCase, PluginMixin
 
 
-class LoggingTest(BeetsTestCase):
+class LoggingTest(unittest.TestCase):
     def test_logging_management(self):
         l1 = log.getLogger("foo123")
         l2 = blog.getLogger("foo123")
diff --git a/test/test_query.py b/test/test_query.py
index f85e5c637..22c2710de 100644
--- a/test/test_query.py
+++ b/test/test_query.py
@@ -373,7 +373,7 @@ class GetTest(DummyDataTestCase):
             dbcore.query.RegexpQuery("year", "199(")
 
 
-class MatchTest(BeetsTestCase):
+class MatchTest(unittest.TestCase):
     def setUp(self):
         super().setUp()
         self.item = _common.item()
@@ -811,7 +811,7 @@ class NoneQueryTest(BeetsTestCase, AssertsMixin):
         self.assertInResult(item, matched)
 
 
-class NotQueryMatchTest(BeetsTestCase):
+class NotQueryMatchTest(unittest.TestCase):
     """Test `query.NotQuery` matching against a single item, using the same
     cases and assertions as on `MatchTest`, plus assertion on the negated
     queries (ie. assert q -> assert not NotQuery(q)).
diff --git a/test/test_sort.py b/test/test_sort.py
index d6aa5c518..25d993e30 100644
--- a/test/test_sort.py
+++ b/test/test_sort.py
@@ -378,7 +378,7 @@ class ConfigSortTest(DummyDataTestCase):
         assert results[0].albumartist > results[1].albumartist
 
 
-class CaseSensitivityTest(DummyDataTestCase, BeetsTestCase):
+class CaseSensitivityTest(DummyDataTestCase):
     """If case_insensitive is false, lower-case values should be placed
     after all upper-case values. E.g., `Foo Qux bar`
     """
diff --git a/test/test_ui.py b/test/test_ui.py
index afa16e171..8bb0218d5 100644
--- a/test/test_ui.py
+++ b/test/test_ui.py
@@ -1337,7 +1337,7 @@ class ShowChangeTest(BeetsTestCase):
 
 
 @patch("beets.library.Item.try_filesize", Mock(return_value=987))
-class SummarizeItemsTest(BeetsTestCase):
+class SummarizeItemsTest(unittest.TestCase):
     def setUp(self):
         super().setUp()
         item = library.Item()
@@ -1374,7 +1374,7 @@ class SummarizeItemsTest(BeetsTestCase):
         assert summary == "3 items, G 2, F 1, 4kbps, 32:42, 2.9 KiB"
 
 
-class PathFormatTest(BeetsTestCase):
+class PathFormatTest(unittest.TestCase):
     def test_custom_paths_prepend(self):
         default_formats = ui.get_path_formats()
 
@@ -1521,7 +1521,7 @@ class CommonOptionsParserCliTest(BeetsTestCase):
         # assert 'plugins: ' in output
 
 
-class CommonOptionsParserTest(BeetsTestCase):
+class CommonOptionsParserTest(unittest.TestCase):
     def test_album_option(self):
         parser = ui.CommonOptionsParser()
         assert not parser._album_flags
@@ -1614,7 +1614,7 @@ class CommonOptionsParserTest(BeetsTestCase):
         )
 
 
-class EncodingTest(BeetsTestCase):
+class EncodingTest(unittest.TestCase):
     """Tests for the `terminal_encoding` config option and our
     `_in_encoding` and `_out_encoding` utility functions.
     """
diff --git a/test/test_util.py b/test/test_util.py
index 6b795b957..d08868619 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -25,7 +25,6 @@ import pytest
 
 from beets import util
 from beets.test import _common
-from beets.test.helper import BeetsTestCase
 
 
 class UtilTest(unittest.TestCase):
@@ -132,7 +131,7 @@ class UtilTest(unittest.TestCase):
         pass
 
 
-class PathConversionTest(BeetsTestCase):
+class PathConversionTest(unittest.TestCase):
     def test_syspath_windows_format(self):
         with _common.platform_windows():
             path = os.path.join("a", "b", "c")

From 5900282093c484d964377fa5d40151bd18a2bd56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Mon, 26 May 2025 13:05:40 +0100
Subject: [PATCH 26/49] Use BeetsTestCase for lastgenre tests, re-use the
 defined config

---
 test/plugins/test_lastgenre.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py
index d34860774..be145d811 100644
--- a/test/plugins/test_lastgenre.py
+++ b/test/plugins/test_lastgenre.py
@@ -14,17 +14,16 @@
 
 """Tests for the 'lastgenre' plugin."""
 
-import unittest
 from unittest.mock import Mock
 
 import pytest
 
-from beets import config
 from beets.test import _common
+from beets.test.helper import BeetsTestCase
 from beetsplug import lastgenre
 
 
-class LastGenrePluginTest(unittest.TestCase):
+class LastGenrePluginTest(BeetsTestCase):
     def setUp(self):
         super().setUp()
         self.plugin = lastgenre.LastGenrePlugin()
@@ -32,12 +31,12 @@ class LastGenrePluginTest(unittest.TestCase):
     def _setup_config(
         self, whitelist=False, canonical=False, count=1, prefer_specific=False
     ):
-        config["lastgenre"]["canonical"] = canonical
-        config["lastgenre"]["count"] = count
-        config["lastgenre"]["prefer_specific"] = prefer_specific
+        self.config["lastgenre"]["canonical"] = canonical
+        self.config["lastgenre"]["count"] = count
+        self.config["lastgenre"]["prefer_specific"] = prefer_specific
         if isinstance(whitelist, (bool, (str,))):
             # Filename, default, or disabled.
-            config["lastgenre"]["whitelist"] = whitelist
+            self.config["lastgenre"]["whitelist"] = whitelist
         self.plugin.setup()
         if not isinstance(whitelist, (bool, (str,))):
             # Explicit list of genres.
@@ -463,11 +462,10 @@ def test_get_genre(config_values, item_genre, mock_genres, expected_result):
     lastgenre.LastGenrePlugin.fetch_album_genre = mock_fetch_album_genre
     lastgenre.LastGenrePlugin.fetch_artist_genre = mock_fetch_artist_genre
 
-    # Configure
-    config["lastgenre"] = config_values
-
     # Initialize plugin instance and item
     plugin = lastgenre.LastGenrePlugin()
+    # Configure
+    plugin.config.set(config_values)
     item = _common.item()
     item.genre = item_genre
 

From da5ec00aaff0f05737048f6c6e267d8e8eb80aba Mon Sep 17 00:00:00 2001
From: Ben Stolovitz <ben@stolovitz.com>
Date: Mon, 26 May 2025 13:24:57 -0400
Subject: [PATCH 27/49] [Test] Fix path tests on windows (#5803)

## Description

Fixes #5802.

Today, tests fail on most Windows machines because we hard-code `D:` as
the root drive, but most machines use `C:`. This change uses the same
normalization function in the test assertion to ensure the drives match.

## To Do

- [ ] ~~Documentation.~~
- [x] Changelog.
- [x] Tests. (this is a tests change)

## What changed?

* Updated tests to generate the drive name via normalization, instead of
hard-coding `D:`.
* Updated the `Item::destination()` method to document the
`relative_to_libdir` param.

## How tested?

* [x] Tests pass locally.
---
 beets/library.py     |  4 +++-
 docs/changelog.rst   |  2 ++
 test/test_library.py | 10 ++++++++--
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/beets/library.py b/beets/library.py
index 9be3a1602..b813d2ba4 100644
--- a/beets/library.py
+++ b/beets/library.py
@@ -1084,7 +1084,9 @@ class Item(LibModel):
         (i.e., where the file ought to be).
 
         The path is returned as a bytestring. ``basedir`` can override the
-        library's base directory for the destination.
+        library's base directory for the destination. If ``relative_to_libdir``
+        is true, returns just the fragment of the path underneath the library
+        base directory.
         """
         db = self._check_db()
         basedir = basedir or db.directory
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 09259e1fa..9bc065419 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -28,6 +28,8 @@ Bug fixes:
 * :doc:`plugins/musicbrainz`: fix regression where user configured
   ``extra_tags`` have been read incorrectly.
   :bug:`5788`
+* tests: Fix library tests failing on Windows when run from outside ``D:/``.
+  :bug:`5802`
 
 For packagers:
 
diff --git a/test/test_library.py b/test/test_library.py
index 39f1d0b9e..bb014d76d 100644
--- a/test/test_library.py
+++ b/test/test_library.py
@@ -34,7 +34,7 @@ from beets.library import Album
 from beets.test import _common
 from beets.test._common import item
 from beets.test.helper import BeetsTestCase, ItemInDBTestCase
-from beets.util import as_string, bytestring_path, syspath
+from beets.util import as_string, bytestring_path, normpath, syspath
 
 # Shortcut to path normalization.
 np = util.normpath
@@ -553,6 +553,9 @@ class ItemFormattedMappingTest(ItemInDBTestCase):
 class PathFormattingMixin:
     """Utilities for testing path formatting."""
 
+    i: beets.library.Item
+    lib: beets.library.Library
+
     def _setf(self, fmt):
         self.lib.path_formats.insert(0, ("default", fmt))
 
@@ -560,9 +563,12 @@ class PathFormattingMixin:
         if i is None:
             i = self.i
 
+        # Handle paths on Windows.
         if os.path.sep != "/":
             dest = dest.replace(b"/", os.path.sep.encode())
-            dest = b"D:" + dest
+
+            # Paths are normalized based on the CWD.
+            dest = normpath(dest)
 
         actual = i.destination()
 

From dd2f203090af6f570b0c5158661d718607758c96 Mon Sep 17 00:00:00 2001
From: Uncorrupt3318 <25012611+Uncorrupt3318@users.noreply.github.com>
Date: Mon, 26 May 2025 23:17:52 +0100
Subject: [PATCH 28/49] Feat: Add replace plugin (#5644)

Adds replace plugin. The plugin allows the user to replace the audio
file of a song, while keeping the tags and file name.

Some music servers keep track of favourite songs via paths and tags. Now
there won't be a need to 'refavourite'. Plus, this skips the
import/merge steps.
---
 beetsplug/replace.py         | 122 +++++++++++++++++++++++++++++++++++
 docs/changelog.rst           |   1 +
 docs/plugins/index.rst       |   1 +
 docs/plugins/replace.rst     |  17 +++++
 test/plugins/test_replace.py | 115 +++++++++++++++++++++++++++++++++
 5 files changed, 256 insertions(+)
 create mode 100644 beetsplug/replace.py
 create mode 100644 docs/plugins/replace.rst
 create mode 100644 test/plugins/test_replace.py

diff --git a/beetsplug/replace.py b/beetsplug/replace.py
new file mode 100644
index 000000000..0c570877b
--- /dev/null
+++ b/beetsplug/replace.py
@@ -0,0 +1,122 @@
+import shutil
+from pathlib import Path
+
+import mediafile
+
+from beets import ui, util
+from beets.library import Item, Library
+from beets.plugins import BeetsPlugin
+
+
+class ReplacePlugin(BeetsPlugin):
+    def commands(self):
+        cmd = ui.Subcommand(
+            "replace", help="replace audio file while keeping tags"
+        )
+        cmd.func = self.run
+        return [cmd]
+
+    def run(self, lib: Library, args: list[str]) -> None:
+        if len(args) < 2:
+            raise ui.UserError("Usage: beet replace <query> <new_file_path>")
+
+        new_file_path: Path = Path(args[-1])
+        item_query: list[str] = args[:-1]
+
+        self.file_check(new_file_path)
+
+        item_list = list(lib.items(item_query))
+
+        if not item_list:
+            raise ui.UserError("No matching songs found.")
+
+        song = self.select_song(item_list)
+
+        if not song:
+            ui.print_("Operation cancelled.")
+            return
+
+        if not self.confirm_replacement(new_file_path, song):
+            ui.print_("Aborting replacement.")
+            return
+
+        self.replace_file(new_file_path, song)
+
+    def file_check(self, filepath: Path) -> None:
+        """Check if the file exists and is supported"""
+        if not filepath.is_file():
+            raise ui.UserError(
+                f"'{util.displayable_path(filepath)}' is not a valid file."
+            )
+
+        try:
+            mediafile.MediaFile(util.syspath(filepath))
+        except mediafile.FileTypeError as fte:
+            raise ui.UserError(fte)
+
+    def select_song(self, items: list[Item]):
+        """Present a menu of matching songs and get user selection."""
+        ui.print_("\nMatching songs:")
+        for i, item in enumerate(items, 1):
+            ui.print_(f"{i}. {util.displayable_path(item)}")
+
+        while True:
+            try:
+                index = int(
+                    input(
+                        f"Which song would you like to replace? "
+                        f"[1-{len(items)}] (0 to cancel): "
+                    )
+                )
+                if index == 0:
+                    return None
+                if 1 <= index <= len(items):
+                    return items[index - 1]
+                ui.print_(
+                    f"Invalid choice. Please enter a number "
+                    f"between 1 and {len(items)}."
+                )
+            except ValueError:
+                ui.print_("Invalid input. Please type in a number.")
+
+    def confirm_replacement(self, new_file_path: Path, song: Item):
+        """Get user confirmation for the replacement."""
+        original_file_path: Path = Path(song.path.decode())
+
+        if not original_file_path.exists():
+            raise ui.UserError("The original song file was not found.")
+
+        ui.print_(
+            f"\nReplacing: {util.displayable_path(new_file_path)} "
+            f"-> {util.displayable_path(original_file_path)}"
+        )
+        decision: str = (
+            input("Are you sure you want to replace this track? (y/N): ")
+            .strip()
+            .casefold()
+        )
+        return decision in {"yes", "y"}
+
+    def replace_file(self, new_file_path: Path, song: Item) -> None:
+        """Replace the existing file with the new one."""
+        original_file_path = Path(song.path.decode())
+        dest = original_file_path.with_suffix(new_file_path.suffix)
+
+        try:
+            shutil.move(util.syspath(new_file_path), util.syspath(dest))
+        except Exception as e:
+            raise ui.UserError(f"Error replacing file: {e}")
+
+        if (
+            new_file_path.suffix != original_file_path.suffix
+            and original_file_path.exists()
+        ):
+            try:
+                original_file_path.unlink()
+            except Exception as e:
+                raise ui.UserError(f"Could not delete original file: {e}")
+
+        song.path = str(dest).encode()
+        song.store()
+
+        ui.print_("Replacement successful.")
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 9bc065419..688717351 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -22,6 +22,7 @@ New features:
 * :doc:`plugins/discogs`: Implement ``track_for_id`` method to allow retrieving
   singletons by their Discogs ID.
   :bug:`4661`
+* :doc:`plugins/replace`: Add new plugin.
 
 Bug fixes:
 
diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst
index 82fa94281..5fbe42d9f 100644
--- a/docs/plugins/index.rst
+++ b/docs/plugins/index.rst
@@ -125,6 +125,7 @@ following to your configuration:
    playlist
    plexupdate
    random
+   replace
    replaygain
    rewrite
    scrub
diff --git a/docs/plugins/replace.rst b/docs/plugins/replace.rst
new file mode 100644
index 000000000..8695d492c
--- /dev/null
+++ b/docs/plugins/replace.rst
@@ -0,0 +1,17 @@
+Replace Plugin
+==============
+
+The ``replace`` plugin provides a command that replaces the audio file
+of a track, while keeping the name and tags intact. It should save
+some time when you get the wrong version of a song.
+
+Enable the ``replace`` plugin in your configuration (see :ref:`using-plugins`)
+and then type::
+
+    $ beet replace <query> <path>
+
+The plugin will show you a list of files for you to pick from, and then
+ask for confirmation.
+
+Consider using the `replaygain` command from the
+:doc:`/plugins/replaygain` plugin, if you usually use it during imports.
diff --git a/test/plugins/test_replace.py b/test/plugins/test_replace.py
new file mode 100644
index 000000000..a247e317a
--- /dev/null
+++ b/test/plugins/test_replace.py
@@ -0,0 +1,115 @@
+import shutil
+from pathlib import Path
+
+import pytest
+from mediafile import MediaFile
+
+from beets import ui
+from beets.test import _common
+from beetsplug.replace import ReplacePlugin
+
+replace = ReplacePlugin()
+
+
+class TestReplace:
+    @pytest.fixture(autouse=True)
+    def _fake_dir(self, tmp_path):
+        self.fake_dir = tmp_path
+
+    @pytest.fixture(autouse=True)
+    def _fake_file(self, tmp_path):
+        self.fake_file = tmp_path
+
+    def test_path_is_dir(self):
+        fake_directory = self.fake_dir / "fakeDir"
+        fake_directory.mkdir()
+        with pytest.raises(ui.UserError):
+            replace.file_check(fake_directory)
+
+    def test_path_is_unsupported_file(self):
+        fake_file = self.fake_file / "fakefile.txt"
+        fake_file.write_text("test", encoding="utf-8")
+        with pytest.raises(ui.UserError):
+            replace.file_check(fake_file)
+
+    def test_path_is_supported_file(self):
+        dest = self.fake_file / "full.mp3"
+        src = Path(_common.RSRC.decode()) / "full.mp3"
+        shutil.copyfile(src, dest)
+
+        mediafile = MediaFile(dest)
+        mediafile.albumartist = "AAA"
+        mediafile.disctitle = "DDD"
+        mediafile.genres = ["a", "b", "c"]
+        mediafile.composer = None
+        mediafile.save()
+
+        replace.file_check(Path(str(dest)))
+
+    def test_select_song_valid_choice(self, monkeypatch, capfd):
+        songs = ["Song A", "Song B", "Song C"]
+        monkeypatch.setattr("builtins.input", lambda _: "2")
+
+        selected_song = replace.select_song(songs)
+
+        captured = capfd.readouterr()
+
+        assert "1. Song A" in captured.out
+        assert "2. Song B" in captured.out
+        assert "3. Song C" in captured.out
+        assert selected_song == "Song B"
+
+    def test_select_song_cancel(self, monkeypatch):
+        songs = ["Song A", "Song B", "Song C"]
+        monkeypatch.setattr("builtins.input", lambda _: "0")
+
+        selected_song = replace.select_song(songs)
+
+        assert selected_song is None
+
+    def test_select_song_invalid_then_valid(self, monkeypatch, capfd):
+        songs = ["Song A", "Song B", "Song C"]
+        inputs = iter(["invalid", "4", "3"])
+        monkeypatch.setattr("builtins.input", lambda _: next(inputs))
+
+        selected_song = replace.select_song(songs)
+
+        captured = capfd.readouterr()
+
+        assert "Invalid input. Please type in a number." in captured.out
+        assert (
+            "Invalid choice. Please enter a number between 1 and 3."
+            in captured.out
+        )
+        assert selected_song == "Song C"
+
+    def test_confirm_replacement_file_not_exist(self):
+        class Song:
+            path = b"test123321.txt"
+
+        song = Song()
+
+        with pytest.raises(ui.UserError):
+            replace.confirm_replacement("test", song)
+
+    def test_confirm_replacement_yes(self, monkeypatch):
+        src = Path(_common.RSRC.decode()) / "full.mp3"
+        monkeypatch.setattr("builtins.input", lambda _: "YES    ")
+
+        class Song:
+            path = str(src).encode()
+
+        song = Song()
+
+        assert replace.confirm_replacement("test", song) is True
+
+    def test_confirm_replacement_no(self, monkeypatch):
+        src = Path(_common.RSRC.decode()) / "full.mp3"
+        monkeypatch.setattr("builtins.input", lambda _: "test123")
+
+        class Song:
+            path = str(src).encode()
+
+        song = Song()
+
+        assert replace.confirm_replacement("test", song) is False

From 0f76312f3120af4f2bd3b309928603ac61018976 Mon Sep 17 00:00:00 2001
From: Ian McCowan <ian@mccowan.space>
Date: Fri, 30 May 2025 06:41:29 -0700
Subject: [PATCH 29/49] Fix duplicate database change event send on Library.add
 (#5561)

## Description

Fixes #5560. Also a couple other incidental changes / improvements:
* Add `EventType` that holds the actual string literals used for event
sending. With type checking, this can prevent subtle bugs resulting from
misspelled event names.
* Fix `HiddenFileTest` by using `bytestring_path()`

## To Do

- [x] ~Documentation.~
- [x] Changelog.
- [x] Tests.

---------

Co-authored-by: J0J0 Todos <jojo@peek-a-boo.at>
Co-authored-by: J0J0 Todos <2733783+JOJ0@users.noreply.github.com>
---
 beets/event_types.py | 33 +++++++++++++++++++++++++++++++++
 beets/library.py     |  3 ++-
 beets/plugins.py     |  5 ++++-
 docs/changelog.rst   |  4 ++++
 test/test_hidden.py  |  4 ++--
 test/test_library.py | 22 +++++++++++++++++++++-
 6 files changed, 66 insertions(+), 5 deletions(-)
 create mode 100644 beets/event_types.py

diff --git a/beets/event_types.py b/beets/event_types.py
new file mode 100644
index 000000000..d5fc01eec
--- /dev/null
+++ b/beets/event_types.py
@@ -0,0 +1,33 @@
+from typing import Literal
+
+EventType = Literal[
+    "pluginload",
+    "import",
+    "album_imported",
+    "album_removed",
+    "item_copied",
+    "item_imported",
+    "before_item_moved",
+    "item_moved",
+    "item_linked",
+    "item_hardlinked",
+    "item_reflinked",
+    "item_removed",
+    "write",
+    "after_write",
+    "import_task_created",
+    "import_task_start",
+    "import_task_apply",
+    "import_task_before_choice",
+    "import_task_choice",
+    "import_task_files",
+    "library_opened",
+    "database_change",
+    "cli_exit",
+    "import_begin",
+    "trackinfo_received",
+    "albuminfo_received",
+    "before_choose_candidate",
+    "mb_track_extract",
+    "mb_album_extract",
+]
diff --git a/beets/library.py b/beets/library.py
index b813d2ba4..271059c69 100644
--- a/beets/library.py
+++ b/beets/library.py
@@ -369,8 +369,9 @@ class LibModel(dbcore.Model["Library"]):
         plugins.send("database_change", lib=self._db, model=self)
 
     def add(self, lib=None):
+        # super().add() calls self.store(), which sends `database_change`,
+        # so don't do it here
         super().add(lib)
-        plugins.send("database_change", lib=self._db, model=self)
 
     def __format__(self, spec):
         if not spec:
diff --git a/beets/plugins.py b/beets/plugins.py
index 63e5d3bde..d87dd5d1e 100644
--- a/beets/plugins.py
+++ b/beets/plugins.py
@@ -39,6 +39,9 @@ import beets
 from beets import logging
 from beets.util.id_extractors import extract_release_id
 
+if TYPE_CHECKING:
+    from beets.event_types import EventType
+
 if sys.version_info >= (3, 10):
     from typing import ParamSpec
 else:
@@ -292,7 +295,7 @@ class BeetsPlugin:
     _raw_listeners: dict[str, list[Listener]] | None = None
     listeners: dict[str, list[Listener]] | None = None
 
-    def register_listener(self, event: str, func: Listener) -> None:
+    def register_listener(self, event: "EventType", func: Listener):
         """Add a function as a listener for the specified event."""
         wrapped_func = self._set_log_level_and_params(logging.WARNING, func)
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 688717351..b78cae78f 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -31,6 +31,10 @@ Bug fixes:
   :bug:`5788`
 * tests: Fix library tests failing on Windows when run from outside ``D:/``.
   :bug:`5802`
+* Fix an issue where calling `Library.add` would cause the `database_change`
+  event to be sent twice, not once.
+  :bug:`5560`
+* Fix ``HiddenFileTest`` by using ``bytestring_path()``.
 
 For packagers:
 
diff --git a/test/test_hidden.py b/test/test_hidden.py
index a7e6a1a10..bd974b1cb 100644
--- a/test/test_hidden.py
+++ b/test/test_hidden.py
@@ -22,7 +22,7 @@ import tempfile
 import unittest
 
 from beets import util
-from beets.util import hidden
+from beets.util import bytestring_path, hidden
 
 
 class HiddenFileTest(unittest.TestCase):
@@ -44,7 +44,7 @@ class HiddenFileTest(unittest.TestCase):
                 else:
                     raise e
 
-            assert hidden.is_hidden(f.name)
+            assert hidden.is_hidden(bytestring_path(f.name))
 
     def test_windows_hidden(self):
         if not sys.platform == "win32":
diff --git a/test/test_library.py b/test/test_library.py
index bb014d76d..36322cfec 100644
--- a/test/test_library.py
+++ b/test/test_library.py
@@ -29,11 +29,12 @@ from mediafile import MediaFile, UnreadableFileError
 
 import beets.dbcore.query
 import beets.library
+import beets.logging as blog
 from beets import config, plugins, util
 from beets.library import Album
 from beets.test import _common
 from beets.test._common import item
-from beets.test.helper import BeetsTestCase, ItemInDBTestCase
+from beets.test.helper import BeetsTestCase, ItemInDBTestCase, capture_log
 from beets.util import as_string, bytestring_path, normpath, syspath
 
 # Shortcut to path normalization.
@@ -126,6 +127,25 @@ class AddTest(BeetsTestCase):
         )
         assert new_grouping == self.i.grouping
 
+    def test_library_add_one_database_change_event(self):
+        """Test library.add emits only one database_change event."""
+        self.item = _common.item()
+        self.item.path = beets.util.normpath(
+            os.path.join(
+                self.temp_dir,
+                b"a",
+                b"b.mp3",
+            )
+        )
+        self.item.album = "a"
+        self.item.title = "b"
+
+        blog.getLogger("beets").set_global_level(blog.DEBUG)
+        with capture_log() as logs:
+            self.lib.add(self.item)
+
+        assert logs.count("Sending event: database_change") == 1
+
 
 class RemoveTest(ItemInDBTestCase):
     def test_remove_deletes_from_db(self):

From 509cbdcbe472aa3cf6559f6fba3eb2e6c9dcf47d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sat, 24 May 2025 15:16:02 +0100
Subject: [PATCH 30/49] Move sanitize_pairs/choices from plugins to util module

---
 beets/plugins.py         | 60 ------------------------------------
 beets/util/config.py     | 66 ++++++++++++++++++++++++++++++++++++++++
 beetsplug/fetchart.py    |  3 +-
 beetsplug/lyrics.py      |  3 +-
 test/test_plugins.py     | 10 ------
 test/util/test_config.py | 15 +++++++++
 6 files changed, 85 insertions(+), 72 deletions(-)
 create mode 100644 beets/util/config.py
 create mode 100644 test/util/test_config.py

diff --git a/beets/plugins.py b/beets/plugins.py
index d87dd5d1e..6d3a8447e 100644
--- a/beets/plugins.py
+++ b/beets/plugins.py
@@ -654,66 +654,6 @@ def feat_tokens(for_artist: bool = True) -> str:
     )
 
 
-def sanitize_choices(
-    choices: Sequence[str], choices_all: Sequence[str]
-) -> list[str]:
-    """Clean up a stringlist configuration attribute: keep only choices
-    elements present in choices_all, remove duplicate elements, expand '*'
-    wildcard while keeping original stringlist order.
-    """
-    seen: set[str] = set()
-    others = [x for x in choices_all if x not in choices]
-    res: list[str] = []
-    for s in choices:
-        if s not in seen:
-            if s in list(choices_all):
-                res.append(s)
-            elif s == "*":
-                res.extend(others)
-        seen.add(s)
-    return res
-
-
-def sanitize_pairs(
-    pairs: Sequence[tuple[str, str]], pairs_all: Sequence[tuple[str, str]]
-) -> list[tuple[str, str]]:
-    """Clean up a single-element mapping configuration attribute as returned
-    by Confuse's `Pairs` template: keep only two-element tuples present in
-    pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*')
-    wildcards while keeping the original order. Note that ('*', '*') and
-    ('*', 'whatever') have the same effect.
-
-    For example,
-
-    >>> sanitize_pairs(
-    ...     [('foo', 'baz bar'), ('key', '*'), ('*', '*')],
-    ...     [('foo', 'bar'), ('foo', 'baz'), ('foo', 'foobar'),
-    ...      ('key', 'value')]
-    ...     )
-    [('foo', 'baz'), ('foo', 'bar'), ('key', 'value'), ('foo', 'foobar')]
-    """
-    pairs_all = list(pairs_all)
-    seen: set[tuple[str, str]] = set()
-    others = [x for x in pairs_all if x not in pairs]
-    res: list[tuple[str, str]] = []
-    for k, values in pairs:
-        for v in values.split():
-            x = (k, v)
-            if x in pairs_all:
-                if x not in seen:
-                    seen.add(x)
-                    res.append(x)
-            elif k == "*":
-                new = [o for o in others if o not in seen]
-                seen.update(new)
-                res.extend(new)
-            elif v == "*":
-                new = [o for o in others if o not in seen and o[0] == k]
-                seen.update(new)
-                res.extend(new)
-    return res
-
-
 def get_distance(
     config: ConfigView, data_source: str, info: AlbumInfo | TrackInfo
 ) -> Distance:
diff --git a/beets/util/config.py b/beets/util/config.py
new file mode 100644
index 000000000..218a9d133
--- /dev/null
+++ b/beets/util/config.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from collections.abc import Collection, Sequence
+
+
+def sanitize_choices(
+    choices: Sequence[str], choices_all: Collection[str]
+) -> list[str]:
+    """Clean up a stringlist configuration attribute: keep only choices
+    elements present in choices_all, remove duplicate elements, expand '*'
+    wildcard while keeping original stringlist order.
+    """
+    seen: set[str] = set()
+    others = [x for x in choices_all if x not in choices]
+    res: list[str] = []
+    for s in choices:
+        if s not in seen:
+            if s in list(choices_all):
+                res.append(s)
+            elif s == "*":
+                res.extend(others)
+        seen.add(s)
+    return res
+
+
+def sanitize_pairs(
+    pairs: Sequence[tuple[str, str]], pairs_all: Sequence[tuple[str, str]]
+) -> list[tuple[str, str]]:
+    """Clean up a single-element mapping configuration attribute as returned
+    by Confuse's `Pairs` template: keep only two-element tuples present in
+    pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*')
+    wildcards while keeping the original order. Note that ('*', '*') and
+    ('*', 'whatever') have the same effect.
+
+    For example,
+
+    >>> sanitize_pairs(
+    ...     [('foo', 'baz bar'), ('key', '*'), ('*', '*')],
+    ...     [('foo', 'bar'), ('foo', 'baz'), ('foo', 'foobar'),
+    ...      ('key', 'value')]
+    ...     )
+    [('foo', 'baz'), ('foo', 'bar'), ('key', 'value'), ('foo', 'foobar')]
+    """
+    pairs_all = list(pairs_all)
+    seen: set[tuple[str, str]] = set()
+    others = [x for x in pairs_all if x not in pairs]
+    res: list[tuple[str, str]] = []
+    for k, values in pairs:
+        for v in values.split():
+            x = (k, v)
+            if x in pairs_all:
+                if x not in seen:
+                    seen.add(x)
+                    res.append(x)
+            elif k == "*":
+                new = [o for o in others if o not in seen]
+                seen.update(new)
+                res.extend(new)
+            elif v == "*":
+                new = [o for o in others if o not in seen and o[0] == k]
+                seen.update(new)
+                res.extend(new)
+    return res
diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py
index 3473fe08b..b442633da 100644
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
@@ -32,6 +32,7 @@ from mediafile import image_mime_type
 from beets import config, importer, plugins, ui, util
 from beets.util import bytestring_path, get_temp_filename, sorted_walk, syspath
 from beets.util.artresizer import ArtResizer
+from beets.util.config import sanitize_pairs
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator, Sequence
@@ -1396,7 +1397,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin):
             if s_cls.available(self._log, self.config)
             for c in s_cls.VALID_MATCHING_CRITERIA
         ]
-        sources = plugins.sanitize_pairs(
+        sources = sanitize_pairs(
             self.config["sources"].as_pairs(default_value="*"),
             available_sources,
         )
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index 3e979221c..e2c0c7fd2 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -39,6 +39,7 @@ from unidecode import unidecode
 import beets
 from beets import plugins, ui
 from beets.autotag.hooks import string_dist
+from beets.util.config import sanitize_choices
 
 if TYPE_CHECKING:
     from logging import Logger
@@ -957,7 +958,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
     def backends(self) -> list[Backend]:
         user_sources = self.config["sources"].get()
 
-        chosen = plugins.sanitize_choices(user_sources, self.BACKEND_BY_NAME)
+        chosen = sanitize_choices(user_sources, self.BACKEND_BY_NAME)
         if "google" in chosen and not self.config["google_API_key"].get():
             self.warn("Disabling Google source: no API key configured.")
             chosen.remove("google")
diff --git a/test/test_plugins.py b/test/test_plugins.py
index 3e809e492..207522430 100644
--- a/test/test_plugins.py
+++ b/test/test_plugins.py
@@ -15,7 +15,6 @@
 
 import itertools
 import os
-import unittest
 from unittest.mock import ANY, Mock, patch
 
 import pytest
@@ -215,15 +214,6 @@ class EventsTest(PluginImportTestCase):
         ]
 
 
-class HelpersTest(unittest.TestCase):
-    def test_sanitize_choices(self):
-        assert plugins.sanitize_choices(["A", "Z"], ("A", "B")) == ["A"]
-        assert plugins.sanitize_choices(["A", "A"], ("A")) == ["A"]
-        assert plugins.sanitize_choices(
-            ["D", "*", "A"], ("A", "B", "C", "D")
-        ) == ["D", "B", "C", "A"]
-
-
 class ListenersTest(PluginLoaderTestCase):
     def test_register(self):
         class DummyPlugin(plugins.BeetsPlugin):
diff --git a/test/util/test_config.py b/test/util/test_config.py
new file mode 100644
index 000000000..0c49f85b1
--- /dev/null
+++ b/test/util/test_config.py
@@ -0,0 +1,15 @@
+import unittest
+
+from beets.util.config import sanitize_choices
+
+
+class HelpersTest(unittest.TestCase):
+    def test_sanitize_choices(self):
+        assert sanitize_choices(["A", "Z"], ("A", "B")) == ["A"]
+        assert sanitize_choices(["A", "A"], ("A")) == ["A"]
+        assert sanitize_choices(["D", "*", "A"], ("A", "B", "C", "D")) == [
+            "D",
+            "B",
+            "C",
+            "A",
+        ]

From 1c9aebd36c4ebd9a0a08ac6a131ee06927f2bad9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 25 May 2025 10:52:39 +0100
Subject: [PATCH 31/49] match.current_metadata -> util.get_most_common_tags

---
 beets/autotag/__init__.py |  9 +----
 beets/autotag/match.py    | 44 ++-------------------
 beets/importer/tasks.py   |  2 +-
 beets/util/__init__.py    | 40 +++++++++++++++++++
 test/test_autotag.py      | 80 --------------------------------------
 test/test_util.py         | 82 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 127 insertions(+), 130 deletions(-)

diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py
index 42f957b0d..5b6a11195 100644
--- a/beets/autotag/__init__.py
+++ b/beets/autotag/__init__.py
@@ -24,13 +24,7 @@ from beets.library import Album, Item, LibModel
 from beets.util import unique_list
 
 from .hooks import AlbumInfo, AlbumMatch, Distance, TrackInfo, TrackMatch
-from .match import (
-    Proposal,
-    Recommendation,
-    current_metadata,
-    tag_album,
-    tag_item,
-)
+from .match import Proposal, Recommendation, tag_album, tag_item
 
 __all__ = [
     "AlbumInfo",
@@ -43,7 +37,6 @@ __all__ = [
     "apply_album_metadata",
     "apply_item_metadata",
     "apply_metadata",
-    "current_metadata",
     "tag_album",
     "tag_item",
 ]
diff --git a/beets/autotag/match.py b/beets/autotag/match.py
index 91a315de0..4dc4c1052 100644
--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@@ -36,7 +36,7 @@ from beets.autotag import (
     TrackMatch,
     hooks,
 )
-from beets.util import plurality
+from beets.util import get_most_common_tags
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Sequence
@@ -80,44 +80,6 @@ class Proposal(NamedTuple):
 # Primary matching functionality.
 
 
-def current_metadata(
-    items: Iterable[Item],
-) -> tuple[dict[str, Any], dict[str, Any]]:
-    """Extract the likely current metadata for an album given a list of its
-    items. Return two dictionaries:
-     - The most common value for each field.
-     - Whether each field's value was unanimous (values are booleans).
-    """
-    assert items  # Must be nonempty.
-
-    likelies = {}
-    consensus = {}
-    fields = [
-        "artist",
-        "album",
-        "albumartist",
-        "year",
-        "disctotal",
-        "mb_albumid",
-        "label",
-        "barcode",
-        "catalognum",
-        "country",
-        "media",
-        "albumdisambig",
-    ]
-    for field in fields:
-        values = [item[field] for item in items if item]
-        likelies[field], freq = plurality(values)
-        consensus[field] = freq == len(values)
-
-    # If there's an album artist consensus, use this for the artist.
-    if consensus["albumartist"] and likelies["albumartist"]:
-        likelies["artist"] = likelies["albumartist"]
-
-    return likelies, consensus
-
-
 def assign_items(
     items: Sequence[Item],
     tracks: Sequence[TrackInfo],
@@ -231,7 +193,7 @@ def distance(
     keys are a subset of `items` and the values are a subset of
     `album_info.tracks`.
     """
-    likelies, _ = current_metadata(items)
+    likelies, _ = get_most_common_tags(items)
 
     dist = hooks.Distance()
 
@@ -499,7 +461,7 @@ def tag_album(
     candidates.
     """
     # Get current metadata.
-    likelies, consensus = current_metadata(items)
+    likelies, consensus = get_most_common_tags(items)
     cur_artist: str = likelies["artist"]
     cur_album: str = likelies["album"]
     log.debug("Tagging {0} - {1}", cur_artist, cur_album)
diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py
index d2f638c55..75f04cf5a 100644
--- a/beets/importer/tasks.py
+++ b/beets/importer/tasks.py
@@ -228,7 +228,7 @@ class ImportTask(BaseImportTask):
         or APPLY (in which case the data comes from the choice).
         """
         if self.choice_flag in (Action.ASIS, Action.RETAG):
-            likelies, consensus = autotag.current_metadata(self.items)
+            likelies, consensus = util.get_most_common_tags(self.items)
             return likelies
         elif self.choice_flag is Action.APPLY and self.match:
             return self.match.info.copy()
diff --git a/beets/util/__init__.py b/beets/util/__init__.py
index 6bc4d14ee..9bd7451f8 100644
--- a/beets/util/__init__.py
+++ b/beets/util/__init__.py
@@ -56,6 +56,8 @@ if TYPE_CHECKING:
     from collections.abc import Iterator, Sequence
     from logging import Logger
 
+    from beets.library import Item
+
 if sys.version_info >= (3, 10):
     from typing import TypeAlias
 else:
@@ -814,6 +816,44 @@ def plurality(objs: Iterable[T]) -> tuple[T, int]:
     return c.most_common(1)[0]
 
 
+def get_most_common_tags(
+    items: Sequence[Item],
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Extract the likely current metadata for an album given a list of its
+    items. Return two dictionaries:
+     - The most common value for each field.
+     - Whether each field's value was unanimous (values are booleans).
+    """
+    assert items  # Must be nonempty.
+
+    likelies = {}
+    consensus = {}
+    fields = [
+        "artist",
+        "album",
+        "albumartist",
+        "year",
+        "disctotal",
+        "mb_albumid",
+        "label",
+        "barcode",
+        "catalognum",
+        "country",
+        "media",
+        "albumdisambig",
+    ]
+    for field in fields:
+        values = [item[field] for item in items if item]
+        likelies[field], freq = plurality(values)
+        consensus[field] = freq == len(values)
+
+    # If there's an album artist consensus, use this for the artist.
+    if consensus["albumartist"] and likelies["albumartist"]:
+        likelies["artist"] = likelies["albumartist"]
+
+    return likelies, consensus
+
+
 # stdout and stderr as bytes
 class CommandOutput(NamedTuple):
     stdout: bytes
diff --git a/test/test_autotag.py b/test/test_autotag.py
index 7f8ed3d2e..bd4205806 100644
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@@ -24,86 +24,6 @@ from beets.autotag import AlbumInfo, TrackInfo, correct_list_fields, match
 from beets.autotag.hooks import Distance, string_dist
 from beets.library import Item
 from beets.test.helper import BeetsTestCase, ConfigMixin
-from beets.util import plurality
-
-
-class PluralityTest(BeetsTestCase):
-    def test_plurality_consensus(self):
-        objs = [1, 1, 1, 1]
-        obj, freq = plurality(objs)
-        assert obj == 1
-        assert freq == 4
-
-    def test_plurality_near_consensus(self):
-        objs = [1, 1, 2, 1]
-        obj, freq = plurality(objs)
-        assert obj == 1
-        assert freq == 3
-
-    def test_plurality_conflict(self):
-        objs = [1, 1, 2, 2, 3]
-        obj, freq = plurality(objs)
-        assert obj in (1, 2)
-        assert freq == 2
-
-    def test_plurality_empty_sequence_raises_error(self):
-        with pytest.raises(ValueError, match="must be non-empty"):
-            plurality([])
-
-    def test_current_metadata_finds_pluralities(self):
-        items = [
-            Item(artist="The Beetles", album="The White Album"),
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="Teh White Album"),
-        ]
-        likelies, consensus = match.current_metadata(items)
-        assert likelies["artist"] == "The Beatles"
-        assert likelies["album"] == "The White Album"
-        assert not consensus["artist"]
-
-    def test_current_metadata_artist_consensus(self):
-        items = [
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="Teh White Album"),
-        ]
-        likelies, consensus = match.current_metadata(items)
-        assert likelies["artist"] == "The Beatles"
-        assert likelies["album"] == "The White Album"
-        assert consensus["artist"]
-
-    def test_albumartist_consensus(self):
-        items = [
-            Item(artist="tartist1", album="album", albumartist="aartist"),
-            Item(artist="tartist2", album="album", albumartist="aartist"),
-            Item(artist="tartist3", album="album", albumartist="aartist"),
-        ]
-        likelies, consensus = match.current_metadata(items)
-        assert likelies["artist"] == "aartist"
-        assert not consensus["artist"]
-
-    def test_current_metadata_likelies(self):
-        fields = [
-            "artist",
-            "album",
-            "albumartist",
-            "year",
-            "disctotal",
-            "mb_albumid",
-            "label",
-            "barcode",
-            "catalognum",
-            "country",
-            "media",
-            "albumdisambig",
-        ]
-        items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)]
-        likelies, _ = match.current_metadata(items)
-        for f in fields:
-            if isinstance(likelies[f], int):
-                assert likelies[f] == 0
-            else:
-                assert likelies[f] == f"{f}_1"
 
 
 def _make_item(title, track, artist="some artist"):
diff --git a/test/test_util.py b/test/test_util.py
index d08868619..5aa6c5dc7 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -24,7 +24,10 @@ from unittest.mock import Mock, patch
 import pytest
 
 from beets import util
+from beets.library import Item
 from beets.test import _common
+from beets.test.helper import BeetsTestCase
+from beets.util import plurality
 
 
 class UtilTest(unittest.TestCase):
@@ -217,3 +220,82 @@ class TestPathLegalization:
             expected_path,
             expected_truncated,
         )
+
+
+class PluralityTest(BeetsTestCase):
+    def test_plurality_consensus(self):
+        objs = [1, 1, 1, 1]
+        obj, freq = plurality(objs)
+        assert obj == 1
+        assert freq == 4
+
+    def test_plurality_near_consensus(self):
+        objs = [1, 1, 2, 1]
+        obj, freq = plurality(objs)
+        assert obj == 1
+        assert freq == 3
+
+    def test_plurality_conflict(self):
+        objs = [1, 1, 2, 2, 3]
+        obj, freq = plurality(objs)
+        assert obj in (1, 2)
+        assert freq == 2
+
+    def test_plurality_empty_sequence_raises_error(self):
+        with pytest.raises(ValueError, match="must be non-empty"):
+            plurality([])
+
+    def test_current_metadata_finds_pluralities(self):
+        items = [
+            Item(artist="The Beetles", album="The White Album"),
+            Item(artist="The Beatles", album="The White Album"),
+            Item(artist="The Beatles", album="Teh White Album"),
+        ]
+        likelies, consensus = util.get_most_common_tags(items)
+        assert likelies["artist"] == "The Beatles"
+        assert likelies["album"] == "The White Album"
+        assert not consensus["artist"]
+
+    def test_current_metadata_artist_consensus(self):
+        items = [
+            Item(artist="The Beatles", album="The White Album"),
+            Item(artist="The Beatles", album="The White Album"),
+            Item(artist="The Beatles", album="Teh White Album"),
+        ]
+        likelies, consensus = util.get_most_common_tags(items)
+        assert likelies["artist"] == "The Beatles"
+        assert likelies["album"] == "The White Album"
+        assert consensus["artist"]
+
+    def test_albumartist_consensus(self):
+        items = [
+            Item(artist="tartist1", album="album", albumartist="aartist"),
+            Item(artist="tartist2", album="album", albumartist="aartist"),
+            Item(artist="tartist3", album="album", albumartist="aartist"),
+        ]
+        likelies, consensus = util.get_most_common_tags(items)
+        assert likelies["artist"] == "aartist"
+        assert not consensus["artist"]
+
+    def test_current_metadata_likelies(self):
+        fields = [
+            "artist",
+            "album",
+            "albumartist",
+            "year",
+            "disctotal",
+            "mb_albumid",
+            "label",
+            "barcode",
+            "catalognum",
+            "country",
+            "media",
+            "albumdisambig",
+        ]
+        items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)]
+        likelies, _ = util.get_most_common_tags(items)
+        for f in fields:
+            if isinstance(likelies[f], int):
+                assert likelies[f] == 0
+            else:
+                assert likelies[f] == f"{f}_1"

From 01b6ea78987931804d08f7ae377ab54f46b95f0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 25 May 2025 02:50:57 +0100
Subject: [PATCH 32/49] Simplify and speed up plurality/album tags retrieval
 tests

---
 test/test_util.py | 95 +++++++++++++----------------------------------
 1 file changed, 26 insertions(+), 69 deletions(-)

diff --git a/test/test_util.py b/test/test_util.py
index 5aa6c5dc7..d8a4ca0db 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -26,8 +26,6 @@ import pytest
 from beets import util
 from beets.library import Item
 from beets.test import _common
-from beets.test.helper import BeetsTestCase
-from beets.util import plurality
 
 
 class UtilTest(unittest.TestCase):
@@ -222,80 +220,39 @@ class TestPathLegalization:
         )
 
 
-class PluralityTest(BeetsTestCase):
-    def test_plurality_consensus(self):
-        objs = [1, 1, 1, 1]
-        obj, freq = plurality(objs)
-        assert obj == 1
-        assert freq == 4
+class TestPlurality:
+    @pytest.mark.parametrize(
+        "objs, expected_obj, expected_freq",
+        [
+            pytest.param([1, 1, 1, 1], 1, 4, id="consensus"),
+            pytest.param([1, 1, 2, 1], 1, 3, id="near consensus"),
+            pytest.param([1, 1, 2, 2, 3], 1, 2, id="conflict-first-wins"),
+        ],
+    )
+    def test_plurality(self, objs, expected_obj, expected_freq):
+        assert (expected_obj, expected_freq) == util.plurality(objs)
 
-    def test_plurality_near_consensus(self):
-        objs = [1, 1, 2, 1]
-        obj, freq = plurality(objs)
-        assert obj == 1
-        assert freq == 3
-
-    def test_plurality_conflict(self):
-        objs = [1, 1, 2, 2, 3]
-        obj, freq = plurality(objs)
-        assert obj in (1, 2)
-        assert freq == 2
-
-    def test_plurality_empty_sequence_raises_error(self):
+    def test_empty_sequence_raises_error(self):
         with pytest.raises(ValueError, match="must be non-empty"):
-            plurality([])
+            util.plurality([])
 
-    def test_current_metadata_finds_pluralities(self):
+    def test_get_most_common_tags(self):
         items = [
-            Item(artist="The Beetles", album="The White Album"),
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="Teh White Album"),
+            Item(albumartist="aartist", label="label 1", album="album"),
+            Item(albumartist="aartist", label="label 2", album="album"),
+            Item(albumartist="aartist", label="label 3", album="another album"),
         ]
-        likelies, consensus = util.get_most_common_tags(items)
-        assert likelies["artist"] == "The Beatles"
-        assert likelies["album"] == "The White Album"
-        assert not consensus["artist"]
 
-    def test_current_metadata_artist_consensus(self):
-        items = [
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="Teh White Album"),
-        ]
         likelies, consensus = util.get_most_common_tags(items)
-        assert likelies["artist"] == "The Beatles"
-        assert likelies["album"] == "The White Album"
-        assert consensus["artist"]
 
-    def test_albumartist_consensus(self):
-        items = [
-            Item(artist="tartist1", album="album", albumartist="aartist"),
-            Item(artist="tartist2", album="album", albumartist="aartist"),
-            Item(artist="tartist3", album="album", albumartist="aartist"),
-        ]
-        likelies, consensus = util.get_most_common_tags(items)
+        assert likelies["albumartist"] == "aartist"
+        assert likelies["album"] == "album"
+        # albumartist consensus overrides artist
         assert likelies["artist"] == "aartist"
-        assert not consensus["artist"]
+        assert likelies["label"] == "label 1"
+        assert likelies["year"] == 0
 
-    def test_current_metadata_likelies(self):
-        fields = [
-            "artist",
-            "album",
-            "albumartist",
-            "year",
-            "disctotal",
-            "mb_albumid",
-            "label",
-            "barcode",
-            "catalognum",
-            "country",
-            "media",
-            "albumdisambig",
-        ]
-        items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)]
-        likelies, _ = util.get_most_common_tags(items)
-        for f in fields:
-            if isinstance(likelies[f], int):
-                assert likelies[f] == 0
-            else:
-                assert likelies[f] == f"{f}_1"
+        assert consensus["year"]
+        assert consensus["albumartist"]
+        assert not consensus["album"]
+        assert not consensus["label"]

From adbd50b2374edb4fe9f9455da9401dc1225a4818 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 25 May 2025 02:59:27 +0100
Subject: [PATCH 33/49] Move distance to a separate module

---
 beets/autotag/__init__.py     |  14 +-
 beets/autotag/distance.py     | 531 ++++++++++++++++++++++++++++++++++
 beets/autotag/hooks.py        | 334 +--------------------
 beets/autotag/match.py        | 205 +------------
 beets/plugins.py              |  13 +-
 beetsplug/chroma.py           |   2 +-
 beetsplug/discogs.py          |   3 +-
 beetsplug/lyrics.py           |   2 +-
 test/autotag/test_distance.py | 476 ++++++++++++++++++++++++++++++
 test/test_autotag.py          | 472 ------------------------------
 10 files changed, 1028 insertions(+), 1024 deletions(-)
 create mode 100644 beets/autotag/distance.py
 create mode 100644 test/autotag/test_distance.py

diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py
index 5b6a11195..5b16b012e 100644
--- a/beets/autotag/__init__.py
+++ b/beets/autotag/__init__.py
@@ -14,22 +14,26 @@
 
 """Facilities for automatically determining files' correct metadata."""
 
-from collections.abc import Mapping, Sequence
-from typing import Union
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Union
 
 from beets import config, logging
-from beets.library import Album, Item, LibModel
 
 # Parts of external interface.
 from beets.util import unique_list
 
-from .hooks import AlbumInfo, AlbumMatch, Distance, TrackInfo, TrackMatch
+from .hooks import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch
 from .match import Proposal, Recommendation, tag_album, tag_item
 
+if TYPE_CHECKING:
+    from collections.abc import Mapping, Sequence
+
+    from beets.library import Album, Item, LibModel
+
 __all__ = [
     "AlbumInfo",
     "AlbumMatch",
-    "Distance",
     "TrackInfo",
     "TrackMatch",
     "Proposal",
diff --git a/beets/autotag/distance.py b/beets/autotag/distance.py
new file mode 100644
index 000000000..d146c27f0
--- /dev/null
+++ b/beets/autotag/distance.py
@@ -0,0 +1,531 @@
+from __future__ import annotations
+
+import datetime
+import re
+from functools import cache, total_ordering
+from typing import TYPE_CHECKING, Any
+
+from jellyfish import levenshtein_distance
+from unidecode import unidecode
+
+from beets import config, plugins
+from beets.util import as_string, cached_classproperty, get_most_common_tags
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator, Sequence
+
+    from beets.library import Item
+
+    from .hooks import AlbumInfo, TrackInfo
+
+# Candidate distance scoring.
+
+# Artist signals that indicate "various artists". These are used at the
+# album level to determine whether a given release is likely a VA
+# release and also on the track level to to remove the penalty for
+# differing artists.
+VA_ARTISTS = ("", "various artists", "various", "va", "unknown")
+
+# Parameters for string distance function.
+# Words that can be moved to the end of a string using a comma.
+SD_END_WORDS = ["the", "a", "an"]
+# Reduced weights for certain portions of the string.
+SD_PATTERNS = [
+    (r"^the ", 0.1),
+    (r"[\[\(]?(ep|single)[\]\)]?", 0.0),
+    (r"[\[\(]?(featuring|feat|ft)[\. :].+", 0.1),
+    (r"\(.*?\)", 0.3),
+    (r"\[.*?\]", 0.3),
+    (r"(, )?(pt\.|part) .+", 0.2),
+]
+# Replacements to use before testing distance.
+SD_REPLACE = [
+    (r"&", "and"),
+]
+
+
+def _string_dist_basic(str1: str, str2: str) -> float:
+    """Basic edit distance between two strings, ignoring
+    non-alphanumeric characters and case. Comparisons are based on a
+    transliteration/lowering to ASCII characters. Normalized by string
+    length.
+    """
+    assert isinstance(str1, str)
+    assert isinstance(str2, str)
+    str1 = as_string(unidecode(str1))
+    str2 = as_string(unidecode(str2))
+    str1 = re.sub(r"[^a-z0-9]", "", str1.lower())
+    str2 = re.sub(r"[^a-z0-9]", "", str2.lower())
+    if not str1 and not str2:
+        return 0.0
+    return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2)))
+
+
+def string_dist(str1: str | None, str2: str | None) -> float:
+    """Gives an "intuitive" edit distance between two strings. This is
+    an edit distance, normalized by the string length, with a number of
+    tweaks that reflect intuition about text.
+    """
+    if str1 is None and str2 is None:
+        return 0.0
+    if str1 is None or str2 is None:
+        return 1.0
+
+    str1 = str1.lower()
+    str2 = str2.lower()
+
+    # Don't penalize strings that move certain words to the end. For
+    # example, "the something" should be considered equal to
+    # "something, the".
+    for word in SD_END_WORDS:
+        if str1.endswith(", %s" % word):
+            str1 = "{} {}".format(word, str1[: -len(word) - 2])
+        if str2.endswith(", %s" % word):
+            str2 = "{} {}".format(word, str2[: -len(word) - 2])
+
+    # Perform a couple of basic normalizing substitutions.
+    for pat, repl in SD_REPLACE:
+        str1 = re.sub(pat, repl, str1)
+        str2 = re.sub(pat, repl, str2)
+
+    # Change the weight for certain string portions matched by a set
+    # of regular expressions. We gradually change the strings and build
+    # up penalties associated with parts of the string that were
+    # deleted.
+    base_dist = _string_dist_basic(str1, str2)
+    penalty = 0.0
+    for pat, weight in SD_PATTERNS:
+        # Get strings that drop the pattern.
+        case_str1 = re.sub(pat, "", str1)
+        case_str2 = re.sub(pat, "", str2)
+
+        if case_str1 != str1 or case_str2 != str2:
+            # If the pattern was present (i.e., it is deleted in the
+            # the current case), recalculate the distances for the
+            # modified strings.
+            case_dist = _string_dist_basic(case_str1, case_str2)
+            case_delta = max(0.0, base_dist - case_dist)
+            if case_delta == 0.0:
+                continue
+
+            # Shift our baseline strings down (to avoid rematching the
+            # same part of the string) and add a scaled distance
+            # amount to the penalties.
+            str1 = case_str1
+            str2 = case_str2
+            base_dist = case_dist
+            penalty += weight * case_delta
+
+    return base_dist + penalty
+
+
+@total_ordering
+class Distance:
+    """Keeps track of multiple distance penalties. Provides a single
+    weighted distance for all penalties as well as a weighted distance
+    for each individual penalty.
+    """
+
+    def __init__(self) -> None:
+        self._penalties: dict[str, list[float]] = {}
+        self.tracks: dict[TrackInfo, Distance] = {}
+
+    @cached_classproperty
+    def _weights(cls) -> dict[str, float]:
+        """A dictionary from keys to floating-point weights."""
+        weights_view = config["match"]["distance_weights"]
+        weights = {}
+        for key in weights_view.keys():
+            weights[key] = weights_view[key].as_number()
+        return weights
+
+    # Access the components and their aggregates.
+
+    @property
+    def distance(self) -> float:
+        """Return a weighted and normalized distance across all
+        penalties.
+        """
+        dist_max = self.max_distance
+        if dist_max:
+            return self.raw_distance / self.max_distance
+        return 0.0
+
+    @property
+    def max_distance(self) -> float:
+        """Return the maximum distance penalty (normalization factor)."""
+        dist_max = 0.0
+        for key, penalty in self._penalties.items():
+            dist_max += len(penalty) * self._weights[key]
+        return dist_max
+
+    @property
+    def raw_distance(self) -> float:
+        """Return the raw (denormalized) distance."""
+        dist_raw = 0.0
+        for key, penalty in self._penalties.items():
+            dist_raw += sum(penalty) * self._weights[key]
+        return dist_raw
+
+    def items(self) -> list[tuple[str, float]]:
+        """Return a list of (key, dist) pairs, with `dist` being the
+        weighted distance, sorted from highest to lowest. Does not
+        include penalties with a zero value.
+        """
+        list_ = []
+        for key in self._penalties:
+            dist = self[key]
+            if dist:
+                list_.append((key, dist))
+        # Convert distance into a negative float we can sort items in
+        # ascending order (for keys, when the penalty is equal) and
+        # still get the items with the biggest distance first.
+        return sorted(
+            list_, key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0])
+        )
+
+    def __hash__(self) -> int:
+        return id(self)
+
+    def __eq__(self, other) -> bool:
+        return self.distance == other
+
+    # Behave like a float.
+
+    def __lt__(self, other) -> bool:
+        return self.distance < other
+
+    def __float__(self) -> float:
+        return self.distance
+
+    def __sub__(self, other) -> float:
+        return self.distance - other
+
+    def __rsub__(self, other) -> float:
+        return other - self.distance
+
+    def __str__(self) -> str:
+        return f"{self.distance:.2f}"
+
+    # Behave like a dict.
+
+    def __getitem__(self, key) -> float:
+        """Returns the weighted distance for a named penalty."""
+        dist = sum(self._penalties[key]) * self._weights[key]
+        dist_max = self.max_distance
+        if dist_max:
+            return dist / dist_max
+        return 0.0
+
+    def __iter__(self) -> Iterator[tuple[str, float]]:
+        return iter(self.items())
+
+    def __len__(self) -> int:
+        return len(self.items())
+
+    def keys(self) -> list[str]:
+        return [key for key, _ in self.items()]
+
+    def update(self, dist: Distance):
+        """Adds all the distance penalties from `dist`."""
+        if not isinstance(dist, Distance):
+            raise ValueError(
+                "`dist` must be a Distance object, not {}".format(type(dist))
+            )
+        for key, penalties in dist._penalties.items():
+            self._penalties.setdefault(key, []).extend(penalties)
+
+    # Adding components.
+
+    def _eq(self, value1: re.Pattern[str] | Any, value2: Any) -> bool:
+        """Returns True if `value1` is equal to `value2`. `value1` may
+        be a compiled regular expression, in which case it will be
+        matched against `value2`.
+        """
+        if isinstance(value1, re.Pattern):
+            return bool(value1.match(value2))
+        return value1 == value2
+
+    def add(self, key: str, dist: float):
+        """Adds a distance penalty. `key` must correspond with a
+        configured weight setting. `dist` must be a float between 0.0
+        and 1.0, and will be added to any existing distance penalties
+        for the same key.
+        """
+        if not 0.0 <= dist <= 1.0:
+            raise ValueError(f"`dist` must be between 0.0 and 1.0, not {dist}")
+        self._penalties.setdefault(key, []).append(dist)
+
+    def add_equality(
+        self,
+        key: str,
+        value: Any,
+        options: list[Any] | tuple[Any, ...] | Any,
+    ):
+        """Adds a distance penalty of 1.0 if `value` doesn't match any
+        of the values in `options`. If an option is a compiled regular
+        expression, it will be considered equal if it matches against
+        `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        for opt in options:
+            if self._eq(opt, value):
+                dist = 0.0
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_expr(self, key: str, expr: bool):
+        """Adds a distance penalty of 1.0 if `expr` evaluates to True,
+        or 0.0.
+        """
+        if expr:
+            self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_number(self, key: str, number1: int, number2: int):
+        """Adds a distance penalty of 1.0 for each number of difference
+        between `number1` and `number2`, or 0.0 when there is no
+        difference. Use this when there is no upper limit on the
+        difference between the two numbers.
+        """
+        diff = abs(number1 - number2)
+        if diff:
+            for i in range(diff):
+                self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_priority(
+        self,
+        key: str,
+        value: Any,
+        options: list[Any] | tuple[Any, ...] | Any,
+    ):
+        """Adds a distance penalty that corresponds to the position at
+        which `value` appears in `options`. A distance penalty of 0.0
+        for the first option, or 1.0 if there is no matching option. If
+        an option is a compiled regular expression, it will be
+        considered equal if it matches against `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        unit = 1.0 / (len(options) or 1)
+        for i, opt in enumerate(options):
+            if self._eq(opt, value):
+                dist = i * unit
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_ratio(
+        self,
+        key: str,
+        number1: int | float,
+        number2: int | float,
+    ):
+        """Adds a distance penalty for `number1` as a ratio of `number2`.
+        `number1` is bound at 0 and `number2`.
+        """
+        number = float(max(min(number1, number2), 0))
+        if number2:
+            dist = number / number2
+        else:
+            dist = 0.0
+        self.add(key, dist)
+
+    def add_string(self, key: str, str1: str | None, str2: str | None):
+        """Adds a distance penalty based on the edit distance between
+        `str1` and `str2`.
+        """
+        dist = string_dist(str1, str2)
+        self.add(key, dist)
+
+
+@cache
+def get_track_length_grace() -> float:
+    """Get cached grace period for track length matching."""
+    return config["match"]["track_length_grace"].as_number()
+
+
+@cache
+def get_track_length_max() -> float:
+    """Get cached maximum track length for track length matching."""
+    return config["match"]["track_length_max"].as_number()
+
+
+def track_index_changed(item: Item, track_info: TrackInfo) -> bool:
+    """Returns True if the item and track info index is different. Tolerates
+    per disc and per release numbering.
+    """
+    return item.track not in (track_info.medium_index, track_info.index)
+
+
+def track_distance(
+    item: Item,
+    track_info: TrackInfo,
+    incl_artist: bool = False,
+) -> Distance:
+    """Determines the significance of a track metadata change. Returns a
+    Distance object. `incl_artist` indicates that a distance component should
+    be included for the track artist (i.e., for various-artist releases).
+
+    ``track_length_grace`` and ``track_length_max`` configuration options are
+    cached because this function is called many times during the matching
+    process and their access comes with a performance overhead.
+    """
+    dist = Distance()
+
+    # Length.
+    if info_length := track_info.length:
+        diff = abs(item.length - info_length) - get_track_length_grace()
+        dist.add_ratio("track_length", diff, get_track_length_max())
+
+    # Title.
+    dist.add_string("track_title", item.title, track_info.title)
+
+    # Artist. Only check if there is actually an artist in the track data.
+    if (
+        incl_artist
+        and track_info.artist
+        and item.artist.lower() not in VA_ARTISTS
+    ):
+        dist.add_string("track_artist", item.artist, track_info.artist)
+
+    # Track index.
+    if track_info.index and item.track:
+        dist.add_expr("track_index", track_index_changed(item, track_info))
+
+    # Track ID.
+    if item.mb_trackid:
+        dist.add_expr("track_id", item.mb_trackid != track_info.track_id)
+
+    # Penalize mismatching disc numbers.
+    if track_info.medium and item.disc:
+        dist.add_expr("medium", item.disc != track_info.medium)
+
+    # Plugins.
+    dist.update(plugins.track_distance(item, track_info))
+
+    return dist
+
+
+def distance(
+    items: Sequence[Item],
+    album_info: AlbumInfo,
+    mapping: dict[Item, TrackInfo],
+) -> Distance:
+    """Determines how "significant" an album metadata change would be.
+    Returns a Distance object. `album_info` is an AlbumInfo object
+    reflecting the album to be compared. `items` is a sequence of all
+    Item objects that will be matched (order is not important).
+    `mapping` is a dictionary mapping Items to TrackInfo objects; the
+    keys are a subset of `items` and the values are a subset of
+    `album_info.tracks`.
+    """
+    likelies, _ = get_most_common_tags(items)
+
+    dist = Distance()
+
+    # Artist, if not various.
+    if not album_info.va:
+        dist.add_string("artist", likelies["artist"], album_info.artist)
+
+    # Album.
+    dist.add_string("album", likelies["album"], album_info.album)
+
+    preferred_config = config["match"]["preferred"]
+    # Current or preferred media.
+    if album_info.media:
+        # Preferred media options.
+        media_patterns: Sequence[str] = preferred_config["media"].as_str_seq()
+        options = [
+            re.compile(r"(\d+x)?(%s)" % pat, re.I) for pat in media_patterns
+        ]
+        if options:
+            dist.add_priority("media", album_info.media, options)
+        # Current media.
+        elif likelies["media"]:
+            dist.add_equality("media", album_info.media, likelies["media"])
+
+    # Mediums.
+    if likelies["disctotal"] and album_info.mediums:
+        dist.add_number("mediums", likelies["disctotal"], album_info.mediums)
+
+    # Prefer earliest release.
+    if album_info.year and preferred_config["original_year"]:
+        # Assume 1889 (earliest first gramophone discs) if we don't know the
+        # original year.
+        original = album_info.original_year or 1889
+        diff = abs(album_info.year - original)
+        diff_max = abs(datetime.date.today().year - original)
+        dist.add_ratio("year", diff, diff_max)
+    # Year.
+    elif likelies["year"] and album_info.year:
+        if likelies["year"] in (album_info.year, album_info.original_year):
+            # No penalty for matching release or original year.
+            dist.add("year", 0.0)
+        elif album_info.original_year:
+            # Prefer matchest closest to the release year.
+            diff = abs(likelies["year"] - album_info.year)
+            diff_max = abs(
+                datetime.date.today().year - album_info.original_year
+            )
+            dist.add_ratio("year", diff, diff_max)
+        else:
+            # Full penalty when there is no original year.
+            dist.add("year", 1.0)
+
+    # Preferred countries.
+    country_patterns: Sequence[str] = preferred_config["countries"].as_str_seq()
+    options = [re.compile(pat, re.I) for pat in country_patterns]
+    if album_info.country and options:
+        dist.add_priority("country", album_info.country, options)
+    # Country.
+    elif likelies["country"] and album_info.country:
+        dist.add_string("country", likelies["country"], album_info.country)
+
+    # Label.
+    if likelies["label"] and album_info.label:
+        dist.add_string("label", likelies["label"], album_info.label)
+
+    # Catalog number.
+    if likelies["catalognum"] and album_info.catalognum:
+        dist.add_string(
+            "catalognum", likelies["catalognum"], album_info.catalognum
+        )
+
+    # Disambiguation.
+    if likelies["albumdisambig"] and album_info.albumdisambig:
+        dist.add_string(
+            "albumdisambig", likelies["albumdisambig"], album_info.albumdisambig
+        )
+
+    # Album ID.
+    if likelies["mb_albumid"]:
+        dist.add_equality(
+            "album_id", likelies["mb_albumid"], album_info.album_id
+        )
+
+    # Tracks.
+    dist.tracks = {}
+    for item, track in mapping.items():
+        dist.tracks[track] = track_distance(item, track, album_info.va)
+        dist.add("tracks", dist.tracks[track].distance)
+
+    # Missing tracks.
+    for _ in range(len(album_info.tracks) - len(mapping)):
+        dist.add("missing_tracks", 1.0)
+
+    # Unmatched tracks.
+    for _ in range(len(items) - len(mapping)):
+        dist.add("unmatched_tracks", 1.0)
+
+    # Plugins.
+    dist.update(plugins.album_distance(items, album_info, mapping))
+
+    return dist
diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py
index 641a6cb4f..7cd215fc4 100644
--- a/beets/autotag/hooks.py
+++ b/beets/autotag/hooks.py
@@ -16,21 +16,15 @@
 
 from __future__ import annotations
 
-import re
-from functools import total_ordering
 from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar
 
-from jellyfish import levenshtein_distance
-from unidecode import unidecode
-
-from beets import config, logging
-from beets.util import as_string, cached_classproperty
+from beets import logging
 
 if TYPE_CHECKING:
-    from collections.abc import Iterator
-
     from beets.library import Item
 
+    from .distance import Distance
+
 log = logging.getLogger("beets")
 
 V = TypeVar("V")
@@ -254,328 +248,6 @@ class TrackInfo(AttrDict[Any]):
         return dupe
 
 
-# Candidate distance scoring.
-
-# Parameters for string distance function.
-# Words that can be moved to the end of a string using a comma.
-SD_END_WORDS = ["the", "a", "an"]
-# Reduced weights for certain portions of the string.
-SD_PATTERNS = [
-    (r"^the ", 0.1),
-    (r"[\[\(]?(ep|single)[\]\)]?", 0.0),
-    (r"[\[\(]?(featuring|feat|ft)[\. :].+", 0.1),
-    (r"\(.*?\)", 0.3),
-    (r"\[.*?\]", 0.3),
-    (r"(, )?(pt\.|part) .+", 0.2),
-]
-# Replacements to use before testing distance.
-SD_REPLACE = [
-    (r"&", "and"),
-]
-
-
-def _string_dist_basic(str1: str, str2: str) -> float:
-    """Basic edit distance between two strings, ignoring
-    non-alphanumeric characters and case. Comparisons are based on a
-    transliteration/lowering to ASCII characters. Normalized by string
-    length.
-    """
-    assert isinstance(str1, str)
-    assert isinstance(str2, str)
-    str1 = as_string(unidecode(str1))
-    str2 = as_string(unidecode(str2))
-    str1 = re.sub(r"[^a-z0-9]", "", str1.lower())
-    str2 = re.sub(r"[^a-z0-9]", "", str2.lower())
-    if not str1 and not str2:
-        return 0.0
-    return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2)))
-
-
-def string_dist(str1: str | None, str2: str | None) -> float:
-    """Gives an "intuitive" edit distance between two strings. This is
-    an edit distance, normalized by the string length, with a number of
-    tweaks that reflect intuition about text.
-    """
-    if str1 is None and str2 is None:
-        return 0.0
-    if str1 is None or str2 is None:
-        return 1.0
-
-    str1 = str1.lower()
-    str2 = str2.lower()
-
-    # Don't penalize strings that move certain words to the end. For
-    # example, "the something" should be considered equal to
-    # "something, the".
-    for word in SD_END_WORDS:
-        if str1.endswith(", %s" % word):
-            str1 = "{} {}".format(word, str1[: -len(word) - 2])
-        if str2.endswith(", %s" % word):
-            str2 = "{} {}".format(word, str2[: -len(word) - 2])
-
-    # Perform a couple of basic normalizing substitutions.
-    for pat, repl in SD_REPLACE:
-        str1 = re.sub(pat, repl, str1)
-        str2 = re.sub(pat, repl, str2)
-
-    # Change the weight for certain string portions matched by a set
-    # of regular expressions. We gradually change the strings and build
-    # up penalties associated with parts of the string that were
-    # deleted.
-    base_dist = _string_dist_basic(str1, str2)
-    penalty = 0.0
-    for pat, weight in SD_PATTERNS:
-        # Get strings that drop the pattern.
-        case_str1 = re.sub(pat, "", str1)
-        case_str2 = re.sub(pat, "", str2)
-
-        if case_str1 != str1 or case_str2 != str2:
-            # If the pattern was present (i.e., it is deleted in the
-            # the current case), recalculate the distances for the
-            # modified strings.
-            case_dist = _string_dist_basic(case_str1, case_str2)
-            case_delta = max(0.0, base_dist - case_dist)
-            if case_delta == 0.0:
-                continue
-
-            # Shift our baseline strings down (to avoid rematching the
-            # same part of the string) and add a scaled distance
-            # amount to the penalties.
-            str1 = case_str1
-            str2 = case_str2
-            base_dist = case_dist
-            penalty += weight * case_delta
-
-    return base_dist + penalty
-
-
-@total_ordering
-class Distance:
-    """Keeps track of multiple distance penalties. Provides a single
-    weighted distance for all penalties as well as a weighted distance
-    for each individual penalty.
-    """
-
-    def __init__(self) -> None:
-        self._penalties: dict[str, list[float]] = {}
-        self.tracks: dict[TrackInfo, Distance] = {}
-
-    @cached_classproperty
-    def _weights(cls) -> dict[str, float]:
-        """A dictionary from keys to floating-point weights."""
-        weights_view = config["match"]["distance_weights"]
-        weights = {}
-        for key in weights_view.keys():
-            weights[key] = weights_view[key].as_number()
-        return weights
-
-    # Access the components and their aggregates.
-
-    @property
-    def distance(self) -> float:
-        """Return a weighted and normalized distance across all
-        penalties.
-        """
-        dist_max = self.max_distance
-        if dist_max:
-            return self.raw_distance / self.max_distance
-        return 0.0
-
-    @property
-    def max_distance(self) -> float:
-        """Return the maximum distance penalty (normalization factor)."""
-        dist_max = 0.0
-        for key, penalty in self._penalties.items():
-            dist_max += len(penalty) * self._weights[key]
-        return dist_max
-
-    @property
-    def raw_distance(self) -> float:
-        """Return the raw (denormalized) distance."""
-        dist_raw = 0.0
-        for key, penalty in self._penalties.items():
-            dist_raw += sum(penalty) * self._weights[key]
-        return dist_raw
-
-    def items(self) -> list[tuple[str, float]]:
-        """Return a list of (key, dist) pairs, with `dist` being the
-        weighted distance, sorted from highest to lowest. Does not
-        include penalties with a zero value.
-        """
-        list_ = []
-        for key in self._penalties:
-            dist = self[key]
-            if dist:
-                list_.append((key, dist))
-        # Convert distance into a negative float we can sort items in
-        # ascending order (for keys, when the penalty is equal) and
-        # still get the items with the biggest distance first.
-        return sorted(
-            list_, key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0])
-        )
-
-    def __hash__(self) -> int:
-        return id(self)
-
-    def __eq__(self, other) -> bool:
-        return self.distance == other
-
-    # Behave like a float.
-
-    def __lt__(self, other) -> bool:
-        return self.distance < other
-
-    def __float__(self) -> float:
-        return self.distance
-
-    def __sub__(self, other) -> float:
-        return self.distance - other
-
-    def __rsub__(self, other) -> float:
-        return other - self.distance
-
-    def __str__(self) -> str:
-        return f"{self.distance:.2f}"
-
-    # Behave like a dict.
-
-    def __getitem__(self, key) -> float:
-        """Returns the weighted distance for a named penalty."""
-        dist = sum(self._penalties[key]) * self._weights[key]
-        dist_max = self.max_distance
-        if dist_max:
-            return dist / dist_max
-        return 0.0
-
-    def __iter__(self) -> Iterator[tuple[str, float]]:
-        return iter(self.items())
-
-    def __len__(self) -> int:
-        return len(self.items())
-
-    def keys(self) -> list[str]:
-        return [key for key, _ in self.items()]
-
-    def update(self, dist: Distance):
-        """Adds all the distance penalties from `dist`."""
-        if not isinstance(dist, Distance):
-            raise ValueError(
-                "`dist` must be a Distance object, not {}".format(type(dist))
-            )
-        for key, penalties in dist._penalties.items():
-            self._penalties.setdefault(key, []).extend(penalties)
-
-    # Adding components.
-
-    def _eq(self, value1: re.Pattern[str] | Any, value2: Any) -> bool:
-        """Returns True if `value1` is equal to `value2`. `value1` may
-        be a compiled regular expression, in which case it will be
-        matched against `value2`.
-        """
-        if isinstance(value1, re.Pattern):
-            return bool(value1.match(value2))
-        return value1 == value2
-
-    def add(self, key: str, dist: float):
-        """Adds a distance penalty. `key` must correspond with a
-        configured weight setting. `dist` must be a float between 0.0
-        and 1.0, and will be added to any existing distance penalties
-        for the same key.
-        """
-        if not 0.0 <= dist <= 1.0:
-            raise ValueError(f"`dist` must be between 0.0 and 1.0, not {dist}")
-        self._penalties.setdefault(key, []).append(dist)
-
-    def add_equality(
-        self,
-        key: str,
-        value: Any,
-        options: list[Any] | tuple[Any, ...] | Any,
-    ):
-        """Adds a distance penalty of 1.0 if `value` doesn't match any
-        of the values in `options`. If an option is a compiled regular
-        expression, it will be considered equal if it matches against
-        `value`.
-        """
-        if not isinstance(options, (list, tuple)):
-            options = [options]
-        for opt in options:
-            if self._eq(opt, value):
-                dist = 0.0
-                break
-        else:
-            dist = 1.0
-        self.add(key, dist)
-
-    def add_expr(self, key: str, expr: bool):
-        """Adds a distance penalty of 1.0 if `expr` evaluates to True,
-        or 0.0.
-        """
-        if expr:
-            self.add(key, 1.0)
-        else:
-            self.add(key, 0.0)
-
-    def add_number(self, key: str, number1: int, number2: int):
-        """Adds a distance penalty of 1.0 for each number of difference
-        between `number1` and `number2`, or 0.0 when there is no
-        difference. Use this when there is no upper limit on the
-        difference between the two numbers.
-        """
-        diff = abs(number1 - number2)
-        if diff:
-            for i in range(diff):
-                self.add(key, 1.0)
-        else:
-            self.add(key, 0.0)
-
-    def add_priority(
-        self,
-        key: str,
-        value: Any,
-        options: list[Any] | tuple[Any, ...] | Any,
-    ):
-        """Adds a distance penalty that corresponds to the position at
-        which `value` appears in `options`. A distance penalty of 0.0
-        for the first option, or 1.0 if there is no matching option. If
-        an option is a compiled regular expression, it will be
-        considered equal if it matches against `value`.
-        """
-        if not isinstance(options, (list, tuple)):
-            options = [options]
-        unit = 1.0 / (len(options) or 1)
-        for i, opt in enumerate(options):
-            if self._eq(opt, value):
-                dist = i * unit
-                break
-        else:
-            dist = 1.0
-        self.add(key, dist)
-
-    def add_ratio(
-        self,
-        key: str,
-        number1: int | float,
-        number2: int | float,
-    ):
-        """Adds a distance penalty for `number1` as a ratio of `number2`.
-        `number1` is bound at 0 and `number2`.
-        """
-        number = float(max(min(number1, number2), 0))
-        if number2:
-            dist = number / number2
-        else:
-            dist = 0.0
-        self.add(key, dist)
-
-    def add_string(self, key: str, str1: str | None, str2: str | None):
-        """Adds a distance penalty based on the edit distance between
-        `str1` and `str2`.
-        """
-        dist = string_dist(str1, str2)
-        self.add(key, dist)
-
-
 # Structures that compose all the information for a candidate match.
 
 
diff --git a/beets/autotag/match.py b/beets/autotag/match.py
index 4dc4c1052..64572cf3b 100644
--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@@ -18,37 +18,23 @@ releases and tracks.
 
 from __future__ import annotations
 
-import datetime
-import re
 from enum import IntEnum
-from functools import cache
 from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar
 
 import lap
 import numpy as np
 
 from beets import config, logging, plugins
-from beets.autotag import (
-    AlbumInfo,
-    AlbumMatch,
-    Distance,
-    TrackInfo,
-    TrackMatch,
-    hooks,
-)
+from beets.autotag import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch, hooks
 from beets.util import get_most_common_tags
 
+from .distance import VA_ARTISTS, distance, track_distance
+
 if TYPE_CHECKING:
     from collections.abc import Iterable, Sequence
 
     from beets.library import Item
 
-# Artist signals that indicate "various artists". These are used at the
-# album level to determine whether a given release is likely a VA
-# release and also on the track level to to remove the penalty for
-# differing artists.
-VA_ARTISTS = ("", "various artists", "various", "va", "unknown")
-
 # Global logger.
 log = logging.getLogger("beets")
 
@@ -112,191 +98,6 @@ def assign_items(
     return mapping, extra_items, extra_tracks
 
 
-def track_index_changed(item: Item, track_info: TrackInfo) -> bool:
-    """Returns True if the item and track info index is different. Tolerates
-    per disc and per release numbering.
-    """
-    return item.track not in (track_info.medium_index, track_info.index)
-
-
-@cache
-def get_track_length_grace() -> float:
-    """Get cached grace period for track length matching."""
-    return config["match"]["track_length_grace"].as_number()
-
-
-@cache
-def get_track_length_max() -> float:
-    """Get cached maximum track length for track length matching."""
-    return config["match"]["track_length_max"].as_number()
-
-
-def track_distance(
-    item: Item,
-    track_info: TrackInfo,
-    incl_artist: bool = False,
-) -> Distance:
-    """Determines the significance of a track metadata change. Returns a
-    Distance object. `incl_artist` indicates that a distance component should
-    be included for the track artist (i.e., for various-artist releases).
-
-    ``track_length_grace`` and ``track_length_max`` configuration options are
-    cached because this function is called many times during the matching
-    process and their access comes with a performance overhead.
-    """
-    dist = hooks.Distance()
-
-    # Length.
-    if info_length := track_info.length:
-        diff = abs(item.length - info_length) - get_track_length_grace()
-        dist.add_ratio("track_length", diff, get_track_length_max())
-
-    # Title.
-    dist.add_string("track_title", item.title, track_info.title)
-
-    # Artist. Only check if there is actually an artist in the track data.
-    if (
-        incl_artist
-        and track_info.artist
-        and item.artist.lower() not in VA_ARTISTS
-    ):
-        dist.add_string("track_artist", item.artist, track_info.artist)
-
-    # Track index.
-    if track_info.index and item.track:
-        dist.add_expr("track_index", track_index_changed(item, track_info))
-
-    # Track ID.
-    if item.mb_trackid:
-        dist.add_expr("track_id", item.mb_trackid != track_info.track_id)
-
-    # Penalize mismatching disc numbers.
-    if track_info.medium and item.disc:
-        dist.add_expr("medium", item.disc != track_info.medium)
-
-    # Plugins.
-    dist.update(plugins.track_distance(item, track_info))
-
-    return dist
-
-
-def distance(
-    items: Sequence[Item],
-    album_info: AlbumInfo,
-    mapping: dict[Item, TrackInfo],
-) -> Distance:
-    """Determines how "significant" an album metadata change would be.
-    Returns a Distance object. `album_info` is an AlbumInfo object
-    reflecting the album to be compared. `items` is a sequence of all
-    Item objects that will be matched (order is not important).
-    `mapping` is a dictionary mapping Items to TrackInfo objects; the
-    keys are a subset of `items` and the values are a subset of
-    `album_info.tracks`.
-    """
-    likelies, _ = get_most_common_tags(items)
-
-    dist = hooks.Distance()
-
-    # Artist, if not various.
-    if not album_info.va:
-        dist.add_string("artist", likelies["artist"], album_info.artist)
-
-    # Album.
-    dist.add_string("album", likelies["album"], album_info.album)
-
-    preferred_config = config["match"]["preferred"]
-    # Current or preferred media.
-    if album_info.media:
-        # Preferred media options.
-        media_patterns: Sequence[str] = preferred_config["media"].as_str_seq()
-        options = [
-            re.compile(r"(\d+x)?(%s)" % pat, re.I) for pat in media_patterns
-        ]
-        if options:
-            dist.add_priority("media", album_info.media, options)
-        # Current media.
-        elif likelies["media"]:
-            dist.add_equality("media", album_info.media, likelies["media"])
-
-    # Mediums.
-    if likelies["disctotal"] and album_info.mediums:
-        dist.add_number("mediums", likelies["disctotal"], album_info.mediums)
-
-    # Prefer earliest release.
-    if album_info.year and preferred_config["original_year"]:
-        # Assume 1889 (earliest first gramophone discs) if we don't know the
-        # original year.
-        original = album_info.original_year or 1889
-        diff = abs(album_info.year - original)
-        diff_max = abs(datetime.date.today().year - original)
-        dist.add_ratio("year", diff, diff_max)
-    # Year.
-    elif likelies["year"] and album_info.year:
-        if likelies["year"] in (album_info.year, album_info.original_year):
-            # No penalty for matching release or original year.
-            dist.add("year", 0.0)
-        elif album_info.original_year:
-            # Prefer matchest closest to the release year.
-            diff = abs(likelies["year"] - album_info.year)
-            diff_max = abs(
-                datetime.date.today().year - album_info.original_year
-            )
-            dist.add_ratio("year", diff, diff_max)
-        else:
-            # Full penalty when there is no original year.
-            dist.add("year", 1.0)
-
-    # Preferred countries.
-    country_patterns: Sequence[str] = preferred_config["countries"].as_str_seq()
-    options = [re.compile(pat, re.I) for pat in country_patterns]
-    if album_info.country and options:
-        dist.add_priority("country", album_info.country, options)
-    # Country.
-    elif likelies["country"] and album_info.country:
-        dist.add_string("country", likelies["country"], album_info.country)
-
-    # Label.
-    if likelies["label"] and album_info.label:
-        dist.add_string("label", likelies["label"], album_info.label)
-
-    # Catalog number.
-    if likelies["catalognum"] and album_info.catalognum:
-        dist.add_string(
-            "catalognum", likelies["catalognum"], album_info.catalognum
-        )
-
-    # Disambiguation.
-    if likelies["albumdisambig"] and album_info.albumdisambig:
-        dist.add_string(
-            "albumdisambig", likelies["albumdisambig"], album_info.albumdisambig
-        )
-
-    # Album ID.
-    if likelies["mb_albumid"]:
-        dist.add_equality(
-            "album_id", likelies["mb_albumid"], album_info.album_id
-        )
-
-    # Tracks.
-    dist.tracks = {}
-    for item, track in mapping.items():
-        dist.tracks[track] = track_distance(item, track, album_info.va)
-        dist.add("tracks", dist.tracks[track].distance)
-
-    # Missing tracks.
-    for _ in range(len(album_info.tracks) - len(mapping)):
-        dist.add("missing_tracks", 1.0)
-
-    # Unmatched tracks.
-    for _ in range(len(items) - len(mapping)):
-        dist.add("unmatched_tracks", 1.0)
-
-    # Plugins.
-    dist.update(plugins.album_distance(items, album_info, mapping))
-
-    return dist
-
-
 def match_by_id(items: Iterable[Item]) -> AlbumInfo | None:
     """If the items are tagged with an external source ID, return an
     AlbumInfo object for the corresponding album. Otherwise, returns
diff --git a/beets/plugins.py b/beets/plugins.py
index 6d3a8447e..cd66435b5 100644
--- a/beets/plugins.py
+++ b/beets/plugins.py
@@ -37,6 +37,7 @@ import mediafile
 
 import beets
 from beets import logging
+from beets.autotag.distance import Distance
 from beets.util.id_extractors import extract_release_id
 
 if TYPE_CHECKING:
@@ -53,7 +54,7 @@ if TYPE_CHECKING:
 
     from confuse import ConfigView
 
-    from beets.autotag import AlbumInfo, Distance, TrackInfo
+    from beets.autotag import AlbumInfo, TrackInfo
     from beets.dbcore import Query
     from beets.dbcore.db import FieldQueryType
     from beets.dbcore.types import Type
@@ -224,8 +225,6 @@ class BeetsPlugin:
         """Should return a Distance object to be added to the
         distance for every track comparison.
         """
-        from beets.autotag.hooks import Distance
-
         return Distance()
 
     def album_distance(
@@ -237,8 +236,6 @@ class BeetsPlugin:
         """Should return a Distance object to be added to the
         distance for every album-level comparison.
         """
-        from beets.autotag.hooks import Distance
-
         return Distance()
 
     def candidates(
@@ -458,8 +455,6 @@ def track_distance(item: Item, info: TrackInfo) -> Distance:
     """Gets the track distance calculated by all loaded plugins.
     Returns a Distance object.
     """
-    from beets.autotag.hooks import Distance
-
     dist = Distance()
     for plugin in find_plugins():
         dist.update(plugin.track_distance(item, info))
@@ -472,8 +467,6 @@ def album_distance(
     mapping: dict[Item, TrackInfo],
 ) -> Distance:
     """Returns the album distance calculated by plugins."""
-    from beets.autotag.hooks import Distance
-
     dist = Distance()
     for plugin in find_plugins():
         dist.update(plugin.album_distance(items, album_info, mapping))
@@ -660,8 +653,6 @@ def get_distance(
     """Returns the ``data_source`` weight and the maximum source weight
     for albums or individual tracks.
     """
-    from beets.autotag.hooks import Distance
-
     dist = Distance()
     if info.data_source == data_source:
         dist.add("source", config["source_weight"].as_number())
diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py
index 518a41776..5c718154b 100644
--- a/beetsplug/chroma.py
+++ b/beetsplug/chroma.py
@@ -24,7 +24,7 @@ import acoustid
 import confuse
 
 from beets import config, plugins, ui, util
-from beets.autotag.hooks import Distance
+from beets.autotag.distance import Distance
 from beetsplug.musicbrainz import MusicBrainzPlugin
 
 API_KEY = "1vOwZtEn"
diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py
index 696f1d1ac..2408f3498 100644
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@@ -38,7 +38,8 @@ from typing_extensions import TypedDict
 import beets
 import beets.ui
 from beets import config
-from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist
+from beets.autotag.distance import string_dist
+from beets.autotag.hooks import AlbumInfo, TrackInfo
 from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
 from beets.util.id_extractors import extract_release_id
 
diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py
index e2c0c7fd2..f1c40ab24 100644
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@@ -38,7 +38,7 @@ from unidecode import unidecode
 
 import beets
 from beets import plugins, ui
-from beets.autotag.hooks import string_dist
+from beets.autotag.distance import string_dist
 from beets.util.config import sanitize_choices
 
 if TYPE_CHECKING:
diff --git a/test/autotag/test_distance.py b/test/autotag/test_distance.py
new file mode 100644
index 000000000..ec00ebcdf
--- /dev/null
+++ b/test/autotag/test_distance.py
@@ -0,0 +1,476 @@
+import re
+import unittest
+
+from beets import config
+from beets.autotag import AlbumInfo, TrackInfo, match
+from beets.autotag.distance import Distance, string_dist
+from beets.library import Item
+from beets.test.helper import BeetsTestCase
+
+
+def _make_item(title, track, artist="some artist"):
+    return Item(
+        title=title,
+        track=track,
+        artist=artist,
+        album="some album",
+        length=1,
+        mb_trackid="",
+        mb_albumid="",
+        mb_artistid="",
+    )
+
+
+def _make_trackinfo():
+    return [
+        TrackInfo(
+            title="one", track_id=None, artist="some artist", length=1, index=1
+        ),
+        TrackInfo(
+            title="two", track_id=None, artist="some artist", length=1, index=2
+        ),
+        TrackInfo(
+            title="three",
+            track_id=None,
+            artist="some artist",
+            length=1,
+            index=3,
+        ),
+    ]
+
+
+def _clear_weights():
+    """Hack around the lazy descriptor used to cache weights for
+    Distance calculations.
+    """
+    Distance.__dict__["_weights"].cache = {}
+
+
+class DistanceTest(BeetsTestCase):
+    def tearDown(self):
+        super().tearDown()
+        _clear_weights()
+
+    def test_add(self):
+        dist = Distance()
+        dist.add("add", 1.0)
+        assert dist._penalties == {"add": [1.0]}
+
+    def test_add_equality(self):
+        dist = Distance()
+        dist.add_equality("equality", "ghi", ["abc", "def", "ghi"])
+        assert dist._penalties["equality"] == [0.0]
+
+        dist.add_equality("equality", "xyz", ["abc", "def", "ghi"])
+        assert dist._penalties["equality"] == [0.0, 1.0]
+
+        dist.add_equality("equality", "abc", re.compile(r"ABC", re.I))
+        assert dist._penalties["equality"] == [0.0, 1.0, 0.0]
+
+    def test_add_expr(self):
+        dist = Distance()
+        dist.add_expr("expr", True)
+        assert dist._penalties["expr"] == [1.0]
+
+        dist.add_expr("expr", False)
+        assert dist._penalties["expr"] == [1.0, 0.0]
+
+    def test_add_number(self):
+        dist = Distance()
+        # Add a full penalty for each number of difference between two numbers.
+
+        dist.add_number("number", 1, 1)
+        assert dist._penalties["number"] == [0.0]
+
+        dist.add_number("number", 1, 2)
+        assert dist._penalties["number"] == [0.0, 1.0]
+
+        dist.add_number("number", 2, 1)
+        assert dist._penalties["number"] == [0.0, 1.0, 1.0]
+
+        dist.add_number("number", -1, 2)
+        assert dist._penalties["number"] == [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+
+    def test_add_priority(self):
+        dist = Distance()
+        dist.add_priority("priority", "abc", "abc")
+        assert dist._penalties["priority"] == [0.0]
+
+        dist.add_priority("priority", "def", ["abc", "def"])
+        assert dist._penalties["priority"] == [0.0, 0.5]
+
+        dist.add_priority(
+            "priority", "gh", ["ab", "cd", "ef", re.compile("GH", re.I)]
+        )
+        assert dist._penalties["priority"] == [0.0, 0.5, 0.75]
+
+        dist.add_priority("priority", "xyz", ["abc", "def"])
+        assert dist._penalties["priority"] == [0.0, 0.5, 0.75, 1.0]
+
+    def test_add_ratio(self):
+        dist = Distance()
+        dist.add_ratio("ratio", 25, 100)
+        assert dist._penalties["ratio"] == [0.25]
+
+        dist.add_ratio("ratio", 10, 5)
+        assert dist._penalties["ratio"] == [0.25, 1.0]
+
+        dist.add_ratio("ratio", -5, 5)
+        assert dist._penalties["ratio"] == [0.25, 1.0, 0.0]
+
+        dist.add_ratio("ratio", 5, 0)
+        assert dist._penalties["ratio"] == [0.25, 1.0, 0.0, 0.0]
+
+    def test_add_string(self):
+        dist = Distance()
+        sdist = string_dist("abc", "bcd")
+        dist.add_string("string", "abc", "bcd")
+        assert dist._penalties["string"] == [sdist]
+        assert dist._penalties["string"] != [0]
+
+    def test_add_string_none(self):
+        dist = Distance()
+        dist.add_string("string", None, "string")
+        assert dist._penalties["string"] == [1]
+
+    def test_add_string_both_none(self):
+        dist = Distance()
+        dist.add_string("string", None, None)
+        assert dist._penalties["string"] == [0]
+
+    def test_distance(self):
+        config["match"]["distance_weights"]["album"] = 2.0
+        config["match"]["distance_weights"]["medium"] = 1.0
+        _clear_weights()
+
+        dist = Distance()
+        dist.add("album", 0.5)
+        dist.add("media", 0.25)
+        dist.add("media", 0.75)
+        assert dist.distance == 0.5
+
+        # __getitem__()
+        assert dist["album"] == 0.25
+        assert dist["media"] == 0.25
+
+    def test_max_distance(self):
+        config["match"]["distance_weights"]["album"] = 3.0
+        config["match"]["distance_weights"]["medium"] = 1.0
+        _clear_weights()
+
+        dist = Distance()
+        dist.add("album", 0.5)
+        dist.add("medium", 0.0)
+        dist.add("medium", 0.0)
+        assert dist.max_distance == 5.0
+
+    def test_operators(self):
+        config["match"]["distance_weights"]["source"] = 1.0
+        config["match"]["distance_weights"]["album"] = 2.0
+        config["match"]["distance_weights"]["medium"] = 1.0
+        _clear_weights()
+
+        dist = Distance()
+        dist.add("source", 0.0)
+        dist.add("album", 0.5)
+        dist.add("medium", 0.25)
+        dist.add("medium", 0.75)
+        assert len(dist) == 2
+        assert list(dist) == [("album", 0.2), ("medium", 0.2)]
+        assert dist == 0.4
+        assert dist < 1.0
+        assert dist > 0.0
+        assert dist - 0.4 == 0.0
+        assert 0.4 - dist == 0.0
+        assert float(dist) == 0.4
+
+    def test_raw_distance(self):
+        config["match"]["distance_weights"]["album"] = 3.0
+        config["match"]["distance_weights"]["medium"] = 1.0
+        _clear_weights()
+
+        dist = Distance()
+        dist.add("album", 0.5)
+        dist.add("medium", 0.25)
+        dist.add("medium", 0.5)
+        assert dist.raw_distance == 2.25
+
+    def test_items(self):
+        config["match"]["distance_weights"]["album"] = 4.0
+        config["match"]["distance_weights"]["medium"] = 2.0
+        _clear_weights()
+
+        dist = Distance()
+        dist.add("album", 0.1875)
+        dist.add("medium", 0.75)
+        assert dist.items() == [("medium", 0.25), ("album", 0.125)]
+
+        # Sort by key if distance is equal.
+        dist = Distance()
+        dist.add("album", 0.375)
+        dist.add("medium", 0.75)
+        assert dist.items() == [("album", 0.25), ("medium", 0.25)]
+
+    def test_update(self):
+        dist1 = Distance()
+        dist1.add("album", 0.5)
+        dist1.add("media", 1.0)
+
+        dist2 = Distance()
+        dist2.add("album", 0.75)
+        dist2.add("album", 0.25)
+        dist2.add("media", 0.05)
+
+        dist1.update(dist2)
+
+        assert dist1._penalties == {
+            "album": [0.5, 0.75, 0.25],
+            "media": [1.0, 0.05],
+        }
+
+
+class TrackDistanceTest(BeetsTestCase):
+    def test_identical_tracks(self):
+        item = _make_item("one", 1)
+        info = _make_trackinfo()[0]
+        dist = match.track_distance(item, info, incl_artist=True)
+        assert dist == 0.0
+
+    def test_different_title(self):
+        item = _make_item("foo", 1)
+        info = _make_trackinfo()[0]
+        dist = match.track_distance(item, info, incl_artist=True)
+        assert dist != 0.0
+
+    def test_different_artist(self):
+        item = _make_item("one", 1)
+        item.artist = "foo"
+        info = _make_trackinfo()[0]
+        dist = match.track_distance(item, info, incl_artist=True)
+        assert dist != 0.0
+
+    def test_various_artists_tolerated(self):
+        item = _make_item("one", 1)
+        item.artist = "Various Artists"
+        info = _make_trackinfo()[0]
+        dist = match.track_distance(item, info, incl_artist=True)
+        assert dist == 0.0
+
+
+class AlbumDistanceTest(BeetsTestCase):
+    def _mapping(self, items, info):
+        out = {}
+        for i, t in zip(items, info.tracks):
+            out[i] = t
+        return out
+
+    def _dist(self, items, info):
+        return match.distance(items, info, self._mapping(items, info))
+
+    def test_identical_albums(self):
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("two", 2))
+        items.append(_make_item("three", 3))
+        info = AlbumInfo(
+            artist="some artist",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=False,
+        )
+        assert self._dist(items, info) == 0
+
+    def test_incomplete_album(self):
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("three", 3))
+        info = AlbumInfo(
+            artist="some artist",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=False,
+        )
+        dist = self._dist(items, info)
+        assert dist != 0
+        # Make sure the distance is not too great
+        assert dist < 0.2
+
+    def test_global_artists_differ(self):
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("two", 2))
+        items.append(_make_item("three", 3))
+        info = AlbumInfo(
+            artist="someone else",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=False,
+        )
+        assert self._dist(items, info) != 0
+
+    def test_comp_track_artists_match(self):
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("two", 2))
+        items.append(_make_item("three", 3))
+        info = AlbumInfo(
+            artist="should be ignored",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=True,
+        )
+        assert self._dist(items, info) == 0
+
+    def test_comp_no_track_artists(self):
+        # Some VA releases don't have track artists (incomplete metadata).
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("two", 2))
+        items.append(_make_item("three", 3))
+        info = AlbumInfo(
+            artist="should be ignored",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=True,
+        )
+        info.tracks[0].artist = None
+        info.tracks[1].artist = None
+        info.tracks[2].artist = None
+        assert self._dist(items, info) == 0
+
+    def test_comp_track_artists_do_not_match(self):
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("two", 2, "someone else"))
+        items.append(_make_item("three", 3))
+        info = AlbumInfo(
+            artist="some artist",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=True,
+        )
+        assert self._dist(items, info) != 0
+
+    def test_tracks_out_of_order(self):
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("three", 2))
+        items.append(_make_item("two", 3))
+        info = AlbumInfo(
+            artist="some artist",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=False,
+        )
+        dist = self._dist(items, info)
+        assert 0 < dist < 0.2
+
+    def test_two_medium_release(self):
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("two", 2))
+        items.append(_make_item("three", 3))
+        info = AlbumInfo(
+            artist="some artist",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=False,
+        )
+        info.tracks[0].medium_index = 1
+        info.tracks[1].medium_index = 2
+        info.tracks[2].medium_index = 1
+        dist = self._dist(items, info)
+        assert dist == 0
+
+    def test_per_medium_track_numbers(self):
+        items = []
+        items.append(_make_item("one", 1))
+        items.append(_make_item("two", 2))
+        items.append(_make_item("three", 1))
+        info = AlbumInfo(
+            artist="some artist",
+            album="some album",
+            tracks=_make_trackinfo(),
+            va=False,
+        )
+        info.tracks[0].medium_index = 1
+        info.tracks[1].medium_index = 2
+        info.tracks[2].medium_index = 1
+        dist = self._dist(items, info)
+        assert dist == 0
+
+
+class StringDistanceTest(unittest.TestCase):
+    def test_equal_strings(self):
+        dist = string_dist("Some String", "Some String")
+        assert dist == 0.0
+
+    def test_different_strings(self):
+        dist = string_dist("Some String", "Totally Different")
+        assert dist != 0.0
+
+    def test_punctuation_ignored(self):
+        dist = string_dist("Some String", "Some.String!")
+        assert dist == 0.0
+
+    def test_case_ignored(self):
+        dist = string_dist("Some String", "sOME sTring")
+        assert dist == 0.0
+
+    def test_leading_the_has_lower_weight(self):
+        dist1 = string_dist("XXX Band Name", "Band Name")
+        dist2 = string_dist("The Band Name", "Band Name")
+        assert dist2 < dist1
+
+    def test_parens_have_lower_weight(self):
+        dist1 = string_dist("One .Two.", "One")
+        dist2 = string_dist("One (Two)", "One")
+        assert dist2 < dist1
+
+    def test_brackets_have_lower_weight(self):
+        dist1 = string_dist("One .Two.", "One")
+        dist2 = string_dist("One [Two]", "One")
+        assert dist2 < dist1
+
+    def test_ep_label_has_zero_weight(self):
+        dist = string_dist("My Song (EP)", "My Song")
+        assert dist == 0.0
+
+    def test_featured_has_lower_weight(self):
+        dist1 = string_dist("My Song blah Someone", "My Song")
+        dist2 = string_dist("My Song feat Someone", "My Song")
+        assert dist2 < dist1
+
+    def test_postfix_the(self):
+        dist = string_dist("The Song Title", "Song Title, The")
+        assert dist == 0.0
+
+    def test_postfix_a(self):
+        dist = string_dist("A Song Title", "Song Title, A")
+        assert dist == 0.0
+
+    def test_postfix_an(self):
+        dist = string_dist("An Album Title", "Album Title, An")
+        assert dist == 0.0
+
+    def test_empty_strings(self):
+        dist = string_dist("", "")
+        assert dist == 0.0
+
+    def test_solo_pattern(self):
+        # Just make sure these don't crash.
+        string_dist("The ", "")
+        string_dist("(EP)", "(EP)")
+        string_dist(", An", "")
+
+    def test_heuristic_does_not_harm_distance(self):
+        dist = string_dist("Untitled", "[Untitled]")
+        assert dist == 0.0
+
+    def test_ampersand_expansion(self):
+        dist = string_dist("And", "&")
+        assert dist == 0.0
+
+    def test_accented_characters(self):
+        dist = string_dist("\xe9\xe1\xf1", "ean")
+        assert dist == 0.0
diff --git a/test/test_autotag.py b/test/test_autotag.py
index bd4205806..8d467e5ed 100644
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@@ -14,410 +14,14 @@
 
 """Tests for autotagging functionality."""
 
-import re
-import unittest
-
 import pytest
 
 from beets import autotag, config
 from beets.autotag import AlbumInfo, TrackInfo, correct_list_fields, match
-from beets.autotag.hooks import Distance, string_dist
 from beets.library import Item
 from beets.test.helper import BeetsTestCase, ConfigMixin
 
 
-def _make_item(title, track, artist="some artist"):
-    return Item(
-        title=title,
-        track=track,
-        artist=artist,
-        album="some album",
-        length=1,
-        mb_trackid="",
-        mb_albumid="",
-        mb_artistid="",
-    )
-
-
-def _make_trackinfo():
-    return [
-        TrackInfo(
-            title="one", track_id=None, artist="some artist", length=1, index=1
-        ),
-        TrackInfo(
-            title="two", track_id=None, artist="some artist", length=1, index=2
-        ),
-        TrackInfo(
-            title="three",
-            track_id=None,
-            artist="some artist",
-            length=1,
-            index=3,
-        ),
-    ]
-
-
-def _clear_weights():
-    """Hack around the lazy descriptor used to cache weights for
-    Distance calculations.
-    """
-    Distance.__dict__["_weights"].cache = {}
-
-
-class DistanceTest(BeetsTestCase):
-    def tearDown(self):
-        super().tearDown()
-        _clear_weights()
-
-    def test_add(self):
-        dist = Distance()
-        dist.add("add", 1.0)
-        assert dist._penalties == {"add": [1.0]}
-
-    def test_add_equality(self):
-        dist = Distance()
-        dist.add_equality("equality", "ghi", ["abc", "def", "ghi"])
-        assert dist._penalties["equality"] == [0.0]
-
-        dist.add_equality("equality", "xyz", ["abc", "def", "ghi"])
-        assert dist._penalties["equality"] == [0.0, 1.0]
-
-        dist.add_equality("equality", "abc", re.compile(r"ABC", re.I))
-        assert dist._penalties["equality"] == [0.0, 1.0, 0.0]
-
-    def test_add_expr(self):
-        dist = Distance()
-        dist.add_expr("expr", True)
-        assert dist._penalties["expr"] == [1.0]
-
-        dist.add_expr("expr", False)
-        assert dist._penalties["expr"] == [1.0, 0.0]
-
-    def test_add_number(self):
-        dist = Distance()
-        # Add a full penalty for each number of difference between two numbers.
-
-        dist.add_number("number", 1, 1)
-        assert dist._penalties["number"] == [0.0]
-
-        dist.add_number("number", 1, 2)
-        assert dist._penalties["number"] == [0.0, 1.0]
-
-        dist.add_number("number", 2, 1)
-        assert dist._penalties["number"] == [0.0, 1.0, 1.0]
-
-        dist.add_number("number", -1, 2)
-        assert dist._penalties["number"] == [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]
-
-    def test_add_priority(self):
-        dist = Distance()
-        dist.add_priority("priority", "abc", "abc")
-        assert dist._penalties["priority"] == [0.0]
-
-        dist.add_priority("priority", "def", ["abc", "def"])
-        assert dist._penalties["priority"] == [0.0, 0.5]
-
-        dist.add_priority(
-            "priority", "gh", ["ab", "cd", "ef", re.compile("GH", re.I)]
-        )
-        assert dist._penalties["priority"] == [0.0, 0.5, 0.75]
-
-        dist.add_priority("priority", "xyz", ["abc", "def"])
-        assert dist._penalties["priority"] == [0.0, 0.5, 0.75, 1.0]
-
-    def test_add_ratio(self):
-        dist = Distance()
-        dist.add_ratio("ratio", 25, 100)
-        assert dist._penalties["ratio"] == [0.25]
-
-        dist.add_ratio("ratio", 10, 5)
-        assert dist._penalties["ratio"] == [0.25, 1.0]
-
-        dist.add_ratio("ratio", -5, 5)
-        assert dist._penalties["ratio"] == [0.25, 1.0, 0.0]
-
-        dist.add_ratio("ratio", 5, 0)
-        assert dist._penalties["ratio"] == [0.25, 1.0, 0.0, 0.0]
-
-    def test_add_string(self):
-        dist = Distance()
-        sdist = string_dist("abc", "bcd")
-        dist.add_string("string", "abc", "bcd")
-        assert dist._penalties["string"] == [sdist]
-        assert dist._penalties["string"] != [0]
-
-    def test_add_string_none(self):
-        dist = Distance()
-        dist.add_string("string", None, "string")
-        assert dist._penalties["string"] == [1]
-
-    def test_add_string_both_none(self):
-        dist = Distance()
-        dist.add_string("string", None, None)
-        assert dist._penalties["string"] == [0]
-
-    def test_distance(self):
-        config["match"]["distance_weights"]["album"] = 2.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.5)
-        dist.add("media", 0.25)
-        dist.add("media", 0.75)
-        assert dist.distance == 0.5
-
-        # __getitem__()
-        assert dist["album"] == 0.25
-        assert dist["media"] == 0.25
-
-    def test_max_distance(self):
-        config["match"]["distance_weights"]["album"] = 3.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.5)
-        dist.add("medium", 0.0)
-        dist.add("medium", 0.0)
-        assert dist.max_distance == 5.0
-
-    def test_operators(self):
-        config["match"]["distance_weights"]["source"] = 1.0
-        config["match"]["distance_weights"]["album"] = 2.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("source", 0.0)
-        dist.add("album", 0.5)
-        dist.add("medium", 0.25)
-        dist.add("medium", 0.75)
-        assert len(dist) == 2
-        assert list(dist) == [("album", 0.2), ("medium", 0.2)]
-        assert dist == 0.4
-        assert dist < 1.0
-        assert dist > 0.0
-        assert dist - 0.4 == 0.0
-        assert 0.4 - dist == 0.0
-        assert float(dist) == 0.4
-
-    def test_raw_distance(self):
-        config["match"]["distance_weights"]["album"] = 3.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.5)
-        dist.add("medium", 0.25)
-        dist.add("medium", 0.5)
-        assert dist.raw_distance == 2.25
-
-    def test_items(self):
-        config["match"]["distance_weights"]["album"] = 4.0
-        config["match"]["distance_weights"]["medium"] = 2.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.1875)
-        dist.add("medium", 0.75)
-        assert dist.items() == [("medium", 0.25), ("album", 0.125)]
-
-        # Sort by key if distance is equal.
-        dist = Distance()
-        dist.add("album", 0.375)
-        dist.add("medium", 0.75)
-        assert dist.items() == [("album", 0.25), ("medium", 0.25)]
-
-    def test_update(self):
-        dist1 = Distance()
-        dist1.add("album", 0.5)
-        dist1.add("media", 1.0)
-
-        dist2 = Distance()
-        dist2.add("album", 0.75)
-        dist2.add("album", 0.25)
-        dist2.add("media", 0.05)
-
-        dist1.update(dist2)
-
-        assert dist1._penalties == {
-            "album": [0.5, 0.75, 0.25],
-            "media": [1.0, 0.05],
-        }
-
-
-class TrackDistanceTest(BeetsTestCase):
-    def test_identical_tracks(self):
-        item = _make_item("one", 1)
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist == 0.0
-
-    def test_different_title(self):
-        item = _make_item("foo", 1)
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist != 0.0
-
-    def test_different_artist(self):
-        item = _make_item("one", 1)
-        item.artist = "foo"
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist != 0.0
-
-    def test_various_artists_tolerated(self):
-        item = _make_item("one", 1)
-        item.artist = "Various Artists"
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist == 0.0
-
-
-class AlbumDistanceTest(BeetsTestCase):
-    def _mapping(self, items, info):
-        out = {}
-        for i, t in zip(items, info.tracks):
-            out[i] = t
-        return out
-
-    def _dist(self, items, info):
-        return match.distance(items, info, self._mapping(items, info))
-
-    def test_identical_albums(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        assert self._dist(items, info) == 0
-
-    def test_incomplete_album(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        dist = self._dist(items, info)
-        assert dist != 0
-        # Make sure the distance is not too great
-        assert dist < 0.2
-
-    def test_global_artists_differ(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="someone else",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        assert self._dist(items, info) != 0
-
-    def test_comp_track_artists_match(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="should be ignored",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        assert self._dist(items, info) == 0
-
-    def test_comp_no_track_artists(self):
-        # Some VA releases don't have track artists (incomplete metadata).
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="should be ignored",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        info.tracks[0].artist = None
-        info.tracks[1].artist = None
-        info.tracks[2].artist = None
-        assert self._dist(items, info) == 0
-
-    def test_comp_track_artists_do_not_match(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2, "someone else"))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        assert self._dist(items, info) != 0
-
-    def test_tracks_out_of_order(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("three", 2))
-        items.append(_make_item("two", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        dist = self._dist(items, info)
-        assert 0 < dist < 0.2
-
-    def test_two_medium_release(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        info.tracks[0].medium_index = 1
-        info.tracks[1].medium_index = 2
-        info.tracks[2].medium_index = 1
-        dist = self._dist(items, info)
-        assert dist == 0
-
-    def test_per_medium_track_numbers(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 1))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        info.tracks[0].medium_index = 1
-        info.tracks[1].medium_index = 2
-        info.tracks[2].medium_index = 1
-        dist = self._dist(items, info)
-        assert dist == 0
-
-
 class TestAssignment(ConfigMixin):
     A = "one"
     B = "two"
@@ -840,82 +444,6 @@ class ApplyCompilationTest(BeetsTestCase, ApplyTestUtil):
         assert self.items[1].comp
 
 
-class StringDistanceTest(unittest.TestCase):
-    def test_equal_strings(self):
-        dist = string_dist("Some String", "Some String")
-        assert dist == 0.0
-
-    def test_different_strings(self):
-        dist = string_dist("Some String", "Totally Different")
-        assert dist != 0.0
-
-    def test_punctuation_ignored(self):
-        dist = string_dist("Some String", "Some.String!")
-        assert dist == 0.0
-
-    def test_case_ignored(self):
-        dist = string_dist("Some String", "sOME sTring")
-        assert dist == 0.0
-
-    def test_leading_the_has_lower_weight(self):
-        dist1 = string_dist("XXX Band Name", "Band Name")
-        dist2 = string_dist("The Band Name", "Band Name")
-        assert dist2 < dist1
-
-    def test_parens_have_lower_weight(self):
-        dist1 = string_dist("One .Two.", "One")
-        dist2 = string_dist("One (Two)", "One")
-        assert dist2 < dist1
-
-    def test_brackets_have_lower_weight(self):
-        dist1 = string_dist("One .Two.", "One")
-        dist2 = string_dist("One [Two]", "One")
-        assert dist2 < dist1
-
-    def test_ep_label_has_zero_weight(self):
-        dist = string_dist("My Song (EP)", "My Song")
-        assert dist == 0.0
-
-    def test_featured_has_lower_weight(self):
-        dist1 = string_dist("My Song blah Someone", "My Song")
-        dist2 = string_dist("My Song feat Someone", "My Song")
-        assert dist2 < dist1
-
-    def test_postfix_the(self):
-        dist = string_dist("The Song Title", "Song Title, The")
-        assert dist == 0.0
-
-    def test_postfix_a(self):
-        dist = string_dist("A Song Title", "Song Title, A")
-        assert dist == 0.0
-
-    def test_postfix_an(self):
-        dist = string_dist("An Album Title", "Album Title, An")
-        assert dist == 0.0
-
-    def test_empty_strings(self):
-        dist = string_dist("", "")
-        assert dist == 0.0
-
-    def test_solo_pattern(self):
-        # Just make sure these don't crash.
-        string_dist("The ", "")
-        string_dist("(EP)", "(EP)")
-        string_dist(", An", "")
-
-    def test_heuristic_does_not_harm_distance(self):
-        dist = string_dist("Untitled", "[Untitled]")
-        assert dist == 0.0
-
-    def test_ampersand_expansion(self):
-        dist = string_dist("And", "&")
-        assert dist == 0.0
-
-    def test_accented_characters(self):
-        dist = string_dist("\xe9\xe1\xf1", "ean")
-        assert dist == 0.0
-
-
 @pytest.mark.parametrize(
     "single_field,list_field",
     [

From 318a840af2e4e80f5e8c6465a0d0d44d31dad4a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 25 May 2025 13:43:20 +0100
Subject: [PATCH 34/49] Rewrite distance tests

---
 test/autotag/test_distance.py | 597 ++++++++++++----------------------
 1 file changed, 210 insertions(+), 387 deletions(-)

diff --git a/test/autotag/test_distance.py b/test/autotag/test_distance.py
index ec00ebcdf..e3ce9f891 100644
--- a/test/autotag/test_distance.py
+++ b/test/autotag/test_distance.py
@@ -1,176 +1,108 @@
 import re
-import unittest
 
-from beets import config
-from beets.autotag import AlbumInfo, TrackInfo, match
-from beets.autotag.distance import Distance, string_dist
+import pytest
+
+from beets.autotag import AlbumInfo, TrackInfo
+from beets.autotag.distance import (
+    Distance,
+    distance,
+    string_dist,
+    track_distance,
+)
 from beets.library import Item
-from beets.test.helper import BeetsTestCase
+from beets.test.helper import ConfigMixin
+
+_p = pytest.param
 
 
-def _make_item(title, track, artist="some artist"):
-    return Item(
-        title=title,
-        track=track,
-        artist=artist,
-        album="some album",
-        length=1,
-        mb_trackid="",
-        mb_albumid="",
-        mb_artistid="",
-    )
+class TestDistance:
+    @pytest.fixture(scope="class")
+    def config(self):
+        return ConfigMixin().config
 
+    @pytest.fixture
+    def dist(self, config):
+        config["match"]["distance_weights"]["source"] = 2.0
+        config["match"]["distance_weights"]["album"] = 4.0
+        config["match"]["distance_weights"]["medium"] = 2.0
 
-def _make_trackinfo():
-    return [
-        TrackInfo(
-            title="one", track_id=None, artist="some artist", length=1, index=1
-        ),
-        TrackInfo(
-            title="two", track_id=None, artist="some artist", length=1, index=2
-        ),
-        TrackInfo(
-            title="three",
-            track_id=None,
-            artist="some artist",
-            length=1,
-            index=3,
-        ),
-    ]
+        Distance.__dict__["_weights"].cache = {}
 
+        return Distance()
 
-def _clear_weights():
-    """Hack around the lazy descriptor used to cache weights for
-    Distance calculations.
-    """
-    Distance.__dict__["_weights"].cache = {}
-
-
-class DistanceTest(BeetsTestCase):
-    def tearDown(self):
-        super().tearDown()
-        _clear_weights()
-
-    def test_add(self):
-        dist = Distance()
+    def test_add(self, dist):
         dist.add("add", 1.0)
+
         assert dist._penalties == {"add": [1.0]}
 
-    def test_add_equality(self):
-        dist = Distance()
-        dist.add_equality("equality", "ghi", ["abc", "def", "ghi"])
-        assert dist._penalties["equality"] == [0.0]
+    @pytest.mark.parametrize(
+        "key, args_with_expected",
+        [
+            (
+                "equality",
+                [
+                    (("ghi", ["abc", "def", "ghi"]), [0.0]),
+                    (("xyz", ["abc", "def", "ghi"]), [0.0, 1.0]),
+                    (("abc", re.compile(r"ABC", re.I)), [0.0, 1.0, 0.0]),
+                ],
+            ),
+            ("expr", [((True,), [1.0]), ((False,), [1.0, 0.0])]),
+            (
+                "number",
+                [
+                    ((1, 1), [0.0]),
+                    ((1, 2), [0.0, 1.0]),
+                    ((2, 1), [0.0, 1.0, 1.0]),
+                    ((-1, 2), [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]),
+                ],
+            ),
+            (
+                "priority",
+                [
+                    (("abc", "abc"), [0.0]),
+                    (("def", ["abc", "def"]), [0.0, 0.5]),
+                    (("gh", ["ab", "cd", "ef", re.compile("GH", re.I)]), [0.0, 0.5, 0.75]),  # noqa: E501
+                    (("xyz", ["abc", "def"]), [0.0, 0.5, 0.75, 1.0]),
+                ],
+            ),
+            (
+                "ratio",
+                [
+                    ((25, 100), [0.25]),
+                    ((10, 5), [0.25, 1.0]),
+                    ((-5, 5), [0.25, 1.0, 0.0]),
+                    ((5, 0), [0.25, 1.0, 0.0, 0.0]),
+                ],
+            ),
+            (
+                "string",
+                [
+                    (("abc", "bcd"), [2 / 3]),
+                    (("abc", None), [2 / 3, 1]),
+                    ((None, None), [2 / 3, 1, 0]),
+                ],
+            ),
+        ],
+    )  # fmt: skip
+    def test_add_methods(self, dist, key, args_with_expected):
+        method = getattr(dist, f"add_{key}")
+        for arg_set, expected in args_with_expected:
+            method(key, *arg_set)
+            assert dist._penalties[key] == expected
 
-        dist.add_equality("equality", "xyz", ["abc", "def", "ghi"])
-        assert dist._penalties["equality"] == [0.0, 1.0]
-
-        dist.add_equality("equality", "abc", re.compile(r"ABC", re.I))
-        assert dist._penalties["equality"] == [0.0, 1.0, 0.0]
-
-    def test_add_expr(self):
-        dist = Distance()
-        dist.add_expr("expr", True)
-        assert dist._penalties["expr"] == [1.0]
-
-        dist.add_expr("expr", False)
-        assert dist._penalties["expr"] == [1.0, 0.0]
-
-    def test_add_number(self):
-        dist = Distance()
-        # Add a full penalty for each number of difference between two numbers.
-
-        dist.add_number("number", 1, 1)
-        assert dist._penalties["number"] == [0.0]
-
-        dist.add_number("number", 1, 2)
-        assert dist._penalties["number"] == [0.0, 1.0]
-
-        dist.add_number("number", 2, 1)
-        assert dist._penalties["number"] == [0.0, 1.0, 1.0]
-
-        dist.add_number("number", -1, 2)
-        assert dist._penalties["number"] == [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]
-
-    def test_add_priority(self):
-        dist = Distance()
-        dist.add_priority("priority", "abc", "abc")
-        assert dist._penalties["priority"] == [0.0]
-
-        dist.add_priority("priority", "def", ["abc", "def"])
-        assert dist._penalties["priority"] == [0.0, 0.5]
-
-        dist.add_priority(
-            "priority", "gh", ["ab", "cd", "ef", re.compile("GH", re.I)]
-        )
-        assert dist._penalties["priority"] == [0.0, 0.5, 0.75]
-
-        dist.add_priority("priority", "xyz", ["abc", "def"])
-        assert dist._penalties["priority"] == [0.0, 0.5, 0.75, 1.0]
-
-    def test_add_ratio(self):
-        dist = Distance()
-        dist.add_ratio("ratio", 25, 100)
-        assert dist._penalties["ratio"] == [0.25]
-
-        dist.add_ratio("ratio", 10, 5)
-        assert dist._penalties["ratio"] == [0.25, 1.0]
-
-        dist.add_ratio("ratio", -5, 5)
-        assert dist._penalties["ratio"] == [0.25, 1.0, 0.0]
-
-        dist.add_ratio("ratio", 5, 0)
-        assert dist._penalties["ratio"] == [0.25, 1.0, 0.0, 0.0]
-
-    def test_add_string(self):
-        dist = Distance()
-        sdist = string_dist("abc", "bcd")
-        dist.add_string("string", "abc", "bcd")
-        assert dist._penalties["string"] == [sdist]
-        assert dist._penalties["string"] != [0]
-
-    def test_add_string_none(self):
-        dist = Distance()
-        dist.add_string("string", None, "string")
-        assert dist._penalties["string"] == [1]
-
-    def test_add_string_both_none(self):
-        dist = Distance()
-        dist.add_string("string", None, None)
-        assert dist._penalties["string"] == [0]
-
-    def test_distance(self):
-        config["match"]["distance_weights"]["album"] = 2.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
+    def test_distance(self, dist):
         dist.add("album", 0.5)
         dist.add("media", 0.25)
         dist.add("media", 0.75)
+
         assert dist.distance == 0.5
+        assert dist.max_distance == 6.0
+        assert dist.raw_distance == 3.0
 
-        # __getitem__()
-        assert dist["album"] == 0.25
-        assert dist["media"] == 0.25
+        assert dist["album"] == 1 / 3
+        assert dist["media"] == 1 / 6
 
-    def test_max_distance(self):
-        config["match"]["distance_weights"]["album"] = 3.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.5)
-        dist.add("medium", 0.0)
-        dist.add("medium", 0.0)
-        assert dist.max_distance == 5.0
-
-    def test_operators(self):
-        config["match"]["distance_weights"]["source"] = 1.0
-        config["match"]["distance_weights"]["album"] = 2.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
+    def test_operators(self, dist):
         dist.add("source", 0.0)
         dist.add("album", 0.5)
         dist.add("medium", 0.25)
@@ -184,23 +116,7 @@ class DistanceTest(BeetsTestCase):
         assert 0.4 - dist == 0.0
         assert float(dist) == 0.4
 
-    def test_raw_distance(self):
-        config["match"]["distance_weights"]["album"] = 3.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.5)
-        dist.add("medium", 0.25)
-        dist.add("medium", 0.5)
-        assert dist.raw_distance == 2.25
-
-    def test_items(self):
-        config["match"]["distance_weights"]["album"] = 4.0
-        config["match"]["distance_weights"]["medium"] = 2.0
-        _clear_weights()
-
-        dist = Distance()
+    def test_penalties_sort(self, dist):
         dist.add("album", 0.1875)
         dist.add("medium", 0.75)
         assert dist.items() == [("medium", 0.25), ("album", 0.125)]
@@ -211,8 +127,8 @@ class DistanceTest(BeetsTestCase):
         dist.add("medium", 0.75)
         assert dist.items() == [("album", 0.25), ("medium", 0.25)]
 
-    def test_update(self):
-        dist1 = Distance()
+    def test_update(self, dist):
+        dist1 = dist
         dist1.add("album", 0.5)
         dist1.add("media", 1.0)
 
@@ -229,248 +145,155 @@ class DistanceTest(BeetsTestCase):
         }
 
 
-class TrackDistanceTest(BeetsTestCase):
-    def test_identical_tracks(self):
-        item = _make_item("one", 1)
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist == 0.0
+class TestTrackDistance:
+    @pytest.fixture(scope="class")
+    def info(self):
+        return TrackInfo(title="title", artist="artist")
 
-    def test_different_title(self):
-        item = _make_item("foo", 1)
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist != 0.0
+    @pytest.mark.parametrize(
+        "title, artist, expected_penalty",
+        [
+            _p("title", "artist", False, id="identical"),
+            _p("title", "Various Artists", False, id="tolerate-va"),
+            _p("title", "different artist", True, id="different-artist"),
+            _p("different title", "artist", True, id="different-title"),
+        ],
+    )
+    def test_track_distance(self, info, title, artist, expected_penalty):
+        item = Item(artist=artist, title=title)
 
-    def test_different_artist(self):
-        item = _make_item("one", 1)
-        item.artist = "foo"
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist != 0.0
-
-    def test_various_artists_tolerated(self):
-        item = _make_item("one", 1)
-        item.artist = "Various Artists"
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist == 0.0
+        assert (
+            bool(track_distance(item, info, incl_artist=True))
+            == expected_penalty
+        )
 
 
-class AlbumDistanceTest(BeetsTestCase):
-    def _mapping(self, items, info):
-        out = {}
-        for i, t in zip(items, info.tracks):
-            out[i] = t
-        return out
+class TestAlbumDistance:
+    @pytest.fixture(scope="class")
+    def items(self):
+        return [
+            Item(
+                title=title,
+                track=track,
+                artist="artist",
+                album="album",
+                length=1,
+            )
+            for title, track in [("one", 1), ("two", 2), ("three", 3)]
+        ]
 
-    def _dist(self, items, info):
-        return match.distance(items, info, self._mapping(items, info))
+    @pytest.fixture
+    def get_dist(self, items):
+        def inner(info: AlbumInfo):
+            return distance(items, info, dict(zip(items, info.tracks)))
 
-    def test_identical_albums(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
+        return inner
+
+    @pytest.fixture
+    def info(self, items):
+        return AlbumInfo(
+            artist="artist",
+            album="album",
+            tracks=[
+                TrackInfo(
+                    title=i.title,
+                    artist=i.artist,
+                    index=i.track,
+                    length=i.length,
+                )
+                for i in items
+            ],
             va=False,
         )
-        assert self._dist(items, info) == 0
 
-    def test_incomplete_album(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
+    def test_identical_albums(self, get_dist, info):
+        assert get_dist(info) == 0
+
+    def test_incomplete_album(self, get_dist, info):
+        info.tracks.pop(2)
+
+        assert 0 < float(get_dist(info)) < 0.2
+
+    def test_overly_complete_album(self, get_dist, info):
+        info.tracks.append(
+            Item(index=4, title="four", artist="artist", length=1)
         )
-        dist = self._dist(items, info)
-        assert dist != 0
-        # Make sure the distance is not too great
-        assert dist < 0.2
 
-    def test_global_artists_differ(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="someone else",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        assert self._dist(items, info) != 0
+        assert 0 < float(get_dist(info)) < 0.2
 
-    def test_comp_track_artists_match(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="should be ignored",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        assert self._dist(items, info) == 0
+    @pytest.mark.parametrize("va", [True, False])
+    def test_albumartist(self, get_dist, info, va):
+        info.artist = "another artist"
+        info.va = va
 
-    def test_comp_no_track_artists(self):
+        assert bool(get_dist(info)) is not va
+
+    def test_comp_no_track_artists(self, get_dist, info):
         # Some VA releases don't have track artists (incomplete metadata).
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="should be ignored",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        info.tracks[0].artist = None
-        info.tracks[1].artist = None
-        info.tracks[2].artist = None
-        assert self._dist(items, info) == 0
+        info.artist = "another artist"
+        info.va = True
+        for track in info.tracks:
+            track.artist = None
 
-    def test_comp_track_artists_do_not_match(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2, "someone else"))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        assert self._dist(items, info) != 0
+        assert get_dist(info) == 0
 
-    def test_tracks_out_of_order(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("three", 2))
-        items.append(_make_item("two", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        dist = self._dist(items, info)
-        assert 0 < dist < 0.2
+    def test_comp_track_artists_do_not_match(self, get_dist, info):
+        info.va = True
+        info.tracks[0].artist = "another artist"
 
-    def test_two_medium_release(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
+        assert get_dist(info) != 0
+
+    def test_tracks_out_of_order(self, get_dist, info):
+        tracks = info.tracks
+        tracks[1].title, tracks[2].title = tracks[2].title, tracks[1].title
+
+        assert 0 < float(get_dist(info)) < 0.2
+
+    def test_two_medium_release(self, get_dist, info):
         info.tracks[0].medium_index = 1
         info.tracks[1].medium_index = 2
         info.tracks[2].medium_index = 1
-        dist = self._dist(items, info)
-        assert dist == 0
 
-    def test_per_medium_track_numbers(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 1))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        info.tracks[0].medium_index = 1
-        info.tracks[1].medium_index = 2
-        info.tracks[2].medium_index = 1
-        dist = self._dist(items, info)
-        assert dist == 0
+        assert get_dist(info) == 0
 
 
-class StringDistanceTest(unittest.TestCase):
-    def test_equal_strings(self):
-        dist = string_dist("Some String", "Some String")
-        assert dist == 0.0
+class TestStringDistance:
+    @pytest.mark.parametrize(
+        "string1, string2",
+        [
+            ("Some String", "Some String"),
+            ("Some String", "Some.String!"),
+            ("Some String", "sOME sTring"),
+            ("My Song (EP)", "My Song"),
+            ("The Song Title", "Song Title, The"),
+            ("A Song Title", "Song Title, A"),
+            ("An Album Title", "Album Title, An"),
+            ("", ""),
+            ("Untitled", "[Untitled]"),
+            ("And", "&"),
+            ("\xe9\xe1\xf1", "ean"),
+        ],
+    )
+    def test_matching_distance(self, string1, string2):
+        assert string_dist(string1, string2) == 0.0
 
-    def test_different_strings(self):
-        dist = string_dist("Some String", "Totally Different")
-        assert dist != 0.0
+    def test_different_distance(self):
+        assert string_dist("Some String", "Totally Different") != 0.0
 
-    def test_punctuation_ignored(self):
-        dist = string_dist("Some String", "Some.String!")
-        assert dist == 0.0
-
-    def test_case_ignored(self):
-        dist = string_dist("Some String", "sOME sTring")
-        assert dist == 0.0
-
-    def test_leading_the_has_lower_weight(self):
-        dist1 = string_dist("XXX Band Name", "Band Name")
-        dist2 = string_dist("The Band Name", "Band Name")
-        assert dist2 < dist1
-
-    def test_parens_have_lower_weight(self):
-        dist1 = string_dist("One .Two.", "One")
-        dist2 = string_dist("One (Two)", "One")
-        assert dist2 < dist1
-
-    def test_brackets_have_lower_weight(self):
-        dist1 = string_dist("One .Two.", "One")
-        dist2 = string_dist("One [Two]", "One")
-        assert dist2 < dist1
-
-    def test_ep_label_has_zero_weight(self):
-        dist = string_dist("My Song (EP)", "My Song")
-        assert dist == 0.0
-
-    def test_featured_has_lower_weight(self):
-        dist1 = string_dist("My Song blah Someone", "My Song")
-        dist2 = string_dist("My Song feat Someone", "My Song")
-        assert dist2 < dist1
-
-    def test_postfix_the(self):
-        dist = string_dist("The Song Title", "Song Title, The")
-        assert dist == 0.0
-
-    def test_postfix_a(self):
-        dist = string_dist("A Song Title", "Song Title, A")
-        assert dist == 0.0
-
-    def test_postfix_an(self):
-        dist = string_dist("An Album Title", "Album Title, An")
-        assert dist == 0.0
-
-    def test_empty_strings(self):
-        dist = string_dist("", "")
-        assert dist == 0.0
+    @pytest.mark.parametrize(
+        "string1, string2, reference",
+        [
+            ("XXX Band Name", "The Band Name", "Band Name"),
+            ("One .Two.", "One (Two)", "One"),
+            ("One .Two.", "One [Two]", "One"),
+            ("My Song blah Someone", "My Song feat Someone", "My Song"),
+        ],
+    )
+    def test_relative_weights(self, string1, string2, reference):
+        assert string_dist(string2, reference) < string_dist(string1, reference)
 
     def test_solo_pattern(self):
         # Just make sure these don't crash.
         string_dist("The ", "")
         string_dist("(EP)", "(EP)")
         string_dist(", An", "")
-
-    def test_heuristic_does_not_harm_distance(self):
-        dist = string_dist("Untitled", "[Untitled]")
-        assert dist == 0.0
-
-    def test_ampersand_expansion(self):
-        dist = string_dist("And", "&")
-        assert dist == 0.0
-
-    def test_accented_characters(self):
-        dist = string_dist("\xe9\xe1\xf1", "ean")
-        assert dist == 0.0

From 0da6192a4ab799196ff49e403e8cd91fbaaef4bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 25 May 2025 14:01:29 +0100
Subject: [PATCH 35/49] Test sanitize_pairs

---
 test/util/test_config.py | 47 ++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/test/util/test_config.py b/test/util/test_config.py
index 0c49f85b1..7105844dd 100644
--- a/test/util/test_config.py
+++ b/test/util/test_config.py
@@ -1,15 +1,38 @@
-import unittest
+import pytest
 
-from beets.util.config import sanitize_choices
+from beets.util.config import sanitize_choices, sanitize_pairs
 
 
-class HelpersTest(unittest.TestCase):
-    def test_sanitize_choices(self):
-        assert sanitize_choices(["A", "Z"], ("A", "B")) == ["A"]
-        assert sanitize_choices(["A", "A"], ("A")) == ["A"]
-        assert sanitize_choices(["D", "*", "A"], ("A", "B", "C", "D")) == [
-            "D",
-            "B",
-            "C",
-            "A",
-        ]
+@pytest.mark.parametrize(
+    "input_choices, valid_choices, expected",
+    [
+        (["A", "Z"], ("A", "B"), ["A"]),
+        (["A", "A"], ("A"), ["A"]),
+        (["D", "*", "A"], ("A", "B", "C", "D"), ["D", "B", "C", "A"]),
+    ],
+)
+def test_sanitize_choices(input_choices, valid_choices, expected):
+    assert sanitize_choices(input_choices, valid_choices) == expected
+
+
+def test_sanitize_pairs():
+    assert sanitize_pairs(
+        [
+            ("foo", "baz bar"),
+            ("foo", "baz bar"),
+            ("key", "*"),
+            ("*", "*"),
+            ("discard", "bye"),
+        ],
+        [
+            ("foo", "bar"),
+            ("foo", "baz"),
+            ("foo", "foobar"),
+            ("key", "value"),
+        ],
+    ) == [
+        ("foo", "baz"),
+        ("foo", "bar"),
+        ("key", "value"),
+        ("foo", "foobar"),
+    ]

From cb246c28bc69986a702fa683f1f7c3c1d94425c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Mon, 26 May 2025 11:49:20 +0100
Subject: [PATCH 36/49] Remove dead chartlyrics

This integration test failed because `chartlyrics.com` website is no
longer available, so I'm removing it.
---
 test/plugins/lyrics_pages.py | 39 ------------------------------------
 1 file changed, 39 deletions(-)

diff --git a/test/plugins/lyrics_pages.py b/test/plugins/lyrics_pages.py
index ef2eeb1a2..e1806b167 100644
--- a/test/plugins/lyrics_pages.py
+++ b/test/plugins/lyrics_pages.py
@@ -108,45 +108,6 @@ lyrics_pages = [
         url_title="The Beatles - Lady Madonna Lyrics | AZLyrics.com",
         marks=[xfail_on_ci("AZLyrics is blocked by Cloudflare")],
     ),
-    LyricsPage.make(
-        "http://www.chartlyrics.com/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx",
-        """
-        Lady Madonna,
-        Children at your feet
-        Wonder how you manage to make ends meet.
-
-        Who finds the money
-        When you pay the rent?
-        Did you think that money was heaven-sent?
-
-        Friday night arrives without a suitcase.
-        Sunday morning creeping like a nun.
-        Monday's child has learned to tie his bootlace.
-
-        See how they run.
-
-        Lady Madonna,
-        Baby at your breast
-        Wonders how you manage to feed the rest.
-
-        See how they run.
-
-        Lady Madonna,
-        Lying on the bed.
-        Listen to the music playing in your head.
-
-        Tuesday afternoon is never ending.
-        Wednesday morning papers didn't come.
-        Thursday night your stockings needed mending.
-
-        See how they run.
-
-        Lady Madonna,
-        Children at your feet
-        Wonder how you manage to make ends meet.
-        """,
-        url_title="The Beatles Lady Madonna lyrics",
-    ),
     LyricsPage.make(
         "https://www.dainuzodziai.lt/m/mergaites-nori-mylet-atlanta/",
         """

From 99f7e94b594314c39709125ba5e6f898c50592f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Mon, 26 May 2025 13:37:23 +0100
Subject: [PATCH 37/49] Add Distance and current_metadata to autotag.__init__
 for backward compat

---
 beets/autotag/__init__.py |  8 ++++++--
 beets/plugins.py          | 12 +++++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py
index 5b16b012e..8cfe534ab 100644
--- a/beets/autotag/__init__.py
+++ b/beets/autotag/__init__.py
@@ -19,10 +19,12 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Union
 
 from beets import config, logging
+from beets.util import get_most_common_tags as current_metadata
 
 # Parts of external interface.
 from beets.util import unique_list
 
+from .distance import Distance
 from .hooks import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch
 from .match import Proposal, Recommendation, tag_album, tag_item
 
@@ -34,13 +36,15 @@ if TYPE_CHECKING:
 __all__ = [
     "AlbumInfo",
     "AlbumMatch",
-    "TrackInfo",
-    "TrackMatch",
+    "Distance",  # for backwards compatibility
     "Proposal",
     "Recommendation",
+    "TrackInfo",
+    "TrackMatch",
     "apply_album_metadata",
     "apply_item_metadata",
     "apply_metadata",
+    "current_metadata",  # for backwards compatibility
     "tag_album",
     "tag_item",
 ]
diff --git a/beets/plugins.py b/beets/plugins.py
index cd66435b5..1ae672e20 100644
--- a/beets/plugins.py
+++ b/beets/plugins.py
@@ -37,7 +37,6 @@ import mediafile
 
 import beets
 from beets import logging
-from beets.autotag.distance import Distance
 from beets.util.id_extractors import extract_release_id
 
 if TYPE_CHECKING:
@@ -55,6 +54,7 @@ if TYPE_CHECKING:
     from confuse import ConfigView
 
     from beets.autotag import AlbumInfo, TrackInfo
+    from beets.autotag.distance import Distance
     from beets.dbcore import Query
     from beets.dbcore.db import FieldQueryType
     from beets.dbcore.types import Type
@@ -225,6 +225,8 @@ class BeetsPlugin:
         """Should return a Distance object to be added to the
         distance for every track comparison.
         """
+        from beets.autotag.distance import Distance
+
         return Distance()
 
     def album_distance(
@@ -236,6 +238,8 @@ class BeetsPlugin:
         """Should return a Distance object to be added to the
         distance for every album-level comparison.
         """
+        from beets.autotag.distance import Distance
+
         return Distance()
 
     def candidates(
@@ -455,6 +459,8 @@ def track_distance(item: Item, info: TrackInfo) -> Distance:
     """Gets the track distance calculated by all loaded plugins.
     Returns a Distance object.
     """
+    from beets.autotag.distance import Distance
+
     dist = Distance()
     for plugin in find_plugins():
         dist.update(plugin.track_distance(item, info))
@@ -467,6 +473,8 @@ def album_distance(
     mapping: dict[Item, TrackInfo],
 ) -> Distance:
     """Returns the album distance calculated by plugins."""
+    from beets.autotag.distance import Distance
+
     dist = Distance()
     for plugin in find_plugins():
         dist.update(plugin.album_distance(items, album_info, mapping))
@@ -653,6 +661,8 @@ def get_distance(
     """Returns the ``data_source`` weight and the maximum source weight
     for albums or individual tracks.
     """
+    from beets.autotag.distance import Distance
+
     dist = Distance()
     if info.data_source == data_source:
         dist.add("source", config["source_weight"].as_number())

From 2f98f11d576601a87431ce1941893249f2ffe4d2 Mon Sep 17 00:00:00 2001
From: Ben Stolovitz <ben@stolovitz.com>
Date: Sun, 25 May 2025 14:38:44 -0400
Subject: [PATCH 38/49] fix local langdetect test failures

avoid linter error

avoid other linter error

fix format

changing deps (no lock!)

poetry lock?

lint & format

attempt 2 at poetry lock

crlf -> lf line endings

changelog!
---
 docs/changelog.rst          | 2 ++
 poetry.lock                 | 7 +++++--
 pyproject.toml              | 1 +
 test/plugins/test_lyrics.py | 7 -------
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index b78cae78f..03a2a72ea 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -35,6 +35,8 @@ Bug fixes:
   event to be sent twice, not once.
   :bug:`5560`
 * Fix ``HiddenFileTest`` by using ``bytestring_path()``.
+* tests: Fix tests failing without ``langdetect`` (by making it required).
+  :bug:`5797`
 
 For packagers:
 
diff --git a/poetry.lock b/poetry.lock
index bdd0ee0ca..752953e1d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1030,7 +1030,7 @@ files = [
 name = "langdetect"
 version = "1.0.9"
 description = "Language detection library ported from Google's language-detection."
-optional = true
+optional = false
 python-versions = "*"
 files = [
     {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"},
@@ -1271,8 +1271,11 @@ files = [
     {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
     {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
+    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
+    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
+    {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
     {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
     {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
     {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
@@ -3276,4 +3279,4 @@ web = ["flask", "flask-cors"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4"
-content-hash = "d609e83f7ffeefc12e28d627e5646aa5c1a6f5a56d7013bb649a468069550dba"
+content-hash = "b3f2746a43227fe639d17eb22d7924e30c9d83eef53dce2c10388c602f0c6665"
diff --git a/pyproject.toml b/pyproject.toml
index 8b817a078..ea69240d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,6 +84,7 @@ sphinx = { version = "*", optional = true }
 beautifulsoup4 = "*"
 codecov = ">=2.1.13"
 flask = "*"
+langdetect = "*"
 mock = "*"
 pylast = "*"
 pytest = "*"
diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py
index 74e727099..945a7158c 100644
--- a/test/plugins/test_lyrics.py
+++ b/test/plugins/test_lyrics.py
@@ -14,8 +14,6 @@
 
 """Tests for the 'lyrics' plugin."""
 
-import importlib.util
-import os
 import re
 import textwrap
 from functools import partial
@@ -30,11 +28,6 @@ from beetsplug import lyrics
 
 from .lyrics_pages import LyricsPage, lyrics_pages
 
-github_ci = os.environ.get("GITHUB_ACTIONS") == "true"
-if not github_ci and not importlib.util.find_spec("langdetect"):
-    pytest.skip("langdetect isn't available", allow_module_level=True)
-
-
 PHRASE_BY_TITLE = {
     "Lady Madonna": "friday night arrives without a suitcase",
     "Jazz'n'blues": "as i check my balance i kiss the screen",

From 66864fcc27756c43566ffa998c137e241744f422 Mon Sep 17 00:00:00 2001
From: Sebastian Mohr <39738318+semohr@users.noreply.github.com>
Date: Wed, 11 Jun 2025 15:19:46 +0200
Subject: [PATCH 39/49] Minor improvements to spotify plugin typing. (#5815)

## Description

Added some more typehints to the spotify plugin. Also added a method to
get the tokenfile and changed to logic for the handle_response to use
`requests.request`.

This is done mainly to prepare for
https://github.com/beetbox/beets/pull/5787, see also
https://github.com/beetbox/beets/pull/5814
---
 beetsplug/spotify.py | 161 +++++++++++++++++++++----------------------
 1 file changed, 79 insertions(+), 82 deletions(-)

diff --git a/beetsplug/spotify.py b/beetsplug/spotify.py
index c0d212971..9d285928a 100644
--- a/beetsplug/spotify.py
+++ b/beetsplug/spotify.py
@@ -25,6 +25,7 @@ import json
 import re
 import time
 import webbrowser
+from typing import TYPE_CHECKING, Any, Literal, Sequence
 
 import confuse
 import requests
@@ -33,8 +34,11 @@ import unidecode
 from beets import ui
 from beets.autotag.hooks import AlbumInfo, TrackInfo
 from beets.dbcore import types
-from beets.library import DateType
-from beets.plugins import BeetsPlugin, MetadataSourcePlugin
+from beets.library import DateType, Library
+from beets.plugins import BeetsPlugin, MetadataSourcePlugin, Response
+
+if TYPE_CHECKING:
+    from beetsplug._typing import JSONDict
 
 DEFAULT_WAITING_TIME = 5
 
@@ -107,33 +111,33 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         self.config["client_id"].redact = True
         self.config["client_secret"].redact = True
 
-        self.tokenfile = self.config["tokenfile"].get(
-            confuse.Filename(in_app_dir=True)
-        )  # Path to the JSON file for storing the OAuth access token.
         self.setup()
 
     def setup(self):
         """Retrieve previously saved OAuth token or generate a new one."""
+
         try:
-            with open(self.tokenfile) as f:
+            with open(self._tokenfile()) as f:
                 token_data = json.load(f)
         except OSError:
             self._authenticate()
         else:
             self.access_token = token_data["access_token"]
 
+    def _tokenfile(self) -> str:
+        """Get the path to the JSON file for storing the OAuth token."""
+        return self.config["tokenfile"].get(confuse.Filename(in_app_dir=True))
+
     def _authenticate(self):
         """Request an access token via the Client Credentials Flow:
         https://developer.spotify.com/documentation/general/guides/authorization-guide/#client-credentials-flow
         """
+        c_id: str = self.config["client_id"].as_str()
+        c_secret: str = self.config["client_secret"].as_str()
+
         headers = {
             "Authorization": "Basic {}".format(
-                base64.b64encode(
-                    ":".join(
-                        self.config[k].as_str()
-                        for k in ("client_id", "client_secret")
-                    ).encode()
-                ).decode()
+                base64.b64encode(f"{c_id}:{c_secret}".encode()).decode()
             )
         }
         response = requests.post(
@@ -154,27 +158,32 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         self._log.debug(
             "{} access token: {}", self.data_source, self.access_token
         )
-        with open(self.tokenfile, "w") as f:
+        with open(self._tokenfile(), "w") as f:
             json.dump({"access_token": self.access_token}, f)
 
     def _handle_response(
-        self, request_type, url, params=None, retry_count=0, max_retries=3
-    ):
+        self,
+        method: Literal["get", "post", "put", "delete"],
+        url: str,
+        params: Any = None,
+        retry_count: int = 0,
+        max_retries: int = 3,
+    ) -> JSONDict:
         """Send a request, reauthenticating if necessary.
 
-        :param request_type: Type of :class:`Request` constructor,
-            e.g. ``requests.get``, ``requests.post``, etc.
-        :type request_type: function
+        :param method: HTTP method to use for the request.
         :param url: URL for the new :class:`Request` object.
-        :type url: str
         :param params: (optional) list of tuples or bytes to send
             in the query string for the :class:`Request`.
         :type params: dict
-        :return: JSON data for the class:`Response <Response>` object.
-        :rtype: dict
         """
+
+        if retry_count > max_retries:
+            raise SpotifyAPIError("Maximum retries reached.")
+
         try:
-            response = request_type(
+            response = requests.request(
+                method,
                 url,
                 headers={"Authorization": f"Bearer {self.access_token}"},
                 params=params,
@@ -189,22 +198,28 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
             self._log.error(f"Network error: {e}")
             raise SpotifyAPIError("Network error.")
         except requests.exceptions.RequestException as e:
+            if e.response is None:
+                self._log.error(f"Request failed: {e}")
+                raise SpotifyAPIError("Request failed.")
             if e.response.status_code == 401:
                 self._log.debug(
                     f"{self.data_source} access token has expired. "
                     f"Reauthenticating."
                 )
                 self._authenticate()
-                return self._handle_response(request_type, url, params=params)
+                return self._handle_response(
+                    method,
+                    url,
+                    params=params,
+                    retry_count=retry_count + 1,
+                )
             elif e.response.status_code == 404:
                 raise SpotifyAPIError(
                     f"API Error: {e.response.status_code}\n"
                     f"URL: {url}\nparams: {params}"
                 )
             elif e.response.status_code == 429:
-                if retry_count >= max_retries:
-                    raise SpotifyAPIError("Maximum retries reached.")
-                seconds = response.headers.get(
+                seconds = e.response.headers.get(
                     "Retry-After", DEFAULT_WAITING_TIME
                 )
                 self._log.debug(
@@ -212,7 +227,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
                 )
                 time.sleep(int(seconds) + 1)
                 return self._handle_response(
-                    request_type,
+                    method,
                     url,
                     params=params,
                     retry_count=retry_count + 1,
@@ -244,9 +259,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         if not (spotify_id := self._get_id(album_id)):
             return None
 
-        album_data = self._handle_response(
-            requests.get, self.album_url + spotify_id
-        )
+        album_data = self._handle_response("get", self.album_url + spotify_id)
         if album_data["name"] == "":
             self._log.debug("Album removed from Spotify: {}", album_id)
             return None
@@ -277,9 +290,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         tracks_data = album_data["tracks"]
         tracks_items = tracks_data["items"]
         while tracks_data["next"]:
-            tracks_data = self._handle_response(
-                requests.get, tracks_data["next"]
-            )
+            tracks_data = self._handle_response("get", tracks_data["next"])
             tracks_items.extend(tracks_data["items"])
 
         tracks = []
@@ -312,14 +323,12 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
             data_url=album_data["external_urls"]["spotify"],
         )
 
-    def _get_track(self, track_data):
+    def _get_track(self, track_data: JSONDict) -> TrackInfo:
         """Convert a Spotify track object dict to a TrackInfo object.
 
         :param track_data: Simplified track object
             (https://developer.spotify.com/documentation/web-api/reference/object-model/#track-object-simplified)
-        :type track_data: dict
         :return: TrackInfo object for track
-        :rtype: beets.autotag.hooks.TrackInfo
         """
         artist, artist_id = self.get_artist(track_data["artists"])
 
@@ -344,26 +353,23 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
             data_url=track_data["external_urls"]["spotify"],
         )
 
-    def track_for_id(self, track_id=None, track_data=None):
-        """Fetch a track by its Spotify ID or URL and return a
-        TrackInfo object or None if the track is not found.
+    def track_for_id(self, track_id: str) -> None | TrackInfo:
+        """Fetch a track by its Spotify ID or URL.
 
-        :param track_id: (Optional) Spotify ID or URL for the track. Either
-            ``track_id`` or ``track_data`` must be provided.
-        :type track_id: str
-        :param track_data: (Optional) Simplified track object dict. May be
-            provided instead of ``track_id`` to avoid unnecessary API calls.
-        :type track_data: dict
-        :return: TrackInfo object for track
-        :rtype: beets.autotag.hooks.TrackInfo or None
+        Returns a TrackInfo object or None if the track is not found.
         """
-        if not track_data:
-            if not (spotify_id := self._get_id(track_id)) or not (
-                track_data := self._handle_response(
-                    requests.get, f"{self.track_url}{spotify_id}"
-                )
-            ):
-                return None
+
+        if not (spotify_id := self._get_id(track_id)):
+            self._log.debug("Invalid Spotify ID: {}", track_id)
+            return None
+
+        if not (
+            track_data := self._handle_response(
+                "get", f"{self.track_url}{spotify_id}"
+            )
+        ):
+            self._log.debug("Track not found: {}", track_id)
+            return None
 
         track = self._get_track(track_data)
 
@@ -371,7 +377,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         # release) and `track.medium_total` (total number of tracks on
         # the track's disc).
         album_data = self._handle_response(
-            requests.get, self.album_url + track_data["album"]["id"]
+            "get", self.album_url + track_data["album"]["id"]
         )
         medium_total = 0
         for i, track_data in enumerate(album_data["tracks"]["items"], start=1):
@@ -383,17 +389,16 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         return track
 
     @staticmethod
-    def _construct_search_query(filters=None, keywords=""):
+    def _construct_search_query(
+        filters: dict[str, str], keywords: str = ""
+    ) -> str:
         """Construct a query string with the specified filters and keywords to
         be provided to the Spotify Search API
-        (https://developer.spotify.com/documentation/web-api/reference/search/search/#writing-a-query---guidelines).
+        (https://developer.spotify.com/documentation/web-api/reference/search).
 
         :param filters: (Optional) Field filters to apply.
-        :type filters: dict
         :param keywords: (Optional) Query keywords to use.
-        :type keywords: str
         :return: Query string to be provided to the Search API.
-        :rtype: str
         """
         query_components = [
             keywords,
@@ -404,34 +409,31 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
             query = query.decode("utf8")
         return unidecode.unidecode(query)
 
-    def _search_api(self, query_type, filters=None, keywords=""):
+    def _search_api(
+        self,
+        query_type: Literal["album", "track"],
+        filters: dict[str, str],
+        keywords: str = "",
+    ) -> Sequence[Response]:
         """Query the Spotify Search API for the specified ``keywords``,
         applying the provided ``filters``.
 
         :param query_type: Item type to search across. Valid types are:
             'album', 'artist', 'playlist', and 'track'.
-        :type query_type: str
         :param filters: (Optional) Field filters to apply.
-        :type filters: dict
         :param keywords: (Optional) Query keywords to use.
-        :type keywords: str
-        :return: JSON data for the class:`Response <Response>` object or None
-            if no search results are returned.
-        :rtype: dict or None
         """
         query = self._construct_search_query(keywords=keywords, filters=filters)
-        if not query:
-            return None
         self._log.debug(f"Searching {self.data_source} for '{query}'")
         try:
             response = self._handle_response(
-                requests.get,
+                "get",
                 self.search_url,
                 params={"q": query, "type": query_type},
             )
         except SpotifyAPIError as e:
             self._log.debug("Spotify API error: {}", e)
-            return []
+            return ()
         response_data = response.get(query_type + "s", {}).get("items", [])
         self._log.debug(
             "Found {} result(s) from {} for '{}'",
@@ -441,7 +443,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         )
         return response_data
 
-    def commands(self):
+    def commands(self) -> list[ui.Subcommand]:
         # autotagger import command
         def queries(lib, opts, args):
             success = self._parse_opts(opts)
@@ -506,17 +508,14 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         self.opts = opts
         return True
 
-    def _match_library_tracks(self, library, keywords):
+    def _match_library_tracks(self, library: Library, keywords: str):
         """Get a list of simplified track object dicts for library tracks
         matching the specified ``keywords``.
 
         :param library: beets library object to query.
-        :type library: beets.library.Library
         :param keywords: Query to match library items against.
-        :type keywords: str
         :return: List of simplified track object dicts for library items
             matching the specified query.
-        :rtype: list[dict]
         """
         results = []
         failures = []
@@ -683,11 +682,9 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
             if write:
                 item.try_write()
 
-    def track_info(self, track_id=None):
+    def track_info(self, track_id: str):
         """Fetch a track's popularity and external IDs using its Spotify ID."""
-        track_data = self._handle_response(
-            requests.get, self.track_url + track_id
-        )
+        track_data = self._handle_response("get", self.track_url + track_id)
         self._log.debug(
             "track_popularity: {} and track_isrc: {}",
             track_data.get("popularity"),
@@ -700,11 +697,11 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
             track_data.get("external_ids").get("upc"),
         )
 
-    def track_audio_features(self, track_id=None):
+    def track_audio_features(self, track_id: str):
         """Fetch track audio features by its Spotify ID."""
         try:
             return self._handle_response(
-                requests.get, self.audio_features_url + track_id
+                "get", self.audio_features_url + track_id
             )
         except SpotifyAPIError as e:
             self._log.debug("Spotify API error: {}", e)

From 4893cee5e5149c4de05bcd200d167ecda10f7503 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= <git@nicomem.com>
Date: Fri, 13 Jun 2025 20:12:03 +0000
Subject: [PATCH 40/49] Fix the MusicBrainz search not taking into account the
 album/recording aliases

---
 beetsplug/musicbrainz.py         | 3 ++-
 docs/changelog.rst               | 2 ++
 test/plugins/test_musicbrainz.py | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py
index ceb931179..e33cc4fce 100644
--- a/beetsplug/musicbrainz.py
+++ b/beetsplug/musicbrainz.py
@@ -771,6 +771,7 @@ class MusicBrainzPlugin(BeetsPlugin):
     ) -> dict[str, str]:
         criteria = {
             "release": album,
+            "alias": album,
             "tracks": str(len(items)),
         } | ({"arid": VARIOUS_ARTISTS_ID} if va_likely else {"artist": artist})
 
@@ -826,7 +827,7 @@ class MusicBrainzPlugin(BeetsPlugin):
     def item_candidates(
         self, item: Item, artist: str, title: str
     ) -> Iterator[beets.autotag.hooks.TrackInfo]:
-        criteria = {"artist": artist, "recording": title}
+        criteria = {"artist": artist, "recording": title, "alias": title}
 
         yield from filter(
             None, map(self.track_info, self._search_api("recording", criteria))
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 03a2a72ea..88b82e4da 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -37,6 +37,8 @@ Bug fixes:
 * Fix ``HiddenFileTest`` by using ``bytestring_path()``.
 * tests: Fix tests failing without ``langdetect`` (by making it required).
   :bug:`5797`
+* :doc:`plugins/musicbrainz`: Fix the MusicBrainz search not taking into
+  account the album/recording aliases
 
 For packagers:
 
diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py
index 100cfc498..aea05bc20 100644
--- a/test/plugins/test_musicbrainz.py
+++ b/test/plugins/test_musicbrainz.py
@@ -1025,6 +1025,7 @@ class TestMusicBrainzPlugin(PluginMixin):
 
         assert mb.get_album_criteria(items, "Artist ", " Album", va_likely) == {
             "release": " Album",
+            "alias": " Album",
             "tracks": str(len(items)),
             **expected_additional_criteria,
         }

From d5bd24bb648dea3880f32d40b4a7c73422a8b6ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 22 Jun 2025 17:47:30 +0100
Subject: [PATCH 41/49] Update pipx-install-action to fix caching errors

---
 .github/workflows/ci.yaml               | 2 +-
 .github/workflows/integration_test.yaml | 2 +-
 .github/workflows/lint.yml              | 8 ++++----
 .github/workflows/make_release.yaml     | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 333706dc7..ac3263bcd 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,7 +21,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - name: Setup Python with poetry caching
         # poetry cache requires poetry to already be installed, weirdly
         uses: actions/setup-python@v5
diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml
index eae04d1d4..f88864c48 100644
--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@@ -9,7 +9,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - uses: actions/setup-python@v5
         with:
           python-version: 3.9
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 16757da27..c9b66f402 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -53,7 +53,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
@@ -74,7 +74,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
@@ -94,7 +94,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
@@ -118,7 +118,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
diff --git a/.github/workflows/make_release.yaml b/.github/workflows/make_release.yaml
index 7ea2d631c..b18dded8d 100644
--- a/.github/workflows/make_release.yaml
+++ b/.github/workflows/make_release.yaml
@@ -19,7 +19,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
@@ -50,7 +50,7 @@ jobs:
           ref: ${{ env.NEW_TAG }}
 
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}

From ab9b2e0b69210d733b1bf80d63380a279233c7d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 22 Jun 2025 18:07:22 +0100
Subject: [PATCH 42/49] Try using threeal/pipx-install-action@v1.0.0 for CI
 tests

I have reported the issue with BrandonLWhite/pipx-install-action@v1.0.3
failing on Windows here:
https://github.com/BrandonLWhite/pipx-install-action/issues/62
---
 .github/workflows/ci.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index ac3263bcd..9d0e67d5f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,7 +21,10 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.3
+        # BrandonLWhite/pipx-install-action@v1.0.3 fails on Windows, thus we're using an alternative action here
+        uses: threeal/pipx-install-action@v1.0.0
+        with:
+          packages: poethepoet>=0.26 poetry<2
       - name: Setup Python with poetry caching
         # poetry cache requires poetry to already be installed, weirdly
         uses: actions/setup-python@v5

From 9926a1ac3cb8d4877061df2fae94ddb8b2aab7b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= <snejus@protonmail.com>
Date: Sun, 29 Jun 2025 13:43:48 +0100
Subject: [PATCH 43/49] Revert "Try using threeal/pipx-install-action@v1.0.0
 for CI tests"

This reverts commit ab9b2e0b69210d733b1bf80d63380a279233c7d9.
---
 .github/workflows/ci.yaml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 9d0e67d5f..ac3263bcd 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,10 +21,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install Python tools
-        # BrandonLWhite/pipx-install-action@v1.0.3 fails on Windows, thus we're using an alternative action here
-        uses: threeal/pipx-install-action@v1.0.0
-        with:
-          packages: poethepoet>=0.26 poetry<2
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
       - name: Setup Python with poetry caching
         # poetry cache requires poetry to already be installed, weirdly
         uses: actions/setup-python@v5

From dd6cb538ac343c837348a0e4b404bc994ed97db1 Mon Sep 17 00:00:00 2001
From: dhruvravii <122979040+dhruvravii@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:38:54 +0530
Subject: [PATCH 44/49] Fix: Spotify plugin unable to recognize Chinese and
 Japanese albums. (#5705)

Fixes an issue where each spotify query was converted to ascii before sending. Adds a
new config option to enable legacy behaviour.

A file called japanese_track_request.json was made to mimic the Spotify
API response since I don't have the credentials. Entries in that will
need to be modified with the actual entries.

Co-authored-by: Sebastian Mohr <sebastian@mohrenclan.de>
Co-authored-by: Sebastian Mohr <39738318+semohr@users.noreply.github.com>
Co-authored-by: J0J0 Todos <2733783+JOJ0@users.noreply.github.com>
---
 beetsplug/spotify.py                          | 12 ++-
 docs/changelog.rst                            |  7 +-
 docs/plugins/spotify.rst                      |  8 ++
 test/plugins/test_spotify.py                  | 79 +++++++++++++++-
 test/rsrc/spotify/japanese_track_request.json | 89 +++++++++++++++++++
 5 files changed, 188 insertions(+), 7 deletions(-)
 create mode 100644 test/rsrc/spotify/japanese_track_request.json

diff --git a/beetsplug/spotify.py b/beetsplug/spotify.py
index 9d285928a..76ceeed68 100644
--- a/beetsplug/spotify.py
+++ b/beetsplug/spotify.py
@@ -106,6 +106,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
                 "client_id": "4e414367a1d14c75a5c5129a627fcab8",
                 "client_secret": "f82bdc09b2254f1a8286815d02fd46dc",
                 "tokenfile": "spotify_token.json",
+                "search_query_ascii": False,
             }
         )
         self.config["client_id"].redact = True
@@ -388,9 +389,8 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         track.medium_total = medium_total
         return track
 
-    @staticmethod
     def _construct_search_query(
-        filters: dict[str, str], keywords: str = ""
+        self, filters: dict[str, str], keywords: str = ""
     ) -> str:
         """Construct a query string with the specified filters and keywords to
         be provided to the Spotify Search API
@@ -407,7 +407,11 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         query = " ".join([q for q in query_components if q])
         if not isinstance(query, str):
             query = query.decode("utf8")
-        return unidecode.unidecode(query)
+
+        if self.config["search_query_ascii"].get():
+            query = unidecode.unidecode(query)
+
+        return query
 
     def _search_api(
         self,
@@ -424,6 +428,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
         :param keywords: (Optional) Query keywords to use.
         """
         query = self._construct_search_query(keywords=keywords, filters=filters)
+
         self._log.debug(f"Searching {self.data_source} for '{query}'")
         try:
             response = self._handle_response(
@@ -560,6 +565,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
                 query = self._construct_search_query(
                     keywords=keywords, filters=query_filters
                 )
+
                 failures.append(query)
                 continue
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 88b82e4da..1baa54011 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -39,7 +39,12 @@ Bug fixes:
   :bug:`5797`
 * :doc:`plugins/musicbrainz`: Fix the MusicBrainz search not taking into
   account the album/recording aliases
-
+* :doc:`/plugins/spotify`: Fix the issue with that every query to spotify was
+  ascii encoded. This resulted in bad matches for queries that contained special
+  e.g. non latin characters as 盗作. If you want to keep the legacy behavior
+  set the config option ``spotify.search_query_ascii: yes``.
+  :bug:`5699`
+  
 For packagers:
 
 * Optional ``extra_tags`` parameter has been removed from
diff --git a/docs/plugins/spotify.rst b/docs/plugins/spotify.rst
index 233d00726..c5aff8ef3 100644
--- a/docs/plugins/spotify.rst
+++ b/docs/plugins/spotify.rst
@@ -83,6 +83,13 @@ in config.yaml under the ``spotify:`` section:
   track/album/artist fields before sending them to Spotify.  Can be useful for
   changing certain abbreviations, like ft. -> feat.  See the examples below.
   Default: None.
+- **search_query_ascii**: If set to ``yes``, the search query will be converted to
+  ASCII before being sent to Spotify. Converting searches to ASCII can
+  enhance search results in some cases, but in general, it is not recommended. 
+  For instance `artist:deadmau5 album:4×4` will be converted to 
+  `artist:deadmau5 album:4x4` (notice `×!=x`).
+  Default: ``no``.
+
 
 Here's an example::
 
@@ -92,6 +99,7 @@ Here's an example::
         region_filter: US
         show_failures: on
         tiebreak: first
+        search_query_ascii: no
 
         regex: [
             {
diff --git a/test/plugins/test_spotify.py b/test/plugins/test_spotify.py
index a2336df10..a2fb26f4b 100644
--- a/test/plugins/test_spotify.py
+++ b/test/plugins/test_spotify.py
@@ -7,7 +7,7 @@ import responses
 
 from beets.library import Item
 from beets.test import _common
-from beets.test.helper import BeetsTestCase
+from beets.test.helper import PluginTestCase
 from beetsplug import spotify
 
 
@@ -23,10 +23,11 @@ def _params(url):
     return parse_qs(urlparse(url).query)
 
 
-class SpotifyPluginTest(BeetsTestCase):
+class SpotifyPluginTest(PluginTestCase):
+    plugin = "spotify"
+
     @responses.activate
     def setUp(self):
-        super().setUp()
         responses.add(
             responses.POST,
             spotify.SpotifyPlugin.oauth_token_url,
@@ -39,6 +40,7 @@ class SpotifyPluginTest(BeetsTestCase):
                 "scope": "",
             },
         )
+        super().setUp()
         self.spotify = spotify.SpotifyPlugin()
         opts = ArgumentsMock("list", False)
         self.spotify._parse_opts(opts)
@@ -176,3 +178,74 @@ class SpotifyPluginTest(BeetsTestCase):
         results = self.spotify._match_library_tracks(self.lib, "Happy")
         assert 1 == len(results)
         assert "6NPVjNh8Jhru9xOmyQigds" == results[0]["id"]
+
+    @responses.activate
+    def test_japanese_track(self):
+        """Ensure non-ASCII characters remain unchanged in search queries"""
+
+        # Path to the mock JSON file for the Japanese track
+        json_file = os.path.join(
+            _common.RSRC, b"spotify", b"japanese_track_request.json"
+        )
+
+        # Load the mock JSON response
+        with open(json_file, "rb") as f:
+            response_body = f.read()
+
+        # Mock Spotify Search API response
+        responses.add(
+            responses.GET,
+            spotify.SpotifyPlugin.search_url,
+            body=response_body,
+            status=200,
+            content_type="application/json",
+        )
+
+        # Create a mock item with Japanese metadata
+        item = Item(
+            mb_trackid="56789",
+            album="盗作",
+            albumartist="ヨルシカ",
+            title="思想犯",
+            length=10,
+        )
+        item.add(self.lib)
+
+        # Search without ascii encoding
+
+        with self.configure_plugin(
+            {
+                "search_query_ascii": False,
+            }
+        ):
+            assert self.spotify.config["search_query_ascii"].get() is False
+            # Call the method to match library tracks
+            results = self.spotify._match_library_tracks(self.lib, item.title)
+
+            # Assertions to verify results
+            assert results is not None
+            assert 1 == len(results)
+            assert results[0]["name"] == item.title
+            assert results[0]["artists"][0]["name"] == item.albumartist
+            assert results[0]["album"]["name"] == item.album
+
+            # Verify search query parameters
+            params = _params(responses.calls[0].request.url)
+            query = params["q"][0]
+            assert item.title in query
+            assert f"artist:{item.albumartist}" in query
+            assert f"album:{item.album}" in query
+            assert not query.isascii()
+
+        # Is not found in the library if ascii encoding is enabled
+        with self.configure_plugin(
+            {
+                "search_query_ascii": True,
+            }
+        ):
+            assert self.spotify.config["search_query_ascii"].get() is True
+            results = self.spotify._match_library_tracks(self.lib, item.title)
+            params = _params(responses.calls[1].request.url)
+            query = params["q"][0]
+
+            assert query.isascii()
diff --git a/test/rsrc/spotify/japanese_track_request.json b/test/rsrc/spotify/japanese_track_request.json
new file mode 100644
index 000000000..04559588e
--- /dev/null
+++ b/test/rsrc/spotify/japanese_track_request.json
@@ -0,0 +1,89 @@
+{
+    "tracks":{
+        "href":"https://api.spotify.com/v1/search?query=Happy+album%3ADespicable+Me+2+artist%3APharrell+Williams&offset=0&limit=20&type=track",
+        "items":[
+            {
+                "album":{
+                    "album_type":"compilation",
+                    "available_markets":[
+                        "AD", "AR", "AT", "AU", "BE", "BG", "BO", "BR", "CA",
+                        "CH", "CL", "CO", "CR", "CY", "CZ", "DE", "DK", "DO",
+                        "EC", "EE", "ES", "FI", "FR", "GB", "GR", "GT", "HK",
+                        "HN", "HU", "IE", "IS", "IT", "LI", "LT", "LU", "LV",
+                        "MC", "MT", "MX", "MY", "NI", "NL", "NO", "NZ", "PA",
+                        "PE", "PH", "PL", "PT", "PY", "RO", "SE", "SG", "SI",
+                        "SK", "SV", "TR", "TW", "US", "UY"
+                    ],
+                    "external_urls":{
+                        "spotify":"https://open.spotify.com/album/5l3zEmMrOhOzG8d8s83GOL"
+                    },
+                    "href":"https://api.spotify.com/v1/albums/5l3zEmMrOhOzG8d8s83GOL",
+                    "id":"5l3zEmMrOhOzG8d8s83GOL",
+                    "images":[
+                        {
+                            "height":640,
+                            "width":640,
+                            "url":"https://i.scdn.co/image/cb7905340c132365bbaee3f17498f062858382e8"
+                        },
+                        {
+                            "height":300,
+                            "width":300,
+                            "url":"https://i.scdn.co/image/af369120f0b20099d6784ab31c88256113f10ffb"
+                        },
+                        {
+                            "height":64,
+                            "width":64,
+                            "url":"https://i.scdn.co/image/9dad385ddf2e7db0bef20cec1fcbdb08689d9ae8"
+                        }
+                    ],
+                    "name":"盗作",
+                    "type":"album",
+                    "uri":"spotify:album:5l3zEmMrOhOzG8d8s83GOL"
+                },
+                "artists":[
+                    {
+                        "external_urls":{
+                            "spotify":"https://open.spotify.com/artist/2RdwBSPQiwcmiDo9kixcl8"
+                        },
+                        "href":"https://api.spotify.com/v1/artists/2RdwBSPQiwcmiDo9kixcl8",
+                        "id":"2RdwBSPQiwcmiDo9kixcl8",
+                        "name":"ヨルシカ",
+                        "type":"artist",
+                        "uri":"spotify:artist:2RdwBSPQiwcmiDo9kixcl8"
+                    }
+                ],
+                "available_markets":[
+                    "AD", "AR", "AT", "AU", "BE", "BG", "BO", "BR", "CA",
+                    "CH", "CL", "CO", "CR", "CY", "CZ", "DE", "DK", "DO",
+                    "EC", "EE", "ES", "FI", "FR", "GB", "GR", "GT", "HK",
+                    "HN", "HU", "IE", "IS", "IT", "LI", "LT", "LU", "LV",
+                    "MC", "MT", "MX", "MY", "NI", "NL", "NO", "NZ", "PA",
+                    "PE", "PH", "PL", "PT", "PY", "RO", "SE", "SG", "SI",
+                    "SK", "SV", "TR", "TW", "US", "UY"
+                ],
+                "disc_number":1,
+                "duration_ms":233305,
+                "explicit":false,
+                "external_ids":{
+                    "isrc":"USQ4E1300686"
+                },
+                "external_urls":{
+                    "spotify":"https://open.spotify.com/track/6NPVjNh8Jhru9xOmyQigds"
+                },
+                "href":"https://api.spotify.com/v1/tracks/6NPVjNh8Jhru9xOmyQigds",
+                "id":"6NPVjNh8Jhru9xOmyQigds",
+                "name":"思想犯",
+                "popularity":89,
+                "preview_url":"https://p.scdn.co/mp3-preview/6b00000be293e6b25f61c33e206a0c522b5cbc87",
+                "track_number":4,
+                "type":"track",
+                "uri":"spotify:track:6NPVjNh8Jhru9xOmyQigds"
+            }
+        ],
+        "limit":20,
+        "next":null,
+        "offset":0,
+        "previous":null,
+        "total":1
+    }
+}

From ac96b9b64e97cdca0be6305e939e902e8742f147 Mon Sep 17 00:00:00 2001
From: Noor <kchr@fripost.org>
Date: Wed, 2 Jul 2025 20:40:37 +0200
Subject: [PATCH 45/49] Preserve line breaks for example cases in substitution
 plugin docs (#5846)

## Description

Adds line block markup to example substitutions in the plugin
documentation, so that each case is shown on a separate line:

> The replacement can be an expression utilising the matched regex,
allowing us to create more general rules. Say for example, we want to
sort all albums by multiple artists into the directory of the first
artist. We can thus capture everything before the first ,, `` &`` or ``
and``, and use this capture group in the output, discarding the rest of
the string.
>
> ```yaml
> substitute:
>  ^(.*?)(,| &| and).*: \1
> ```
>
> This would handle all the below cases in a single rule:
>
>> Bob Dylan and The Band -> Bob Dylan
>> Neil Young & Crazy Horse -> Neil Young
>> James Yorkston, Nina Persson & The Second Hand Orchestra -> James
Yorkston
---
 docs/changelog.rst          | 2 ++
 docs/plugins/substitute.rst | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 1baa54011..0e5799846 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -61,6 +61,8 @@ Other changes:
 
 * Documentation structure for auto generated API references changed slightly.
   Autogenerated API references are now located in the `docs/api` subdirectory.
+* :doc:`/plugins/substitute`: Fix rST formatting for example cases so that each
+  case is shown on separate lines.
 
 2.3.1 (May 14, 2025)
 --------------------
diff --git a/docs/plugins/substitute.rst b/docs/plugins/substitute.rst
index 87ee2ad45..c6fec8054 100644
--- a/docs/plugins/substitute.rst
+++ b/docs/plugins/substitute.rst
@@ -31,9 +31,9 @@ group in the output, discarding the rest of the string.
 
 This would handle all the below cases in a single rule:
 
-    Bob Dylan and The Band -> Bob Dylan
-    Neil Young & Crazy Horse -> Neil Young
-    James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston
+    | Bob Dylan and The Band -> Bob Dylan
+    | Neil Young & Crazy Horse -> Neil Young
+    | James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston
 
 
 To apply the substitution, you have to call the function ``%substitute{}`` in the paths section. For example:

From 537a71ff8286461440ee883ff2c077636bf7e120 Mon Sep 17 00:00:00 2001
From: J0J0 Todos <jojo@peek-a-boo.at>
Date: Mon, 6 Nov 2023 14:47:34 +0100
Subject: [PATCH 46/49] duplicates: Add --remove option to duplicates plugin

Removes from library but keeps files.
---
 beetsplug/duplicates.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/beetsplug/duplicates.py b/beetsplug/duplicates.py
index fadb29845..76441133f 100644
--- a/beetsplug/duplicates.py
+++ b/beetsplug/duplicates.py
@@ -53,6 +53,7 @@ class DuplicatesPlugin(BeetsPlugin):
                 "tiebreak": {},
                 "strict": False,
                 "tag": "",
+                "remove": False,
             }
         )
 
@@ -131,6 +132,13 @@ class DuplicatesPlugin(BeetsPlugin):
             action="store",
             help="tag matched items with 'k=v' attribute",
         )
+        self._command.parser.add_option(
+            "-r",
+            "--remove",
+            dest="remove",
+            action="store_true",
+            help="remove items from library",
+        )
         self._command.parser.add_all_common_options()
 
     def commands(self):
@@ -141,6 +149,7 @@ class DuplicatesPlugin(BeetsPlugin):
             copy = bytestring_path(self.config["copy"].as_str())
             count = self.config["count"].get(bool)
             delete = self.config["delete"].get(bool)
+            remove = self.config["remove"].get(bool)
             fmt = self.config["format"].get(str)
             full = self.config["full"].get(bool)
             keys = self.config["keys"].as_str_seq()
@@ -196,6 +205,7 @@ class DuplicatesPlugin(BeetsPlugin):
                             copy=copy,
                             move=move,
                             delete=delete,
+                            remove=remove,
                             tag=tag,
                             fmt=fmt.format(obj_count),
                         )
@@ -204,7 +214,14 @@ class DuplicatesPlugin(BeetsPlugin):
         return [self._command]
 
     def _process_item(
-        self, item, copy=False, move=False, delete=False, tag=False, fmt=""
+        self,
+        item,
+        copy=False,
+        move=False,
+        delete=False,
+        tag=False,
+        fmt="",
+        remove=False,
     ):
         """Process Item `item`."""
         print_(format(item, fmt))
@@ -216,6 +233,8 @@ class DuplicatesPlugin(BeetsPlugin):
             item.store()
         if delete:
             item.remove(delete=True)
+        if remove:
+            item.remove(delete=False)
         if tag:
             try:
                 k, v = tag.split("=")

From 549847bfd8c64ef68c65b36f652a5d01031e4294 Mon Sep 17 00:00:00 2001
From: J0J0 Todos <jojo@peek-a-boo.at>
Date: Sun, 22 Jun 2025 08:48:10 +0200
Subject: [PATCH 47/49] duplicates: Add docs for --remove option

---
 docs/plugins/duplicates.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/plugins/duplicates.rst b/docs/plugins/duplicates.rst
index 8b11b6661..8ce0e4578 100644
--- a/docs/plugins/duplicates.rst
+++ b/docs/plugins/duplicates.rst
@@ -34,6 +34,7 @@ duplicates themselves via command-line switches ::
   -o DEST, --copy=DEST  copy items to dest
   -p, --path            print paths for matched items or albums
   -t TAG, --tag=TAG     tag matched items with 'k=v' attribute
+  -r, --remove          remove items from library
 
 Configuration
 -------------
@@ -57,7 +58,7 @@ file. The available options mirror the command-line options:
   ``$albumartist - $album - $title: $count`` (for tracks) or ``$albumartist -
   $album: $count`` (for albums).
   Default: ``no``.
-- **delete**: Removes matched items from the library and from the disk.
+- **delete**: Remove matched items from the library and from the disk.
   Default: ``no``
 - **format**: A specific format with which to print every track
   or album. This uses the same template syntax as beets'
@@ -92,6 +93,8 @@ file. The available options mirror the command-line options:
   set. If you would like to consider the lower bitrates as duplicates,
   for example, set ``tiebreak: items: [bitrate]``.
   Default: ``{}``.
+- **remove**: Remove matched items from the library, but not from the disk.
+  Default: ``no``.
 
 Examples
 --------

From 47eee070ba426fc61bfd3e72d166917c60076b8a Mon Sep 17 00:00:00 2001
From: J0J0 Todos <2733783+JOJ0@users.noreply.github.com>
Date: Wed, 2 Jul 2025 07:10:46 +0200
Subject: [PATCH 48/49] duplicates: remove or delete options mutually exclusive

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 beetsplug/duplicates.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/beetsplug/duplicates.py b/beetsplug/duplicates.py
index 76441133f..5a2be0cd2 100644
--- a/beetsplug/duplicates.py
+++ b/beetsplug/duplicates.py
@@ -233,7 +233,7 @@ class DuplicatesPlugin(BeetsPlugin):
             item.store()
         if delete:
             item.remove(delete=True)
-        if remove:
+        elif remove:
             item.remove(delete=False)
         if tag:
             try:

From 7c22cd635c502e2f3ccb71803c3034bc2bb2ec11 Mon Sep 17 00:00:00 2001
From: J0J0 Todos <jojo@peek-a-boo.at>
Date: Sat, 5 Jul 2025 07:24:26 +0200
Subject: [PATCH 49/49] duplicates: Add changelog for --remove option

---
 docs/changelog.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 0e5799846..d1a477cb5 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -23,6 +23,9 @@ New features:
   singletons by their Discogs ID.
   :bug:`4661`
 * :doc:`plugins/replace`: Add new plugin.
+* :doc:`plugins/duplicates`: Add ``--remove`` option, allowing to remove from
+  the library without deleting media files.
+  :bug:`5832`
 
 Bug fixes: