Merge branch 'master' into feature/add-artist-to-item-entry-template

2025-12-25 01:53:31 +01:00 · 2025-07-06 12:14:48 +03:00 · 2025-07-06 12:14:48 +03:00 · 8ea7346575
commit 8ea7346575
parent ab7b7a2e29 2a896d48b9
72 changed files with 2702 additions and 2092 deletions
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -21,7 +21,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
      - name: Setup Python with poetry caching
        # poetry cache requires poetry to already be installed, weirdly
        uses: actions/setup-python@v5
@ -33,7 +33,7 @@ jobs:
        if: matrix.platform == 'ubuntu-latest'
        run: |
          sudo apt update
-          sudo apt install ffmpeg gobject-introspection libcairo2-dev libgirepository-2.0-dev pandoc
+          sudo apt install ffmpeg gobject-introspection libcairo2-dev libgirepository-2.0-dev pandoc imagemagick

      - name: Get changed lyrics files
        id: lyrics-update
@ -60,7 +60,7 @@ jobs:
        env:
          LYRICS_UPDATED: ${{ steps.lyrics-update.outputs.any_changed }}
        run: |
-          poetry install --extras=autobpm --extras=lyrics --extras=docs --extras=replaygain --extras=reflink
+          poetry install --extras=autobpm --extras=lyrics --extras=docs --extras=replaygain --extras=reflink --extras=fetchart
          poe docs
          poe test-with-coverage

--- a/.github/workflows/integration_test.yaml
+++ b/.github/workflows/integration_test.yaml
@ -9,7 +9,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
      - uses: actions/setup-python@v5
        with:
          python-version: 3.9
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@ -53,7 +53,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
@ -74,7 +74,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
@ -94,7 +94,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
@ -118,7 +118,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
--- a/.github/workflows/make_release.yaml
+++ b/.github/workflows/make_release.yaml
@ -19,7 +19,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
@ -50,7 +50,7 @@ jobs:
          ref: ${{ env.NEW_TAG }}

      - name: Install Python tools
-        uses: BrandonLWhite/pipx-install-action@v1.0.1
+        uses: BrandonLWhite/pipx-install-action@v1.0.3
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@ -154,7 +154,7 @@ Code Contribution Ideas
 ^^^^^^^^^^^^^^^^^^^^^^^

 -  We maintain a set of `issues marked as
-   “bite-sized” <https://github.com/beetbox/beets/labels/bitesize>`__.
+   “good first issue” <https://github.com/beetbox/beets/labels/good%20first%20issue>`__.
   These are issues that would serve as a good introduction to the
   codebase. Claim one and start exploring!
 -  Like testing? Our `test
--- a/beets/autotag/init.py
+++ b/beets/autotag/init.py
@ -14,36 +14,37 @@

 """Facilities for automatically determining files' correct metadata."""

-from collections.abc import Mapping, Sequence
-from typing import Union
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Union

 from beets import config, logging
-from beets.library import Album, Item, LibModel
+from beets.util import get_most_common_tags as current_metadata

 # Parts of external interface.
 from beets.util import unique_list

-from .hooks import AlbumInfo, AlbumMatch, Distance, TrackInfo, TrackMatch
-from .match import (
-    Proposal,
-    Recommendation,
-    current_metadata,
-    tag_album,
-    tag_item,
-)
+from .distance import Distance
+from .hooks import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch
+from .match import Proposal, Recommendation, tag_album, tag_item
+
+if TYPE_CHECKING:
+    from collections.abc import Mapping, Sequence
+
+    from beets.library import Album, Item, LibModel

 __all__ = [
    "AlbumInfo",
    "AlbumMatch",
-    "Distance",
-    "TrackInfo",
-    "TrackMatch",
+    "Distance",  # for backwards compatibility
    "Proposal",
    "Recommendation",
+    "TrackInfo",
+    "TrackMatch",
    "apply_album_metadata",
    "apply_item_metadata",
    "apply_metadata",
-    "current_metadata",
+    "current_metadata",  # for backwards compatibility
    "tag_album",
    "tag_item",
 ]
--- a/beets/autotag/distance.py
+++ b/beets/autotag/distance.py
@ -0,0 +1,531 @@
+from __future__ import annotations
+
+import datetime
+import re
+from functools import cache, total_ordering
+from typing import TYPE_CHECKING, Any
+
+from jellyfish import levenshtein_distance
+from unidecode import unidecode
+
+from beets import config, plugins
+from beets.util import as_string, cached_classproperty, get_most_common_tags
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator, Sequence
+
+    from beets.library import Item
+
+    from .hooks import AlbumInfo, TrackInfo
+
+# Candidate distance scoring.
+
+# Artist signals that indicate "various artists". These are used at the
+# album level to determine whether a given release is likely a VA
+# release and also on the track level to to remove the penalty for
+# differing artists.
+VA_ARTISTS = ("", "various artists", "various", "va", "unknown")
+
+# Parameters for string distance function.
+# Words that can be moved to the end of a string using a comma.
+SD_END_WORDS = ["the", "a", "an"]
+# Reduced weights for certain portions of the string.
+SD_PATTERNS = [
+    (r"^the ", 0.1),
+    (r"[\[\(]?(ep|single)[\]\)]?", 0.0),
+    (r"[\[\(]?(featuring|feat|ft)[\. :].+", 0.1),
+    (r"\(.*?\)", 0.3),
+    (r"\[.*?\]", 0.3),
+    (r"(, )?(pt\.|part) .+", 0.2),
+]
+# Replacements to use before testing distance.
+SD_REPLACE = [
+    (r"&", "and"),
+]
+
+
+def _string_dist_basic(str1: str, str2: str) -> float:
+    """Basic edit distance between two strings, ignoring
+    non-alphanumeric characters and case. Comparisons are based on a
+    transliteration/lowering to ASCII characters. Normalized by string
+    length.
+    """
+    assert isinstance(str1, str)
+    assert isinstance(str2, str)
+    str1 = as_string(unidecode(str1))
+    str2 = as_string(unidecode(str2))
+    str1 = re.sub(r"[^a-z0-9]", "", str1.lower())
+    str2 = re.sub(r"[^a-z0-9]", "", str2.lower())
+    if not str1 and not str2:
+        return 0.0
+    return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2)))
+
+
+def string_dist(str1: str | None, str2: str | None) -> float:
+    """Gives an "intuitive" edit distance between two strings. This is
+    an edit distance, normalized by the string length, with a number of
+    tweaks that reflect intuition about text.
+    """
+    if str1 is None and str2 is None:
+        return 0.0
+    if str1 is None or str2 is None:
+        return 1.0
+
+    str1 = str1.lower()
+    str2 = str2.lower()
+
+    # Don't penalize strings that move certain words to the end. For
+    # example, "the something" should be considered equal to
+    # "something, the".
+    for word in SD_END_WORDS:
+        if str1.endswith(", %s" % word):
+            str1 = "{} {}".format(word, str1[: -len(word) - 2])
+        if str2.endswith(", %s" % word):
+            str2 = "{} {}".format(word, str2[: -len(word) - 2])
+
+    # Perform a couple of basic normalizing substitutions.
+    for pat, repl in SD_REPLACE:
+        str1 = re.sub(pat, repl, str1)
+        str2 = re.sub(pat, repl, str2)
+
+    # Change the weight for certain string portions matched by a set
+    # of regular expressions. We gradually change the strings and build
+    # up penalties associated with parts of the string that were
+    # deleted.
+    base_dist = _string_dist_basic(str1, str2)
+    penalty = 0.0
+    for pat, weight in SD_PATTERNS:
+        # Get strings that drop the pattern.
+        case_str1 = re.sub(pat, "", str1)
+        case_str2 = re.sub(pat, "", str2)
+
+        if case_str1 != str1 or case_str2 != str2:
+            # If the pattern was present (i.e., it is deleted in the
+            # the current case), recalculate the distances for the
+            # modified strings.
+            case_dist = _string_dist_basic(case_str1, case_str2)
+            case_delta = max(0.0, base_dist - case_dist)
+            if case_delta == 0.0:
+                continue
+
+            # Shift our baseline strings down (to avoid rematching the
+            # same part of the string) and add a scaled distance
+            # amount to the penalties.
+            str1 = case_str1
+            str2 = case_str2
+            base_dist = case_dist
+            penalty += weight * case_delta
+
+    return base_dist + penalty
+
+
+@total_ordering
+class Distance:
+    """Keeps track of multiple distance penalties. Provides a single
+    weighted distance for all penalties as well as a weighted distance
+    for each individual penalty.
+    """
+
+    def __init__(self) -> None:
+        self._penalties: dict[str, list[float]] = {}
+        self.tracks: dict[TrackInfo, Distance] = {}
+
+    @cached_classproperty
+    def _weights(cls) -> dict[str, float]:
+        """A dictionary from keys to floating-point weights."""
+        weights_view = config["match"]["distance_weights"]
+        weights = {}
+        for key in weights_view.keys():
+            weights[key] = weights_view[key].as_number()
+        return weights
+
+    # Access the components and their aggregates.
+
+    @property
+    def distance(self) -> float:
+        """Return a weighted and normalized distance across all
+        penalties.
+        """
+        dist_max = self.max_distance
+        if dist_max:
+            return self.raw_distance / self.max_distance
+        return 0.0
+
+    @property
+    def max_distance(self) -> float:
+        """Return the maximum distance penalty (normalization factor)."""
+        dist_max = 0.0
+        for key, penalty in self._penalties.items():
+            dist_max += len(penalty) * self._weights[key]
+        return dist_max
+
+    @property
+    def raw_distance(self) -> float:
+        """Return the raw (denormalized) distance."""
+        dist_raw = 0.0
+        for key, penalty in self._penalties.items():
+            dist_raw += sum(penalty) * self._weights[key]
+        return dist_raw
+
+    def items(self) -> list[tuple[str, float]]:
+        """Return a list of (key, dist) pairs, with `dist` being the
+        weighted distance, sorted from highest to lowest. Does not
+        include penalties with a zero value.
+        """
+        list_ = []
+        for key in self._penalties:
+            dist = self[key]
+            if dist:
+                list_.append((key, dist))
+        # Convert distance into a negative float we can sort items in
+        # ascending order (for keys, when the penalty is equal) and
+        # still get the items with the biggest distance first.
+        return sorted(
+            list_, key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0])
+        )
+
+    def __hash__(self) -> int:
+        return id(self)
+
+    def __eq__(self, other) -> bool:
+        return self.distance == other
+
+    # Behave like a float.
+
+    def __lt__(self, other) -> bool:
+        return self.distance < other
+
+    def __float__(self) -> float:
+        return self.distance
+
+    def __sub__(self, other) -> float:
+        return self.distance - other
+
+    def __rsub__(self, other) -> float:
+        return other - self.distance
+
+    def __str__(self) -> str:
+        return f"{self.distance:.2f}"
+
+    # Behave like a dict.
+
+    def __getitem__(self, key) -> float:
+        """Returns the weighted distance for a named penalty."""
+        dist = sum(self._penalties[key]) * self._weights[key]
+        dist_max = self.max_distance
+        if dist_max:
+            return dist / dist_max
+        return 0.0
+
+    def __iter__(self) -> Iterator[tuple[str, float]]:
+        return iter(self.items())
+
+    def __len__(self) -> int:
+        return len(self.items())
+
+    def keys(self) -> list[str]:
+        return [key for key, _ in self.items()]
+
+    def update(self, dist: Distance):
+        """Adds all the distance penalties from `dist`."""
+        if not isinstance(dist, Distance):
+            raise ValueError(
+                "`dist` must be a Distance object, not {}".format(type(dist))
+            )
+        for key, penalties in dist._penalties.items():
+            self._penalties.setdefault(key, []).extend(penalties)
+
+    # Adding components.
+
+    def _eq(self, value1: re.Pattern[str] | Any, value2: Any) -> bool:
+        """Returns True if `value1` is equal to `value2`. `value1` may
+        be a compiled regular expression, in which case it will be
+        matched against `value2`.
+        """
+        if isinstance(value1, re.Pattern):
+            return bool(value1.match(value2))
+        return value1 == value2
+
+    def add(self, key: str, dist: float):
+        """Adds a distance penalty. `key` must correspond with a
+        configured weight setting. `dist` must be a float between 0.0
+        and 1.0, and will be added to any existing distance penalties
+        for the same key.
+        """
+        if not 0.0 <= dist <= 1.0:
+            raise ValueError(f"`dist` must be between 0.0 and 1.0, not {dist}")
+        self._penalties.setdefault(key, []).append(dist)
+
+    def add_equality(
+        self,
+        key: str,
+        value: Any,
+        options: list[Any] | tuple[Any, ...] | Any,
+    ):
+        """Adds a distance penalty of 1.0 if `value` doesn't match any
+        of the values in `options`. If an option is a compiled regular
+        expression, it will be considered equal if it matches against
+        `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        for opt in options:
+            if self._eq(opt, value):
+                dist = 0.0
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_expr(self, key: str, expr: bool):
+        """Adds a distance penalty of 1.0 if `expr` evaluates to True,
+        or 0.0.
+        """
+        if expr:
+            self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_number(self, key: str, number1: int, number2: int):
+        """Adds a distance penalty of 1.0 for each number of difference
+        between `number1` and `number2`, or 0.0 when there is no
+        difference. Use this when there is no upper limit on the
+        difference between the two numbers.
+        """
+        diff = abs(number1 - number2)
+        if diff:
+            for i in range(diff):
+                self.add(key, 1.0)
+        else:
+            self.add(key, 0.0)
+
+    def add_priority(
+        self,
+        key: str,
+        value: Any,
+        options: list[Any] | tuple[Any, ...] | Any,
+    ):
+        """Adds a distance penalty that corresponds to the position at
+        which `value` appears in `options`. A distance penalty of 0.0
+        for the first option, or 1.0 if there is no matching option. If
+        an option is a compiled regular expression, it will be
+        considered equal if it matches against `value`.
+        """
+        if not isinstance(options, (list, tuple)):
+            options = [options]
+        unit = 1.0 / (len(options) or 1)
+        for i, opt in enumerate(options):
+            if self._eq(opt, value):
+                dist = i * unit
+                break
+        else:
+            dist = 1.0
+        self.add(key, dist)
+
+    def add_ratio(
+        self,
+        key: str,
+        number1: int | float,
+        number2: int | float,
+    ):
+        """Adds a distance penalty for `number1` as a ratio of `number2`.
+        `number1` is bound at 0 and `number2`.
+        """
+        number = float(max(min(number1, number2), 0))
+        if number2:
+            dist = number / number2
+        else:
+            dist = 0.0
+        self.add(key, dist)
+
+    def add_string(self, key: str, str1: str | None, str2: str | None):
+        """Adds a distance penalty based on the edit distance between
+        `str1` and `str2`.
+        """
+        dist = string_dist(str1, str2)
+        self.add(key, dist)
+
+
+@cache
+def get_track_length_grace() -> float:
+    """Get cached grace period for track length matching."""
+    return config["match"]["track_length_grace"].as_number()
+
+
+@cache
+def get_track_length_max() -> float:
+    """Get cached maximum track length for track length matching."""
+    return config["match"]["track_length_max"].as_number()
+
+
+def track_index_changed(item: Item, track_info: TrackInfo) -> bool:
+    """Returns True if the item and track info index is different. Tolerates
+    per disc and per release numbering.
+    """
+    return item.track not in (track_info.medium_index, track_info.index)
+
+
+def track_distance(
+    item: Item,
+    track_info: TrackInfo,
+    incl_artist: bool = False,
+) -> Distance:
+    """Determines the significance of a track metadata change. Returns a
+    Distance object. `incl_artist` indicates that a distance component should
+    be included for the track artist (i.e., for various-artist releases).
+
+    ``track_length_grace`` and ``track_length_max`` configuration options are
+    cached because this function is called many times during the matching
+    process and their access comes with a performance overhead.
+    """
+    dist = Distance()
+
+    # Length.
+    if info_length := track_info.length:
+        diff = abs(item.length - info_length) - get_track_length_grace()
+        dist.add_ratio("track_length", diff, get_track_length_max())
+
+    # Title.
+    dist.add_string("track_title", item.title, track_info.title)
+
+    # Artist. Only check if there is actually an artist in the track data.
+    if (
+        incl_artist
+        and track_info.artist
+        and item.artist.lower() not in VA_ARTISTS
+    ):
+        dist.add_string("track_artist", item.artist, track_info.artist)
+
+    # Track index.
+    if track_info.index and item.track:
+        dist.add_expr("track_index", track_index_changed(item, track_info))
+
+    # Track ID.
+    if item.mb_trackid:
+        dist.add_expr("track_id", item.mb_trackid != track_info.track_id)
+
+    # Penalize mismatching disc numbers.
+    if track_info.medium and item.disc:
+        dist.add_expr("medium", item.disc != track_info.medium)
+
+    # Plugins.
+    dist.update(plugins.track_distance(item, track_info))
+
+    return dist
+
+
+def distance(
+    items: Sequence[Item],
+    album_info: AlbumInfo,
+    mapping: dict[Item, TrackInfo],
+) -> Distance:
+    """Determines how "significant" an album metadata change would be.
+    Returns a Distance object. `album_info` is an AlbumInfo object
+    reflecting the album to be compared. `items` is a sequence of all
+    Item objects that will be matched (order is not important).
+    `mapping` is a dictionary mapping Items to TrackInfo objects; the
+    keys are a subset of `items` and the values are a subset of
+    `album_info.tracks`.
+    """
+    likelies, _ = get_most_common_tags(items)
+
+    dist = Distance()
+
+    # Artist, if not various.
+    if not album_info.va:
+        dist.add_string("artist", likelies["artist"], album_info.artist)
+
+    # Album.
+    dist.add_string("album", likelies["album"], album_info.album)
+
+    preferred_config = config["match"]["preferred"]
+    # Current or preferred media.
+    if album_info.media:
+        # Preferred media options.
+        media_patterns: Sequence[str] = preferred_config["media"].as_str_seq()
+        options = [
+            re.compile(r"(\d+x)?(%s)" % pat, re.I) for pat in media_patterns
+        ]
+        if options:
+            dist.add_priority("media", album_info.media, options)
+        # Current media.
+        elif likelies["media"]:
+            dist.add_equality("media", album_info.media, likelies["media"])
+
+    # Mediums.
+    if likelies["disctotal"] and album_info.mediums:
+        dist.add_number("mediums", likelies["disctotal"], album_info.mediums)
+
+    # Prefer earliest release.
+    if album_info.year and preferred_config["original_year"]:
+        # Assume 1889 (earliest first gramophone discs) if we don't know the
+        # original year.
+        original = album_info.original_year or 1889
+        diff = abs(album_info.year - original)
+        diff_max = abs(datetime.date.today().year - original)
+        dist.add_ratio("year", diff, diff_max)
+    # Year.
+    elif likelies["year"] and album_info.year:
+        if likelies["year"] in (album_info.year, album_info.original_year):
+            # No penalty for matching release or original year.
+            dist.add("year", 0.0)
+        elif album_info.original_year:
+            # Prefer matchest closest to the release year.
+            diff = abs(likelies["year"] - album_info.year)
+            diff_max = abs(
+                datetime.date.today().year - album_info.original_year
+            )
+            dist.add_ratio("year", diff, diff_max)
+        else:
+            # Full penalty when there is no original year.
+            dist.add("year", 1.0)
+
+    # Preferred countries.
+    country_patterns: Sequence[str] = preferred_config["countries"].as_str_seq()
+    options = [re.compile(pat, re.I) for pat in country_patterns]
+    if album_info.country and options:
+        dist.add_priority("country", album_info.country, options)
+    # Country.
+    elif likelies["country"] and album_info.country:
+        dist.add_string("country", likelies["country"], album_info.country)
+
+    # Label.
+    if likelies["label"] and album_info.label:
+        dist.add_string("label", likelies["label"], album_info.label)
+
+    # Catalog number.
+    if likelies["catalognum"] and album_info.catalognum:
+        dist.add_string(
+            "catalognum", likelies["catalognum"], album_info.catalognum
+        )
+
+    # Disambiguation.
+    if likelies["albumdisambig"] and album_info.albumdisambig:
+        dist.add_string(
+            "albumdisambig", likelies["albumdisambig"], album_info.albumdisambig
+        )
+
+    # Album ID.
+    if likelies["mb_albumid"]:
+        dist.add_equality(
+            "album_id", likelies["mb_albumid"], album_info.album_id
+        )
+
+    # Tracks.
+    dist.tracks = {}
+    for item, track in mapping.items():
+        dist.tracks[track] = track_distance(item, track, album_info.va)
+        dist.add("tracks", dist.tracks[track].distance)
+
+    # Missing tracks.
+    for _ in range(len(album_info.tracks) - len(mapping)):
+        dist.add("missing_tracks", 1.0)
+
+    # Unmatched tracks.
+    for _ in range(len(items) - len(mapping)):
+        dist.add("unmatched_tracks", 1.0)
+
+    # Plugins.
+    dist.update(plugins.album_distance(items, album_info, mapping))
+
+    return dist
--- a/beets/autotag/hooks.py
+++ b/beets/autotag/hooks.py
@ -16,21 +16,15 @@

 from __future__ import annotations

-import re
-from functools import total_ordering
 from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar

-from jellyfish import levenshtein_distance
-from unidecode import unidecode
-
-from beets import config, logging
-from beets.util import as_string, cached_classproperty
+from beets import logging

 if TYPE_CHECKING:
-    from collections.abc import Iterator
-
    from beets.library import Item

+    from .distance import Distance
+
 log = logging.getLogger("beets")

 V = TypeVar("V")
@ -254,328 +248,6 @@ class TrackInfo(AttrDict[Any]):
        return dupe


-# Candidate distance scoring.
-
-# Parameters for string distance function.
-# Words that can be moved to the end of a string using a comma.
-SD_END_WORDS = ["the", "a", "an"]
-# Reduced weights for certain portions of the string.
-SD_PATTERNS = [
-    (r"^the ", 0.1),
-    (r"[\[\(]?(ep|single)[\]\)]?", 0.0),
-    (r"[\[\(]?(featuring|feat|ft)[\. :].+", 0.1),
-    (r"\(.*?\)", 0.3),
-    (r"\[.*?\]", 0.3),
-    (r"(, )?(pt\.|part) .+", 0.2),
-]
-# Replacements to use before testing distance.
-SD_REPLACE = [
-    (r"&", "and"),
-]
-
-
-def _string_dist_basic(str1: str, str2: str) -> float:
-    """Basic edit distance between two strings, ignoring
-    non-alphanumeric characters and case. Comparisons are based on a
-    transliteration/lowering to ASCII characters. Normalized by string
-    length.
-    """
-    assert isinstance(str1, str)
-    assert isinstance(str2, str)
-    str1 = as_string(unidecode(str1))
-    str2 = as_string(unidecode(str2))
-    str1 = re.sub(r"[^a-z0-9]", "", str1.lower())
-    str2 = re.sub(r"[^a-z0-9]", "", str2.lower())
-    if not str1 and not str2:
-        return 0.0
-    return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2)))
-
-
-def string_dist(str1: str | None, str2: str | None) -> float:
-    """Gives an "intuitive" edit distance between two strings. This is
-    an edit distance, normalized by the string length, with a number of
-    tweaks that reflect intuition about text.
-    """
-    if str1 is None and str2 is None:
-        return 0.0
-    if str1 is None or str2 is None:
-        return 1.0
-
-    str1 = str1.lower()
-    str2 = str2.lower()
-
-    # Don't penalize strings that move certain words to the end. For
-    # example, "the something" should be considered equal to
-    # "something, the".
-    for word in SD_END_WORDS:
-        if str1.endswith(", %s" % word):
-            str1 = "{} {}".format(word, str1[: -len(word) - 2])
-        if str2.endswith(", %s" % word):
-            str2 = "{} {}".format(word, str2[: -len(word) - 2])
-
-    # Perform a couple of basic normalizing substitutions.
-    for pat, repl in SD_REPLACE:
-        str1 = re.sub(pat, repl, str1)
-        str2 = re.sub(pat, repl, str2)
-
-    # Change the weight for certain string portions matched by a set
-    # of regular expressions. We gradually change the strings and build
-    # up penalties associated with parts of the string that were
-    # deleted.
-    base_dist = _string_dist_basic(str1, str2)
-    penalty = 0.0
-    for pat, weight in SD_PATTERNS:
-        # Get strings that drop the pattern.
-        case_str1 = re.sub(pat, "", str1)
-        case_str2 = re.sub(pat, "", str2)
-
-        if case_str1 != str1 or case_str2 != str2:
-            # If the pattern was present (i.e., it is deleted in the
-            # the current case), recalculate the distances for the
-            # modified strings.
-            case_dist = _string_dist_basic(case_str1, case_str2)
-            case_delta = max(0.0, base_dist - case_dist)
-            if case_delta == 0.0:
-                continue
-
-            # Shift our baseline strings down (to avoid rematching the
-            # same part of the string) and add a scaled distance
-            # amount to the penalties.
-            str1 = case_str1
-            str2 = case_str2
-            base_dist = case_dist
-            penalty += weight * case_delta
-
-    return base_dist + penalty
-
-
-@total_ordering
-class Distance:
-    """Keeps track of multiple distance penalties. Provides a single
-    weighted distance for all penalties as well as a weighted distance
-    for each individual penalty.
-    """
-
-    def __init__(self) -> None:
-        self._penalties: dict[str, list[float]] = {}
-        self.tracks: dict[TrackInfo, Distance] = {}
-
-    @cached_classproperty
-    def _weights(cls) -> dict[str, float]:
-        """A dictionary from keys to floating-point weights."""
-        weights_view = config["match"]["distance_weights"]
-        weights = {}
-        for key in weights_view.keys():
-            weights[key] = weights_view[key].as_number()
-        return weights
-
-    # Access the components and their aggregates.
-
-    @property
-    def distance(self) -> float:
-        """Return a weighted and normalized distance across all
-        penalties.
-        """
-        dist_max = self.max_distance
-        if dist_max:
-            return self.raw_distance / self.max_distance
-        return 0.0
-
-    @property
-    def max_distance(self) -> float:
-        """Return the maximum distance penalty (normalization factor)."""
-        dist_max = 0.0
-        for key, penalty in self._penalties.items():
-            dist_max += len(penalty) * self._weights[key]
-        return dist_max
-
-    @property
-    def raw_distance(self) -> float:
-        """Return the raw (denormalized) distance."""
-        dist_raw = 0.0
-        for key, penalty in self._penalties.items():
-            dist_raw += sum(penalty) * self._weights[key]
-        return dist_raw
-
-    def items(self) -> list[tuple[str, float]]:
-        """Return a list of (key, dist) pairs, with `dist` being the
-        weighted distance, sorted from highest to lowest. Does not
-        include penalties with a zero value.
-        """
-        list_ = []
-        for key in self._penalties:
-            dist = self[key]
-            if dist:
-                list_.append((key, dist))
-        # Convert distance into a negative float we can sort items in
-        # ascending order (for keys, when the penalty is equal) and
-        # still get the items with the biggest distance first.
-        return sorted(
-            list_, key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0])
-        )
-
-    def __hash__(self) -> int:
-        return id(self)
-
-    def __eq__(self, other) -> bool:
-        return self.distance == other
-
-    # Behave like a float.
-
-    def __lt__(self, other) -> bool:
-        return self.distance < other
-
-    def __float__(self) -> float:
-        return self.distance
-
-    def __sub__(self, other) -> float:
-        return self.distance - other
-
-    def __rsub__(self, other) -> float:
-        return other - self.distance
-
-    def __str__(self) -> str:
-        return f"{self.distance:.2f}"
-
-    # Behave like a dict.
-
-    def __getitem__(self, key) -> float:
-        """Returns the weighted distance for a named penalty."""
-        dist = sum(self._penalties[key]) * self._weights[key]
-        dist_max = self.max_distance
-        if dist_max:
-            return dist / dist_max
-        return 0.0
-
-    def __iter__(self) -> Iterator[tuple[str, float]]:
-        return iter(self.items())
-
-    def __len__(self) -> int:
-        return len(self.items())
-
-    def keys(self) -> list[str]:
-        return [key for key, _ in self.items()]
-
-    def update(self, dist: Distance):
-        """Adds all the distance penalties from `dist`."""
-        if not isinstance(dist, Distance):
-            raise ValueError(
-                "`dist` must be a Distance object, not {}".format(type(dist))
-            )
-        for key, penalties in dist._penalties.items():
-            self._penalties.setdefault(key, []).extend(penalties)
-
-    # Adding components.
-
-    def _eq(self, value1: re.Pattern[str] | Any, value2: Any) -> bool:
-        """Returns True if `value1` is equal to `value2`. `value1` may
-        be a compiled regular expression, in which case it will be
-        matched against `value2`.
-        """
-        if isinstance(value1, re.Pattern):
-            return bool(value1.match(value2))
-        return value1 == value2
-
-    def add(self, key: str, dist: float):
-        """Adds a distance penalty. `key` must correspond with a
-        configured weight setting. `dist` must be a float between 0.0
-        and 1.0, and will be added to any existing distance penalties
-        for the same key.
-        """
-        if not 0.0 <= dist <= 1.0:
-            raise ValueError(f"`dist` must be between 0.0 and 1.0, not {dist}")
-        self._penalties.setdefault(key, []).append(dist)
-
-    def add_equality(
-        self,
-        key: str,
-        value: Any,
-        options: list[Any] | tuple[Any, ...] | Any,
-    ):
-        """Adds a distance penalty of 1.0 if `value` doesn't match any
-        of the values in `options`. If an option is a compiled regular
-        expression, it will be considered equal if it matches against
-        `value`.
-        """
-        if not isinstance(options, (list, tuple)):
-            options = [options]
-        for opt in options:
-            if self._eq(opt, value):
-                dist = 0.0
-                break
-        else:
-            dist = 1.0
-        self.add(key, dist)
-
-    def add_expr(self, key: str, expr: bool):
-        """Adds a distance penalty of 1.0 if `expr` evaluates to True,
-        or 0.0.
-        """
-        if expr:
-            self.add(key, 1.0)
-        else:
-            self.add(key, 0.0)
-
-    def add_number(self, key: str, number1: int, number2: int):
-        """Adds a distance penalty of 1.0 for each number of difference
-        between `number1` and `number2`, or 0.0 when there is no
-        difference. Use this when there is no upper limit on the
-        difference between the two numbers.
-        """
-        diff = abs(number1 - number2)
-        if diff:
-            for i in range(diff):
-                self.add(key, 1.0)
-        else:
-            self.add(key, 0.0)
-
-    def add_priority(
-        self,
-        key: str,
-        value: Any,
-        options: list[Any] | tuple[Any, ...] | Any,
-    ):
-        """Adds a distance penalty that corresponds to the position at
-        which `value` appears in `options`. A distance penalty of 0.0
-        for the first option, or 1.0 if there is no matching option. If
-        an option is a compiled regular expression, it will be
-        considered equal if it matches against `value`.
-        """
-        if not isinstance(options, (list, tuple)):
-            options = [options]
-        unit = 1.0 / (len(options) or 1)
-        for i, opt in enumerate(options):
-            if self._eq(opt, value):
-                dist = i * unit
-                break
-        else:
-            dist = 1.0
-        self.add(key, dist)
-
-    def add_ratio(
-        self,
-        key: str,
-        number1: int | float,
-        number2: int | float,
-    ):
-        """Adds a distance penalty for `number1` as a ratio of `number2`.
-        `number1` is bound at 0 and `number2`.
-        """
-        number = float(max(min(number1, number2), 0))
-        if number2:
-            dist = number / number2
-        else:
-            dist = 0.0
-        self.add(key, dist)
-
-    def add_string(self, key: str, str1: str | None, str2: str | None):
-        """Adds a distance penalty based on the edit distance between
-        `str1` and `str2`.
-        """
-        dist = string_dist(str1, str2)
-        self.add(key, dist)
-
-
 # Structures that compose all the information for a candidate match.


--- a/beets/autotag/match.py
+++ b/beets/autotag/match.py
@ -18,37 +18,23 @@ releases and tracks.

 from __future__ import annotations

-import datetime
-import re
 from enum import IntEnum
-from functools import cache
 from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar

 import lap
 import numpy as np

 from beets import config, logging, plugins
-from beets.autotag import (
-    AlbumInfo,
-    AlbumMatch,
-    Distance,
-    TrackInfo,
-    TrackMatch,
-    hooks,
-)
-from beets.util import plurality
+from beets.autotag import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch, hooks
+from beets.util import get_most_common_tags
+
+from .distance import VA_ARTISTS, distance, track_distance

 if TYPE_CHECKING:
    from collections.abc import Iterable, Sequence

    from beets.library import Item

-# Artist signals that indicate "various artists". These are used at the
-# album level to determine whether a given release is likely a VA
-# release and also on the track level to to remove the penalty for
-# differing artists.
-VA_ARTISTS = ("", "various artists", "various", "va", "unknown")
-
 # Global logger.
 log = logging.getLogger("beets")

@ -80,44 +66,6 @@ class Proposal(NamedTuple):
 # Primary matching functionality.


-def current_metadata(
-    items: Iterable[Item],
-) -> tuple[dict[str, Any], dict[str, Any]]:
-    """Extract the likely current metadata for an album given a list of its
-    items. Return two dictionaries:
-     - The most common value for each field.
-     - Whether each field's value was unanimous (values are booleans).
-    """
-    assert items  # Must be nonempty.
-
-    likelies = {}
-    consensus = {}
-    fields = [
-        "artist",
-        "album",
-        "albumartist",
-        "year",
-        "disctotal",
-        "mb_albumid",
-        "label",
-        "barcode",
-        "catalognum",
-        "country",
-        "media",
-        "albumdisambig",
-    ]
-    for field in fields:
-        values = [item[field] for item in items if item]
-        likelies[field], freq = plurality(values)
-        consensus[field] = freq == len(values)
-
-    # If there's an album artist consensus, use this for the artist.
-    if consensus["albumartist"] and likelies["albumartist"]:
-        likelies["artist"] = likelies["albumartist"]
-
-    return likelies, consensus
-
-
 def assign_items(
    items: Sequence[Item],
    tracks: Sequence[TrackInfo],
@ -150,191 +98,6 @@ def assign_items(
    return mapping, extra_items, extra_tracks


-def track_index_changed(item: Item, track_info: TrackInfo) -> bool:
-    """Returns True if the item and track info index is different. Tolerates
-    per disc and per release numbering.
-    """
-    return item.track not in (track_info.medium_index, track_info.index)
-
-
-@cache
-def get_track_length_grace() -> float:
-    """Get cached grace period for track length matching."""
-    return config["match"]["track_length_grace"].as_number()
-
-
-@cache
-def get_track_length_max() -> float:
-    """Get cached maximum track length for track length matching."""
-    return config["match"]["track_length_max"].as_number()
-
-
-def track_distance(
-    item: Item,
-    track_info: TrackInfo,
-    incl_artist: bool = False,
-) -> Distance:
-    """Determines the significance of a track metadata change. Returns a
-    Distance object. `incl_artist` indicates that a distance component should
-    be included for the track artist (i.e., for various-artist releases).
-
-    ``track_length_grace`` and ``track_length_max`` configuration options are
-    cached because this function is called many times during the matching
-    process and their access comes with a performance overhead.
-    """
-    dist = hooks.Distance()
-
-    # Length.
-    if info_length := track_info.length:
-        diff = abs(item.length - info_length) - get_track_length_grace()
-        dist.add_ratio("track_length", diff, get_track_length_max())
-
-    # Title.
-    dist.add_string("track_title", item.title, track_info.title)
-
-    # Artist. Only check if there is actually an artist in the track data.
-    if (
-        incl_artist
-        and track_info.artist
-        and item.artist.lower() not in VA_ARTISTS
-    ):
-        dist.add_string("track_artist", item.artist, track_info.artist)
-
-    # Track index.
-    if track_info.index and item.track:
-        dist.add_expr("track_index", track_index_changed(item, track_info))
-
-    # Track ID.
-    if item.mb_trackid:
-        dist.add_expr("track_id", item.mb_trackid != track_info.track_id)
-
-    # Penalize mismatching disc numbers.
-    if track_info.medium and item.disc:
-        dist.add_expr("medium", item.disc != track_info.medium)
-
-    # Plugins.
-    dist.update(plugins.track_distance(item, track_info))
-
-    return dist
-
-
-def distance(
-    items: Sequence[Item],
-    album_info: AlbumInfo,
-    mapping: dict[Item, TrackInfo],
-) -> Distance:
-    """Determines how "significant" an album metadata change would be.
-    Returns a Distance object. `album_info` is an AlbumInfo object
-    reflecting the album to be compared. `items` is a sequence of all
-    Item objects that will be matched (order is not important).
-    `mapping` is a dictionary mapping Items to TrackInfo objects; the
-    keys are a subset of `items` and the values are a subset of
-    `album_info.tracks`.
-    """
-    likelies, _ = current_metadata(items)
-
-    dist = hooks.Distance()
-
-    # Artist, if not various.
-    if not album_info.va:
-        dist.add_string("artist", likelies["artist"], album_info.artist)
-
-    # Album.
-    dist.add_string("album", likelies["album"], album_info.album)
-
-    preferred_config = config["match"]["preferred"]
-    # Current or preferred media.
-    if album_info.media:
-        # Preferred media options.
-        media_patterns: Sequence[str] = preferred_config["media"].as_str_seq()
-        options = [
-            re.compile(r"(\d+x)?(%s)" % pat, re.I) for pat in media_patterns
-        ]
-        if options:
-            dist.add_priority("media", album_info.media, options)
-        # Current media.
-        elif likelies["media"]:
-            dist.add_equality("media", album_info.media, likelies["media"])
-
-    # Mediums.
-    if likelies["disctotal"] and album_info.mediums:
-        dist.add_number("mediums", likelies["disctotal"], album_info.mediums)
-
-    # Prefer earliest release.
-    if album_info.year and preferred_config["original_year"]:
-        # Assume 1889 (earliest first gramophone discs) if we don't know the
-        # original year.
-        original = album_info.original_year or 1889
-        diff = abs(album_info.year - original)
-        diff_max = abs(datetime.date.today().year - original)
-        dist.add_ratio("year", diff, diff_max)
-    # Year.
-    elif likelies["year"] and album_info.year:
-        if likelies["year"] in (album_info.year, album_info.original_year):
-            # No penalty for matching release or original year.
-            dist.add("year", 0.0)
-        elif album_info.original_year:
-            # Prefer matchest closest to the release year.
-            diff = abs(likelies["year"] - album_info.year)
-            diff_max = abs(
-                datetime.date.today().year - album_info.original_year
-            )
-            dist.add_ratio("year", diff, diff_max)
-        else:
-            # Full penalty when there is no original year.
-            dist.add("year", 1.0)
-
-    # Preferred countries.
-    country_patterns: Sequence[str] = preferred_config["countries"].as_str_seq()
-    options = [re.compile(pat, re.I) for pat in country_patterns]
-    if album_info.country and options:
-        dist.add_priority("country", album_info.country, options)
-    # Country.
-    elif likelies["country"] and album_info.country:
-        dist.add_string("country", likelies["country"], album_info.country)
-
-    # Label.
-    if likelies["label"] and album_info.label:
-        dist.add_string("label", likelies["label"], album_info.label)
-
-    # Catalog number.
-    if likelies["catalognum"] and album_info.catalognum:
-        dist.add_string(
-            "catalognum", likelies["catalognum"], album_info.catalognum
-        )
-
-    # Disambiguation.
-    if likelies["albumdisambig"] and album_info.albumdisambig:
-        dist.add_string(
-            "albumdisambig", likelies["albumdisambig"], album_info.albumdisambig
-        )
-
-    # Album ID.
-    if likelies["mb_albumid"]:
-        dist.add_equality(
-            "album_id", likelies["mb_albumid"], album_info.album_id
-        )
-
-    # Tracks.
-    dist.tracks = {}
-    for item, track in mapping.items():
-        dist.tracks[track] = track_distance(item, track, album_info.va)
-        dist.add("tracks", dist.tracks[track].distance)
-
-    # Missing tracks.
-    for _ in range(len(album_info.tracks) - len(mapping)):
-        dist.add("missing_tracks", 1.0)
-
-    # Unmatched tracks.
-    for _ in range(len(items) - len(mapping)):
-        dist.add("unmatched_tracks", 1.0)
-
-    # Plugins.
-    dist.update(plugins.album_distance(items, album_info, mapping))
-
-    return dist
-
-
 def match_by_id(items: Iterable[Item]) -> AlbumInfo | None:
    """If the items are tagged with an external source ID, return an
    AlbumInfo object for the corresponding album. Otherwise, returns
@ -499,7 +262,7 @@ def tag_album(
    candidates.
    """
    # Get current metadata.
-    likelies, consensus = current_metadata(items)
+    likelies, consensus = get_most_common_tags(items)
    cur_artist: str = likelies["artist"]
    cur_album: str = likelies["album"]
    log.debug("Tagging {0} - {1}", cur_artist, cur_album)
--- a/beets/event_types.py
+++ b/beets/event_types.py
@ -0,0 +1,33 @@
+from typing import Literal
+
+EventType = Literal[
+    "pluginload",
+    "import",
+    "album_imported",
+    "album_removed",
+    "item_copied",
+    "item_imported",
+    "before_item_moved",
+    "item_moved",
+    "item_linked",
+    "item_hardlinked",
+    "item_reflinked",
+    "item_removed",
+    "write",
+    "after_write",
+    "import_task_created",
+    "import_task_start",
+    "import_task_apply",
+    "import_task_before_choice",
+    "import_task_choice",
+    "import_task_files",
+    "library_opened",
+    "database_change",
+    "cli_exit",
+    "import_begin",
+    "trackinfo_received",
+    "albuminfo_received",
+    "before_choose_candidate",
+    "mb_track_extract",
+    "mb_album_extract",
+]
--- a/beets/importer/tasks.py
+++ b/beets/importer/tasks.py
@ -228,7 +228,7 @@ class ImportTask(BaseImportTask):
        or APPLY (in which case the data comes from the choice).
        """
        if self.choice_flag in (Action.ASIS, Action.RETAG):
-            likelies, consensus = autotag.current_metadata(self.items)
+            likelies, consensus = util.get_most_common_tags(self.items)
            return likelies
        elif self.choice_flag is Action.APPLY and self.match:
            return self.match.info.copy()
--- a/beets/library.py
+++ b/beets/library.py
@ -369,8 +369,9 @@ class LibModel(dbcore.Model["Library"]):
        plugins.send("database_change", lib=self._db, model=self)

    def add(self, lib=None):
+        # super().add() calls self.store(), which sends `database_change`,
+        # so don't do it here
        super().add(lib)
-        plugins.send("database_change", lib=self._db, model=self)

    def __format__(self, spec):
        if not spec:
@ -1084,7 +1085,9 @@ class Item(LibModel):
        (i.e., where the file ought to be).

        The path is returned as a bytestring. ``basedir`` can override the
-        library's base directory for the destination.
+        library's base directory for the destination. If ``relative_to_libdir``
+        is true, returns just the fragment of the path underneath the library
+        base directory.
        """
        db = self._check_db()
        basedir = basedir or db.directory
@ -1179,6 +1182,7 @@ class Album(LibModel):
        "comp": types.BOOLEAN,
        "mb_albumid": types.STRING,
        "mb_albumartistid": types.STRING,
+        "mb_albumartistids": types.MULTI_VALUE_DSV,
        "albumtype": types.STRING,
        "albumtypes": types.SEMICOLON_SPACE_DSV,
        "label": types.STRING,
@ -1235,6 +1239,7 @@ class Album(LibModel):
        "comp",
        "mb_albumid",
        "mb_albumartistid",
+        "mb_albumartistids",
        "albumtype",
        "albumtypes",
        "label",
--- a/beets/plugins.py
+++ b/beets/plugins.py
@ -39,6 +39,9 @@ import beets
 from beets import logging
 from beets.util.id_extractors import extract_release_id

+if TYPE_CHECKING:
+    from beets.event_types import EventType
+
 if sys.version_info >= (3, 10):
    from typing import ParamSpec
 else:
@ -46,11 +49,12 @@ else:


 if TYPE_CHECKING:
-    from collections.abc import Iterator
+    from collections.abc import Iterable

    from confuse import ConfigView

-    from beets.autotag import AlbumInfo, Distance, TrackInfo
+    from beets.autotag import AlbumInfo, TrackInfo
+    from beets.autotag.distance import Distance
    from beets.dbcore import Query
    from beets.dbcore.db import FieldQueryType
    from beets.dbcore.types import Type
@ -70,7 +74,7 @@ if TYPE_CHECKING:
    P = ParamSpec("P")
    Ret = TypeVar("Ret", bound=Any)
    Listener = Callable[..., None]
-    IterF = Callable[P, Iterator[Ret]]
+    IterF = Callable[P, Iterable[Ret]]


 PLUGIN_NAMESPACE = "beetsplug"
@ -221,7 +225,7 @@ class BeetsPlugin:
        """Should return a Distance object to be added to the
        distance for every track comparison.
        """
-        from beets.autotag.hooks import Distance
+        from beets.autotag.distance import Distance

        return Distance()

@ -234,33 +238,25 @@ class BeetsPlugin:
        """Should return a Distance object to be added to the
        distance for every album-level comparison.
        """
-        from beets.autotag.hooks import Distance
+        from beets.autotag.distance import Distance

        return Distance()

    def candidates(
-        self,
-        items: list[Item],
-        artist: str,
-        album: str,
-        va_likely: bool,
-        extra_tags: dict[str, Any] | None = None,
-    ) -> Iterator[AlbumInfo]:
+        self, items: list[Item], artist: str, album: str, va_likely: bool
+    ) -> Iterable[AlbumInfo]:
        """Return :py:class:`AlbumInfo` candidates that match the given album.

        :param items: List of items in the album
        :param artist: Album artist
        :param album: Album name
        :param va_likely: Whether the album is likely to be by various artists
-        :param extra_tags: is a an optional dictionary of extra tags to search.
-            Only relevant to :py:class:`MusicBrainzPlugin` autotagger and can be
-            ignored by other plugins
        """
        yield from ()

    def item_candidates(
        self, item: Item, artist: str, title: str
-    ) -> Iterator[TrackInfo]:
+    ) -> Iterable[TrackInfo]:
        """Return :py:class:`TrackInfo` candidates that match the given track.

        :param item: Track item
@ -300,7 +296,7 @@ class BeetsPlugin:
    _raw_listeners: dict[str, list[Listener]] | None = None
    listeners: dict[str, list[Listener]] | None = None

-    def register_listener(self, event: str, func: Listener) -> None:
+    def register_listener(self, event: "EventType", func: Listener):
        """Add a function as a listener for the specified event."""
        wrapped_func = self._set_log_level_and_params(logging.WARNING, func)

@ -463,7 +459,7 @@ def track_distance(item: Item, info: TrackInfo) -> Distance:
    """Gets the track distance calculated by all loaded plugins.
    Returns a Distance object.
    """
-    from beets.autotag.hooks import Distance
+    from beets.autotag.distance import Distance

    dist = Distance()
    for plugin in find_plugins():
@ -477,7 +473,7 @@ def album_distance(
    mapping: dict[Item, TrackInfo],
 ) -> Distance:
    """Returns the album distance calculated by plugins."""
-    from beets.autotag.hooks import Distance
+    from beets.autotag.distance import Distance

    dist = Distance()
    for plugin in find_plugins():
@ -495,7 +491,7 @@ def notify_info_yielded(event: str) -> Callable[[IterF[P, Ret]], IterF[P, Ret]]:

    def decorator(func: IterF[P, Ret]) -> IterF[P, Ret]:
        @wraps(func)
-        def wrapper(*args: P.args, **kwargs: P.kwargs) -> Iterator[Ret]:
+        def wrapper(*args: P.args, **kwargs: P.kwargs) -> Iterable[Ret]:
            for v in func(*args, **kwargs):
                send(event, info=v)
                yield v
@ -506,14 +502,14 @@ def notify_info_yielded(event: str) -> Callable[[IterF[P, Ret]], IterF[P, Ret]]:


@notify_info_yielded("albuminfo_received")
-def candidates(*args, **kwargs) -> Iterator[AlbumInfo]:
+def candidates(*args, **kwargs) -> Iterable[AlbumInfo]:
    """Return matching album candidates from all plugins."""
    for plugin in find_plugins():
        yield from plugin.candidates(*args, **kwargs)


@notify_info_yielded("trackinfo_received")
-def item_candidates(*args, **kwargs) -> Iterator[TrackInfo]:
+def item_candidates(*args, **kwargs) -> Iterable[TrackInfo]:
    """Return matching track candidates from all plugins."""
    for plugin in find_plugins():
        yield from plugin.item_candidates(*args, **kwargs)
@ -659,73 +655,13 @@ def feat_tokens(for_artist: bool = True) -> str:
    )


-def sanitize_choices(
-    choices: Sequence[str], choices_all: Sequence[str]
-) -> list[str]:
-    """Clean up a stringlist configuration attribute: keep only choices
-    elements present in choices_all, remove duplicate elements, expand '*'
-    wildcard while keeping original stringlist order.
-    """
-    seen: set[str] = set()
-    others = [x for x in choices_all if x not in choices]
-    res: list[str] = []
-    for s in choices:
-        if s not in seen:
-            if s in list(choices_all):
-                res.append(s)
-            elif s == "*":
-                res.extend(others)
-        seen.add(s)
-    return res
-
-
-def sanitize_pairs(
-    pairs: Sequence[tuple[str, str]], pairs_all: Sequence[tuple[str, str]]
-) -> list[tuple[str, str]]:
-    """Clean up a single-element mapping configuration attribute as returned
-    by Confuse's `Pairs` template: keep only two-element tuples present in
-    pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*')
-    wildcards while keeping the original order. Note that ('*', '*') and
-    ('*', 'whatever') have the same effect.
-
-    For example,
-
-    >>> sanitize_pairs(
-    ...     [('foo', 'baz bar'), ('key', '*'), ('*', '*')],
-    ...     [('foo', 'bar'), ('foo', 'baz'), ('foo', 'foobar'),
-    ...      ('key', 'value')]
-    ...     )
-    [('foo', 'baz'), ('foo', 'bar'), ('key', 'value'), ('foo', 'foobar')]
-    """
-    pairs_all = list(pairs_all)
-    seen: set[tuple[str, str]] = set()
-    others = [x for x in pairs_all if x not in pairs]
-    res: list[tuple[str, str]] = []
-    for k, values in pairs:
-        for v in values.split():
-            x = (k, v)
-            if x in pairs_all:
-                if x not in seen:
-                    seen.add(x)
-                    res.append(x)
-            elif k == "*":
-                new = [o for o in others if o not in seen]
-                seen.update(new)
-                res.extend(new)
-            elif v == "*":
-                new = [o for o in others if o not in seen and o[0] == k]
-                seen.update(new)
-                res.extend(new)
-    return res
-
-
 def get_distance(
    config: ConfigView, data_source: str, info: AlbumInfo | TrackInfo
 ) -> Distance:
    """Returns the ``data_source`` weight and the maximum source weight
    for albums or individual tracks.
    """
-    from beets.autotag.hooks import Distance
+    from beets.autotag.distance import Distance

    dist = Distance()
    if info.data_source == data_source:
@ -872,13 +808,8 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta):
        return extract_release_id(self.data_source.lower(), id_string)

    def candidates(
-        self,
-        items: list[Item],
-        artist: str,
-        album: str,
-        va_likely: bool,
-        extra_tags: dict[str, Any] | None = None,
-    ) -> Iterator[AlbumInfo]:
+        self, items: list[Item], artist: str, album: str, va_likely: bool
+    ) -> Iterable[AlbumInfo]:
        query_filters = {"album": album}
        if not va_likely:
            query_filters["artist"] = artist
@ -888,7 +819,7 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta):

    def item_candidates(
        self, item: Item, artist: str, title: str
-    ) -> Iterator[TrackInfo]:
+    ) -> Iterable[TrackInfo]:
        for result in self._search_api(
            "track", {"artist": artist}, keywords=title
        ):
--- a/beets/test/helper.py
+++ b/beets/test/helper.py
@ -806,7 +806,7 @@ class AutotagStub:
        for p in self.patchers:
            p.stop()

-    def candidates(self, items, artist, album, va_likely, extra_tags=None):
+    def candidates(self, items, artist, album, va_likely):
        if self.matching == self.IDENT:
            yield self._make_album_match(artist, album, len(items))

@ -886,20 +886,43 @@ class FetchImageHelper:
    def run(self, *args, **kwargs):
        super().run(*args, **kwargs)

-    IMAGEHEADER = {
+    IMAGEHEADER: dict[str, bytes] = {
        "image/jpeg": b"\xff\xd8\xff" + b"\x00" * 3 + b"JFIF",
        "image/png": b"\211PNG\r\n\032\n",
+        "image/gif": b"GIF89a",
+        # dummy type that is definitely not a valid image content type
+        "image/watercolour": b"watercolour",
+        "text/html": (
+            b"<!DOCTYPE html>\n<html>\n<head>\n</head>\n"
+            b"<body>\n</body>\n</html>"
+        ),
    }

-    def mock_response(self, url, content_type="image/jpeg", file_type=None):
+    def mock_response(
+        self,
+        url: str,
+        content_type: str = "image/jpeg",
+        file_type: None | str = None,
+    ) -> None:
+        # Potentially return a file of a type that differs from the
+        # server-advertised content type to mimic misbehaving servers.
        if file_type is None:
            file_type = content_type
+
+        try:
+            # imghdr reads 32 bytes
+            header = self.IMAGEHEADER[file_type].ljust(32, b"\x00")
+        except KeyError:
+            # If we can't return a file that looks like real file of the requested
+            # type, better fail the test than returning something else, which might
+            # violate assumption made when writing a test.
+            raise AssertionError(f"Mocking {file_type} responses not supported")
+
        responses.add(
            responses.GET,
            url,
            content_type=content_type,
-            # imghdr reads 32 bytes
-            body=self.IMAGEHEADER.get(file_type, b"").ljust(32, b"\x00"),
+            body=header,
        )


--- a/beets/util/init.py
+++ b/beets/util/init.py
@ -56,6 +56,8 @@ if TYPE_CHECKING:
    from collections.abc import Iterator, Sequence
    from logging import Logger

+    from beets.library import Item
+
 if sys.version_info >= (3, 10):
    from typing import TypeAlias
 else:
@ -803,7 +805,7 @@ def as_string(value: Any) -> str:
        return str(value)


-def plurality(objs: Sequence[T]) -> tuple[T, int]:
+def plurality(objs: Iterable[T]) -> tuple[T, int]:
    """Given a sequence of hashble objects, returns the object that
    is most common in the set and the its number of appearance. The
    sequence must contain at least one object.
@ -814,6 +816,44 @@ def plurality(objs: Sequence[T]) -> tuple[T, int]:
    return c.most_common(1)[0]


+def get_most_common_tags(
+    items: Sequence[Item],
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Extract the likely current metadata for an album given a list of its
+    items. Return two dictionaries:
+     - The most common value for each field.
+     - Whether each field's value was unanimous (values are booleans).
+    """
+    assert items  # Must be nonempty.
+
+    likelies = {}
+    consensus = {}
+    fields = [
+        "artist",
+        "album",
+        "albumartist",
+        "year",
+        "disctotal",
+        "mb_albumid",
+        "label",
+        "barcode",
+        "catalognum",
+        "country",
+        "media",
+        "albumdisambig",
+    ]
+    for field in fields:
+        values = [item[field] for item in items if item]
+        likelies[field], freq = plurality(values)
+        consensus[field] = freq == len(values)
+
+    # If there's an album artist consensus, use this for the artist.
+    if consensus["albumartist"] and likelies["albumartist"]:
+        likelies["artist"] = likelies["albumartist"]
+
+    return likelies, consensus
+
+
 # stdout and stderr as bytes
 class CommandOutput(NamedTuple):
    stdout: bytes
--- a/beets/util/config.py
+++ b/beets/util/config.py
@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from collections.abc import Collection, Sequence
+
+
+def sanitize_choices(
+    choices: Sequence[str], choices_all: Collection[str]
+) -> list[str]:
+    """Clean up a stringlist configuration attribute: keep only choices
+    elements present in choices_all, remove duplicate elements, expand '*'
+    wildcard while keeping original stringlist order.
+    """
+    seen: set[str] = set()
+    others = [x for x in choices_all if x not in choices]
+    res: list[str] = []
+    for s in choices:
+        if s not in seen:
+            if s in list(choices_all):
+                res.append(s)
+            elif s == "*":
+                res.extend(others)
+        seen.add(s)
+    return res
+
+
+def sanitize_pairs(
+    pairs: Sequence[tuple[str, str]], pairs_all: Sequence[tuple[str, str]]
+) -> list[tuple[str, str]]:
+    """Clean up a single-element mapping configuration attribute as returned
+    by Confuse's `Pairs` template: keep only two-element tuples present in
+    pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*')
+    wildcards while keeping the original order. Note that ('*', '*') and
+    ('*', 'whatever') have the same effect.
+
+    For example,
+
+    >>> sanitize_pairs(
+    ...     [('foo', 'baz bar'), ('key', '*'), ('*', '*')],
+    ...     [('foo', 'bar'), ('foo', 'baz'), ('foo', 'foobar'),
+    ...      ('key', 'value')]
+    ...     )
+    [('foo', 'baz'), ('foo', 'bar'), ('key', 'value'), ('foo', 'foobar')]
+    """
+    pairs_all = list(pairs_all)
+    seen: set[tuple[str, str]] = set()
+    others = [x for x in pairs_all if x not in pairs]
+    res: list[tuple[str, str]] = []
+    for k, values in pairs:
+        for v in values.split():
+            x = (k, v)
+            if x in pairs_all:
+                if x not in seen:
+                    seen.add(x)
+                    res.append(x)
+            elif k == "*":
+                new = [o for o in others if o not in seen]
+                seen.update(new)
+                res.extend(new)
+            elif v == "*":
+                new = [o for o in others if o not in seen and o[0] == k]
+                seen.update(new)
+                res.extend(new)
+    return res
--- a/beetsplug/beatport.py
+++ b/beetsplug/beatport.py
@ -361,7 +361,7 @@ class BeatportPlugin(BeetsPlugin):
            data_source=self.data_source, info=track_info, config=self.config
        )

-    def candidates(self, items, artist, release, va_likely, extra_tags=None):
+    def candidates(self, items, artist, release, va_likely):
        """Returns a list of AlbumInfo objects for beatport search results
        matching release and artist (if not various).
        """
--- a/beetsplug/chroma.py
+++ b/beetsplug/chroma.py
@ -24,7 +24,7 @@ import acoustid
 import confuse

 from beets import config, plugins, ui, util
-from beets.autotag.hooks import Distance
+from beets.autotag.distance import Distance
 from beetsplug.musicbrainz import MusicBrainzPlugin

 API_KEY = "1vOwZtEn"
@ -200,7 +200,7 @@ class AcoustidPlugin(plugins.BeetsPlugin):
        dist.add_expr("track_id", info.track_id not in recording_ids)
        return dist

-    def candidates(self, items, artist, album, va_likely, extra_tags=None):
+    def candidates(self, items, artist, album, va_likely):
        albums = []
        for relid in prefix(_all_releases(items), MAX_RELEASES):
            album = self.mb.album_for_id(relid)
--- a/beetsplug/discogs.py
+++ b/beetsplug/discogs.py
@ -25,7 +25,9 @@ import re
 import socket
 import time
 import traceback
+from functools import cache
 from string import ascii_lowercase
+from typing import TYPE_CHECKING

 import confuse
 from discogs_client import Client, Master, Release
@ -36,10 +38,16 @@ from typing_extensions import TypedDict
 import beets
 import beets.ui
 from beets import config
-from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist
+from beets.autotag.distance import string_dist
+from beets.autotag.hooks import AlbumInfo, TrackInfo
 from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
 from beets.util.id_extractors import extract_release_id

+if TYPE_CHECKING:
+    from collections.abc import Callable, Iterable
+
+    from beets.library import Item
+
 USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
 API_KEY = "rAzVUQYRaoFjeBjyWuWZ"
 API_SECRET = "plxtUTqoCzwxZpqdPysCwGuBSmZNdZVy"
@ -54,6 +62,22 @@ CONNECTION_ERRORS = (
 )


+TRACK_INDEX_RE = re.compile(
+    r"""
+    (.*?)   # medium: everything before medium_index.
+    (\d*?)  # medium_index: a number at the end of
+            # `position`, except if followed by a subtrack index.
+            # subtrack_index: can only be matched if medium
+            # or medium_index have been matched, and can be
+    (
+        (?<=\w)\.[\w]+  # a dot followed by a string (A.1, 2.A)
+      | (?<=\d)[A-Z]+   # a string that follows a number (1A, B2a)
+    )?
+    """,
+    re.VERBOSE,
+)
+
+
 class ReleaseFormat(TypedDict):
    name: str
    qty: int
@ -73,6 +97,7 @@ class DiscogsPlugin(BeetsPlugin):
                "separator": ", ",
                "index_tracks": False,
                "append_style_genre": False,
+                "search_limit": 5,
            }
        )
        self.config["apikey"].redact = True
@ -156,111 +181,37 @@ class DiscogsPlugin(BeetsPlugin):
            data_source="Discogs", info=track_info, config=self.config
        )

-    def candidates(self, items, artist, album, va_likely, extra_tags=None):
-        """Returns a list of AlbumInfo objects for discogs search results
-        matching an album and artist (if not various).
-        """
-        if not album and not artist:
-            self._log.debug(
-                "Skipping Discogs query. Files missing album and artist tags."
-            )
-            return []
+    def candidates(
+        self, items: list[Item], artist: str, album: str, va_likely: bool
+    ) -> Iterable[AlbumInfo]:
+        return self.get_albums(f"{artist} {album}" if va_likely else album)

-        if va_likely:
-            query = album
-        else:
-            query = f"{artist} {album}"
-        try:
-            return self.get_albums(query)
-        except DiscogsAPIError as e:
-            self._log.debug("API Error: {0} (query: {1})", e, query)
-            if e.status_code == 401:
-                self.reset_auth()
-                return self.candidates(items, artist, album, va_likely)
-            else:
-                return []
-        except CONNECTION_ERRORS:
-            self._log.debug("Connection error in album search", exc_info=True)
-            return []
-
-    def get_track_from_album_by_title(
-        self, album_info, title, dist_threshold=0.3
-    ):
-        def compare_func(track_info):
-            track_title = getattr(track_info, "title", None)
-            dist = string_dist(track_title, title)
-            return track_title and dist < dist_threshold
-
-        return self.get_track_from_album(album_info, compare_func)
-
-    def get_track_from_album(self, album_info, compare_func):
-        """Return the first track of the release where `compare_func` returns
-        true.
-
-        :return: TrackInfo object.
-        :rtype: beets.autotag.hooks.TrackInfo
-        """
-        if not album_info:
+    def get_track_from_album(
+        self, album_info: AlbumInfo, compare: Callable[[TrackInfo], float]
+    ) -> TrackInfo | None:
+        """Return the best matching track of the release."""
+        scores_and_tracks = [(compare(t), t) for t in album_info.tracks]
+        score, track_info = min(scores_and_tracks, key=lambda x: x[0])
+        if score > 0.3:
            return None

-        for track_info in album_info.tracks:
-            # check for matching position
-            if not compare_func(track_info):
-                continue
+        track_info["artist"] = album_info.artist
+        track_info["artist_id"] = album_info.artist_id
+        track_info["album"] = album_info.album
+        return track_info

-            # attach artist info if not provided
-            if not track_info["artist"]:
-                track_info["artist"] = album_info.artist
-                track_info["artist_id"] = album_info.artist_id
-            # attach album info
-            track_info["album"] = album_info.album
+    def item_candidates(
+        self, item: Item, artist: str, title: str
+    ) -> Iterable[TrackInfo]:
+        albums = self.candidates([item], artist, title, False)

-            return track_info
+        def compare_func(track_info: TrackInfo) -> float:
+            return string_dist(track_info.title, title)

-        return None
+        tracks = (self.get_track_from_album(a, compare_func) for a in albums)
+        return list(filter(None, tracks))

-    def item_candidates(self, item, artist, title):
-        """Returns a list of TrackInfo objects for Search API results
-        matching ``title`` and ``artist``.
-        :param item: Singleton item to be matched.
-        :type item: beets.library.Item
-        :param artist: The artist of the track to be matched.
-        :type artist: str
-        :param title: The title of the track to be matched.
-        :type title: str
-        :return: Candidate TrackInfo objects.
-        :rtype: list[beets.autotag.hooks.TrackInfo]
-        """
-        if not artist and not title:
-            self._log.debug(
-                "Skipping Discogs query. File missing artist and title tags."
-            )
-            return []
-
-        query = f"{artist} {title}"
-        try:
-            albums = self.get_albums(query)
-        except DiscogsAPIError as e:
-            self._log.debug("API Error: {0} (query: {1})", e, query)
-            if e.status_code == 401:
-                self.reset_auth()
-                return self.item_candidates(item, artist, title)
-            else:
-                return []
-        except CONNECTION_ERRORS:
-            self._log.debug("Connection error in track search", exc_info=True)
-        candidates = []
-        for album_cur in albums:
-            self._log.debug("searching within album {0}", album_cur.album)
-            track_result = self.get_track_from_album_by_title(
-                album_cur, item["title"]
-            )
-            if track_result:
-                candidates.append(track_result)
-        # first 10 results, don't overwhelm with options
-        return candidates[:10]
-
-    def album_for_id(self, album_id):
+    def album_for_id(self, album_id: str) -> AlbumInfo | None:
        """Fetches an album by its Discogs ID and returns an AlbumInfo object
        or None if the album is not found.
        """
@ -291,7 +242,15 @@ class DiscogsPlugin(BeetsPlugin):
            return None
        return self.get_album_info(result)

-    def get_albums(self, query):
+    def track_for_id(self, track_id: str) -> TrackInfo | None:
+        if album := self.album_for_id(track_id):
+            for track in album.tracks:
+                if track.track_id == track_id:
+                    return track
+
+        return None
+
+    def get_albums(self, query: str) -> Iterable[AlbumInfo]:
        """Returns a list of AlbumInfo objects for a discogs search query."""
        # Strip non-word characters from query. Things like "!" and "-" can
        # cause a query to return no results, even if they match the artist or
@ -303,8 +262,9 @@ class DiscogsPlugin(BeetsPlugin):
        query = re.sub(r"(?i)\b(CD|disc|vinyl)\s*\d+", "", query)

        try:
-            releases = self.discogs_client.search(query, type="release").page(1)
-
+            results = self.discogs_client.search(query, type="release")
+            results.per_page = self.config["search_limit"].as_number()
+            releases = results.page(1)
        except CONNECTION_ERRORS:
            self._log.debug(
                "Communication error while searching for {0!r}",
@ -312,20 +272,18 @@ class DiscogsPlugin(BeetsPlugin):
                exc_info=True,
            )
            return []
-        return [
-            album for album in map(self.get_album_info, releases[:5]) if album
-        ]
+        return map(self.get_album_info, releases)

-    def get_master_year(self, master_id):
+    @cache
+    def get_master_year(self, master_id: str) -> int | None:
        """Fetches a master release given its Discogs ID and returns its year
        or None if the master release is not found.
        """
-        self._log.debug("Searching for master release {0}", master_id)
+        self._log.debug("Getting master release {0}", master_id)
        result = Master(self.discogs_client, {"id": master_id})

        try:
-            year = result.fetch("year")
-            return year
+            return result.fetch("year")
        except DiscogsAPIError as e:
            if e.status_code != 404:
                self._log.debug(
@ -695,33 +653,21 @@ class DiscogsPlugin(BeetsPlugin):
            medium_index=medium_index,
        )

-    def get_track_index(self, position):
+    @staticmethod
+    def get_track_index(
+        position: str,
+    ) -> tuple[str | None, str | None, str | None]:
        """Returns the medium, medium index and subtrack index for a discogs
        track position."""
        # Match the standard Discogs positions (12.2.9), which can have several
        # forms (1, 1-1, A1, A1.1, A1a, ...).
-        match = re.match(
-            r"^(.*?)"  # medium: everything before medium_index.
-            r"(\d*?)"  # medium_index: a number at the end of
-            # `position`, except if followed by a subtrack
-            # index.
-            # subtrack_index: can only be matched if medium
-            # or medium_index have been matched, and can be
-            r"((?<=\w)\.[\w]+"  # - a dot followed by a string (A.1, 2.A)
-            r"|(?<=\d)[A-Z]+"  # - a string that follows a number (1A, B2a)
-            r")?"
-            r"$",
-            position.upper(),
-        )
-
-        if match:
+        medium = index = subindex = None
+        if match := TRACK_INDEX_RE.fullmatch(position.upper()):
            medium, index, subindex = match.groups()

            if subindex and subindex.startswith("."):
                subindex = subindex[1:]
-        else:
-            self._log.debug("Invalid position: {0}", position)
-            medium = index = subindex = None
+
        return medium or None, index or None, subindex or None

    def get_track_length(self, duration):
--- a/beetsplug/duplicates.py
+++ b/beetsplug/duplicates.py
@ -53,6 +53,7 @@ class DuplicatesPlugin(BeetsPlugin):
                "tiebreak": {},
                "strict": False,
                "tag": "",
+                "remove": False,
            }
        )

@ -131,6 +132,13 @@ class DuplicatesPlugin(BeetsPlugin):
            action="store",
            help="tag matched items with 'k=v' attribute",
        )
+        self._command.parser.add_option(
+            "-r",
+            "--remove",
+            dest="remove",
+            action="store_true",
+            help="remove items from library",
+        )
        self._command.parser.add_all_common_options()

    def commands(self):
@ -141,6 +149,7 @@ class DuplicatesPlugin(BeetsPlugin):
            copy = bytestring_path(self.config["copy"].as_str())
            count = self.config["count"].get(bool)
            delete = self.config["delete"].get(bool)
+            remove = self.config["remove"].get(bool)
            fmt = self.config["format"].get(str)
            full = self.config["full"].get(bool)
            keys = self.config["keys"].as_str_seq()
@ -196,6 +205,7 @@ class DuplicatesPlugin(BeetsPlugin):
                            copy=copy,
                            move=move,
                            delete=delete,
+                            remove=remove,
                            tag=tag,
                            fmt=fmt.format(obj_count),
                        )
@ -204,7 +214,14 @@ class DuplicatesPlugin(BeetsPlugin):
        return [self._command]

    def _process_item(
-        self, item, copy=False, move=False, delete=False, tag=False, fmt=""
+        self,
+        item,
+        copy=False,
+        move=False,
+        delete=False,
+        tag=False,
+        fmt="",
+        remove=False,
    ):
        """Process Item `item`."""
        print_(format(item, fmt))
@ -216,6 +233,8 @@ class DuplicatesPlugin(BeetsPlugin):
            item.store()
        if delete:
            item.remove(delete=True)
+        elif remove:
+            item.remove(delete=False)
        if tag:
            try:
                k, v = tag.split("=")
--- a/beetsplug/fetchart.py
+++ b/beetsplug/fetchart.py
--- a/beetsplug/lyrics.py
+++ b/beetsplug/lyrics.py
@ -38,7 +38,8 @@ from unidecode import unidecode

 import beets
 from beets import plugins, ui
-from beets.autotag.hooks import string_dist
+from beets.autotag.distance import string_dist
+from beets.util.config import sanitize_choices

 if TYPE_CHECKING:
    from logging import Logger
@ -957,7 +958,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin):
    def backends(self) -> list[Backend]:
        user_sources = self.config["sources"].get()

-        chosen = plugins.sanitize_choices(user_sources, self.BACKEND_BY_NAME)
+        chosen = sanitize_choices(user_sources, self.BACKEND_BY_NAME)
        if "google" in chosen and not self.config["google_API_key"].get():
            self.warn("Disabling Google source: no API key configured.")
            chosen.remove("google")
--- a/beetsplug/musicbrainz.py
+++ b/beetsplug/musicbrainz.py
@ -18,6 +18,7 @@ from __future__ import annotations

 import traceback
 from collections import Counter
+from functools import cached_property
 from itertools import product
 from typing import TYPE_CHECKING, Any
 from urllib.parse import urljoin
@ -32,6 +33,7 @@ from beets.util.id_extractors import extract_release_id

 if TYPE_CHECKING:
    from collections.abc import Iterator, Sequence
+    from typing import Literal

    from beets.library import Item

@ -44,10 +46,10 @@ BASE_URL = "https://musicbrainz.org/"
 SKIPPED_TRACKS = ["[data track]"]

 FIELDS_TO_MB_KEYS = {
+    "barcode": "barcode",
    "catalognum": "catno",
    "country": "country",
    "label": "label",
-    "barcode": "barcode",
    "media": "format",
    "year": "date",
 }
@ -383,7 +385,7 @@ class MusicBrainzPlugin(BeetsPlugin):
                    "deezer": False,
                    "tidal": False,
                },
-                "extra_tags": {},
+                "extra_tags": [],
            },
        )
        hostname = self.config["host"].as_str()
@ -747,6 +749,68 @@ class MusicBrainzPlugin(BeetsPlugin):

        return info

+    @cached_property
+    def extra_mb_field_by_tag(self) -> dict[str, str]:
+        """Map configured extra tags to their MusicBrainz API field names.
+
+        Process user configuration to determine which additional MusicBrainz
+        fields should be included in search queries.
+        """
+        mb_field_by_tag = {
+            t: FIELDS_TO_MB_KEYS[t]
+            for t in self.config["extra_tags"].as_str_seq()
+            if t in FIELDS_TO_MB_KEYS
+        }
+        if mb_field_by_tag:
+            self._log.debug("Additional search terms: {}", mb_field_by_tag)
+
+        return mb_field_by_tag
+
+    def get_album_criteria(
+        self, items: list[Item], artist: str, album: str, va_likely: bool
+    ) -> dict[str, str]:
+        criteria = {
+            "release": album,
+            "alias": album,
+            "tracks": str(len(items)),
+        } | ({"arid": VARIOUS_ARTISTS_ID} if va_likely else {"artist": artist})
+
+        for tag, mb_field in self.extra_mb_field_by_tag.items():
+            most_common, _ = util.plurality(i.get(tag) for i in items)
+            value = str(most_common)
+            if tag == "catalognum":
+                value = value.replace(" ", "")
+
+            criteria[mb_field] = value
+
+        return criteria
+
+    def _search_api(
+        self,
+        query_type: Literal["recording", "release"],
+        filters: dict[str, str],
+    ) -> list[JSONDict]:
+        """Perform MusicBrainz API search and return results.
+
+        Execute a search against the MusicBrainz API for recordings or releases
+        using the provided criteria. Handles API errors by converting them into
+        MusicBrainzAPIError exceptions with contextual information.
+        """
+        filters = {
+            k: _v for k, v in filters.items() if (_v := v.lower().strip())
+        }
+        self._log.debug(
+            "Searching for MusicBrainz {}s with: {!r}", query_type, filters
+        )
+        try:
+            method = getattr(musicbrainzngs, f"search_{query_type}s")
+            res = method(limit=self.config["searchlimit"].get(int), **filters)
+        except musicbrainzngs.MusicBrainzError as exc:
+            raise MusicBrainzAPIError(
+                exc, f"{query_type} search", filters, traceback.format_exc()
+            )
+        return res[f"{query_type}-list"]
+
    def candidates(
        self,
        items: list[Item],
@ -755,80 +819,19 @@ class MusicBrainzPlugin(BeetsPlugin):
        va_likely: bool,
        extra_tags: dict[str, Any] | None = None,
    ) -> Iterator[beets.autotag.hooks.AlbumInfo]:
-        """Searches for a single album ("release" in MusicBrainz parlance)
-        and returns an iterator over AlbumInfo objects. May raise a
-        MusicBrainzAPIError.
+        criteria = self.get_album_criteria(items, artist, album, va_likely)
+        release_ids = (r["id"] for r in self._search_api("release", criteria))

-        The query consists of an artist name, an album name, and,
-        optionally, a number of tracks on the album and any other extra tags.
-        """
-        # Build search criteria.
-        criteria = {"release": album.lower().strip()}
-        if artist is not None:
-            criteria["artist"] = artist.lower().strip()
-        else:
-            # Various Artists search.
-            criteria["arid"] = VARIOUS_ARTISTS_ID
-        if track_count := len(items):
-            criteria["tracks"] = str(track_count)
-
-        if self.config["extra_tags"]:
-            tag_list = self.config["extra_tags"].get()
-            self._log.debug("Additional search terms: {0}", tag_list)
-            for tag, value in tag_list.items():
-                if key := FIELDS_TO_MB_KEYS.get(tag):
-                    value = str(value).lower().strip()
-                    if key == "catno":
-                        value = value.replace(" ", "")
-                    if value:
-                        criteria[key] = value
-
-        # Abort if we have no search terms.
-        if not any(criteria.values()):
-            return
-
-        try:
-            self._log.debug(
-                "Searching for MusicBrainz releases with: {!r}", criteria
-            )
-            res = musicbrainzngs.search_releases(
-                limit=self.config["searchlimit"].get(int), **criteria
-            )
-        except musicbrainzngs.MusicBrainzError as exc:
-            raise MusicBrainzAPIError(
-                exc, "release search", criteria, traceback.format_exc()
-            )
-        for release in res["release-list"]:
-            # The search result is missing some data (namely, the tracks),
-            # so we just use the ID and fetch the rest of the information.
-            albuminfo = self.album_for_id(release["id"])
-            if albuminfo is not None:
-                yield albuminfo
+        yield from filter(None, map(self.album_for_id, release_ids))

    def item_candidates(
        self, item: Item, artist: str, title: str
    ) -> Iterator[beets.autotag.hooks.TrackInfo]:
-        """Searches for a single track and returns an iterable of TrackInfo
-        objects. May raise a MusicBrainzAPIError.
-        """
-        criteria = {
-            "artist": artist.lower().strip(),
-            "recording": title.lower().strip(),
-        }
+        criteria = {"artist": artist, "recording": title, "alias": title}

-        if not any(criteria.values()):
-            return
-
-        try:
-            res = musicbrainzngs.search_recordings(
-                limit=self.config["searchlimit"].get(int), **criteria
-            )
-        except musicbrainzngs.MusicBrainzError as exc:
-            raise MusicBrainzAPIError(
-                exc, "recording search", criteria, traceback.format_exc()
-            )
-        for recording in res["recording-list"]:
-            yield self.track_info(recording)
+        yield from filter(
+            None, map(self.track_info, self._search_api("recording", criteria))
+        )

    def album_for_id(
        self, album_id: str
--- a/beetsplug/replace.py
+++ b/beetsplug/replace.py
@ -0,0 +1,122 @@
+import shutil
+from pathlib import Path
+
+import mediafile
+
+from beets import ui, util
+from beets.library import Item, Library
+from beets.plugins import BeetsPlugin
+
+
+class ReplacePlugin(BeetsPlugin):
+    def commands(self):
+        cmd = ui.Subcommand(
+            "replace", help="replace audio file while keeping tags"
+        )
+        cmd.func = self.run
+        return [cmd]
+
+    def run(self, lib: Library, args: list[str]) -> None:
+        if len(args) < 2:
+            raise ui.UserError("Usage: beet replace <query> <new_file_path>")
+
+        new_file_path: Path = Path(args[-1])
+        item_query: list[str] = args[:-1]
+
+        self.file_check(new_file_path)
+
+        item_list = list(lib.items(item_query))
+
+        if not item_list:
+            raise ui.UserError("No matching songs found.")
+
+        song = self.select_song(item_list)
+
+        if not song:
+            ui.print_("Operation cancelled.")
+            return
+
+        if not self.confirm_replacement(new_file_path, song):
+            ui.print_("Aborting replacement.")
+            return
+
+        self.replace_file(new_file_path, song)
+
+    def file_check(self, filepath: Path) -> None:
+        """Check if the file exists and is supported"""
+        if not filepath.is_file():
+            raise ui.UserError(
+                f"'{util.displayable_path(filepath)}' is not a valid file."
+            )
+
+        try:
+            mediafile.MediaFile(util.syspath(filepath))
+        except mediafile.FileTypeError as fte:
+            raise ui.UserError(fte)
+
+    def select_song(self, items: list[Item]):
+        """Present a menu of matching songs and get user selection."""
+        ui.print_("\nMatching songs:")
+        for i, item in enumerate(items, 1):
+            ui.print_(f"{i}. {util.displayable_path(item)}")
+
+        while True:
+            try:
+                index = int(
+                    input(
+                        f"Which song would you like to replace? "
+                        f"[1-{len(items)}] (0 to cancel): "
+                    )
+                )
+                if index == 0:
+                    return None
+                if 1 <= index <= len(items):
+                    return items[index - 1]
+                ui.print_(
+                    f"Invalid choice. Please enter a number "
+                    f"between 1 and {len(items)}."
+                )
+            except ValueError:
+                ui.print_("Invalid input. Please type in a number.")
+
+    def confirm_replacement(self, new_file_path: Path, song: Item):
+        """Get user confirmation for the replacement."""
+        original_file_path: Path = Path(song.path.decode())
+
+        if not original_file_path.exists():
+            raise ui.UserError("The original song file was not found.")
+
+        ui.print_(
+            f"\nReplacing: {util.displayable_path(new_file_path)} "
+            f"-> {util.displayable_path(original_file_path)}"
+        )
+        decision: str = (
+            input("Are you sure you want to replace this track? (y/N): ")
+            .strip()
+            .casefold()
+        )
+        return decision in {"yes", "y"}
+
+    def replace_file(self, new_file_path: Path, song: Item) -> None:
+        """Replace the existing file with the new one."""
+        original_file_path = Path(song.path.decode())
+        dest = original_file_path.with_suffix(new_file_path.suffix)
+
+        try:
+            shutil.move(util.syspath(new_file_path), util.syspath(dest))
+        except Exception as e:
+            raise ui.UserError(f"Error replacing file: {e}")
+
+        if (
+            new_file_path.suffix != original_file_path.suffix
+            and original_file_path.exists()
+        ):
+            try:
+                original_file_path.unlink()
+            except Exception as e:
+                raise ui.UserError(f"Could not delete original file: {e}")
+
+        song.path = str(dest).encode()
+        song.store()
+
+        ui.print_("Replacement successful.")
--- a/beetsplug/spotify.py
+++ b/beetsplug/spotify.py
@ -25,6 +25,7 @@ import json
 import re
 import time
 import webbrowser
+from typing import TYPE_CHECKING, Any, Literal, Sequence

 import confuse
 import requests
@ -33,8 +34,11 @@ import unidecode
 from beets import ui
 from beets.autotag.hooks import AlbumInfo, TrackInfo
 from beets.dbcore import types
-from beets.library import DateType
-from beets.plugins import BeetsPlugin, MetadataSourcePlugin
+from beets.library import DateType, Library
+from beets.plugins import BeetsPlugin, MetadataSourcePlugin, Response
+
+if TYPE_CHECKING:
+    from beetsplug._typing import JSONDict

 DEFAULT_WAITING_TIME = 5

@ -102,38 +106,39 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
                "client_id": "4e414367a1d14c75a5c5129a627fcab8",
                "client_secret": "f82bdc09b2254f1a8286815d02fd46dc",
                "tokenfile": "spotify_token.json",
+                "search_query_ascii": False,
            }
        )
        self.config["client_id"].redact = True
        self.config["client_secret"].redact = True

-        self.tokenfile = self.config["tokenfile"].get(
-            confuse.Filename(in_app_dir=True)
-        )  # Path to the JSON file for storing the OAuth access token.
        self.setup()

    def setup(self):
        """Retrieve previously saved OAuth token or generate a new one."""
+
        try:
-            with open(self.tokenfile) as f:
+            with open(self._tokenfile()) as f:
                token_data = json.load(f)
        except OSError:
            self._authenticate()
        else:
            self.access_token = token_data["access_token"]

+    def _tokenfile(self) -> str:
+        """Get the path to the JSON file for storing the OAuth token."""
+        return self.config["tokenfile"].get(confuse.Filename(in_app_dir=True))
+
    def _authenticate(self):
        """Request an access token via the Client Credentials Flow:
        https://developer.spotify.com/documentation/general/guides/authorization-guide/#client-credentials-flow
        """
+        c_id: str = self.config["client_id"].as_str()
+        c_secret: str = self.config["client_secret"].as_str()
+
        headers = {
            "Authorization": "Basic {}".format(
-                base64.b64encode(
-                    ":".join(
-                        self.config[k].as_str()
-                        for k in ("client_id", "client_secret")
-                    ).encode()
-                ).decode()
+                base64.b64encode(f"{c_id}:{c_secret}".encode()).decode()
            )
        }
        response = requests.post(
@ -154,27 +159,32 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
        self._log.debug(
            "{} access token: {}", self.data_source, self.access_token
        )
-        with open(self.tokenfile, "w") as f:
+        with open(self._tokenfile(), "w") as f:
            json.dump({"access_token": self.access_token}, f)

    def _handle_response(
-        self, request_type, url, params=None, retry_count=0, max_retries=3
-    ):
+        self,
+        method: Literal["get", "post", "put", "delete"],
+        url: str,
+        params: Any = None,
+        retry_count: int = 0,
+        max_retries: int = 3,
+    ) -> JSONDict:
        """Send a request, reauthenticating if necessary.

-        :param request_type: Type of :class:`Request` constructor,
-            e.g. ``requests.get``, ``requests.post``, etc.
-        :type request_type: function
+        :param method: HTTP method to use for the request.
        :param url: URL for the new :class:`Request` object.
-        :type url: str
        :param params: (optional) list of tuples or bytes to send
            in the query string for the :class:`Request`.
        :type params: dict
-        :return: JSON data for the class:`Response <Response>` object.
-        :rtype: dict
        """
+
+        if retry_count > max_retries:
+            raise SpotifyAPIError("Maximum retries reached.")
+
        try:
-            response = request_type(
+            response = requests.request(
+                method,
                url,
                headers={"Authorization": f"Bearer {self.access_token}"},
                params=params,
@ -189,22 +199,28 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
            self._log.error(f"Network error: {e}")
            raise SpotifyAPIError("Network error.")
        except requests.exceptions.RequestException as e:
+            if e.response is None:
+                self._log.error(f"Request failed: {e}")
+                raise SpotifyAPIError("Request failed.")
            if e.response.status_code == 401:
                self._log.debug(
                    f"{self.data_source} access token has expired. "
                    f"Reauthenticating."
                )
                self._authenticate()
-                return self._handle_response(request_type, url, params=params)
+                return self._handle_response(
+                    method,
+                    url,
+                    params=params,
+                    retry_count=retry_count + 1,
+                )
            elif e.response.status_code == 404:
                raise SpotifyAPIError(
                    f"API Error: {e.response.status_code}\n"
                    f"URL: {url}\nparams: {params}"
                )
            elif e.response.status_code == 429:
-                if retry_count >= max_retries:
-                    raise SpotifyAPIError("Maximum retries reached.")
-                seconds = response.headers.get(
+                seconds = e.response.headers.get(
                    "Retry-After", DEFAULT_WAITING_TIME
                )
                self._log.debug(
@ -212,7 +228,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
                )
                time.sleep(int(seconds) + 1)
                return self._handle_response(
-                    request_type,
+                    method,
                    url,
                    params=params,
                    retry_count=retry_count + 1,
@ -244,9 +260,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
        if not (spotify_id := self._get_id(album_id)):
            return None

-        album_data = self._handle_response(
-            requests.get, self.album_url + spotify_id
-        )
+        album_data = self._handle_response("get", self.album_url + spotify_id)
        if album_data["name"] == "":
            self._log.debug("Album removed from Spotify: {}", album_id)
            return None
@ -277,9 +291,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
        tracks_data = album_data["tracks"]
        tracks_items = tracks_data["items"]
        while tracks_data["next"]:
-            tracks_data = self._handle_response(
-                requests.get, tracks_data["next"]
-            )
+            tracks_data = self._handle_response("get", tracks_data["next"])
            tracks_items.extend(tracks_data["items"])

        tracks = []
@ -312,14 +324,12 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
            data_url=album_data["external_urls"]["spotify"],
        )

-    def _get_track(self, track_data):
+    def _get_track(self, track_data: JSONDict) -> TrackInfo:
        """Convert a Spotify track object dict to a TrackInfo object.

        :param track_data: Simplified track object
            (https://developer.spotify.com/documentation/web-api/reference/object-model/#track-object-simplified)
-        :type track_data: dict
        :return: TrackInfo object for track
-        :rtype: beets.autotag.hooks.TrackInfo
        """
        artist, artist_id = self.get_artist(track_data["artists"])

@ -344,26 +354,23 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
            data_url=track_data["external_urls"]["spotify"],
        )

-    def track_for_id(self, track_id=None, track_data=None):
-        """Fetch a track by its Spotify ID or URL and return a
-        TrackInfo object or None if the track is not found.
+    def track_for_id(self, track_id: str) -> None | TrackInfo:
+        """Fetch a track by its Spotify ID or URL.

-        :param track_id: (Optional) Spotify ID or URL for the track. Either
-            ``track_id`` or ``track_data`` must be provided.
-        :type track_id: str
-        :param track_data: (Optional) Simplified track object dict. May be
-            provided instead of ``track_id`` to avoid unnecessary API calls.
-        :type track_data: dict
-        :return: TrackInfo object for track
-        :rtype: beets.autotag.hooks.TrackInfo or None
+        Returns a TrackInfo object or None if the track is not found.
        """
-        if not track_data:
-            if not (spotify_id := self._get_id(track_id)) or not (
-                track_data := self._handle_response(
-                    requests.get, f"{self.track_url}{spotify_id}"
-                )
-            ):
-                return None
+
+        if not (spotify_id := self._get_id(track_id)):
+            self._log.debug("Invalid Spotify ID: {}", track_id)
+            return None
+
+        if not (
+            track_data := self._handle_response(
+                "get", f"{self.track_url}{spotify_id}"
+            )
+        ):
+            self._log.debug("Track not found: {}", track_id)
+            return None

        track = self._get_track(track_data)

@ -371,7 +378,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
        # release) and `track.medium_total` (total number of tracks on
        # the track's disc).
        album_data = self._handle_response(
-            requests.get, self.album_url + track_data["album"]["id"]
+            "get", self.album_url + track_data["album"]["id"]
        )
        medium_total = 0
        for i, track_data in enumerate(album_data["tracks"]["items"], start=1):
@ -382,18 +389,16 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
        track.medium_total = medium_total
        return track

-    @staticmethod
-    def _construct_search_query(filters=None, keywords=""):
+    def _construct_search_query(
+        self, filters: dict[str, str], keywords: str = ""
+    ) -> str:
        """Construct a query string with the specified filters and keywords to
        be provided to the Spotify Search API
-        (https://developer.spotify.com/documentation/web-api/reference/search/search/#writing-a-query---guidelines).
+        (https://developer.spotify.com/documentation/web-api/reference/search).

        :param filters: (Optional) Field filters to apply.
-        :type filters: dict
        :param keywords: (Optional) Query keywords to use.
-        :type keywords: str
        :return: Query string to be provided to the Search API.
-        :rtype: str
        """
        query_components = [
            keywords,
@ -402,36 +407,38 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
        query = " ".join([q for q in query_components if q])
        if not isinstance(query, str):
            query = query.decode("utf8")
-        return unidecode.unidecode(query)

-    def _search_api(self, query_type, filters=None, keywords=""):
+        if self.config["search_query_ascii"].get():
+            query = unidecode.unidecode(query)
+
+        return query
+
+    def _search_api(
+        self,
+        query_type: Literal["album", "track"],
+        filters: dict[str, str],
+        keywords: str = "",
+    ) -> Sequence[Response]:
        """Query the Spotify Search API for the specified ``keywords``,
        applying the provided ``filters``.

        :param query_type: Item type to search across. Valid types are:
            'album', 'artist', 'playlist', and 'track'.
-        :type query_type: str
        :param filters: (Optional) Field filters to apply.
-        :type filters: dict
        :param keywords: (Optional) Query keywords to use.
-        :type keywords: str
-        :return: JSON data for the class:`Response <Response>` object or None
-            if no search results are returned.
-        :rtype: dict or None
        """
        query = self._construct_search_query(keywords=keywords, filters=filters)
-        if not query:
-            return None
+
        self._log.debug(f"Searching {self.data_source} for '{query}'")
        try:
            response = self._handle_response(
-                requests.get,
+                "get",
                self.search_url,
                params={"q": query, "type": query_type},
            )
        except SpotifyAPIError as e:
            self._log.debug("Spotify API error: {}", e)
-            return []
+            return ()
        response_data = response.get(query_type + "s", {}).get("items", [])
        self._log.debug(
            "Found {} result(s) from {} for '{}'",
@ -441,7 +448,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
        )
        return response_data

-    def commands(self):
+    def commands(self) -> list[ui.Subcommand]:
        # autotagger import command
        def queries(lib, opts, args):
            success = self._parse_opts(opts)
@ -506,17 +513,14 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
        self.opts = opts
        return True

-    def _match_library_tracks(self, library, keywords):
+    def _match_library_tracks(self, library: Library, keywords: str):
        """Get a list of simplified track object dicts for library tracks
        matching the specified ``keywords``.

        :param library: beets library object to query.
-        :type library: beets.library.Library
        :param keywords: Query to match library items against.
-        :type keywords: str
        :return: List of simplified track object dicts for library items
            matching the specified query.
-        :rtype: list[dict]
        """
        results = []
        failures = []
@ -561,6 +565,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
                query = self._construct_search_query(
                    keywords=keywords, filters=query_filters
                )
+
                failures.append(query)
                continue

@ -683,11 +688,9 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
            if write:
                item.try_write()

-    def track_info(self, track_id=None):
+    def track_info(self, track_id: str):
        """Fetch a track's popularity and external IDs using its Spotify ID."""
-        track_data = self._handle_response(
-            requests.get, self.track_url + track_id
-        )
+        track_data = self._handle_response("get", self.track_url + track_id)
        self._log.debug(
            "track_popularity: {} and track_isrc: {}",
            track_data.get("popularity"),
@ -700,11 +703,11 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
            track_data.get("external_ids").get("upc"),
        )

-    def track_audio_features(self, track_id=None):
+    def track_audio_features(self, track_id: str):
        """Fetch track audio features by its Spotify ID."""
        try:
            return self._handle_response(
-                requests.get, self.audio_features_url + track_id
+                "get", self.audio_features_url + track_id
            )
        except SpotifyAPIError as e:
            self._log.debug("Spotify API error: {}", e)
--- a/docs/.gitignore
+++ b/docs/.gitignore
@ -0,0 +1,2 @@
+_build
+generated/
--- a/docs/Makefile
+++ b/docs/Makefile
@ -6,6 +6,7 @@ SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = _build
+SOURCEDIR	  = .

 # When both are available, use Sphinx 2.x for autodoc compatibility.
 ifeq ($(shell which sphinx-build2 >/dev/null 2>&1 ; echo $$?),0)
@ -39,7 +40,7 @@ help:
 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"

 clean:
-	-rm -rf $(BUILDDIR)/*
+	-rm -rf $(BUILDDIR)/* $(SOURCEDIR)/api/generated/*

 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
--- a/docs/_templates/autosummary/base.rst
+++ b/docs/_templates/autosummary/base.rst
@ -0,0 +1,3 @@
+{{ fullname | escape | underline}}
+.. currentmodule:: {{ module }}
+.. auto{{ objtype }}:: {{ objname }}
--- a/docs/_templates/autosummary/class.rst
+++ b/docs/_templates/autosummary/class.rst
@ -0,0 +1,28 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+   :members:                                    <-- add at least this line
+   :private-members:
+   :show-inheritance:                           <-- plus I want to show inheritance...
+   :inherited-members:                          <-- ...and inherited members too
+
+   {% block methods %}
+   .. automethod:: __init__
+
+   {% if methods %}
+   .. rubric:: {{ _('Public methods summary') }}
+
+   .. autosummary::
+   {% for item in methods %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% for item in _methods %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   .. rubric:: {{ _('Methods definition') }}
+
--- a/docs/_templates/autosummary/module.rst
+++ b/docs/_templates/autosummary/module.rst
@ -0,0 +1,11 @@
+{{ fullname | escape | underline}}
+{% block modules %}
+{% if modules %}
+.. rubric:: Modules
+
+{% for item in modules %}
+{{ item }}
+
+{%- endfor %}
+{% endif %}
+{% endblock %}
--- a/docs/api/database.rst
+++ b/docs/api/database.rst
@ -0,0 +1,47 @@
+Database
+--------
+
+.. currentmodule:: beets.library
+
+
+Library
+'''''''
+
+.. autosummary::
+    :toctree: generated/
+
+    Library
+
+
+Models
+''''''
+
+.. autosummary::
+    :toctree: generated/
+
+    LibModel
+    Album
+    Item
+
+
+Transactions
+''''''''''''
+
+.. currentmodule:: beets.dbcore.db
+
+.. autosummary::
+    :toctree: generated/
+
+    Transaction
+
+Queries
+'''''''
+
+.. currentmodule:: beets.dbcore.query
+
+.. autosummary::
+    :toctree: generated/
+
+    Query
+    FieldQuery
+    AndQuery
--- a/docs/api/plugins.rst
+++ b/docs/api/plugins.rst
@ -0,0 +1,11 @@
+Plugins
+-------
+
+.. currentmodule:: beets.plugins
+
+
+
+.. autosummary::
+    :toctree: generated/
+
+    BeetsPlugin
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -8,25 +8,77 @@ Unreleased

 New features:

+* :doc:`plugins/musicbrainz`: The MusicBrainz autotagger has been moved to
+  a separate plugin. The default :ref:`plugins-config` includes `musicbrainz`,
+  but if you've customized your `plugins` list in your configuration, you'll
+  need to explicitly add `musicbrainz` to continue using this functionality.
+  Configuration option `musicbrainz.enabled` has thus been deprecated.
+  :bug:`2686`
+  :bug:`4605`
 * :doc:`plugins/web`: Show notifications when a track plays. This uses the
  Media Session API to customize media notifications.
+* :doc:`plugins/discogs`: Add configurable ``search_limit`` option to
+  limit the number of results returned by the Discogs metadata search queries.
+* :doc:`plugins/discogs`: Implement ``track_for_id`` method to allow retrieving
+  singletons by their Discogs ID.
+  :bug:`4661`
+* :doc:`plugins/replace`: Add new plugin.
+* :doc:`plugins/duplicates`: Add ``--remove`` option, allowing to remove from
+  the library without deleting media files.
+  :bug:`5832`

 Bug fixes:

+* :doc:`plugins/musicbrainz`: fix regression where user configured
+  ``extra_tags`` have been read incorrectly.
+  :bug:`5788`
+* tests: Fix library tests failing on Windows when run from outside ``D:/``.
+  :bug:`5802`
+* Fix an issue where calling `Library.add` would cause the `database_change`
+  event to be sent twice, not once.
+  :bug:`5560`
+* Fix ``HiddenFileTest`` by using ``bytestring_path()``.
+* tests: Fix tests failing without ``langdetect`` (by making it required).
+  :bug:`5797`
+* :doc:`plugins/musicbrainz`: Fix the MusicBrainz search not taking into
+  account the album/recording aliases
+* :doc:`/plugins/spotify`: Fix the issue with that every query to spotify was
+  ascii encoded. This resulted in bad matches for queries that contained special
+  e.g. non latin characters as 盗作. If you want to keep the legacy behavior
+  set the config option ``spotify.search_query_ascii: yes``.
+  :bug:`5699`
+  
 For packagers:

+* Optional ``extra_tags`` parameter has been removed from
+  ``BeetsPlugin.candidates`` method signature since it is never passed in. If
+  you override this method in your plugin, feel free to remove this parameter.
+
+For plugin developers:
+
+* The `fetchart` plugins has seen a few changes to function signatures and
+  source registration in the process of introducing typings to the code.
+  Custom art sources might need to be adapted.
+
 Other changes:

+* Documentation structure for auto generated API references changed slightly.
+  Autogenerated API references are now located in the `docs/api` subdirectory.
+* :doc:`/plugins/substitute`: Fix rST formatting for example cases so that each
+  case is shown on separate lines.
+
 2.3.1 (May 14, 2025)
 --------------------

 Bug fixes:
+
 * :doc:`/reference/pathformat`: Fixed a regression where path legalization
  incorrectly removed parts of user-configured path formats that followed a dot
  (**.**).
  :bug:`5771`

 For packagers:
+
 * Force ``poetry`` version below 2 to avoid it mangling file modification times
  in ``sdist`` package.
  :bug:`5770`
@ -39,13 +91,6 @@ been dropped.

 New features:

-* :doc:`plugins/musicbrainz`: The MusicBrainz autotagger has been moved to
-  a separate plugin. The default :ref:`plugins-config` includes `musicbrainz`,
-  but if you've customized your `plugins` list in your configuration, you'll
-  need to explicitly add `musicbrainz` to continue using this functionality.
-  Configuration option `musicbrainz.enabled` has thus been deprecated.
-  :bug:`2686`
-  :bug:`4605`
 * :doc:`plugins/lastgenre`: The new configuration option, ``keep_existing``,
  provides more fine-grained control over how pre-populated genre tags are
  handled. The ``force`` option now behaves in a more conventional manner.
--- a/docs/conf.py
+++ b/docs/conf.py
@ -1,19 +1,35 @@
-AUTHOR = "Adrian Sampson"
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html

-# General configuration
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

-extensions = ["sphinx.ext.autodoc", "sphinx.ext.extlinks"]
-
-exclude_patterns = ["_build"]
-source_suffix = {".rst": "restructuredtext"}
-master_doc = "index"

 project = "beets"
+AUTHOR = "Adrian Sampson"
 copyright = "2016, Adrian Sampson"

+master_doc = "index"
+language = "en"
 version = "2.3"
 release = "2.3.1"

+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.extlinks",
+]
+autosummary_generate = True
+exclude_patterns = ["_build"]
+templates_path = ["_templates"]
+source_suffix = {".rst": "restructuredtext", ".md": "markdown"}
+
+
 pygments_style = "sphinx"

 # External links to the bug tracker and other sites.
@ -59,10 +75,24 @@ man_pages = [
    ),
 ]

-# Options for pydata theme
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+
 html_theme = "pydata_sphinx_theme"
 html_theme_options = {"collapse_navigation": True, "logo": {"text": "beets"}}
 html_title = "beets"
 html_logo = "_static/beets_logo_nobg.png"
 html_static_path = ["_static"]
 html_css_files = ["beets.css"]
+
+
+def skip_member(app, what, name, obj, skip, options):
+    if name.startswith("_"):
+        return True
+    return skip
+
+
+def setup(app):
+    app.connect("autodoc-skip-member", skip_member)
--- a/docs/dev/index.rst
+++ b/docs/dev/index.rst
@ -10,8 +10,18 @@ and write metadata tags in media files.
 .. _MediaFile: https://mediafile.readthedocs.io/en/latest/

 .. toctree::
+    :maxdepth: 1

    plugins
    library
    importer
    cli
+
+
+.. toctree::
+    :maxdepth: 1
+    :caption: API Reference
+
+    ../api/plugins
+    ../api/database
+
--- a/docs/dev/library.rst
+++ b/docs/dev/library.rst
@ -20,30 +20,18 @@ invocation of beets usually has only one :class:`Library`. It's powered by
 abstraction, something like a very minimal `ORM`_. The library is also
 responsible for handling queries to retrieve stored objects.

-.. autoclass:: Library(path, directory[, path_formats[, replacements]])
+Overview 
+''''''''

-    .. automethod:: __init__
+You can add new items or albums to the library via the
+:py:meth:`Library.add` and :py:meth:`Library.add_album` methods.

-    You can add new items or albums to the library:
+You may also query the library for items and albums using the
+:py:meth:`Library.items`, :py:meth:`Library.albums`, :py:meth:`Library.get_item` and :py:meth:`Library.get_album` methods.

-    .. automethod:: add
-
-    .. automethod:: add_album
-
-    And there are methods for querying the database:
-
-    .. automethod:: items
-
-    .. automethod:: albums
-
-    .. automethod:: get_item
-
-    .. automethod:: get_album
-
-    Any modifications must go through a :class:`Transaction` which you get can
-    using this method:
-
-    .. automethod:: transaction
+Any modifications to the library must go through a
+:class:`Transaction` object, which you can get using the
+:py:meth:`Library.transaction` context manager.

 .. _SQLite: https://sqlite.org/index.html
 .. _ORM: https://en.wikipedia.org/wiki/Object-relational_mapping
@ -54,7 +42,7 @@ Model Classes

 The two model entities in beets libraries, :class:`Item` and :class:`Album`,
 share a base class, :class:`LibModel`, that provides common functionality. That
-class itself specialises :class:`dbcore.Model` which provides an ORM-like
+class itself specialises :class:`beets.dbcore.Model` which provides an ORM-like
 abstraction.

 To get or change the metadata of a model (an item or album), either access its
@ -68,42 +56,25 @@ Model base
 Models use dirty-flags to track when the object's metadata goes out of
 sync with the database. The dirty dictionary maps field names to booleans
 indicating whether the field has been written since the object was last
-synchronized (via load or store) with the database.
+synchronized (via load or store) with the database. This logic is implemented
+in the model base class :class:`LibModel` and is inherited by both
+:class:`Item` and :class:`Album`.

-.. autoclass:: LibModel
+We provide CRUD-like methods for interacting with the database:

-    .. automethod:: all_keys
+* :py:meth:`LibModel.store`
+* :py:meth:`LibModel.load`
+* :py:meth:`LibModel.remove` 
+* :py:meth:`LibModel.add`

-    .. automethod:: __init__
+The base class :class:`beets.dbcore.Model` has a ``dict``-like interface, so
+normal the normal mapping API is supported:

-    .. autoattribute:: _types
+* :py:meth:`LibModel.keys`
+* :py:meth:`LibModel.update`
+* :py:meth:`LibModel.items`
+* :py:meth:`LibModel.get`

-    .. autoattribute:: _fields
-
-    There are CRUD-like methods for interacting with the database:
-
-    .. automethod:: store
-
-    .. automethod:: load
-
-    .. automethod:: remove
-
-    .. automethod:: add
-
-    The base class :class:`dbcore.Model` has a ``dict``-like interface, so
-    normal the normal mapping API is supported:
-
-    .. automethod:: keys
-
-    .. automethod:: update
-
-    .. automethod:: items
-
-    .. note::
-       The :py:meth:`Album.items` method is not inherited from
-       :py:meth:`LibModel.items` for historical reasons.
-
-    .. automethod:: get

 Item
 ''''
@ -155,38 +126,6 @@ This leads to the following implementation policy:
  * On every modification to DB metadata (``item.field = ...``), the DB mtime
    is reset to zero.

-
-.. autoclass:: Item
-
-    .. automethod:: __init__
-
-    .. automethod:: from_path
-
-    .. automethod:: get_album
-
-    .. automethod:: destination
-
-    .. automethod:: current_mtime
-
-    The methods ``read()`` and ``write()`` are complementary: one reads a
-    file's tags and updates the item's metadata fields accordingly while the
-    other takes the item's fields and writes them to the file's tags.
-
-    .. automethod:: read
-
-    .. automethod:: write
-
-    .. automethod:: try_write
-
-    .. automethod:: try_sync
-
-    The :class:`Item` class supplements the normal model interface so that they
-    interacting with the filesystem as well:
-
-    .. automethod:: move
-
-    .. automethod:: remove
-
 Album
 '''''

@ -205,35 +144,10 @@ For those fields that are both item-level and album-level (e.g., ``year`` or
 use an SQLite table called ``albums``, in which each column is an album
 metadata field.

-.. autoclass:: Album

-    .. automethod:: __init__
-
-    .. automethod:: item_dir
-
-    .. automethod:: items
-
-    Albums extend the normal model interface to also forward changes to their
-    items:
-
-    .. autoattribute:: item_keys
-
-    .. automethod:: store
-
-    .. automethod:: try_sync
-
-    .. automethod:: move
-
-    .. automethod:: remove
-
-    Albums also manage album art, image files that are associated with each
-    album:
-
-    .. automethod:: set_art
-
-    .. automethod:: move_art
-
-    .. automethod:: art_destination
+.. note::
+    The :py:meth:`Album.items` method is not inherited from
+    :py:meth:`LibModel.items` for historical reasons.

 Transactions
 ''''''''''''
@ -241,24 +155,30 @@ Transactions
 The :class:`Library` class provides the basic methods necessary to access and
 manipulate its contents. To perform more complicated operations atomically, or
 to interact directly with the underlying SQLite database, you must use a
-*transaction* (see this `blog post`_ for motivation). For example::
+*transaction* (see this `blog post`_ for motivation). For example
+
+.. code-block:: python

    lib = Library()
    with lib.transaction() as tx:
        items = lib.items(query)
        lib.add_album(list(items))

-.. _blog post: https://beets.io/blog/sqlite-nightmare.html
-
 .. currentmodule:: beets.dbcore.db

-.. autoclass:: Transaction
-    :members:
+The :class:`Transaction` class is a context manager that provides a
+transactional interface to the underlying SQLite database. It is
+responsible for managing the transaction's lifecycle, including
+beginning, committing, and rolling back the transaction if
+an error occurs.

+.. _blog post: https://beets.io/blog/sqlite-nightmare.html

 Queries
 -------

+.. currentmodule:: beets.dbcore.query
+
 To access albums and items in a library, we use :doc:`/reference/query`.
 In beets, the :class:`Query` abstract base class represents a criterion that
 matches items or albums in the database.
--- a/docs/dev/plugins.rst
+++ b/docs/dev/plugins.rst
@ -1,3 +1,11 @@
+Plugin Development Guide
+========================
+
+Beets plugins are Python modules or packages that extend the core functionality
+of beets. The plugin system is designed to be flexible, allowing developers to
+add virtually any type of features.
+
+
 .. _writing-plugins:

 Writing Plugins
@ -413,9 +421,8 @@ to extend the kinds of metadata that they can easily manage.

 The ``MediaFile`` class uses ``MediaField`` descriptors to provide
 access to file tags. If you have created a descriptor you can add it through
-your plugins ``add_media_field()`` method.
+your plugins :py:meth:`beets.plugins.BeetsPlugin.add_media_field()`` method.

-.. automethod:: beets.plugins.BeetsPlugin.add_media_field
 .. _MediaFile: https://mediafile.readthedocs.io/en/latest/


--- a/docs/plugins/discogs.rst
+++ b/docs/plugins/discogs.rst
@ -101,11 +101,20 @@ This option is useful when importing classical music.

 Other configurations available under ``discogs:`` are:

- **append_style_genre**: Appends the Discogs style (if found) to the genre tag. This can be useful if you want more granular genres to categorize your music.
-  For example, a release in Discogs might have a genre of "Electronic" and a style of "Techno": enabling this setting would set the genre to be "Electronic, Techno" (assuming default separator of ``", "``) instead of just "Electronic".
+- **append_style_genre**: Appends the Discogs style (if found) to the genre
+  tag. This can be useful if you want more granular genres to categorize your
+  music. For example, a release in Discogs might have a genre of "Electronic"
+  and a style of "Techno": enabling this setting would set the genre to be
+  "Electronic, Techno" (assuming default separator of ``", "``) instead of just
+  "Electronic".
  Default: ``False``
- **separator**: How to join multiple genre and style values from Discogs into a string.
+- **separator**: How to join multiple genre and style values from Discogs into
+  a string.
  Default: ``", "``
+- **search_limit**: The maximum number of results to return from Discogs. This is
+  useful if you want to limit the number of results returned to speed up
+  searches.
+  Default: ``5``


 Troubleshooting
--- a/docs/plugins/duplicates.rst
+++ b/docs/plugins/duplicates.rst
@ -34,6 +34,7 @@ duplicates themselves via command-line switches ::
  -o DEST, --copy=DEST  copy items to dest
  -p, --path            print paths for matched items or albums
  -t TAG, --tag=TAG     tag matched items with 'k=v' attribute
+  -r, --remove          remove items from library

 Configuration
 -------------
@ -57,7 +58,7 @@ file. The available options mirror the command-line options:
  ``$albumartist - $album - $title: $count`` (for tracks) or ``$albumartist -
  $album: $count`` (for albums).
  Default: ``no``.
- **delete**: Removes matched items from the library and from the disk.
+- **delete**: Remove matched items from the library and from the disk.
  Default: ``no``
 - **format**: A specific format with which to print every track
  or album. This uses the same template syntax as beets'
@ -92,6 +93,8 @@ file. The available options mirror the command-line options:
  set. If you would like to consider the lower bitrates as duplicates,
  for example, set ``tiebreak: items: [bitrate]``.
  Default: ``{}``.
+- **remove**: Remove matched items from the library, but not from the disk.
+  Default: ``no``.

 Examples
 --------
--- a/docs/plugins/index.rst
+++ b/docs/plugins/index.rst
@ -125,6 +125,7 @@ following to your configuration:
   playlist
   plexupdate
   random
+   replace
   replaygain
   rewrite
   scrub
--- a/docs/plugins/musicbrainz.rst
+++ b/docs/plugins/musicbrainz.rst
@ -102,7 +102,7 @@ MusicBrainz. Additional tags to be queried can be supplied with the
 .. code-block:: yaml

    musicbrainz:
-        extra_tags: [year, catalognum, country, media, label]
+        extra_tags: [barcode, catalognum, country, label, media, year]

 This setting should improve the autotagger results if the metadata with the
 given tags match the metadata returned by MusicBrainz.
--- a/docs/plugins/replace.rst
+++ b/docs/plugins/replace.rst
@ -0,0 +1,17 @@
+Replace Plugin
+==============
+
+The ``replace`` plugin provides a command that replaces the audio file
+of a track, while keeping the name and tags intact. It should save
+some time when you get the wrong version of a song.
+
+Enable the ``replace`` plugin in your configuration (see :ref:`using-plugins`)
+and then type::
+
+    $ beet replace <query> <path>
+
+The plugin will show you a list of files for you to pick from, and then
+ask for confirmation.
+
+Consider using the `replaygain` command from the
+:doc:`/plugins/replaygain` plugin, if you usually use it during imports.
--- a/docs/plugins/spotify.rst
+++ b/docs/plugins/spotify.rst
@ -83,6 +83,13 @@ in config.yaml under the ``spotify:`` section:
  track/album/artist fields before sending them to Spotify.  Can be useful for
  changing certain abbreviations, like ft. -> feat.  See the examples below.
  Default: None.
+- **search_query_ascii**: If set to ``yes``, the search query will be converted to
+  ASCII before being sent to Spotify. Converting searches to ASCII can
+  enhance search results in some cases, but in general, it is not recommended. 
+  For instance `artist:deadmau5 album:4×4` will be converted to 
+  `artist:deadmau5 album:4x4` (notice `×!=x`).
+  Default: ``no``.
+

 Here's an example::

@ -92,6 +99,7 @@ Here's an example::
        region_filter: US
        show_failures: on
        tiebreak: first
+        search_query_ascii: no

        regex: [
            {
--- a/docs/plugins/substitute.rst
+++ b/docs/plugins/substitute.rst
@ -31,9 +31,9 @@ group in the output, discarding the rest of the string.

 This would handle all the below cases in a single rule:

-    Bob Dylan and The Band -> Bob Dylan
-    Neil Young & Crazy Horse -> Neil Young
-    James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston
+    | Bob Dylan and The Band -> Bob Dylan
+    | Neil Young & Crazy Horse -> Neil Young
+    | James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston


 To apply the substitution, you have to call the function ``%substitute{}`` in the paths section. For example:
--- a/poetry.lock
+++ b/poetry.lock
@ -1030,7 +1030,7 @@ files = [
 name = "langdetect"
 version = "1.0.9"
 description = "Language detection library ported from Google's language-detection."
-optional = true
+optional = false
 python-versions = "*"
 files = [
    {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"},
@ -1271,8 +1271,11 @@ files = [
    {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
    {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
+    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
+    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
+    {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
    {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
    {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
    {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
@ -3276,4 +3279,4 @@ web = ["flask", "flask-cors"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4"
-content-hash = "d609e83f7ffeefc12e28d627e5646aa5c1a6f5a56d7013bb649a468069550dba"
+content-hash = "b3f2746a43227fe639d17eb22d7924e30c9d83eef53dce2c10388c602f0c6665"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -84,6 +84,7 @@ sphinx = { version = "*", optional = true }
 beautifulsoup4 = "*"
 codecov = ">=2.1.13"
 flask = "*"
+langdetect = "*"
 mock = "*"
 pylast = "*"
 pytest = "*"
--- a/test/autotag/test_distance.py
+++ b/test/autotag/test_distance.py
@ -0,0 +1,299 @@
+import re
+
+import pytest
+
+from beets.autotag import AlbumInfo, TrackInfo
+from beets.autotag.distance import (
+    Distance,
+    distance,
+    string_dist,
+    track_distance,
+)
+from beets.library import Item
+from beets.test.helper import ConfigMixin
+
+_p = pytest.param
+
+
+class TestDistance:
+    @pytest.fixture(scope="class")
+    def config(self):
+        return ConfigMixin().config
+
+    @pytest.fixture
+    def dist(self, config):
+        config["match"]["distance_weights"]["source"] = 2.0
+        config["match"]["distance_weights"]["album"] = 4.0
+        config["match"]["distance_weights"]["medium"] = 2.0
+
+        Distance.__dict__["_weights"].cache = {}
+
+        return Distance()
+
+    def test_add(self, dist):
+        dist.add("add", 1.0)
+
+        assert dist._penalties == {"add": [1.0]}
+
+    @pytest.mark.parametrize(
+        "key, args_with_expected",
+        [
+            (
+                "equality",
+                [
+                    (("ghi", ["abc", "def", "ghi"]), [0.0]),
+                    (("xyz", ["abc", "def", "ghi"]), [0.0, 1.0]),
+                    (("abc", re.compile(r"ABC", re.I)), [0.0, 1.0, 0.0]),
+                ],
+            ),
+            ("expr", [((True,), [1.0]), ((False,), [1.0, 0.0])]),
+            (
+                "number",
+                [
+                    ((1, 1), [0.0]),
+                    ((1, 2), [0.0, 1.0]),
+                    ((2, 1), [0.0, 1.0, 1.0]),
+                    ((-1, 2), [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]),
+                ],
+            ),
+            (
+                "priority",
+                [
+                    (("abc", "abc"), [0.0]),
+                    (("def", ["abc", "def"]), [0.0, 0.5]),
+                    (("gh", ["ab", "cd", "ef", re.compile("GH", re.I)]), [0.0, 0.5, 0.75]),  # noqa: E501
+                    (("xyz", ["abc", "def"]), [0.0, 0.5, 0.75, 1.0]),
+                ],
+            ),
+            (
+                "ratio",
+                [
+                    ((25, 100), [0.25]),
+                    ((10, 5), [0.25, 1.0]),
+                    ((-5, 5), [0.25, 1.0, 0.0]),
+                    ((5, 0), [0.25, 1.0, 0.0, 0.0]),
+                ],
+            ),
+            (
+                "string",
+                [
+                    (("abc", "bcd"), [2 / 3]),
+                    (("abc", None), [2 / 3, 1]),
+                    ((None, None), [2 / 3, 1, 0]),
+                ],
+            ),
+        ],
+    )  # fmt: skip
+    def test_add_methods(self, dist, key, args_with_expected):
+        method = getattr(dist, f"add_{key}")
+        for arg_set, expected in args_with_expected:
+            method(key, *arg_set)
+            assert dist._penalties[key] == expected
+
+    def test_distance(self, dist):
+        dist.add("album", 0.5)
+        dist.add("media", 0.25)
+        dist.add("media", 0.75)
+
+        assert dist.distance == 0.5
+        assert dist.max_distance == 6.0
+        assert dist.raw_distance == 3.0
+
+        assert dist["album"] == 1 / 3
+        assert dist["media"] == 1 / 6
+
+    def test_operators(self, dist):
+        dist.add("source", 0.0)
+        dist.add("album", 0.5)
+        dist.add("medium", 0.25)
+        dist.add("medium", 0.75)
+        assert len(dist) == 2
+        assert list(dist) == [("album", 0.2), ("medium", 0.2)]
+        assert dist == 0.4
+        assert dist < 1.0
+        assert dist > 0.0
+        assert dist - 0.4 == 0.0
+        assert 0.4 - dist == 0.0
+        assert float(dist) == 0.4
+
+    def test_penalties_sort(self, dist):
+        dist.add("album", 0.1875)
+        dist.add("medium", 0.75)
+        assert dist.items() == [("medium", 0.25), ("album", 0.125)]
+
+        # Sort by key if distance is equal.
+        dist = Distance()
+        dist.add("album", 0.375)
+        dist.add("medium", 0.75)
+        assert dist.items() == [("album", 0.25), ("medium", 0.25)]
+
+    def test_update(self, dist):
+        dist1 = dist
+        dist1.add("album", 0.5)
+        dist1.add("media", 1.0)
+
+        dist2 = Distance()
+        dist2.add("album", 0.75)
+        dist2.add("album", 0.25)
+        dist2.add("media", 0.05)
+
+        dist1.update(dist2)
+
+        assert dist1._penalties == {
+            "album": [0.5, 0.75, 0.25],
+            "media": [1.0, 0.05],
+        }
+
+
+class TestTrackDistance:
+    @pytest.fixture(scope="class")
+    def info(self):
+        return TrackInfo(title="title", artist="artist")
+
+    @pytest.mark.parametrize(
+        "title, artist, expected_penalty",
+        [
+            _p("title", "artist", False, id="identical"),
+            _p("title", "Various Artists", False, id="tolerate-va"),
+            _p("title", "different artist", True, id="different-artist"),
+            _p("different title", "artist", True, id="different-title"),
+        ],
+    )
+    def test_track_distance(self, info, title, artist, expected_penalty):
+        item = Item(artist=artist, title=title)
+
+        assert (
+            bool(track_distance(item, info, incl_artist=True))
+            == expected_penalty
+        )
+
+
+class TestAlbumDistance:
+    @pytest.fixture(scope="class")
+    def items(self):
+        return [
+            Item(
+                title=title,
+                track=track,
+                artist="artist",
+                album="album",
+                length=1,
+            )
+            for title, track in [("one", 1), ("two", 2), ("three", 3)]
+        ]
+
+    @pytest.fixture
+    def get_dist(self, items):
+        def inner(info: AlbumInfo):
+            return distance(items, info, dict(zip(items, info.tracks)))
+
+        return inner
+
+    @pytest.fixture
+    def info(self, items):
+        return AlbumInfo(
+            artist="artist",
+            album="album",
+            tracks=[
+                TrackInfo(
+                    title=i.title,
+                    artist=i.artist,
+                    index=i.track,
+                    length=i.length,
+                )
+                for i in items
+            ],
+            va=False,
+        )
+
+    def test_identical_albums(self, get_dist, info):
+        assert get_dist(info) == 0
+
+    def test_incomplete_album(self, get_dist, info):
+        info.tracks.pop(2)
+
+        assert 0 < float(get_dist(info)) < 0.2
+
+    def test_overly_complete_album(self, get_dist, info):
+        info.tracks.append(
+            Item(index=4, title="four", artist="artist", length=1)
+        )
+
+        assert 0 < float(get_dist(info)) < 0.2
+
+    @pytest.mark.parametrize("va", [True, False])
+    def test_albumartist(self, get_dist, info, va):
+        info.artist = "another artist"
+        info.va = va
+
+        assert bool(get_dist(info)) is not va
+
+    def test_comp_no_track_artists(self, get_dist, info):
+        # Some VA releases don't have track artists (incomplete metadata).
+        info.artist = "another artist"
+        info.va = True
+        for track in info.tracks:
+            track.artist = None
+
+        assert get_dist(info) == 0
+
+    def test_comp_track_artists_do_not_match(self, get_dist, info):
+        info.va = True
+        info.tracks[0].artist = "another artist"
+
+        assert get_dist(info) != 0
+
+    def test_tracks_out_of_order(self, get_dist, info):
+        tracks = info.tracks
+        tracks[1].title, tracks[2].title = tracks[2].title, tracks[1].title
+
+        assert 0 < float(get_dist(info)) < 0.2
+
+    def test_two_medium_release(self, get_dist, info):
+        info.tracks[0].medium_index = 1
+        info.tracks[1].medium_index = 2
+        info.tracks[2].medium_index = 1
+
+        assert get_dist(info) == 0
+
+
+class TestStringDistance:
+    @pytest.mark.parametrize(
+        "string1, string2",
+        [
+            ("Some String", "Some String"),
+            ("Some String", "Some.String!"),
+            ("Some String", "sOME sTring"),
+            ("My Song (EP)", "My Song"),
+            ("The Song Title", "Song Title, The"),
+            ("A Song Title", "Song Title, A"),
+            ("An Album Title", "Album Title, An"),
+            ("", ""),
+            ("Untitled", "[Untitled]"),
+            ("And", "&"),
+            ("\xe9\xe1\xf1", "ean"),
+        ],
+    )
+    def test_matching_distance(self, string1, string2):
+        assert string_dist(string1, string2) == 0.0
+
+    def test_different_distance(self):
+        assert string_dist("Some String", "Totally Different") != 0.0
+
+    @pytest.mark.parametrize(
+        "string1, string2, reference",
+        [
+            ("XXX Band Name", "The Band Name", "Band Name"),
+            ("One .Two.", "One (Two)", "One"),
+            ("One .Two.", "One [Two]", "One"),
+            ("My Song blah Someone", "My Song feat Someone", "My Song"),
+        ],
+    )
+    def test_relative_weights(self, string1, string2, reference):
+        assert string_dist(string2, reference) < string_dist(string1, reference)
+
+    def test_solo_pattern(self):
+        # Just make sure these don't crash.
+        string_dist("The ", "")
+        string_dist("(EP)", "(EP)")
+        string_dist(", An", "")
--- a/test/plugins/lyrics_pages.py
+++ b/test/plugins/lyrics_pages.py
@ -108,45 +108,6 @@ lyrics_pages = [
        url_title="The Beatles - Lady Madonna Lyrics | AZLyrics.com",
        marks=[xfail_on_ci("AZLyrics is blocked by Cloudflare")],
    ),
-    LyricsPage.make(
-        "http://www.chartlyrics.com/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx",
-        """
-        Lady Madonna,
-        Children at your feet
-        Wonder how you manage to make ends meet.
-
-        Who finds the money
-        When you pay the rent?
-        Did you think that money was heaven-sent?
-
-        Friday night arrives without a suitcase.
-        Sunday morning creeping like a nun.
-        Monday's child has learned to tie his bootlace.
-
-        See how they run.
-
-        Lady Madonna,
-        Baby at your breast
-        Wonders how you manage to feed the rest.
-
-        See how they run.
-
-        Lady Madonna,
-        Lying on the bed.
-        Listen to the music playing in your head.
-
-        Tuesday afternoon is never ending.
-        Wednesday morning papers didn't come.
-        Thursday night your stockings needed mending.
-
-        See how they run.
-
-        Lady Madonna,
-        Children at your feet
-        Wonder how you manage to make ends meet.
-        """,
-        url_title="The Beatles Lady Madonna lyrics",
-    ),
    LyricsPage.make(
        "https://www.dainuzodziai.lt/m/mergaites-nori-mylet-atlanta/",
        """
--- a/test/plugins/test_art.py
+++ b/test/plugins/test_art.py
@ -14,8 +14,12 @@

 """Tests for the album art fetchers."""

+from __future__ import annotations
+
 import os
 import shutil
+import unittest
+from typing import TYPE_CHECKING
 from unittest.mock import patch

 import confuse
@ -37,6 +41,11 @@ from beetsplug import fetchart

 logger = logging.getLogger("beets.test_art")

+if TYPE_CHECKING:
+    from collections.abc import Iterator, Sequence
+
+    from beets.library import Album
+

 class Settings:
    """Used to pass settings to the ArtSources when the plugin isn't fully
@ -48,6 +57,19 @@ class Settings:
            setattr(self, k, v)


+class DummyRemoteArtSource(fetchart.RemoteArtSource):
+    NAME = "Dummy Art Source"
+    ID = "dummy"
+
+    def get(
+        self,
+        album: Album,
+        plugin: fetchart.FetchArtPlugin,
+        paths: None | Sequence[bytes],
+    ) -> Iterator[fetchart.Candidate]:
+        return iter(())
+
+
 class UseThePlugin(CleanupModulesMixin, BeetsTestCase):
    modules = (fetchart.__name__, ArtResizer.__module__)

@ -202,9 +224,11 @@ class FetchImageTest(FetchImageTestCase):
    def setUp(self):
        super().setUp()
        self.dpath = os.path.join(self.temp_dir, b"arttest")
-        self.source = fetchart.RemoteArtSource(logger, self.plugin.config)
+        self.source = DummyRemoteArtSource(logger, self.plugin.config)
        self.settings = Settings(maxwidth=0)
-        self.candidate = fetchart.Candidate(logger, url=self.URL)
+        self.candidate = fetchart.Candidate(
+            logger, self.source.ID, url=self.URL
+        )

    def test_invalid_type_returns_none(self):
        self.mock_response(self.URL, "image/watercolour")
@ -432,7 +456,7 @@ class ITunesStoreTest(UseThePlugin):
        self.mock_response(fetchart.ITunesStore.API_URL, json)
        candidate = next(self.source.get(self.album, self.settings, []))
        assert candidate.url == "url_to_the_image"
-        assert candidate.match == fetchart.Candidate.MATCH_EXACT
+        assert candidate.match == fetchart.MetadataMatch.EXACT

    def test_itunesstore_no_result(self):
        json = '{"results": []}'
@ -471,7 +495,7 @@ class ITunesStoreTest(UseThePlugin):
        self.mock_response(fetchart.ITunesStore.API_URL, json)
        candidate = next(self.source.get(self.album, self.settings, []))
        assert candidate.url == "url_to_the_image"
-        assert candidate.match == fetchart.Candidate.MATCH_FALLBACK
+        assert candidate.match == fetchart.MetadataMatch.FALLBACK

    def test_itunesstore_returns_result_without_artwork(self):
        json = """{
@ -727,7 +751,11 @@ class ArtImporterTest(UseThePlugin):
        self.art_file = os.path.join(self.temp_dir, b"tmpcover.jpg")
        _common.touch(self.art_file)
        self.old_afa = self.plugin.art_for_album
-        self.afa_response = fetchart.Candidate(logger, path=self.art_file)
+        self.afa_response = fetchart.Candidate(
+            logger,
+            source_name="test",
+            path=self.art_file,
+        )

        def art_for_album(i, p, local_only=False):
            return self.afa_response
@ -814,7 +842,11 @@ class ArtImporterTest(UseThePlugin):
    def test_do_not_delete_original_if_already_in_place(self):
        artdest = os.path.join(os.path.dirname(self.i.path), b"cover.jpg")
        shutil.copyfile(syspath(self.art_file), syspath(artdest))
-        self.afa_response = fetchart.Candidate(logger, path=artdest)
+        self.afa_response = fetchart.Candidate(
+            logger,
+            source_name="test",
+            path=artdest,
+        )
        self._fetch_art(True)

    def test_fetch_art_if_imported_file_deleted(self):
@ -855,7 +887,9 @@ class ArtForAlbumTest(UseThePlugin):

        def fs_source_get(_self, album, settings, paths):
            if paths:
-                yield fetchart.Candidate(logger, path=self.image_file)
+                yield fetchart.Candidate(
+                    logger, source_name=_self.ID, path=self.image_file
+                )

        fetchart.FileSystem.get = fs_source_get

@ -979,7 +1013,7 @@ class ArtForAlbumTest(UseThePlugin):
        self._assert_image_operated(self.IMG_348x348, self.RESIZE_OP, True)


-class DeprecatedConfigTest(BeetsTestCase):
+class DeprecatedConfigTest(unittest.TestCase):
    """While refactoring the plugin, the remote_priority option was deprecated,
    and a new codepath should translate its effect. Check that it actually does
    so.
@ -997,7 +1031,7 @@ class DeprecatedConfigTest(BeetsTestCase):
        assert isinstance(self.plugin.sources[-1], fetchart.FileSystem)


-class EnforceRatioConfigTest(BeetsTestCase):
+class EnforceRatioConfigTest(unittest.TestCase):
    """Throw some data at the regexes."""

    def _load_with_config(self, values, should_raise):
--- a/test/plugins/test_beatport.py
+++ b/test/plugins/test_beatport.py
@ -14,6 +14,7 @@

 """Tests for the 'beatport' plugin."""

+import unittest
 from datetime import timedelta

 from beets.test import _common
@ -585,7 +586,7 @@ class BeatportTest(BeetsTestCase):
            assert track.genre == test_track.genre


-class BeatportResponseEmptyTest(BeetsTestCase):
+class BeatportResponseEmptyTest(unittest.TestCase):
    def _make_tracks_response(self):
        results = [
            {
--- a/test/plugins/test_discogs.py
+++ b/test/plugins/test_discogs.py
@ -171,27 +171,6 @@ class DGAlbumInfoTest(BeetsTestCase):
        assert t[3].index == 4
        assert t[3].medium_total == 1

-    def test_parse_position(self):
-        """Test the conversion of discogs `position` to medium, medium_index
-        and subtrack_index."""
-        # List of tuples (discogs_position, (medium, medium_index, subindex)
-        positions = [
-            ("1", (None, "1", None)),
-            ("A12", ("A", "12", None)),
-            ("12-34", ("12-", "34", None)),
-            ("CD1-1", ("CD1-", "1", None)),
-            ("1.12", (None, "1", "12")),
-            ("12.a", (None, "12", "A")),
-            ("12.34", (None, "12", "34")),
-            ("1ab", (None, "1", "AB")),
-            # Non-standard
-            ("IV", ("IV", None, None)),
-        ]
-
-        d = DiscogsPlugin()
-        for position, expected in positions:
-            assert d.get_track_index(position) == expected
-
    def test_parse_tracklist_without_sides(self):
        """Test standard Discogs position 12.2.9#1: "without sides"."""
        release = self._make_release_from_positions(["1", "2", "3"])
@ -417,3 +396,22 @@ def test_get_media_and_albumtype(formats, expected_media, expected_albumtype):
    result = DiscogsPlugin.get_media_and_albumtype(formats)

    assert result == (expected_media, expected_albumtype)
+
+
+@pytest.mark.parametrize(
+    "position, medium, index, subindex",
+    [
+        ("1", None, "1", None),
+        ("A12", "A", "12", None),
+        ("12-34", "12-", "34", None),
+        ("CD1-1", "CD1-", "1", None),
+        ("1.12", None, "1", "12"),
+        ("12.a", None, "12", "A"),
+        ("12.34", None, "12", "34"),
+        ("1ab", None, "1", "AB"),
+        # Non-standard
+        ("IV", "IV", None, None),
+    ],
+)
+def test_get_track_index(position, medium, index, subindex):
+    assert DiscogsPlugin.get_track_index(position) == (medium, index, subindex)
--- a/test/plugins/test_embedart.py
+++ b/test/plugins/test_embedart.py
@ -34,8 +34,35 @@ def require_artresizer_compare(test):
    def wrapper(*args, **kwargs):
        if not ArtResizer.shared.can_compare:
            raise unittest.SkipTest("compare not available")
-        else:
-            return test(*args, **kwargs)
+
+        # PHASH computation in ImageMagick changed at some point in an
+        # undocumented way. Check at a low level that comparisons of our
+        # fixtures give the expected results. Only then, plugin logic tests
+        # below are meaningful.
+        # cf. https://github.com/ImageMagick/ImageMagick/discussions/5191
+        # It would be better to investigate what exactly change in IM and
+        # handle that in ArtResizer.IMBackend.{can_compare,compare}.
+        # Skipping the tests as below is a quick fix to CI, but users may
+        # still see unexpected behaviour.
+        abbey_artpath = os.path.join(_common.RSRC, b"abbey.jpg")
+        abbey_similarpath = os.path.join(_common.RSRC, b"abbey-similar.jpg")
+        abbey_differentpath = os.path.join(_common.RSRC, b"abbey-different.jpg")
+        compare_threshold = 20
+
+        similar_compares_ok = ArtResizer.shared.compare(
+            abbey_artpath,
+            abbey_similarpath,
+            compare_threshold,
+        )
+        different_compares_ok = ArtResizer.shared.compare(
+            abbey_artpath,
+            abbey_differentpath,
+            compare_threshold,
+        )
+        if not similar_compares_ok or different_compares_ok:
+            raise unittest.SkipTest("IM version with broken compare")
+
+        return test(*args, **kwargs)

    wrapper.__name__ = test.__name__
    return wrapper
--- a/test/plugins/test_lastgenre.py
+++ b/test/plugins/test_lastgenre.py
@ -18,7 +18,6 @@ from unittest.mock import Mock

 import pytest

-from beets import config
 from beets.test import _common
 from beets.test.helper import BeetsTestCase
 from beetsplug import lastgenre
@ -32,12 +31,12 @@ class LastGenrePluginTest(BeetsTestCase):
    def _setup_config(
        self, whitelist=False, canonical=False, count=1, prefer_specific=False
    ):
-        config["lastgenre"]["canonical"] = canonical
-        config["lastgenre"]["count"] = count
-        config["lastgenre"]["prefer_specific"] = prefer_specific
+        self.config["lastgenre"]["canonical"] = canonical
+        self.config["lastgenre"]["count"] = count
+        self.config["lastgenre"]["prefer_specific"] = prefer_specific
        if isinstance(whitelist, (bool, (str,))):
            # Filename, default, or disabled.
-            config["lastgenre"]["whitelist"] = whitelist
+            self.config["lastgenre"]["whitelist"] = whitelist
        self.plugin.setup()
        if not isinstance(whitelist, (bool, (str,))):
            # Explicit list of genres.
@ -463,11 +462,10 @@ def test_get_genre(config_values, item_genre, mock_genres, expected_result):
    lastgenre.LastGenrePlugin.fetch_album_genre = mock_fetch_album_genre
    lastgenre.LastGenrePlugin.fetch_artist_genre = mock_fetch_artist_genre

-    # Configure
-    config["lastgenre"] = config_values
-
    # Initialize plugin instance and item
    plugin = lastgenre.LastGenrePlugin()
+    # Configure
+    plugin.config.set(config_values)
    item = _common.item()
    item.genre = item_genre

--- a/test/plugins/test_lyrics.py
+++ b/test/plugins/test_lyrics.py
@ -14,8 +14,6 @@

 """Tests for the 'lyrics' plugin."""

-import importlib.util
-import os
 import re
 import textwrap
 from functools import partial
@ -30,11 +28,6 @@ from beetsplug import lyrics

 from .lyrics_pages import LyricsPage, lyrics_pages

-github_ci = os.environ.get("GITHUB_ACTIONS") == "true"
-if not github_ci and not importlib.util.find_spec("langdetect"):
-    pytest.skip("langdetect isn't available", allow_module_level=True)
-
-
 PHRASE_BY_TITLE = {
    "Lady Madonna": "friday night arrives without a suitcase",
    "Jazz'n'blues": "as i check my balance i kiss the screen",
--- a/test/plugins/test_musicbrainz.py
+++ b/test/plugins/test_musicbrainz.py
@ -14,10 +14,14 @@

 """Tests for MusicBrainz API wrapper."""

+import unittest
 from unittest import mock

+import pytest
+
 from beets import config
-from beets.test.helper import BeetsTestCase
+from beets.library import Item
+from beets.test.helper import BeetsTestCase, PluginMixin
 from beetsplug import musicbrainz


@ -662,7 +666,7 @@ class MBAlbumInfoTest(MusicBrainzTestCase):
        assert t[1].trackdisambig == "SECOND TRACK"


-class ArtistFlatteningTest(BeetsTestCase):
+class ArtistFlatteningTest(unittest.TestCase):
    def _credit_dict(self, suffix=""):
        return {
            "artist": {
@ -754,89 +758,6 @@ class ArtistFlatteningTest(BeetsTestCase):


 class MBLibraryTest(MusicBrainzTestCase):
-    def test_match_track(self):
-        with mock.patch("musicbrainzngs.search_recordings") as p:
-            p.return_value = {
-                "recording-list": [
-                    {
-                        "title": "foo",
-                        "id": "bar",
-                        "length": 42,
-                    }
-                ],
-            }
-            ti = list(self.mb.item_candidates(None, "hello", "there"))[0]
-
-            p.assert_called_with(artist="hello", recording="there", limit=5)
-            assert ti.title == "foo"
-            assert ti.track_id == "bar"
-
-    def test_candidates(self):
-        mbid = "d2a6f856-b553-40a0-ac54-a321e8e2da99"
-        with mock.patch("musicbrainzngs.search_releases") as sp:
-            sp.return_value = {
-                "release-list": [
-                    {
-                        "id": mbid,
-                    }
-                ],
-            }
-            with mock.patch("musicbrainzngs.get_release_by_id") as gp:
-                gp.return_value = {
-                    "release": {
-                        "title": "hi",
-                        "id": mbid,
-                        "status": "status",
-                        "medium-list": [
-                            {
-                                "track-list": [
-                                    {
-                                        "id": "baz",
-                                        "recording": {
-                                            "title": "foo",
-                                            "id": "bar",
-                                            "length": 42,
-                                        },
-                                        "position": 9,
-                                        "number": "A1",
-                                    }
-                                ],
-                                "position": 5,
-                            }
-                        ],
-                        "artist-credit": [
-                            {
-                                "artist": {
-                                    "name": "some-artist",
-                                    "id": "some-id",
-                                },
-                            }
-                        ],
-                        "release-group": {
-                            "id": "another-id",
-                        },
-                    }
-                }
-
-                ai = list(self.mb.candidates([], "hello", "there", False))[0]
-
-                sp.assert_called_with(artist="hello", release="there", limit=5)
-                gp.assert_called_with(mbid, mock.ANY)
-                assert ai.tracks[0].title == "foo"
-                assert ai.album == "hi"
-
-    def test_match_track_empty(self):
-        with mock.patch("musicbrainzngs.search_recordings") as p:
-            til = list(self.mb.item_candidates(None, " ", " "))
-            assert not p.called
-            assert til == []
-
-    def test_candidates_empty(self):
-        with mock.patch("musicbrainzngs.search_releases") as p:
-            ail = list(self.mb.candidates([], " ", " ", False))
-            assert not p.called
-            assert ail == []
-
    def test_follow_pseudo_releases(self):
        side_effect = [
            {
@ -1063,3 +984,97 @@ class MBLibraryTest(MusicBrainzTestCase):
            gp.side_effect = side_effect
            album = self.mb.album_for_id("d2a6f856-b553-40a0-ac54-a321e8e2da02")
            assert album.country is None
+
+
+class TestMusicBrainzPlugin(PluginMixin):
+    plugin = "musicbrainz"
+
+    mbid = "d2a6f856-b553-40a0-ac54-a321e8e2da99"
+    RECORDING = {"title": "foo", "id": "bar", "length": 42}
+
+    @pytest.fixture
+    def plugin_config(self):
+        return {}
+
+    @pytest.fixture
+    def mb(self, plugin_config):
+        self.config[self.plugin].set(plugin_config)
+
+        return musicbrainz.MusicBrainzPlugin()
+
+    @pytest.mark.parametrize(
+        "plugin_config,va_likely,expected_additional_criteria",
+        [
+            ({}, False, {"artist": "Artist "}),
+            ({}, True, {"arid": "89ad4ac3-39f7-470e-963a-56509c546377"}),
+            (
+                {"extra_tags": ["label", "catalognum"]},
+                False,
+                {"artist": "Artist ", "label": "abc", "catno": "ABC123"},
+            ),
+        ],
+    )
+    def test_get_album_criteria(
+        self, mb, va_likely, expected_additional_criteria
+    ):
+        items = [
+            Item(catalognum="ABC 123", label="abc"),
+            Item(catalognum="ABC 123", label="abc"),
+            Item(catalognum="ABC 123", label="def"),
+        ]
+
+        assert mb.get_album_criteria(items, "Artist ", " Album", va_likely) == {
+            "release": " Album",
+            "alias": " Album",
+            "tracks": str(len(items)),
+            **expected_additional_criteria,
+        }
+
+    def test_item_candidates(self, monkeypatch, mb):
+        monkeypatch.setattr(
+            "musicbrainzngs.search_recordings",
+            lambda *_, **__: {"recording-list": [self.RECORDING]},
+        )
+
+        candidates = list(mb.item_candidates(Item(), "hello", "there"))
+
+        assert len(candidates) == 1
+        assert candidates[0].track_id == self.RECORDING["id"]
+
+    def test_candidates(self, monkeypatch, mb):
+        monkeypatch.setattr(
+            "musicbrainzngs.search_releases",
+            lambda *_, **__: {"release-list": [{"id": self.mbid}]},
+        )
+        monkeypatch.setattr(
+            "musicbrainzngs.get_release_by_id",
+            lambda *_, **__: {
+                "release": {
+                    "title": "hi",
+                    "id": self.mbid,
+                    "status": "status",
+                    "medium-list": [
+                        {
+                            "track-list": [
+                                {
+                                    "id": "baz",
+                                    "recording": self.RECORDING,
+                                    "position": 9,
+                                    "number": "A1",
+                                }
+                            ],
+                            "position": 5,
+                        }
+                    ],
+                    "artist-credit": [
+                        {"artist": {"name": "some-artist", "id": "some-id"}}
+                    ],
+                    "release-group": {"id": "another-id"},
+                }
+            },
+        )
+        candidates = list(mb.candidates([], "hello", "there", False))
+
+        assert len(candidates) == 1
+        assert candidates[0].tracks[0].track_id == self.RECORDING["id"]
+        assert candidates[0].album == "hi"
--- a/test/plugins/test_replace.py
+++ b/test/plugins/test_replace.py
@ -0,0 +1,115 @@
+import shutil
+from pathlib import Path
+
+import pytest
+from mediafile import MediaFile
+
+from beets import ui
+from beets.test import _common
+from beetsplug.replace import ReplacePlugin
+
+replace = ReplacePlugin()
+
+
+class TestReplace:
+    @pytest.fixture(autouse=True)
+    def _fake_dir(self, tmp_path):
+        self.fake_dir = tmp_path
+
+    @pytest.fixture(autouse=True)
+    def _fake_file(self, tmp_path):
+        self.fake_file = tmp_path
+
+    def test_path_is_dir(self):
+        fake_directory = self.fake_dir / "fakeDir"
+        fake_directory.mkdir()
+        with pytest.raises(ui.UserError):
+            replace.file_check(fake_directory)
+
+    def test_path_is_unsupported_file(self):
+        fake_file = self.fake_file / "fakefile.txt"
+        fake_file.write_text("test", encoding="utf-8")
+        with pytest.raises(ui.UserError):
+            replace.file_check(fake_file)
+
+    def test_path_is_supported_file(self):
+        dest = self.fake_file / "full.mp3"
+        src = Path(_common.RSRC.decode()) / "full.mp3"
+        shutil.copyfile(src, dest)
+
+        mediafile = MediaFile(dest)
+        mediafile.albumartist = "AAA"
+        mediafile.disctitle = "DDD"
+        mediafile.genres = ["a", "b", "c"]
+        mediafile.composer = None
+        mediafile.save()
+
+        replace.file_check(Path(str(dest)))
+
+    def test_select_song_valid_choice(self, monkeypatch, capfd):
+        songs = ["Song A", "Song B", "Song C"]
+        monkeypatch.setattr("builtins.input", lambda _: "2")
+
+        selected_song = replace.select_song(songs)
+
+        captured = capfd.readouterr()
+
+        assert "1. Song A" in captured.out
+        assert "2. Song B" in captured.out
+        assert "3. Song C" in captured.out
+        assert selected_song == "Song B"
+
+    def test_select_song_cancel(self, monkeypatch):
+        songs = ["Song A", "Song B", "Song C"]
+        monkeypatch.setattr("builtins.input", lambda _: "0")
+
+        selected_song = replace.select_song(songs)
+
+        assert selected_song is None
+
+    def test_select_song_invalid_then_valid(self, monkeypatch, capfd):
+        songs = ["Song A", "Song B", "Song C"]
+        inputs = iter(["invalid", "4", "3"])
+        monkeypatch.setattr("builtins.input", lambda _: next(inputs))
+
+        selected_song = replace.select_song(songs)
+
+        captured = capfd.readouterr()
+
+        assert "Invalid input. Please type in a number." in captured.out
+        assert (
+            "Invalid choice. Please enter a number between 1 and 3."
+            in captured.out
+        )
+        assert selected_song == "Song C"
+
+    def test_confirm_replacement_file_not_exist(self):
+        class Song:
+            path = b"test123321.txt"
+
+        song = Song()
+
+        with pytest.raises(ui.UserError):
+            replace.confirm_replacement("test", song)
+
+    def test_confirm_replacement_yes(self, monkeypatch):
+        src = Path(_common.RSRC.decode()) / "full.mp3"
+        monkeypatch.setattr("builtins.input", lambda _: "YES    ")
+
+        class Song:
+            path = str(src).encode()
+
+        song = Song()
+
+        assert replace.confirm_replacement("test", song) is True
+
+    def test_confirm_replacement_no(self, monkeypatch):
+        src = Path(_common.RSRC.decode()) / "full.mp3"
+        monkeypatch.setattr("builtins.input", lambda _: "test123")
+
+        class Song:
+            path = str(src).encode()
+
+        song = Song()
+
+        assert replace.confirm_replacement("test", song) is False
--- a/test/plugins/test_spotify.py
+++ b/test/plugins/test_spotify.py
@ -7,7 +7,7 @@ import responses

 from beets.library import Item
 from beets.test import _common
-from beets.test.helper import BeetsTestCase
+from beets.test.helper import PluginTestCase
 from beetsplug import spotify


@ -23,10 +23,11 @@ def _params(url):
    return parse_qs(urlparse(url).query)


-class SpotifyPluginTest(BeetsTestCase):
+class SpotifyPluginTest(PluginTestCase):
+    plugin = "spotify"
+
    @responses.activate
    def setUp(self):
-        super().setUp()
        responses.add(
            responses.POST,
            spotify.SpotifyPlugin.oauth_token_url,
@ -39,6 +40,7 @@ class SpotifyPluginTest(BeetsTestCase):
                "scope": "",
            },
        )
+        super().setUp()
        self.spotify = spotify.SpotifyPlugin()
        opts = ArgumentsMock("list", False)
        self.spotify._parse_opts(opts)
@ -176,3 +178,74 @@ class SpotifyPluginTest(BeetsTestCase):
        results = self.spotify._match_library_tracks(self.lib, "Happy")
        assert 1 == len(results)
        assert "6NPVjNh8Jhru9xOmyQigds" == results[0]["id"]
+
+    @responses.activate
+    def test_japanese_track(self):
+        """Ensure non-ASCII characters remain unchanged in search queries"""
+
+        # Path to the mock JSON file for the Japanese track
+        json_file = os.path.join(
+            _common.RSRC, b"spotify", b"japanese_track_request.json"
+        )
+
+        # Load the mock JSON response
+        with open(json_file, "rb") as f:
+            response_body = f.read()
+
+        # Mock Spotify Search API response
+        responses.add(
+            responses.GET,
+            spotify.SpotifyPlugin.search_url,
+            body=response_body,
+            status=200,
+            content_type="application/json",
+        )
+
+        # Create a mock item with Japanese metadata
+        item = Item(
+            mb_trackid="56789",
+            album="盗作",
+            albumartist="ヨルシカ",
+            title="思想犯",
+            length=10,
+        )
+        item.add(self.lib)
+
+        # Search without ascii encoding
+
+        with self.configure_plugin(
+            {
+                "search_query_ascii": False,
+            }
+        ):
+            assert self.spotify.config["search_query_ascii"].get() is False
+            # Call the method to match library tracks
+            results = self.spotify._match_library_tracks(self.lib, item.title)
+
+            # Assertions to verify results
+            assert results is not None
+            assert 1 == len(results)
+            assert results[0]["name"] == item.title
+            assert results[0]["artists"][0]["name"] == item.albumartist
+            assert results[0]["album"]["name"] == item.album
+
+            # Verify search query parameters
+            params = _params(responses.calls[0].request.url)
+            query = params["q"][0]
+            assert item.title in query
+            assert f"artist:{item.albumartist}" in query
+            assert f"album:{item.album}" in query
+            assert not query.isascii()
+
+        # Is not found in the library if ascii encoding is enabled
+        with self.configure_plugin(
+            {
+                "search_query_ascii": True,
+            }
+        ):
+            assert self.spotify.config["search_query_ascii"].get() is True
+            results = self.spotify._match_library_tracks(self.lib, item.title)
+            params = _params(responses.calls[1].request.url)
+            query = params["q"][0]
+
+            assert query.isascii()
--- a/test/plugins/test_subsonicupdate.py
+++ b/test/plugins/test_subsonicupdate.py
@ -1,11 +1,11 @@
 """Tests for the 'subsonic' plugin."""

+import unittest
 from urllib.parse import parse_qs, urlparse

 import responses

 from beets import config
-from beets.test.helper import BeetsTestCase
 from beetsplug import subsonicupdate


@ -24,7 +24,7 @@ def _params(url):
    return parse_qs(urlparse(url).query)


-class SubsonicPluginTest(BeetsTestCase):
+class SubsonicPluginTest(unittest.TestCase):
    """Test class for subsonicupdate."""

    @responses.activate
--- a/test/plugins/test_the.py
+++ b/test/plugins/test_the.py
@ -1,11 +1,12 @@
 """Tests for the 'the' plugin"""

+import unittest
+
 from beets import config
-from beets.test.helper import BeetsTestCase
 from beetsplug.the import FORMAT, PATTERN_A, PATTERN_THE, ThePlugin


-class ThePluginTest(BeetsTestCase):
+class ThePluginTest(unittest.TestCase):
    def test_unthe_with_default_patterns(self):
        assert ThePlugin().unthe("", PATTERN_THE) == ""
        assert (
--- a/test/rsrc/spotify/japanese_track_request.json
+++ b/test/rsrc/spotify/japanese_track_request.json
@ -0,0 +1,89 @@
+{
+    "tracks":{
+        "href":"https://api.spotify.com/v1/search?query=Happy+album%3ADespicable+Me+2+artist%3APharrell+Williams&offset=0&limit=20&type=track",
+        "items":[
+            {
+                "album":{
+                    "album_type":"compilation",
+                    "available_markets":[
+                        "AD", "AR", "AT", "AU", "BE", "BG", "BO", "BR", "CA",
+                        "CH", "CL", "CO", "CR", "CY", "CZ", "DE", "DK", "DO",
+                        "EC", "EE", "ES", "FI", "FR", "GB", "GR", "GT", "HK",
+                        "HN", "HU", "IE", "IS", "IT", "LI", "LT", "LU", "LV",
+                        "MC", "MT", "MX", "MY", "NI", "NL", "NO", "NZ", "PA",
+                        "PE", "PH", "PL", "PT", "PY", "RO", "SE", "SG", "SI",
+                        "SK", "SV", "TR", "TW", "US", "UY"
+                    ],
+                    "external_urls":{
+                        "spotify":"https://open.spotify.com/album/5l3zEmMrOhOzG8d8s83GOL"
+                    },
+                    "href":"https://api.spotify.com/v1/albums/5l3zEmMrOhOzG8d8s83GOL",
+                    "id":"5l3zEmMrOhOzG8d8s83GOL",
+                    "images":[
+                        {
+                            "height":640,
+                            "width":640,
+                            "url":"https://i.scdn.co/image/cb7905340c132365bbaee3f17498f062858382e8"
+                        },
+                        {
+                            "height":300,
+                            "width":300,
+                            "url":"https://i.scdn.co/image/af369120f0b20099d6784ab31c88256113f10ffb"
+                        },
+                        {
+                            "height":64,
+                            "width":64,
+                            "url":"https://i.scdn.co/image/9dad385ddf2e7db0bef20cec1fcbdb08689d9ae8"
+                        }
+                    ],
+                    "name":"盗作",
+                    "type":"album",
+                    "uri":"spotify:album:5l3zEmMrOhOzG8d8s83GOL"
+                },
+                "artists":[
+                    {
+                        "external_urls":{
+                            "spotify":"https://open.spotify.com/artist/2RdwBSPQiwcmiDo9kixcl8"
+                        },
+                        "href":"https://api.spotify.com/v1/artists/2RdwBSPQiwcmiDo9kixcl8",
+                        "id":"2RdwBSPQiwcmiDo9kixcl8",
+                        "name":"ヨルシカ",
+                        "type":"artist",
+                        "uri":"spotify:artist:2RdwBSPQiwcmiDo9kixcl8"
+                    }
+                ],
+                "available_markets":[
+                    "AD", "AR", "AT", "AU", "BE", "BG", "BO", "BR", "CA",
+                    "CH", "CL", "CO", "CR", "CY", "CZ", "DE", "DK", "DO",
+                    "EC", "EE", "ES", "FI", "FR", "GB", "GR", "GT", "HK",
+                    "HN", "HU", "IE", "IS", "IT", "LI", "LT", "LU", "LV",
+                    "MC", "MT", "MX", "MY", "NI", "NL", "NO", "NZ", "PA",
+                    "PE", "PH", "PL", "PT", "PY", "RO", "SE", "SG", "SI",
+                    "SK", "SV", "TR", "TW", "US", "UY"
+                ],
+                "disc_number":1,
+                "duration_ms":233305,
+                "explicit":false,
+                "external_ids":{
+                    "isrc":"USQ4E1300686"
+                },
+                "external_urls":{
+                    "spotify":"https://open.spotify.com/track/6NPVjNh8Jhru9xOmyQigds"
+                },
+                "href":"https://api.spotify.com/v1/tracks/6NPVjNh8Jhru9xOmyQigds",
+                "id":"6NPVjNh8Jhru9xOmyQigds",
+                "name":"思想犯",
+                "popularity":89,
+                "preview_url":"https://p.scdn.co/mp3-preview/6b00000be293e6b25f61c33e206a0c522b5cbc87",
+                "track_number":4,
+                "type":"track",
+                "uri":"spotify:track:6NPVjNh8Jhru9xOmyQigds"
+            }
+        ],
+        "limit":20,
+        "next":null,
+        "offset":0,
+        "previous":null,
+        "total":1
+    }
+}
--- a/test/test_autotag.py
+++ b/test/test_autotag.py
@ -14,488 +14,12 @@

 """Tests for autotagging functionality."""

-import re
-import unittest
-
 import pytest

 from beets import autotag, config
 from beets.autotag import AlbumInfo, TrackInfo, correct_list_fields, match
-from beets.autotag.hooks import Distance, string_dist
 from beets.library import Item
 from beets.test.helper import BeetsTestCase, ConfigMixin
-from beets.util import plurality
-
-
-class PluralityTest(BeetsTestCase):
-    def test_plurality_consensus(self):
-        objs = [1, 1, 1, 1]
-        obj, freq = plurality(objs)
-        assert obj == 1
-        assert freq == 4
-
-    def test_plurality_near_consensus(self):
-        objs = [1, 1, 2, 1]
-        obj, freq = plurality(objs)
-        assert obj == 1
-        assert freq == 3
-
-    def test_plurality_conflict(self):
-        objs = [1, 1, 2, 2, 3]
-        obj, freq = plurality(objs)
-        assert obj in (1, 2)
-        assert freq == 2
-
-    def test_plurality_empty_sequence_raises_error(self):
-        with pytest.raises(ValueError, match="must be non-empty"):
-            plurality([])
-
-    def test_current_metadata_finds_pluralities(self):
-        items = [
-            Item(artist="The Beetles", album="The White Album"),
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="Teh White Album"),
-        ]
-        likelies, consensus = match.current_metadata(items)
-        assert likelies["artist"] == "The Beatles"
-        assert likelies["album"] == "The White Album"
-        assert not consensus["artist"]
-
-    def test_current_metadata_artist_consensus(self):
-        items = [
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="The White Album"),
-            Item(artist="The Beatles", album="Teh White Album"),
-        ]
-        likelies, consensus = match.current_metadata(items)
-        assert likelies["artist"] == "The Beatles"
-        assert likelies["album"] == "The White Album"
-        assert consensus["artist"]
-
-    def test_albumartist_consensus(self):
-        items = [
-            Item(artist="tartist1", album="album", albumartist="aartist"),
-            Item(artist="tartist2", album="album", albumartist="aartist"),
-            Item(artist="tartist3", album="album", albumartist="aartist"),
-        ]
-        likelies, consensus = match.current_metadata(items)
-        assert likelies["artist"] == "aartist"
-        assert not consensus["artist"]
-
-    def test_current_metadata_likelies(self):
-        fields = [
-            "artist",
-            "album",
-            "albumartist",
-            "year",
-            "disctotal",
-            "mb_albumid",
-            "label",
-            "barcode",
-            "catalognum",
-            "country",
-            "media",
-            "albumdisambig",
-        ]
-        items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)]
-        likelies, _ = match.current_metadata(items)
-        for f in fields:
-            if isinstance(likelies[f], int):
-                assert likelies[f] == 0
-            else:
-                assert likelies[f] == f"{f}_1"
-
-
-def _make_item(title, track, artist="some artist"):
-    return Item(
-        title=title,
-        track=track,
-        artist=artist,
-        album="some album",
-        length=1,
-        mb_trackid="",
-        mb_albumid="",
-        mb_artistid="",
-    )
-
-
-def _make_trackinfo():
-    return [
-        TrackInfo(
-            title="one", track_id=None, artist="some artist", length=1, index=1
-        ),
-        TrackInfo(
-            title="two", track_id=None, artist="some artist", length=1, index=2
-        ),
-        TrackInfo(
-            title="three",
-            track_id=None,
-            artist="some artist",
-            length=1,
-            index=3,
-        ),
-    ]
-
-
-def _clear_weights():
-    """Hack around the lazy descriptor used to cache weights for
-    Distance calculations.
-    """
-    Distance.__dict__["_weights"].cache = {}
-
-
-class DistanceTest(BeetsTestCase):
-    def tearDown(self):
-        super().tearDown()
-        _clear_weights()
-
-    def test_add(self):
-        dist = Distance()
-        dist.add("add", 1.0)
-        assert dist._penalties == {"add": [1.0]}
-
-    def test_add_equality(self):
-        dist = Distance()
-        dist.add_equality("equality", "ghi", ["abc", "def", "ghi"])
-        assert dist._penalties["equality"] == [0.0]
-
-        dist.add_equality("equality", "xyz", ["abc", "def", "ghi"])
-        assert dist._penalties["equality"] == [0.0, 1.0]
-
-        dist.add_equality("equality", "abc", re.compile(r"ABC", re.I))
-        assert dist._penalties["equality"] == [0.0, 1.0, 0.0]
-
-    def test_add_expr(self):
-        dist = Distance()
-        dist.add_expr("expr", True)
-        assert dist._penalties["expr"] == [1.0]
-
-        dist.add_expr("expr", False)
-        assert dist._penalties["expr"] == [1.0, 0.0]
-
-    def test_add_number(self):
-        dist = Distance()
-        # Add a full penalty for each number of difference between two numbers.
-
-        dist.add_number("number", 1, 1)
-        assert dist._penalties["number"] == [0.0]
-
-        dist.add_number("number", 1, 2)
-        assert dist._penalties["number"] == [0.0, 1.0]
-
-        dist.add_number("number", 2, 1)
-        assert dist._penalties["number"] == [0.0, 1.0, 1.0]
-
-        dist.add_number("number", -1, 2)
-        assert dist._penalties["number"] == [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]
-
-    def test_add_priority(self):
-        dist = Distance()
-        dist.add_priority("priority", "abc", "abc")
-        assert dist._penalties["priority"] == [0.0]
-
-        dist.add_priority("priority", "def", ["abc", "def"])
-        assert dist._penalties["priority"] == [0.0, 0.5]
-
-        dist.add_priority(
-            "priority", "gh", ["ab", "cd", "ef", re.compile("GH", re.I)]
-        )
-        assert dist._penalties["priority"] == [0.0, 0.5, 0.75]
-
-        dist.add_priority("priority", "xyz", ["abc", "def"])
-        assert dist._penalties["priority"] == [0.0, 0.5, 0.75, 1.0]
-
-    def test_add_ratio(self):
-        dist = Distance()
-        dist.add_ratio("ratio", 25, 100)
-        assert dist._penalties["ratio"] == [0.25]
-
-        dist.add_ratio("ratio", 10, 5)
-        assert dist._penalties["ratio"] == [0.25, 1.0]
-
-        dist.add_ratio("ratio", -5, 5)
-        assert dist._penalties["ratio"] == [0.25, 1.0, 0.0]
-
-        dist.add_ratio("ratio", 5, 0)
-        assert dist._penalties["ratio"] == [0.25, 1.0, 0.0, 0.0]
-
-    def test_add_string(self):
-        dist = Distance()
-        sdist = string_dist("abc", "bcd")
-        dist.add_string("string", "abc", "bcd")
-        assert dist._penalties["string"] == [sdist]
-        assert dist._penalties["string"] != [0]
-
-    def test_add_string_none(self):
-        dist = Distance()
-        dist.add_string("string", None, "string")
-        assert dist._penalties["string"] == [1]
-
-    def test_add_string_both_none(self):
-        dist = Distance()
-        dist.add_string("string", None, None)
-        assert dist._penalties["string"] == [0]
-
-    def test_distance(self):
-        config["match"]["distance_weights"]["album"] = 2.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.5)
-        dist.add("media", 0.25)
-        dist.add("media", 0.75)
-        assert dist.distance == 0.5
-
-        # __getitem__()
-        assert dist["album"] == 0.25
-        assert dist["media"] == 0.25
-
-    def test_max_distance(self):
-        config["match"]["distance_weights"]["album"] = 3.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.5)
-        dist.add("medium", 0.0)
-        dist.add("medium", 0.0)
-        assert dist.max_distance == 5.0
-
-    def test_operators(self):
-        config["match"]["distance_weights"]["source"] = 1.0
-        config["match"]["distance_weights"]["album"] = 2.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("source", 0.0)
-        dist.add("album", 0.5)
-        dist.add("medium", 0.25)
-        dist.add("medium", 0.75)
-        assert len(dist) == 2
-        assert list(dist) == [("album", 0.2), ("medium", 0.2)]
-        assert dist == 0.4
-        assert dist < 1.0
-        assert dist > 0.0
-        assert dist - 0.4 == 0.0
-        assert 0.4 - dist == 0.0
-        assert float(dist) == 0.4
-
-    def test_raw_distance(self):
-        config["match"]["distance_weights"]["album"] = 3.0
-        config["match"]["distance_weights"]["medium"] = 1.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.5)
-        dist.add("medium", 0.25)
-        dist.add("medium", 0.5)
-        assert dist.raw_distance == 2.25
-
-    def test_items(self):
-        config["match"]["distance_weights"]["album"] = 4.0
-        config["match"]["distance_weights"]["medium"] = 2.0
-        _clear_weights()
-
-        dist = Distance()
-        dist.add("album", 0.1875)
-        dist.add("medium", 0.75)
-        assert dist.items() == [("medium", 0.25), ("album", 0.125)]
-
-        # Sort by key if distance is equal.
-        dist = Distance()
-        dist.add("album", 0.375)
-        dist.add("medium", 0.75)
-        assert dist.items() == [("album", 0.25), ("medium", 0.25)]
-
-    def test_update(self):
-        dist1 = Distance()
-        dist1.add("album", 0.5)
-        dist1.add("media", 1.0)
-
-        dist2 = Distance()
-        dist2.add("album", 0.75)
-        dist2.add("album", 0.25)
-        dist2.add("media", 0.05)
-
-        dist1.update(dist2)
-
-        assert dist1._penalties == {
-            "album": [0.5, 0.75, 0.25],
-            "media": [1.0, 0.05],
-        }
-
-
-class TrackDistanceTest(BeetsTestCase):
-    def test_identical_tracks(self):
-        item = _make_item("one", 1)
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist == 0.0
-
-    def test_different_title(self):
-        item = _make_item("foo", 1)
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist != 0.0
-
-    def test_different_artist(self):
-        item = _make_item("one", 1)
-        item.artist = "foo"
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist != 0.0
-
-    def test_various_artists_tolerated(self):
-        item = _make_item("one", 1)
-        item.artist = "Various Artists"
-        info = _make_trackinfo()[0]
-        dist = match.track_distance(item, info, incl_artist=True)
-        assert dist == 0.0
-
-
-class AlbumDistanceTest(BeetsTestCase):
-    def _mapping(self, items, info):
-        out = {}
-        for i, t in zip(items, info.tracks):
-            out[i] = t
-        return out
-
-    def _dist(self, items, info):
-        return match.distance(items, info, self._mapping(items, info))
-
-    def test_identical_albums(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        assert self._dist(items, info) == 0
-
-    def test_incomplete_album(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        dist = self._dist(items, info)
-        assert dist != 0
-        # Make sure the distance is not too great
-        assert dist < 0.2
-
-    def test_global_artists_differ(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="someone else",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        assert self._dist(items, info) != 0
-
-    def test_comp_track_artists_match(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="should be ignored",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        assert self._dist(items, info) == 0
-
-    def test_comp_no_track_artists(self):
-        # Some VA releases don't have track artists (incomplete metadata).
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="should be ignored",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        info.tracks[0].artist = None
-        info.tracks[1].artist = None
-        info.tracks[2].artist = None
-        assert self._dist(items, info) == 0
-
-    def test_comp_track_artists_do_not_match(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2, "someone else"))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=True,
-        )
-        assert self._dist(items, info) != 0
-
-    def test_tracks_out_of_order(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("three", 2))
-        items.append(_make_item("two", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        dist = self._dist(items, info)
-        assert 0 < dist < 0.2
-
-    def test_two_medium_release(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 3))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        info.tracks[0].medium_index = 1
-        info.tracks[1].medium_index = 2
-        info.tracks[2].medium_index = 1
-        dist = self._dist(items, info)
-        assert dist == 0
-
-    def test_per_medium_track_numbers(self):
-        items = []
-        items.append(_make_item("one", 1))
-        items.append(_make_item("two", 2))
-        items.append(_make_item("three", 1))
-        info = AlbumInfo(
-            artist="some artist",
-            album="some album",
-            tracks=_make_trackinfo(),
-            va=False,
-        )
-        info.tracks[0].medium_index = 1
-        info.tracks[1].medium_index = 2
-        info.tracks[2].medium_index = 1
-        dist = self._dist(items, info)
-        assert dist == 0


 class TestAssignment(ConfigMixin):
@ -920,82 +444,6 @@ class ApplyCompilationTest(BeetsTestCase, ApplyTestUtil):
        assert self.items[1].comp


-class StringDistanceTest(unittest.TestCase):
-    def test_equal_strings(self):
-        dist = string_dist("Some String", "Some String")
-        assert dist == 0.0
-
-    def test_different_strings(self):
-        dist = string_dist("Some String", "Totally Different")
-        assert dist != 0.0
-
-    def test_punctuation_ignored(self):
-        dist = string_dist("Some String", "Some.String!")
-        assert dist == 0.0
-
-    def test_case_ignored(self):
-        dist = string_dist("Some String", "sOME sTring")
-        assert dist == 0.0
-
-    def test_leading_the_has_lower_weight(self):
-        dist1 = string_dist("XXX Band Name", "Band Name")
-        dist2 = string_dist("The Band Name", "Band Name")
-        assert dist2 < dist1
-
-    def test_parens_have_lower_weight(self):
-        dist1 = string_dist("One .Two.", "One")
-        dist2 = string_dist("One (Two)", "One")
-        assert dist2 < dist1
-
-    def test_brackets_have_lower_weight(self):
-        dist1 = string_dist("One .Two.", "One")
-        dist2 = string_dist("One [Two]", "One")
-        assert dist2 < dist1
-
-    def test_ep_label_has_zero_weight(self):
-        dist = string_dist("My Song (EP)", "My Song")
-        assert dist == 0.0
-
-    def test_featured_has_lower_weight(self):
-        dist1 = string_dist("My Song blah Someone", "My Song")
-        dist2 = string_dist("My Song feat Someone", "My Song")
-        assert dist2 < dist1
-
-    def test_postfix_the(self):
-        dist = string_dist("The Song Title", "Song Title, The")
-        assert dist == 0.0
-
-    def test_postfix_a(self):
-        dist = string_dist("A Song Title", "Song Title, A")
-        assert dist == 0.0
-
-    def test_postfix_an(self):
-        dist = string_dist("An Album Title", "Album Title, An")
-        assert dist == 0.0
-
-    def test_empty_strings(self):
-        dist = string_dist("", "")
-        assert dist == 0.0
-
-    def test_solo_pattern(self):
-        # Just make sure these don't crash.
-        string_dist("The ", "")
-        string_dist("(EP)", "(EP)")
-        string_dist(", An", "")
-
-    def test_heuristic_does_not_harm_distance(self):
-        dist = string_dist("Untitled", "[Untitled]")
-        assert dist == 0.0
-
-    def test_ampersand_expansion(self):
-        dist = string_dist("And", "&")
-        assert dist == 0.0
-
-    def test_accented_characters(self):
-        dist = string_dist("\xe9\xe1\xf1", "ean")
-        assert dist == 0.0
-
-
@pytest.mark.parametrize(
    "single_field,list_field",
    [
--- a/test/test_files.py
+++ b/test/test_files.py
@ -200,7 +200,7 @@ class MoveTest(BeetsTestCase):
        assert self.i.path == util.normpath(self.dest)


-class HelperTest(BeetsTestCase):
+class HelperTest(unittest.TestCase):
    def test_ancestry_works_on_file(self):
        p = "/a/b/c"
        a = ["/", "/a", "/a/b"]
--- a/test/test_hidden.py
+++ b/test/test_hidden.py
@ -22,7 +22,7 @@ import tempfile
 import unittest

 from beets import util
-from beets.util import hidden
+from beets.util import bytestring_path, hidden


 class HiddenFileTest(unittest.TestCase):
@ -44,7 +44,7 @@ class HiddenFileTest(unittest.TestCase):
                else:
                    raise e

-            assert hidden.is_hidden(f.name)
+            assert hidden.is_hidden(bytestring_path(f.name))

    def test_windows_hidden(self):
        if not sys.platform == "win32":
--- a/test/test_importer.py
+++ b/test/test_importer.py
@ -129,6 +129,11 @@ class NonAutotaggedImportTest(AsIsImporterMixin, ImportTestCase):
        self.run_asis_importer(delete=True)
        self.assertNotExists(os.path.join(self.import_dir, b"album"))

+    def test_album_mb_albumartistids(self):
+        self.run_asis_importer()
+        album = self.lib.albums()[0]
+        assert album.mb_albumartistids == album.items()[0].mb_albumartistids
+
    @unittest.skipUnless(_common.HAVE_SYMLINK, "need symlinks")
    def test_import_link_arrives(self):
        self.run_asis_importer(link=True)
@ -919,7 +924,7 @@ class ChooseCandidateTest(AutotagImportTestCase):
        assert self.lib.albums().get().album == "Applied Album MM"


-class InferAlbumDataTest(BeetsTestCase):
+class InferAlbumDataTest(unittest.TestCase):
    def setUp(self):
        super().setUp()

@ -1215,7 +1220,7 @@ class ImportDuplicateSingletonTest(ImportTestCase):
        return item


-class TagLogTest(BeetsTestCase):
+class TagLogTest(unittest.TestCase):
    def test_tag_log_line(self):
        sio = StringIO()
        handler = logging.StreamHandler(sio)
--- a/test/test_library.py
+++ b/test/test_library.py
@ -29,12 +29,13 @@ from mediafile import MediaFile, UnreadableFileError

 import beets.dbcore.query
 import beets.library
+import beets.logging as blog
 from beets import config, plugins, util
 from beets.library import Album
 from beets.test import _common
 from beets.test._common import item
-from beets.test.helper import BeetsTestCase, ItemInDBTestCase
-from beets.util import as_string, bytestring_path, syspath
+from beets.test.helper import BeetsTestCase, ItemInDBTestCase, capture_log
+from beets.util import as_string, bytestring_path, normpath, syspath

 # Shortcut to path normalization.
 np = util.normpath
@ -126,6 +127,25 @@ class AddTest(BeetsTestCase):
        )
        assert new_grouping == self.i.grouping

+    def test_library_add_one_database_change_event(self):
+        """Test library.add emits only one database_change event."""
+        self.item = _common.item()
+        self.item.path = beets.util.normpath(
+            os.path.join(
+                self.temp_dir,
+                b"a",
+                b"b.mp3",
+            )
+        )
+        self.item.album = "a"
+        self.item.title = "b"
+
+        blog.getLogger("beets").set_global_level(blog.DEBUG)
+        with capture_log() as logs:
+            self.lib.add(self.item)
+
+        assert logs.count("Sending event: database_change") == 1
+

 class RemoveTest(ItemInDBTestCase):
    def test_remove_deletes_from_db(self):
@ -553,6 +573,9 @@ class ItemFormattedMappingTest(ItemInDBTestCase):
 class PathFormattingMixin:
    """Utilities for testing path formatting."""

+    i: beets.library.Item
+    lib: beets.library.Library
+
    def _setf(self, fmt):
        self.lib.path_formats.insert(0, ("default", fmt))

@ -560,9 +583,12 @@ class PathFormattingMixin:
        if i is None:
            i = self.i

+        # Handle paths on Windows.
        if os.path.sep != "/":
            dest = dest.replace(b"/", os.path.sep.encode())
-            dest = b"D:" + dest
+
+            # Paths are normalized based on the CWD.
+            dest = normpath(dest)

        actual = i.destination()

--- a/test/test_logging.py
+++ b/test/test_logging.py
@ -3,21 +3,17 @@
 import logging as log
 import sys
 import threading
+import unittest
 from io import StringIO

 import beets.logging as blog
 import beetsplug
 from beets import plugins, ui
 from beets.test import _common, helper
-from beets.test.helper import (
-    AsIsImporterMixin,
-    BeetsTestCase,
-    ImportTestCase,
-    PluginMixin,
-)
+from beets.test.helper import AsIsImporterMixin, ImportTestCase, PluginMixin


-class LoggingTest(BeetsTestCase):
+class LoggingTest(unittest.TestCase):
    def test_logging_management(self):
        l1 = log.getLogger("foo123")
        l2 = blog.getLogger("foo123")
--- a/test/test_plugins.py
+++ b/test/test_plugins.py
@ -15,7 +15,6 @@

 import itertools
 import os
-import unittest
 from unittest.mock import ANY, Mock, patch

 import pytest
@ -215,15 +214,6 @@ class EventsTest(PluginImportTestCase):
        ]


-class HelpersTest(unittest.TestCase):
-    def test_sanitize_choices(self):
-        assert plugins.sanitize_choices(["A", "Z"], ("A", "B")) == ["A"]
-        assert plugins.sanitize_choices(["A", "A"], ("A")) == ["A"]
-        assert plugins.sanitize_choices(
-            ["D", "*", "A"], ("A", "B", "C", "D")
-        ) == ["D", "B", "C", "A"]
-
-
 class ListenersTest(PluginLoaderTestCase):
    def test_register(self):
        class DummyPlugin(plugins.BeetsPlugin):
--- a/test/test_query.py
+++ b/test/test_query.py
@ -373,7 +373,7 @@ class GetTest(DummyDataTestCase):
            dbcore.query.RegexpQuery("year", "199(")


-class MatchTest(BeetsTestCase):
+class MatchTest(unittest.TestCase):
    def setUp(self):
        super().setUp()
        self.item = _common.item()
@ -811,7 +811,7 @@ class NoneQueryTest(BeetsTestCase, AssertsMixin):
        self.assertInResult(item, matched)


-class NotQueryMatchTest(BeetsTestCase):
+class NotQueryMatchTest(unittest.TestCase):
    """Test `query.NotQuery` matching against a single item, using the same
    cases and assertions as on `MatchTest`, plus assertion on the negated
    queries (ie. assert q -> assert not NotQuery(q)).
--- a/test/test_sort.py
+++ b/test/test_sort.py
@ -378,7 +378,7 @@ class ConfigSortTest(DummyDataTestCase):
        assert results[0].albumartist > results[1].albumartist


-class CaseSensitivityTest(DummyDataTestCase, BeetsTestCase):
+class CaseSensitivityTest(DummyDataTestCase):
    """If case_insensitive is false, lower-case values should be placed
    after all upper-case values. E.g., `Foo Qux bar`
    """
--- a/test/test_ui.py
+++ b/test/test_ui.py
@ -1337,7 +1337,7 @@ class ShowChangeTest(BeetsTestCase):


@patch("beets.library.Item.try_filesize", Mock(return_value=987))
-class SummarizeItemsTest(BeetsTestCase):
+class SummarizeItemsTest(unittest.TestCase):
    def setUp(self):
        super().setUp()
        item = library.Item()
@ -1374,7 +1374,7 @@ class SummarizeItemsTest(BeetsTestCase):
        assert summary == "3 items, G 2, F 1, 4kbps, 32:42, 2.9 KiB"


-class PathFormatTest(BeetsTestCase):
+class PathFormatTest(unittest.TestCase):
    def test_custom_paths_prepend(self):
        default_formats = ui.get_path_formats()

@ -1521,7 +1521,7 @@ class CommonOptionsParserCliTest(BeetsTestCase):
        # assert 'plugins: ' in output


-class CommonOptionsParserTest(BeetsTestCase):
+class CommonOptionsParserTest(unittest.TestCase):
    def test_album_option(self):
        parser = ui.CommonOptionsParser()
        assert not parser._album_flags
@ -1614,7 +1614,7 @@ class CommonOptionsParserTest(BeetsTestCase):
        )


-class EncodingTest(BeetsTestCase):
+class EncodingTest(unittest.TestCase):
    """Tests for the `terminal_encoding` config option and our
    `_in_encoding` and `_out_encoding` utility functions.
    """
--- a/test/test_util.py
+++ b/test/test_util.py
@ -24,8 +24,8 @@ from unittest.mock import Mock, patch
 import pytest

 from beets import util
+from beets.library import Item
 from beets.test import _common
-from beets.test.helper import BeetsTestCase


 class UtilTest(unittest.TestCase):
@ -132,7 +132,7 @@ class UtilTest(unittest.TestCase):
        pass


-class PathConversionTest(BeetsTestCase):
+class PathConversionTest(unittest.TestCase):
    def test_syspath_windows_format(self):
        with _common.platform_windows():
            path = os.path.join("a", "b", "c")
@ -218,3 +218,41 @@ class TestPathLegalization:
            expected_path,
            expected_truncated,
        )
+
+
+class TestPlurality:
+    @pytest.mark.parametrize(
+        "objs, expected_obj, expected_freq",
+        [
+            pytest.param([1, 1, 1, 1], 1, 4, id="consensus"),
+            pytest.param([1, 1, 2, 1], 1, 3, id="near consensus"),
+            pytest.param([1, 1, 2, 2, 3], 1, 2, id="conflict-first-wins"),
+        ],
+    )
+    def test_plurality(self, objs, expected_obj, expected_freq):
+        assert (expected_obj, expected_freq) == util.plurality(objs)
+
+    def test_empty_sequence_raises_error(self):
+        with pytest.raises(ValueError, match="must be non-empty"):
+            util.plurality([])
+
+    def test_get_most_common_tags(self):
+        items = [
+            Item(albumartist="aartist", label="label 1", album="album"),
+            Item(albumartist="aartist", label="label 2", album="album"),
+            Item(albumartist="aartist", label="label 3", album="another album"),
+        ]
+
+        likelies, consensus = util.get_most_common_tags(items)
+
+        assert likelies["albumartist"] == "aartist"
+        assert likelies["album"] == "album"
+        # albumartist consensus overrides artist
+        assert likelies["artist"] == "aartist"
+        assert likelies["label"] == "label 1"
+        assert likelies["year"] == 0
+
+        assert consensus["year"]
+        assert consensus["albumartist"]
+        assert not consensus["album"]
+        assert not consensus["label"]
--- a/test/util/test_config.py
+++ b/test/util/test_config.py
@ -0,0 +1,38 @@
+import pytest
+
+from beets.util.config import sanitize_choices, sanitize_pairs
+
+
+@pytest.mark.parametrize(
+    "input_choices, valid_choices, expected",
+    [
+        (["A", "Z"], ("A", "B"), ["A"]),
+        (["A", "A"], ("A"), ["A"]),
+        (["D", "*", "A"], ("A", "B", "C", "D"), ["D", "B", "C", "A"]),
+    ],
+)
+def test_sanitize_choices(input_choices, valid_choices, expected):
+    assert sanitize_choices(input_choices, valid_choices) == expected
+
+
+def test_sanitize_pairs():
+    assert sanitize_pairs(
+        [
+            ("foo", "baz bar"),
+            ("foo", "baz bar"),
+            ("key", "*"),
+            ("*", "*"),
+            ("discard", "bye"),
+        ],
+        [
+            ("foo", "bar"),
+            ("foo", "baz"),
+            ("foo", "foobar"),
+            ("key", "value"),
+        ],
+    ) == [
+        ("foo", "baz"),
+        ("foo", "bar"),
+        ("key", "value"),
+        ("foo", "foobar"),
+    ]