Match substrings fuzzily (#6408)

Fixes #2043
Fixes #5638

Improve the `fuzzy` plugin in two ways:

1. Make short fuzzy queries behave more like substring matching.
2. Force fuzzy-prefixed queries to use slow evaluation so fuzzy logic is
always applied.

## Problem
Fuzzy prefix queries (for example `~foo` or custom prefixes like
`%%foo`) could take a fast DB query path on normal fields, which
bypassed fuzzy semantics and produced broad/
unrelated matches [#5638](https://github.com/beetbox/beets/issues/5638).

Also, when the query pattern was shorter than the field value, fuzzy
matching was too strict for substring-style use cases
[#2043](https://github.com/beetbox/beets/issues/2043).

Thanks to @carreter for this in #5140. Opened a new PR since I did not
have permissions to push to their fork.

Supersedes #5140.
This commit is contained in:
Šarūnas Nejus 2026-03-03 14:23:03 +00:00 committed by GitHub
commit 842354ee6b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 79 additions and 3 deletions

View file

@ -22,18 +22,34 @@ from beets.plugins import BeetsPlugin
class FuzzyQuery(StringFieldQuery[str]):
def __init__(self, field_name: str, pattern: str, *_) -> None:
# Fuzzy matching is only available via `string_match`.
super().__init__(field_name, pattern, fast=False)
@classmethod
def string_match(cls, pattern: str, val: str):
def string_match(cls, pattern: str, val: str) -> bool:
# smartcase
if pattern.islower():
val = val.lower()
query_matcher = difflib.SequenceMatcher(None, pattern, val)
threshold = config["fuzzy"]["threshold"].as_number()
return query_matcher.quick_ratio() >= threshold
# Adjust match threshold for the case that the pattern is shorter
# than the value being matched. This allows the pattern to match
# substrings of the value, not just the entire value.
if len(pattern) < len(val):
max_possible_ratio = 2 * len(pattern) / (len(pattern) + len(val))
threshold *= max_possible_ratio
# If upper bound of the ratio meets threshold, then calculate
# the actual ratio.
if query_matcher.quick_ratio() >= threshold:
return query_matcher.ratio() >= threshold
return False
class FuzzyPlugin(BeetsPlugin):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.config.add(
{

View file

@ -47,6 +47,12 @@ Bug fixes
- In autotagging, initialise empty multi-valued fields with ``None`` instead of
empty list, which caused beets to overwrite existing metadata with empty list
values instead of leaving them unchanged. :bug:`6403`
- :doc:`plugins/fuzzy`: Improve fuzzy matching when the query is shorter than
the field value so substring-style searches produce more useful results.
:bug:`2043`
- :doc:`plugins/fuzzy`: Force slow query evaluation whenever the fuzzy prefix is
used (for example ``~foo`` or ``%%foo``), so fuzzy matching is applied
consistently. :bug:`5638`
For plugin developers
~~~~~~~~~~~~~~~~~~~~~

View file

@ -0,0 +1,54 @@
# This file is part of beets.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Tests for the fuzzy query plugin."""
import pytest
from beets.test.helper import PluginMixin, TestHelper
@pytest.fixture
def helper(request):
helper = TestHelper()
helper.setup_beets()
request.instance.lib = helper.lib
request.instance.add_item = helper.add_item
yield
helper.teardown_beets()
@pytest.mark.usefixtures("helper")
class TestFuzzyPlugin(PluginMixin):
plugin = "fuzzy"
@pytest.mark.parametrize(
"query,expected_titles",
[
pytest.param("~foo", ["seafood"], id="all-fields-substring"),
pytest.param("title:~foo", ["seafood"], id="field-substring"),
pytest.param("~seafood", ["seafood"], id="all-fields-equal-length"),
pytest.param("~zzz", [], id="all-fields-no-match"),
],
)
def test_fuzzy_queries(self, query, expected_titles):
self.add_item(title="seafood", artist="alpha")
self.add_item(title="bread", artist="beta")
with self.configure_plugin({}):
items = self.lib.items(query)
assert [item.title for item in items] == expected_titles