diff --git a/beetsplug/fuzzy.py b/beetsplug/fuzzy.py index 959544ed3..56bce8b2c 100644 --- a/beetsplug/fuzzy.py +++ b/beetsplug/fuzzy.py @@ -22,18 +22,34 @@ from beets.plugins import BeetsPlugin class FuzzyQuery(StringFieldQuery[str]): + def __init__(self, field_name: str, pattern: str, *_) -> None: + # Fuzzy matching is only available via `string_match`. + super().__init__(field_name, pattern, fast=False) + @classmethod - def string_match(cls, pattern: str, val: str): + def string_match(cls, pattern: str, val: str) -> bool: # smartcase if pattern.islower(): val = val.lower() query_matcher = difflib.SequenceMatcher(None, pattern, val) threshold = config["fuzzy"]["threshold"].as_number() - return query_matcher.quick_ratio() >= threshold + # Adjust match threshold for the case that the pattern is shorter + # than the value being matched. This allows the pattern to match + # substrings of the value, not just the entire value. + if len(pattern) < len(val): + max_possible_ratio = 2 * len(pattern) / (len(pattern) + len(val)) + threshold *= max_possible_ratio + + # If upper bound of the ratio meets threshold, then calculate + # the actual ratio. + if query_matcher.quick_ratio() >= threshold: + return query_matcher.ratio() >= threshold + + return False class FuzzyPlugin(BeetsPlugin): - def __init__(self): + def __init__(self) -> None: super().__init__() self.config.add( { diff --git a/docs/changelog.rst b/docs/changelog.rst index 80ed763bf..6a0fa5d7d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -47,6 +47,12 @@ Bug fixes - In autotagging, initialise empty multi-valued fields with ``None`` instead of empty list, which caused beets to overwrite existing metadata with empty list values instead of leaving them unchanged. :bug:`6403` +- :doc:`plugins/fuzzy`: Improve fuzzy matching when the query is shorter than + the field value so substring-style searches produce more useful results. + :bug:`2043` +- :doc:`plugins/fuzzy`: Force slow query evaluation whenever the fuzzy prefix is + used (for example ``~foo`` or ``%%foo``), so fuzzy matching is applied + consistently. :bug:`5638` For plugin developers ~~~~~~~~~~~~~~~~~~~~~ diff --git a/test/plugins/test_fuzzy.py b/test/plugins/test_fuzzy.py new file mode 100644 index 000000000..be55b76ee --- /dev/null +++ b/test/plugins/test_fuzzy.py @@ -0,0 +1,54 @@ +# This file is part of beets. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Tests for the fuzzy query plugin.""" + +import pytest + +from beets.test.helper import PluginMixin, TestHelper + + +@pytest.fixture +def helper(request): + helper = TestHelper() + helper.setup_beets() + + request.instance.lib = helper.lib + request.instance.add_item = helper.add_item + + yield + + helper.teardown_beets() + + +@pytest.mark.usefixtures("helper") +class TestFuzzyPlugin(PluginMixin): + plugin = "fuzzy" + + @pytest.mark.parametrize( + "query,expected_titles", + [ + pytest.param("~foo", ["seafood"], id="all-fields-substring"), + pytest.param("title:~foo", ["seafood"], id="field-substring"), + pytest.param("~seafood", ["seafood"], id="all-fields-equal-length"), + pytest.param("~zzz", [], id="all-fields-no-match"), + ], + ) + def test_fuzzy_queries(self, query, expected_titles): + self.add_item(title="seafood", artist="alpha") + self.add_item(title="bread", artist="beta") + + with self.configure_plugin({}): + items = self.lib.items(query) + + assert [item.title for item in items] == expected_titles