mirror of
https://github.com/beetbox/beets.git
synced 2026-03-05 12:42:12 +01:00
Match substrings fuzzily (#6408)
Fixes #2043 Fixes #5638 Improve the `fuzzy` plugin in two ways: 1. Make short fuzzy queries behave more like substring matching. 2. Force fuzzy-prefixed queries to use slow evaluation so fuzzy logic is always applied. ## Problem Fuzzy prefix queries (for example `~foo` or custom prefixes like `%%foo`) could take a fast DB query path on normal fields, which bypassed fuzzy semantics and produced broad/ unrelated matches [#5638](https://github.com/beetbox/beets/issues/5638). Also, when the query pattern was shorter than the field value, fuzzy matching was too strict for substring-style use cases [#2043](https://github.com/beetbox/beets/issues/2043). Thanks to @carreter for this in #5140. Opened a new PR since I did not have permissions to push to their fork. Supersedes #5140.
This commit is contained in:
commit
842354ee6b
3 changed files with 79 additions and 3 deletions
|
|
@ -22,18 +22,34 @@ from beets.plugins import BeetsPlugin
|
|||
|
||||
|
||||
class FuzzyQuery(StringFieldQuery[str]):
|
||||
def __init__(self, field_name: str, pattern: str, *_) -> None:
|
||||
# Fuzzy matching is only available via `string_match`.
|
||||
super().__init__(field_name, pattern, fast=False)
|
||||
|
||||
@classmethod
|
||||
def string_match(cls, pattern: str, val: str):
|
||||
def string_match(cls, pattern: str, val: str) -> bool:
|
||||
# smartcase
|
||||
if pattern.islower():
|
||||
val = val.lower()
|
||||
query_matcher = difflib.SequenceMatcher(None, pattern, val)
|
||||
threshold = config["fuzzy"]["threshold"].as_number()
|
||||
return query_matcher.quick_ratio() >= threshold
|
||||
# Adjust match threshold for the case that the pattern is shorter
|
||||
# than the value being matched. This allows the pattern to match
|
||||
# substrings of the value, not just the entire value.
|
||||
if len(pattern) < len(val):
|
||||
max_possible_ratio = 2 * len(pattern) / (len(pattern) + len(val))
|
||||
threshold *= max_possible_ratio
|
||||
|
||||
# If upper bound of the ratio meets threshold, then calculate
|
||||
# the actual ratio.
|
||||
if query_matcher.quick_ratio() >= threshold:
|
||||
return query_matcher.ratio() >= threshold
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class FuzzyPlugin(BeetsPlugin):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.config.add(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -47,6 +47,12 @@ Bug fixes
|
|||
- In autotagging, initialise empty multi-valued fields with ``None`` instead of
|
||||
empty list, which caused beets to overwrite existing metadata with empty list
|
||||
values instead of leaving them unchanged. :bug:`6403`
|
||||
- :doc:`plugins/fuzzy`: Improve fuzzy matching when the query is shorter than
|
||||
the field value so substring-style searches produce more useful results.
|
||||
:bug:`2043`
|
||||
- :doc:`plugins/fuzzy`: Force slow query evaluation whenever the fuzzy prefix is
|
||||
used (for example ``~foo`` or ``%%foo``), so fuzzy matching is applied
|
||||
consistently. :bug:`5638`
|
||||
|
||||
For plugin developers
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
|
|||
54
test/plugins/test_fuzzy.py
Normal file
54
test/plugins/test_fuzzy.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# This file is part of beets.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
# "Software"), to deal in the Software without restriction, including
|
||||
# without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so, subject to
|
||||
# the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
|
||||
"""Tests for the fuzzy query plugin."""
|
||||
|
||||
import pytest
|
||||
|
||||
from beets.test.helper import PluginMixin, TestHelper
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def helper(request):
|
||||
helper = TestHelper()
|
||||
helper.setup_beets()
|
||||
|
||||
request.instance.lib = helper.lib
|
||||
request.instance.add_item = helper.add_item
|
||||
|
||||
yield
|
||||
|
||||
helper.teardown_beets()
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("helper")
|
||||
class TestFuzzyPlugin(PluginMixin):
|
||||
plugin = "fuzzy"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query,expected_titles",
|
||||
[
|
||||
pytest.param("~foo", ["seafood"], id="all-fields-substring"),
|
||||
pytest.param("title:~foo", ["seafood"], id="field-substring"),
|
||||
pytest.param("~seafood", ["seafood"], id="all-fields-equal-length"),
|
||||
pytest.param("~zzz", [], id="all-fields-no-match"),
|
||||
],
|
||||
)
|
||||
def test_fuzzy_queries(self, query, expected_titles):
|
||||
self.add_item(title="seafood", artist="alpha")
|
||||
self.add_item(title="bread", artist="beta")
|
||||
|
||||
with self.configure_plugin({}):
|
||||
items = self.lib.items(query)
|
||||
|
||||
assert [item.title for item in items] == expected_titles
|
||||
Loading…
Reference in a new issue