Format MusicBrainz search terms and escape Lucene special chars

Add a helper to lower/strip and escape Lucene query syntax.
Use it when building search queries and add unit tests.
This commit is contained in:
Šarūnas Nejus 2026-02-07 22:26:17 +00:00
parent cdfb813910
commit 1271b711f7
No known key found for this signature in database
3 changed files with 38 additions and 5 deletions

View file

@ -11,9 +11,10 @@ logic throughout the codebase.
from __future__ import annotations
import operator
import re
from dataclasses import dataclass, field
from functools import cached_property, singledispatchmethod, wraps
from itertools import groupby
from itertools import groupby, starmap
from typing import TYPE_CHECKING, Any, Literal, ParamSpec, TypedDict, TypeVar
from requests_ratelimiter import LimiterMixin
@ -33,6 +34,9 @@ if TYPE_CHECKING:
log = logging.getLogger(__name__)
LUCENE_SPECIAL_CHAR_PAT = re.compile(r'([-+&|!(){}[\]^"~*?:\\/])')
class LimiterTimeoutSession(LimiterMixin, TimeoutAndRetrySession):
"""HTTP session that enforces rate limits."""
@ -181,6 +185,21 @@ class MusicBrainzAPI(RequestHandler):
def _browse(self, entity: Entity, **kwargs) -> list[JSONDict]:
return self._get_resource(entity, **kwargs).get(f"{entity}s", [])
@staticmethod
def format_search_term(field: str, term: str) -> str:
"""Format a search term for the MusicBrainz API.
See https://lucene.apache.org/core/4_3_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html
"""
if not (term := term.lower().strip()):
return ""
term = LUCENE_SPECIAL_CHAR_PAT.sub(r"\\\1", term)
if field:
term = f"{field}:({term})"
return term
def search(
self,
entity: Entity,
@ -195,10 +214,8 @@ class MusicBrainzAPI(RequestHandler):
- 'value' is empty, in which case the filter is ignored
* Values are lowercased and stripped of whitespace.
"""
query = " AND ".join(
":".join(filter(None, (k, f'"{_v}"')))
for k, v in filters.items()
if (_v := v.lower().strip())
query = " ".join(
filter(None, starmap(self.format_search_term, filters.items()))
)
log.debug("Searching for MusicBrainz {}s with: {!r}", entity, query)
kwargs["query"] = query

View file

@ -11,6 +11,8 @@ New features:
Bug fixes:
- :doc:`plugins/musicbrainz`: Fix search terms escaping. :bug:`6347`
For packagers:
Other changes:

View file

@ -1,3 +1,5 @@
import pytest
from beetsplug._utils.musicbrainz import MusicBrainzAPI
@ -80,3 +82,15 @@ def test_group_relations():
},
],
}
@pytest.mark.parametrize(
"field, term, expected",
[
("artist", ' AC/DC + "[Live]" ', r"artist:(ac\/dc \+ \"\[live\]\")"),
("", "Foo:Bar", r"foo\:bar"),
("artist", " ", ""),
],
)
def test_format_search_term(field, term, expected):
assert MusicBrainzAPI.format_search_term(field, term) == expected