mirror of
https://github.com/beetbox/beets.git
synced 2026-02-08 08:25:23 +01:00
Format MusicBrainz search terms and escape Lucene special chars
Add a helper to lower/strip and escape Lucene query syntax. Use it when building search queries and add unit tests.
This commit is contained in:
parent
cdfb813910
commit
1271b711f7
3 changed files with 38 additions and 5 deletions
|
|
@ -11,9 +11,10 @@ logic throughout the codebase.
|
|||
from __future__ import annotations
|
||||
|
||||
import operator
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from functools import cached_property, singledispatchmethod, wraps
|
||||
from itertools import groupby
|
||||
from itertools import groupby, starmap
|
||||
from typing import TYPE_CHECKING, Any, Literal, ParamSpec, TypedDict, TypeVar
|
||||
|
||||
from requests_ratelimiter import LimiterMixin
|
||||
|
|
@ -33,6 +34,9 @@ if TYPE_CHECKING:
|
|||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
LUCENE_SPECIAL_CHAR_PAT = re.compile(r'([-+&|!(){}[\]^"~*?:\\/])')
|
||||
|
||||
|
||||
class LimiterTimeoutSession(LimiterMixin, TimeoutAndRetrySession):
|
||||
"""HTTP session that enforces rate limits."""
|
||||
|
||||
|
|
@ -181,6 +185,21 @@ class MusicBrainzAPI(RequestHandler):
|
|||
def _browse(self, entity: Entity, **kwargs) -> list[JSONDict]:
|
||||
return self._get_resource(entity, **kwargs).get(f"{entity}s", [])
|
||||
|
||||
@staticmethod
|
||||
def format_search_term(field: str, term: str) -> str:
|
||||
"""Format a search term for the MusicBrainz API.
|
||||
|
||||
See https://lucene.apache.org/core/4_3_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html
|
||||
"""
|
||||
if not (term := term.lower().strip()):
|
||||
return ""
|
||||
|
||||
term = LUCENE_SPECIAL_CHAR_PAT.sub(r"\\\1", term)
|
||||
if field:
|
||||
term = f"{field}:({term})"
|
||||
|
||||
return term
|
||||
|
||||
def search(
|
||||
self,
|
||||
entity: Entity,
|
||||
|
|
@ -195,10 +214,8 @@ class MusicBrainzAPI(RequestHandler):
|
|||
- 'value' is empty, in which case the filter is ignored
|
||||
* Values are lowercased and stripped of whitespace.
|
||||
"""
|
||||
query = " AND ".join(
|
||||
":".join(filter(None, (k, f'"{_v}"')))
|
||||
for k, v in filters.items()
|
||||
if (_v := v.lower().strip())
|
||||
query = " ".join(
|
||||
filter(None, starmap(self.format_search_term, filters.items()))
|
||||
)
|
||||
log.debug("Searching for MusicBrainz {}s with: {!r}", entity, query)
|
||||
kwargs["query"] = query
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ New features:
|
|||
|
||||
Bug fixes:
|
||||
|
||||
- :doc:`plugins/musicbrainz`: Fix search terms escaping. :bug:`6347`
|
||||
|
||||
For packagers:
|
||||
|
||||
Other changes:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import pytest
|
||||
|
||||
from beetsplug._utils.musicbrainz import MusicBrainzAPI
|
||||
|
||||
|
||||
|
|
@ -80,3 +82,15 @@ def test_group_relations():
|
|||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"field, term, expected",
|
||||
[
|
||||
("artist", ' AC/DC + "[Live]" ', r"artist:(ac\/dc \+ \"\[live\]\")"),
|
||||
("", "Foo:Bar", r"foo\:bar"),
|
||||
("artist", " ", ""),
|
||||
],
|
||||
)
|
||||
def test_format_search_term(field, term, expected):
|
||||
assert MusicBrainzAPI.format_search_term(field, term) == expected
|
||||
|
|
|
|||
Loading…
Reference in a new issue