diff --git a/beetsplug/_utils/musicbrainz.py b/beetsplug/_utils/musicbrainz.py index 2fc821df9..bb278b954 100644 --- a/beetsplug/_utils/musicbrainz.py +++ b/beetsplug/_utils/musicbrainz.py @@ -11,9 +11,10 @@ logic throughout the codebase. from __future__ import annotations import operator +import re from dataclasses import dataclass, field from functools import cached_property, singledispatchmethod, wraps -from itertools import groupby +from itertools import groupby, starmap from typing import TYPE_CHECKING, Any, Literal, ParamSpec, TypedDict, TypeVar from requests_ratelimiter import LimiterMixin @@ -33,6 +34,9 @@ if TYPE_CHECKING: log = logging.getLogger(__name__) +LUCENE_SPECIAL_CHAR_PAT = re.compile(r'([-+&|!(){}[\]^"~*?:\\/])') + + class LimiterTimeoutSession(LimiterMixin, TimeoutAndRetrySession): """HTTP session that enforces rate limits.""" @@ -181,6 +185,21 @@ class MusicBrainzAPI(RequestHandler): def _browse(self, entity: Entity, **kwargs) -> list[JSONDict]: return self._get_resource(entity, **kwargs).get(f"{entity}s", []) + @staticmethod + def format_search_term(field: str, term: str) -> str: + """Format a search term for the MusicBrainz API. + + See https://lucene.apache.org/core/4_3_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html + """ + if not (term := term.lower().strip()): + return "" + + term = LUCENE_SPECIAL_CHAR_PAT.sub(r"\\\1", term) + if field: + term = f"{field}:({term})" + + return term + def search( self, entity: Entity, @@ -195,10 +214,8 @@ class MusicBrainzAPI(RequestHandler): - 'value' is empty, in which case the filter is ignored * Values are lowercased and stripped of whitespace. """ - query = " AND ".join( - ":".join(filter(None, (k, f'"{_v}"'))) - for k, v in filters.items() - if (_v := v.lower().strip()) + query = " ".join( + filter(None, starmap(self.format_search_term, filters.items())) ) log.debug("Searching for MusicBrainz {}s with: {!r}", entity, query) kwargs["query"] = query diff --git a/docs/changelog.rst b/docs/changelog.rst index 25a0c1365..bd2243d67 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -11,6 +11,8 @@ New features: Bug fixes: +- :doc:`plugins/musicbrainz`: Fix search terms escaping. :bug:`6347` + For packagers: Other changes: diff --git a/test/plugins/utils/test_musicbrainz.py b/test/plugins/utils/test_musicbrainz.py index 291f50eb5..c7363f516 100644 --- a/test/plugins/utils/test_musicbrainz.py +++ b/test/plugins/utils/test_musicbrainz.py @@ -1,3 +1,5 @@ +import pytest + from beetsplug._utils.musicbrainz import MusicBrainzAPI @@ -80,3 +82,15 @@ def test_group_relations(): }, ], } + + +@pytest.mark.parametrize( + "field, term, expected", + [ + ("artist", ' AC/DC + "[Live]" ', r"artist:(ac\/dc \+ \"\[live\]\")"), + ("", "Foo:Bar", r"foo\:bar"), + ("artist", " ", ""), + ], +) +def test_format_search_term(field, term, expected): + assert MusicBrainzAPI.format_search_term(field, term) == expected