Avoid Unidecoding Query Strings in Deezer Metadata Plugin (#5882)

Moved the `_construct_search_query` method that was duplicated in the
spotify and deezer plugins into the (new) shared helper baseclass (
`SearchApiMetadataSourcePlugin`).

By default deezer now also does not ascii encode a query when it is send
but the legacy behaviour can be restored using the
`deezer.search_query_ascii` option.

closes #5860
This commit is contained in:
Sebastian Mohr 2025-08-26 13:46:47 +02:00 committed by GitHub
commit 6c21482b7a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 75 additions and 70 deletions

View file

@ -13,6 +13,7 @@ import re
import warnings import warnings
from typing import TYPE_CHECKING, Generic, Literal, Sequence, TypedDict, TypeVar from typing import TYPE_CHECKING, Generic, Literal, Sequence, TypedDict, TypeVar
import unidecode
from typing_extensions import NotRequired from typing_extensions import NotRequired
from beets.util import cached_classproperty from beets.util import cached_classproperty
@ -334,18 +335,26 @@ class SearchApiMetadataSourcePlugin(
of identifiers for the requested type (album or track). of identifiers for the requested type (album or track).
""" """
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.config.add(
{
"search_query_ascii": False,
}
)
@abc.abstractmethod @abc.abstractmethod
def _search_api( def _search_api(
self, self,
query_type: Literal["album", "track"], query_type: Literal["album", "track"],
filters: SearchFilter, filters: SearchFilter,
keywords: str = "", query_string: str = "",
) -> Sequence[R]: ) -> Sequence[R]:
"""Perform a search on the API. """Perform a search on the API.
:param query_type: The type of query to perform. :param query_type: The type of query to perform.
:param filters: A dictionary of filters to apply to the search. :param filters: A dictionary of filters to apply to the search.
:param keywords: Additional keywords to include in the search. :param query_string: Additional query to include in the search.
Should return a list of identifiers for the requested type (album or track). Should return a list of identifiers for the requested type (album or track).
""" """
@ -373,7 +382,9 @@ class SearchApiMetadataSourcePlugin(
def item_candidates( def item_candidates(
self, item: Item, artist: str, title: str self, item: Item, artist: str, title: str
) -> Iterable[TrackInfo]: ) -> Iterable[TrackInfo]:
results = self._search_api("track", {"artist": artist}, keywords=title) results = self._search_api(
"track", {"artist": artist}, query_string=title
)
if not results: if not results:
return [] return []
@ -382,6 +393,30 @@ class SearchApiMetadataSourcePlugin(
self.tracks_for_ids([result["id"] for result in results if result]), self.tracks_for_ids([result["id"] for result in results if result]),
) )
def _construct_search_query(
self, filters: SearchFilter, query_string: str
) -> str:
"""Construct a query string with the specified filters and keywords to
be provided to the spotify (or similar) search API.
The returned format was initially designed for spotify's search API but
we found is also useful with other APIs that support similar query structures.
see `spotify <https://developer.spotify.com/documentation/web-api/reference/search>`_
and `deezer <https://developers.deezer.com/api/search>`_.
:param filters: Field filters to apply.
:param query_string: Query keywords to use.
:return: Query string to be provided to the search API.
"""
components = [query_string, *(f'{k}:"{v}"' for k, v in filters.items())]
query = " ".join(filter(None, components))
if self.config["search_query_ascii"].get():
query = unidecode.unidecode(query)
return query
# Dynamically copy methods to BeetsPlugin for legacy support # Dynamically copy methods to BeetsPlugin for legacy support
# TODO: Remove this in the future major release, v3.0.0 # TODO: Remove this in the future major release, v3.0.0

View file

@ -21,7 +21,6 @@ import time
from typing import TYPE_CHECKING, Literal, Sequence from typing import TYPE_CHECKING, Literal, Sequence
import requests import requests
import unidecode
from beets import ui from beets import ui
from beets.autotag import AlbumInfo, TrackInfo from beets.autotag import AlbumInfo, TrackInfo
@ -216,27 +215,6 @@ class DeezerPlugin(SearchApiMetadataSourcePlugin[IDResponse]):
deezer_updated=time.time(), deezer_updated=time.time(),
) )
@staticmethod
def _construct_search_query(
filters: SearchFilter, keywords: str = ""
) -> str:
"""Construct a query string with the specified filters and keywords to
be provided to the Deezer Search API
(https://developers.deezer.com/api/search).
:param filters: Field filters to apply.
:param keywords: (Optional) Query keywords to use.
:return: Query string to be provided to the Search API.
"""
query_components = [
keywords,
" ".join(f'{k}:"{v}"' for k, v in filters.items()),
]
query = " ".join([q for q in query_components if q])
if not isinstance(query, str):
query = query.decode("utf8")
return unidecode.unidecode(query)
def _search_api( def _search_api(
self, self,
query_type: Literal[ query_type: Literal[
@ -250,17 +228,19 @@ class DeezerPlugin(SearchApiMetadataSourcePlugin[IDResponse]):
"user", "user",
], ],
filters: SearchFilter, filters: SearchFilter,
keywords="", query_string: str = "",
) -> Sequence[IDResponse]: ) -> Sequence[IDResponse]:
"""Query the Deezer Search API for the specified ``keywords``, applying """Query the Deezer Search API for the specified ``query_string``, applying
the provided ``filters``. the provided ``filters``.
:param filters: Field filters to apply. :param filters: Field filters to apply.
:param keywords: Query keywords to use. :param query_string: Additional query to include in the search.
:return: JSON data for the class:`Response <Response>` object or None :return: JSON data for the class:`Response <Response>` object or None
if no search results are returned. if no search results are returned.
""" """
query = self._construct_search_query(keywords=keywords, filters=filters) query = self._construct_search_query(
query_string=query_string, filters=filters
)
self._log.debug(f"Searching {self.data_source} for '{query}'") self._log.debug(f"Searching {self.data_source} for '{query}'")
try: try:
response = requests.get( response = requests.get(

View file

@ -29,7 +29,6 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence, Union
import confuse import confuse
import requests import requests
import unidecode
from beets import ui from beets import ui
from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.autotag.hooks import AlbumInfo, TrackInfo
@ -139,7 +138,6 @@ class SpotifyPlugin(
"client_id": "4e414367a1d14c75a5c5129a627fcab8", "client_id": "4e414367a1d14c75a5c5129a627fcab8",
"client_secret": "f82bdc09b2254f1a8286815d02fd46dc", "client_secret": "f82bdc09b2254f1a8286815d02fd46dc",
"tokenfile": "spotify_token.json", "tokenfile": "spotify_token.json",
"search_query_ascii": False,
} }
) )
self.config["client_id"].redact = True self.config["client_id"].redact = True
@ -422,46 +420,23 @@ class SpotifyPlugin(
track.medium_total = medium_total track.medium_total = medium_total
return track return track
def _construct_search_query(
self, filters: SearchFilter, keywords: str = ""
) -> str:
"""Construct a query string with the specified filters and keywords to
be provided to the Spotify Search API
(https://developer.spotify.com/documentation/web-api/reference/search).
:param filters: (Optional) Field filters to apply.
:param keywords: (Optional) Query keywords to use.
:return: Query string to be provided to the Search API.
"""
query_components = [
keywords,
" ".join(f"{k}:{v}" for k, v in filters.items()),
]
query = " ".join([q for q in query_components if q])
if not isinstance(query, str):
query = query.decode("utf8")
if self.config["search_query_ascii"].get():
query = unidecode.unidecode(query)
return query
def _search_api( def _search_api(
self, self,
query_type: Literal["album", "track"], query_type: Literal["album", "track"],
filters: SearchFilter, filters: SearchFilter,
keywords: str = "", query_string: str = "",
) -> Sequence[SearchResponseAlbums | SearchResponseTracks]: ) -> Sequence[SearchResponseAlbums | SearchResponseTracks]:
"""Query the Spotify Search API for the specified ``keywords``, """Query the Spotify Search API for the specified ``query_string``,
applying the provided ``filters``. applying the provided ``filters``.
:param query_type: Item type to search across. Valid types are: :param query_type: Item type to search across. Valid types are:
'album', 'artist', 'playlist', and 'track'. 'album', 'artist', 'playlist', and 'track'.
:param filters: (Optional) Field filters to apply. :param filters: Field filters to apply.
:param keywords: (Optional) Query keywords to use. :param query_string: Additional query to include in the search.
""" """
query = self._construct_search_query(keywords=keywords, filters=filters) query = self._construct_search_query(
filters=filters, query_string=query_string
)
self._log.debug(f"Searching {self.data_source} for '{query}'") self._log.debug(f"Searching {self.data_source} for '{query}'")
try: try:
@ -588,16 +563,18 @@ class SpotifyPlugin(
# Custom values can be passed in the config (just in case) # Custom values can be passed in the config (just in case)
artist = item[self.config["artist_field"].get()] artist = item[self.config["artist_field"].get()]
album = item[self.config["album_field"].get()] album = item[self.config["album_field"].get()]
keywords = item[self.config["track_field"].get()] query_string = item[self.config["track_field"].get()]
# Query the Web API for each track, look for the items' JSON data # Query the Web API for each track, look for the items' JSON data
query_filters: SearchFilter = {"artist": artist, "album": album} query_filters: SearchFilter = {"artist": artist, "album": album}
response_data_tracks = self._search_api( response_data_tracks = self._search_api(
query_type="track", keywords=keywords, filters=query_filters query_type="track",
query_string=query_string,
filters=query_filters,
) )
if not response_data_tracks: if not response_data_tracks:
query = self._construct_search_query( query = self._construct_search_query(
keywords=keywords, filters=query_filters query_string=query_string, filters=query_filters
) )
failures.append(query) failures.append(query)

View file

@ -57,6 +57,10 @@ Bug fixes:
:bug:`5930` :bug:`5930`
- :doc:`plugins/chroma`: AcoustID lookup HTTP requests will now time out after - :doc:`plugins/chroma`: AcoustID lookup HTTP requests will now time out after
10 seconds, rather than hanging the entire import process. 10 seconds, rather than hanging the entire import process.
- :doc:`/plugins/deezer`: Fix the issue with that every query to deezer was
ascii encoded. This resulted in bad matches for queries that contained special
e.g. non latin characters as 盗作. If you want to keep the legacy behavior set
the config option ``deezer.search_query_ascii: yes``. :bug:`5860`
For packagers: For packagers:

View file

@ -29,6 +29,15 @@ Configuration
This plugin can be configured like other metadata source plugins as described in This plugin can be configured like other metadata source plugins as described in
:ref:`metadata-source-plugin-configuration`. :ref:`metadata-source-plugin-configuration`.
The default options should work as-is, but there are some options you can put in
config.yaml under the ``deezer:`` section:
- **search_query_ascii**: If set to ``yes``, the search query will be converted
to ASCII before being sent to Deezer. Converting searches to ASCII can enhance
search results in some cases, but in general, it is not recommended. For
instance ``artist:deadmau5 album:4×4`` will be converted to ``artist:deadmau5
album:4x4`` (notice ``×!=x``). Default: ``no``.
The ``deezer`` plugin provides an additional command ``deezerupdate`` to update The ``deezer`` plugin provides an additional command ``deezerupdate`` to update
the ``rank`` information from Deezer. The ``rank`` (ranges from 0 to 1M) is a the ``rank`` information from Deezer. The ``rank`` (ranges from 0 to 1M) is a
global indicator of a song's popularity on Deezer that is updated daily based on global indicator of a song's popularity on Deezer that is updated daily based on

View file

@ -82,8 +82,8 @@ class SpotifyPluginTest(PluginTestCase):
params = _params(responses.calls[0].request.url) params = _params(responses.calls[0].request.url)
query = params["q"][0] query = params["q"][0]
assert "duifhjslkef" in query assert "duifhjslkef" in query
assert "artist:ujydfsuihse" in query assert 'artist:"ujydfsuihse"' in query
assert "album:lkajsdflakjsd" in query assert 'album:"lkajsdflakjsd"' in query
assert params["type"] == ["track"] assert params["type"] == ["track"]
@responses.activate @responses.activate
@ -117,8 +117,8 @@ class SpotifyPluginTest(PluginTestCase):
params = _params(responses.calls[0].request.url) params = _params(responses.calls[0].request.url)
query = params["q"][0] query = params["q"][0]
assert "Happy" in query assert "Happy" in query
assert "artist:Pharrell Williams" in query assert 'artist:"Pharrell Williams"' in query
assert "album:Despicable Me 2" in query assert 'album:"Despicable Me 2"' in query
assert params["type"] == ["track"] assert params["type"] == ["track"]
@responses.activate @responses.activate
@ -233,8 +233,8 @@ class SpotifyPluginTest(PluginTestCase):
params = _params(responses.calls[0].request.url) params = _params(responses.calls[0].request.url)
query = params["q"][0] query = params["q"][0]
assert item.title in query assert item.title in query
assert f"artist:{item.albumartist}" in query assert f'artist:"{item.albumartist}"' in query
assert f"album:{item.album}" in query assert f'album:"{item.album}"' in query
assert not query.isascii() assert not query.isascii()
# Is not found in the library if ascii encoding is enabled # Is not found in the library if ascii encoding is enabled