mirror of
https://github.com/beetbox/beets.git
synced 2025-12-06 08:39:17 +01:00
Avoid Unidecoding Query Strings in Deezer Metadata Plugin (#5882)
Moved the `_construct_search_query` method that was duplicated in the spotify and deezer plugins into the (new) shared helper baseclass ( `SearchApiMetadataSourcePlugin`). By default deezer now also does not ascii encode a query when it is send but the legacy behaviour can be restored using the `deezer.search_query_ascii` option. closes #5860
This commit is contained in:
commit
6c21482b7a
6 changed files with 75 additions and 70 deletions
|
|
@ -13,6 +13,7 @@ import re
|
|||
import warnings
|
||||
from typing import TYPE_CHECKING, Generic, Literal, Sequence, TypedDict, TypeVar
|
||||
|
||||
import unidecode
|
||||
from typing_extensions import NotRequired
|
||||
|
||||
from beets.util import cached_classproperty
|
||||
|
|
@ -334,18 +335,26 @@ class SearchApiMetadataSourcePlugin(
|
|||
of identifiers for the requested type (album or track).
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
self.config.add(
|
||||
{
|
||||
"search_query_ascii": False,
|
||||
}
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _search_api(
|
||||
self,
|
||||
query_type: Literal["album", "track"],
|
||||
filters: SearchFilter,
|
||||
keywords: str = "",
|
||||
query_string: str = "",
|
||||
) -> Sequence[R]:
|
||||
"""Perform a search on the API.
|
||||
|
||||
:param query_type: The type of query to perform.
|
||||
:param filters: A dictionary of filters to apply to the search.
|
||||
:param keywords: Additional keywords to include in the search.
|
||||
:param query_string: Additional query to include in the search.
|
||||
|
||||
Should return a list of identifiers for the requested type (album or track).
|
||||
"""
|
||||
|
|
@ -373,7 +382,9 @@ class SearchApiMetadataSourcePlugin(
|
|||
def item_candidates(
|
||||
self, item: Item, artist: str, title: str
|
||||
) -> Iterable[TrackInfo]:
|
||||
results = self._search_api("track", {"artist": artist}, keywords=title)
|
||||
results = self._search_api(
|
||||
"track", {"artist": artist}, query_string=title
|
||||
)
|
||||
if not results:
|
||||
return []
|
||||
|
||||
|
|
@ -382,6 +393,30 @@ class SearchApiMetadataSourcePlugin(
|
|||
self.tracks_for_ids([result["id"] for result in results if result]),
|
||||
)
|
||||
|
||||
def _construct_search_query(
|
||||
self, filters: SearchFilter, query_string: str
|
||||
) -> str:
|
||||
"""Construct a query string with the specified filters and keywords to
|
||||
be provided to the spotify (or similar) search API.
|
||||
|
||||
The returned format was initially designed for spotify's search API but
|
||||
we found is also useful with other APIs that support similar query structures.
|
||||
see `spotify <https://developer.spotify.com/documentation/web-api/reference/search>`_
|
||||
and `deezer <https://developers.deezer.com/api/search>`_.
|
||||
|
||||
:param filters: Field filters to apply.
|
||||
:param query_string: Query keywords to use.
|
||||
:return: Query string to be provided to the search API.
|
||||
"""
|
||||
|
||||
components = [query_string, *(f'{k}:"{v}"' for k, v in filters.items())]
|
||||
query = " ".join(filter(None, components))
|
||||
|
||||
if self.config["search_query_ascii"].get():
|
||||
query = unidecode.unidecode(query)
|
||||
|
||||
return query
|
||||
|
||||
|
||||
# Dynamically copy methods to BeetsPlugin for legacy support
|
||||
# TODO: Remove this in the future major release, v3.0.0
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ import time
|
|||
from typing import TYPE_CHECKING, Literal, Sequence
|
||||
|
||||
import requests
|
||||
import unidecode
|
||||
|
||||
from beets import ui
|
||||
from beets.autotag import AlbumInfo, TrackInfo
|
||||
|
|
@ -216,27 +215,6 @@ class DeezerPlugin(SearchApiMetadataSourcePlugin[IDResponse]):
|
|||
deezer_updated=time.time(),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _construct_search_query(
|
||||
filters: SearchFilter, keywords: str = ""
|
||||
) -> str:
|
||||
"""Construct a query string with the specified filters and keywords to
|
||||
be provided to the Deezer Search API
|
||||
(https://developers.deezer.com/api/search).
|
||||
|
||||
:param filters: Field filters to apply.
|
||||
:param keywords: (Optional) Query keywords to use.
|
||||
:return: Query string to be provided to the Search API.
|
||||
"""
|
||||
query_components = [
|
||||
keywords,
|
||||
" ".join(f'{k}:"{v}"' for k, v in filters.items()),
|
||||
]
|
||||
query = " ".join([q for q in query_components if q])
|
||||
if not isinstance(query, str):
|
||||
query = query.decode("utf8")
|
||||
return unidecode.unidecode(query)
|
||||
|
||||
def _search_api(
|
||||
self,
|
||||
query_type: Literal[
|
||||
|
|
@ -250,17 +228,19 @@ class DeezerPlugin(SearchApiMetadataSourcePlugin[IDResponse]):
|
|||
"user",
|
||||
],
|
||||
filters: SearchFilter,
|
||||
keywords="",
|
||||
query_string: str = "",
|
||||
) -> Sequence[IDResponse]:
|
||||
"""Query the Deezer Search API for the specified ``keywords``, applying
|
||||
"""Query the Deezer Search API for the specified ``query_string``, applying
|
||||
the provided ``filters``.
|
||||
|
||||
:param filters: Field filters to apply.
|
||||
:param keywords: Query keywords to use.
|
||||
:param query_string: Additional query to include in the search.
|
||||
:return: JSON data for the class:`Response <Response>` object or None
|
||||
if no search results are returned.
|
||||
"""
|
||||
query = self._construct_search_query(keywords=keywords, filters=filters)
|
||||
query = self._construct_search_query(
|
||||
query_string=query_string, filters=filters
|
||||
)
|
||||
self._log.debug(f"Searching {self.data_source} for '{query}'")
|
||||
try:
|
||||
response = requests.get(
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence, Union
|
|||
|
||||
import confuse
|
||||
import requests
|
||||
import unidecode
|
||||
|
||||
from beets import ui
|
||||
from beets.autotag.hooks import AlbumInfo, TrackInfo
|
||||
|
|
@ -139,7 +138,6 @@ class SpotifyPlugin(
|
|||
"client_id": "4e414367a1d14c75a5c5129a627fcab8",
|
||||
"client_secret": "f82bdc09b2254f1a8286815d02fd46dc",
|
||||
"tokenfile": "spotify_token.json",
|
||||
"search_query_ascii": False,
|
||||
}
|
||||
)
|
||||
self.config["client_id"].redact = True
|
||||
|
|
@ -422,46 +420,23 @@ class SpotifyPlugin(
|
|||
track.medium_total = medium_total
|
||||
return track
|
||||
|
||||
def _construct_search_query(
|
||||
self, filters: SearchFilter, keywords: str = ""
|
||||
) -> str:
|
||||
"""Construct a query string with the specified filters and keywords to
|
||||
be provided to the Spotify Search API
|
||||
(https://developer.spotify.com/documentation/web-api/reference/search).
|
||||
|
||||
:param filters: (Optional) Field filters to apply.
|
||||
:param keywords: (Optional) Query keywords to use.
|
||||
:return: Query string to be provided to the Search API.
|
||||
"""
|
||||
|
||||
query_components = [
|
||||
keywords,
|
||||
" ".join(f"{k}:{v}" for k, v in filters.items()),
|
||||
]
|
||||
query = " ".join([q for q in query_components if q])
|
||||
if not isinstance(query, str):
|
||||
query = query.decode("utf8")
|
||||
|
||||
if self.config["search_query_ascii"].get():
|
||||
query = unidecode.unidecode(query)
|
||||
|
||||
return query
|
||||
|
||||
def _search_api(
|
||||
self,
|
||||
query_type: Literal["album", "track"],
|
||||
filters: SearchFilter,
|
||||
keywords: str = "",
|
||||
query_string: str = "",
|
||||
) -> Sequence[SearchResponseAlbums | SearchResponseTracks]:
|
||||
"""Query the Spotify Search API for the specified ``keywords``,
|
||||
"""Query the Spotify Search API for the specified ``query_string``,
|
||||
applying the provided ``filters``.
|
||||
|
||||
:param query_type: Item type to search across. Valid types are:
|
||||
'album', 'artist', 'playlist', and 'track'.
|
||||
:param filters: (Optional) Field filters to apply.
|
||||
:param keywords: (Optional) Query keywords to use.
|
||||
:param filters: Field filters to apply.
|
||||
:param query_string: Additional query to include in the search.
|
||||
"""
|
||||
query = self._construct_search_query(keywords=keywords, filters=filters)
|
||||
query = self._construct_search_query(
|
||||
filters=filters, query_string=query_string
|
||||
)
|
||||
|
||||
self._log.debug(f"Searching {self.data_source} for '{query}'")
|
||||
try:
|
||||
|
|
@ -588,16 +563,18 @@ class SpotifyPlugin(
|
|||
# Custom values can be passed in the config (just in case)
|
||||
artist = item[self.config["artist_field"].get()]
|
||||
album = item[self.config["album_field"].get()]
|
||||
keywords = item[self.config["track_field"].get()]
|
||||
query_string = item[self.config["track_field"].get()]
|
||||
|
||||
# Query the Web API for each track, look for the items' JSON data
|
||||
query_filters: SearchFilter = {"artist": artist, "album": album}
|
||||
response_data_tracks = self._search_api(
|
||||
query_type="track", keywords=keywords, filters=query_filters
|
||||
query_type="track",
|
||||
query_string=query_string,
|
||||
filters=query_filters,
|
||||
)
|
||||
if not response_data_tracks:
|
||||
query = self._construct_search_query(
|
||||
keywords=keywords, filters=query_filters
|
||||
query_string=query_string, filters=query_filters
|
||||
)
|
||||
|
||||
failures.append(query)
|
||||
|
|
|
|||
|
|
@ -57,6 +57,10 @@ Bug fixes:
|
|||
:bug:`5930`
|
||||
- :doc:`plugins/chroma`: AcoustID lookup HTTP requests will now time out after
|
||||
10 seconds, rather than hanging the entire import process.
|
||||
- :doc:`/plugins/deezer`: Fix the issue with that every query to deezer was
|
||||
ascii encoded. This resulted in bad matches for queries that contained special
|
||||
e.g. non latin characters as 盗作. If you want to keep the legacy behavior set
|
||||
the config option ``deezer.search_query_ascii: yes``. :bug:`5860`
|
||||
|
||||
For packagers:
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,15 @@ Configuration
|
|||
This plugin can be configured like other metadata source plugins as described in
|
||||
:ref:`metadata-source-plugin-configuration`.
|
||||
|
||||
The default options should work as-is, but there are some options you can put in
|
||||
config.yaml under the ``deezer:`` section:
|
||||
|
||||
- **search_query_ascii**: If set to ``yes``, the search query will be converted
|
||||
to ASCII before being sent to Deezer. Converting searches to ASCII can enhance
|
||||
search results in some cases, but in general, it is not recommended. For
|
||||
instance ``artist:deadmau5 album:4×4`` will be converted to ``artist:deadmau5
|
||||
album:4x4`` (notice ``×!=x``). Default: ``no``.
|
||||
|
||||
The ``deezer`` plugin provides an additional command ``deezerupdate`` to update
|
||||
the ``rank`` information from Deezer. The ``rank`` (ranges from 0 to 1M) is a
|
||||
global indicator of a song's popularity on Deezer that is updated daily based on
|
||||
|
|
|
|||
|
|
@ -82,8 +82,8 @@ class SpotifyPluginTest(PluginTestCase):
|
|||
params = _params(responses.calls[0].request.url)
|
||||
query = params["q"][0]
|
||||
assert "duifhjslkef" in query
|
||||
assert "artist:ujydfsuihse" in query
|
||||
assert "album:lkajsdflakjsd" in query
|
||||
assert 'artist:"ujydfsuihse"' in query
|
||||
assert 'album:"lkajsdflakjsd"' in query
|
||||
assert params["type"] == ["track"]
|
||||
|
||||
@responses.activate
|
||||
|
|
@ -117,8 +117,8 @@ class SpotifyPluginTest(PluginTestCase):
|
|||
params = _params(responses.calls[0].request.url)
|
||||
query = params["q"][0]
|
||||
assert "Happy" in query
|
||||
assert "artist:Pharrell Williams" in query
|
||||
assert "album:Despicable Me 2" in query
|
||||
assert 'artist:"Pharrell Williams"' in query
|
||||
assert 'album:"Despicable Me 2"' in query
|
||||
assert params["type"] == ["track"]
|
||||
|
||||
@responses.activate
|
||||
|
|
@ -233,8 +233,8 @@ class SpotifyPluginTest(PluginTestCase):
|
|||
params = _params(responses.calls[0].request.url)
|
||||
query = params["q"][0]
|
||||
assert item.title in query
|
||||
assert f"artist:{item.albumartist}" in query
|
||||
assert f"album:{item.album}" in query
|
||||
assert f'artist:"{item.albumartist}"' in query
|
||||
assert f'album:"{item.album}"' in query
|
||||
assert not query.isascii()
|
||||
|
||||
# Is not found in the library if ascii encoding is enabled
|
||||
|
|
|
|||
Loading…
Reference in a new issue