From be74936134d136572ebfc6e0eb0e2b47275859d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 18 May 2025 09:25:05 +0100 Subject: [PATCH 01/49] Handle extra_tags the way they used to be handled --- beets/util/__init__.py | 2 +- beetsplug/musicbrainz.py | 65 +++++++++++++++++++++----------- docs/changelog.rst | 18 +++++---- test/plugins/test_musicbrainz.py | 44 ++++++++++++++++++++- 4 files changed, 98 insertions(+), 31 deletions(-) diff --git a/beets/util/__init__.py b/beets/util/__init__.py index a1ce55caa..6bc4d14ee 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -803,7 +803,7 @@ def as_string(value: Any) -> str: return str(value) -def plurality(objs: Sequence[T]) -> tuple[T, int]: +def plurality(objs: Iterable[T]) -> tuple[T, int]: """Given a sequence of hashble objects, returns the object that is most common in the set and the its number of appearance. The sequence must contain at least one object. diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index 34a46715d..bdfeb0968 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -18,6 +18,7 @@ from __future__ import annotations import traceback from collections import Counter +from functools import cached_property from itertools import product from typing import TYPE_CHECKING, Any from urllib.parse import urljoin @@ -383,7 +384,7 @@ class MusicBrainzPlugin(BeetsPlugin): "deezer": False, "tidal": False, }, - "extra_tags": {}, + "extra_tags": [], }, ) hostname = self.config["host"].as_str() @@ -747,6 +748,46 @@ class MusicBrainzPlugin(BeetsPlugin): return info + @cached_property + def extra_mb_field_by_tag(self) -> dict[str, str]: + """Map configured extra tags to their MusicBrainz API field names. + + Process user configuration to determine which additional MusicBrainz + fields should be included in search queries. + """ + mb_field_by_tag = { + t: FIELDS_TO_MB_KEYS[t] + for t in self.config["extra_tags"].as_str_seq() + if t in FIELDS_TO_MB_KEYS + } + if mb_field_by_tag: + self._log.debug("Additional search terms: {}", mb_field_by_tag) + + return mb_field_by_tag + + def get_album_criteria( + self, items: list[Item], artist: str, album: str, va_likely: bool + ) -> dict[str, str]: + # Build search criteria. + criteria = {"release": album.lower().strip()} + if artist is not None: + criteria["artist"] = artist.lower().strip() + else: + # Various Artists search. + criteria["arid"] = VARIOUS_ARTISTS_ID + if track_count := len(items): + criteria["tracks"] = str(track_count) + + for tag, mb_field in self.extra_mb_field_by_tag.items(): + most_common, _ = util.plurality(i.get(tag) for i in items) + value = str(most_common).lower().strip() + if tag == "catalognum": + value = value.replace(" ", "") + if value: + criteria[mb_field] = value + + return criteria + def candidates( self, items: list[Item], @@ -762,27 +803,7 @@ class MusicBrainzPlugin(BeetsPlugin): The query consists of an artist name, an album name, and, optionally, a number of tracks on the album and any other extra tags. """ - # Build search criteria. - criteria = {"release": album.lower().strip()} - if artist is not None: - criteria["artist"] = artist.lower().strip() - else: - # Various Artists search. - criteria["arid"] = VARIOUS_ARTISTS_ID - if track_count := len(items): - criteria["tracks"] = str(track_count) - - if self.config["extra_tags"]: - tag_list = self.config["extra_tags"].get() - self._log.debug("Additional search terms: {0}", tag_list) - for tag, value in tag_list.items(): - if key := FIELDS_TO_MB_KEYS.get(tag): - value = str(value).lower().strip() - if key == "catno": - value = value.replace(" ", "") - if value: - criteria[key] = value - + criteria = self.get_album_criteria(items, artist, album, va_likely) # Abort if we have no search terms. if not any(criteria.values()): return diff --git a/docs/changelog.rst b/docs/changelog.rst index 31da975e2..a01ee8c97 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -8,11 +8,22 @@ Unreleased New features: +* :doc:`plugins/musicbrainz`: The MusicBrainz autotagger has been moved to + a separate plugin. The default :ref:`plugins-config` includes `musicbrainz`, + but if you've customized your `plugins` list in your configuration, you'll + need to explicitly add `musicbrainz` to continue using this functionality. + Configuration option `musicbrainz.enabled` has thus been deprecated. + :bug:`2686` + :bug:`4605` * :doc:`plugins/web`: Show notifications when a track plays. This uses the Media Session API to customize media notifications. Bug fixes: +* :doc:`plugins/musicbrainz`: fix regression where user configured + ``extra_tags`` have been read incorrectly. + :bug:`5788` + For packagers: Other changes: @@ -39,13 +50,6 @@ been dropped. New features: -* :doc:`plugins/musicbrainz`: The MusicBrainz autotagger has been moved to - a separate plugin. The default :ref:`plugins-config` includes `musicbrainz`, - but if you've customized your `plugins` list in your configuration, you'll - need to explicitly add `musicbrainz` to continue using this functionality. - Configuration option `musicbrainz.enabled` has thus been deprecated. - :bug:`2686` - :bug:`4605` * :doc:`plugins/lastgenre`: The new configuration option, ``keep_existing``, provides more fine-grained control over how pre-populated genre tags are handled. The ``force`` option now behaves in a more conventional manner. diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index 0f142a353..d4104a3ba 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -16,8 +16,11 @@ from unittest import mock +import pytest + from beets import config -from beets.test.helper import BeetsTestCase +from beets.library import Item +from beets.test.helper import BeetsTestCase, PluginMixin from beetsplug import musicbrainz @@ -1063,3 +1066,42 @@ class MBLibraryTest(MusicBrainzTestCase): gp.side_effect = side_effect album = self.mb.album_for_id("d2a6f856-b553-40a0-ac54-a321e8e2da02") assert album.country is None + + +class TestMusicBrainzPlugin(PluginMixin): + plugin = "musicbrainz" + + @pytest.fixture + def mb_plugin(self, plugin_config): + self.config[self.plugin].set(plugin_config) + + return musicbrainz.MusicBrainzPlugin() + + @pytest.mark.parametrize( + "plugin_config,va_likely,expected_additional_criteria", + [ + ({}, False, {"artist": "artist"}), + ({}, True, {"artist": "artist"}), + ( + {"extra_tags": ["label", "catalognum"]}, + False, + {"artist": "artist", "label": "abc", "catno": "abc123"}, + ), + ], + ) + def test_get_album_criteria( + self, mb_plugin, va_likely, expected_additional_criteria + ): + items = [ + Item(catalognum="ABC 123", label="abc"), + Item(catalognum="ABC 123", label="abc"), + Item(catalognum="ABC 123", label="def"), + ] + + assert mb_plugin.get_album_criteria( + items, "Artist ", " Album", va_likely + ) == { + "release": "album", + "tracks": str(len(items)), + **expected_additional_criteria, + } From 64878933153c9074ef6f4154184617ec7988a78e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 18 May 2025 09:46:40 +0100 Subject: [PATCH 02/49] Synchronise docs with the actual supported extra_tags --- beetsplug/musicbrainz.py | 2 +- docs/plugins/musicbrainz.rst | 2 +- test/plugins/test_musicbrainz.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index bdfeb0968..cc0b7adee 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -45,10 +45,10 @@ BASE_URL = "https://musicbrainz.org/" SKIPPED_TRACKS = ["[data track]"] FIELDS_TO_MB_KEYS = { + "barcode": "barcode", "catalognum": "catno", "country": "country", "label": "label", - "barcode": "barcode", "media": "format", "year": "date", } diff --git a/docs/plugins/musicbrainz.rst b/docs/plugins/musicbrainz.rst index ef10be66d..9068ec45d 100644 --- a/docs/plugins/musicbrainz.rst +++ b/docs/plugins/musicbrainz.rst @@ -102,7 +102,7 @@ MusicBrainz. Additional tags to be queried can be supplied with the .. code-block:: yaml musicbrainz: - extra_tags: [year, catalognum, country, media, label] + extra_tags: [barcode, catalognum, country, label, media, year] This setting should improve the autotagger results if the metadata with the given tags match the metadata returned by MusicBrainz. diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index d4104a3ba..d74e3ff40 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -1081,7 +1081,7 @@ class TestMusicBrainzPlugin(PluginMixin): "plugin_config,va_likely,expected_additional_criteria", [ ({}, False, {"artist": "artist"}), - ({}, True, {"artist": "artist"}), + ({}, True, {"arid": "89ad4ac3-39f7-470e-963a-56509c546377"}), ( {"extra_tags": ["label", "catalognum"]}, False, From 0102f3ce7d206372eae5573a1e0ecdd9d19a4511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 18 May 2025 12:15:45 +0100 Subject: [PATCH 03/49] Take into account `va_likely` param and remove redundant checks - Instead of checking for empty `artist` query, use `va_likely` parameter to determine whether we should query for Various Artists or not. - `album` / `title` is always a truthy string - no need to handle empty criteria case - `tracks` list always has at least one track - no need to check for `len(items)` --- beetsplug/musicbrainz.py | 23 ++++++++--------------- test/plugins/test_musicbrainz.py | 16 +++------------- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index cc0b7adee..d6960300e 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -768,15 +768,14 @@ class MusicBrainzPlugin(BeetsPlugin): def get_album_criteria( self, items: list[Item], artist: str, album: str, va_likely: bool ) -> dict[str, str]: - # Build search criteria. - criteria = {"release": album.lower().strip()} - if artist is not None: - criteria["artist"] = artist.lower().strip() - else: - # Various Artists search. - criteria["arid"] = VARIOUS_ARTISTS_ID - if track_count := len(items): - criteria["tracks"] = str(track_count) + criteria = { + "release": album.lower().strip(), + "tracks": str(len(items)), + } | ( + {"arid": VARIOUS_ARTISTS_ID} + if va_likely + else {"artist": artist.lower().strip()} + ) for tag, mb_field in self.extra_mb_field_by_tag.items(): most_common, _ = util.plurality(i.get(tag) for i in items) @@ -804,9 +803,6 @@ class MusicBrainzPlugin(BeetsPlugin): optionally, a number of tracks on the album and any other extra tags. """ criteria = self.get_album_criteria(items, artist, album, va_likely) - # Abort if we have no search terms. - if not any(criteria.values()): - return try: self._log.debug( @@ -837,9 +833,6 @@ class MusicBrainzPlugin(BeetsPlugin): "recording": title.lower().strip(), } - if not any(criteria.values()): - return - try: res = musicbrainzngs.search_recordings( limit=self.config["searchlimit"].get(int), **criteria diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index d74e3ff40..90befff9b 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -823,23 +823,13 @@ class MBLibraryTest(MusicBrainzTestCase): ai = list(self.mb.candidates([], "hello", "there", False))[0] - sp.assert_called_with(artist="hello", release="there", limit=5) + sp.assert_called_with( + artist="hello", release="there", tracks="0", limit=5 + ) gp.assert_called_with(mbid, mock.ANY) assert ai.tracks[0].title == "foo" assert ai.album == "hi" - def test_match_track_empty(self): - with mock.patch("musicbrainzngs.search_recordings") as p: - til = list(self.mb.item_candidates(None, " ", " ")) - assert not p.called - assert til == [] - - def test_candidates_empty(self): - with mock.patch("musicbrainzngs.search_releases") as p: - ail = list(self.mb.candidates([], " ", " ", False)) - assert not p.called - assert ail == [] - def test_follow_pseudo_releases(self): side_effect = [ { From 2ec65ed8ca5cf098d17ca0702dfae60dab0afeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 18 May 2025 14:22:19 +0100 Subject: [PATCH 04/49] Deduplicate candidate methods using _search_api method --- beetsplug/musicbrainz.py | 88 ++++++++----------- test/plugins/test_musicbrainz.py | 143 ++++++++++++++----------------- 2 files changed, 100 insertions(+), 131 deletions(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index d6960300e..ceb931179 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -33,6 +33,7 @@ from beets.util.id_extractors import extract_release_id if TYPE_CHECKING: from collections.abc import Iterator, Sequence + from typing import Literal from beets.library import Item @@ -769,24 +770,46 @@ class MusicBrainzPlugin(BeetsPlugin): self, items: list[Item], artist: str, album: str, va_likely: bool ) -> dict[str, str]: criteria = { - "release": album.lower().strip(), + "release": album, "tracks": str(len(items)), - } | ( - {"arid": VARIOUS_ARTISTS_ID} - if va_likely - else {"artist": artist.lower().strip()} - ) + } | ({"arid": VARIOUS_ARTISTS_ID} if va_likely else {"artist": artist}) for tag, mb_field in self.extra_mb_field_by_tag.items(): most_common, _ = util.plurality(i.get(tag) for i in items) - value = str(most_common).lower().strip() + value = str(most_common) if tag == "catalognum": value = value.replace(" ", "") - if value: - criteria[mb_field] = value + + criteria[mb_field] = value return criteria + def _search_api( + self, + query_type: Literal["recording", "release"], + filters: dict[str, str], + ) -> list[JSONDict]: + """Perform MusicBrainz API search and return results. + + Execute a search against the MusicBrainz API for recordings or releases + using the provided criteria. Handles API errors by converting them into + MusicBrainzAPIError exceptions with contextual information. + """ + filters = { + k: _v for k, v in filters.items() if (_v := v.lower().strip()) + } + self._log.debug( + "Searching for MusicBrainz {}s with: {!r}", query_type, filters + ) + try: + method = getattr(musicbrainzngs, f"search_{query_type}s") + res = method(limit=self.config["searchlimit"].get(int), **filters) + except musicbrainzngs.MusicBrainzError as exc: + raise MusicBrainzAPIError( + exc, f"{query_type} search", filters, traceback.format_exc() + ) + return res[f"{query_type}-list"] + def candidates( self, items: list[Item], @@ -795,54 +818,19 @@ class MusicBrainzPlugin(BeetsPlugin): va_likely: bool, extra_tags: dict[str, Any] | None = None, ) -> Iterator[beets.autotag.hooks.AlbumInfo]: - """Searches for a single album ("release" in MusicBrainz parlance) - and returns an iterator over AlbumInfo objects. May raise a - MusicBrainzAPIError. - - The query consists of an artist name, an album name, and, - optionally, a number of tracks on the album and any other extra tags. - """ criteria = self.get_album_criteria(items, artist, album, va_likely) + release_ids = (r["id"] for r in self._search_api("release", criteria)) - try: - self._log.debug( - "Searching for MusicBrainz releases with: {!r}", criteria - ) - res = musicbrainzngs.search_releases( - limit=self.config["searchlimit"].get(int), **criteria - ) - except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError( - exc, "release search", criteria, traceback.format_exc() - ) - for release in res["release-list"]: - # The search result is missing some data (namely, the tracks), - # so we just use the ID and fetch the rest of the information. - albuminfo = self.album_for_id(release["id"]) - if albuminfo is not None: - yield albuminfo + yield from filter(None, map(self.album_for_id, release_ids)) def item_candidates( self, item: Item, artist: str, title: str ) -> Iterator[beets.autotag.hooks.TrackInfo]: - """Searches for a single track and returns an iterable of TrackInfo - objects. May raise a MusicBrainzAPIError. - """ - criteria = { - "artist": artist.lower().strip(), - "recording": title.lower().strip(), - } + criteria = {"artist": artist, "recording": title} - try: - res = musicbrainzngs.search_recordings( - limit=self.config["searchlimit"].get(int), **criteria - ) - except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError( - exc, "recording search", criteria, traceback.format_exc() - ) - for recording in res["recording-list"]: - yield self.track_info(recording) + yield from filter( + None, map(self.track_info, self._search_api("recording", criteria)) + ) def album_for_id( self, album_id: str diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index 90befff9b..d9a06d00a 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -757,79 +757,6 @@ class ArtistFlatteningTest(BeetsTestCase): class MBLibraryTest(MusicBrainzTestCase): - def test_match_track(self): - with mock.patch("musicbrainzngs.search_recordings") as p: - p.return_value = { - "recording-list": [ - { - "title": "foo", - "id": "bar", - "length": 42, - } - ], - } - ti = list(self.mb.item_candidates(None, "hello", "there"))[0] - - p.assert_called_with(artist="hello", recording="there", limit=5) - assert ti.title == "foo" - assert ti.track_id == "bar" - - def test_candidates(self): - mbid = "d2a6f856-b553-40a0-ac54-a321e8e2da99" - with mock.patch("musicbrainzngs.search_releases") as sp: - sp.return_value = { - "release-list": [ - { - "id": mbid, - } - ], - } - with mock.patch("musicbrainzngs.get_release_by_id") as gp: - gp.return_value = { - "release": { - "title": "hi", - "id": mbid, - "status": "status", - "medium-list": [ - { - "track-list": [ - { - "id": "baz", - "recording": { - "title": "foo", - "id": "bar", - "length": 42, - }, - "position": 9, - "number": "A1", - } - ], - "position": 5, - } - ], - "artist-credit": [ - { - "artist": { - "name": "some-artist", - "id": "some-id", - }, - } - ], - "release-group": { - "id": "another-id", - }, - } - } - - ai = list(self.mb.candidates([], "hello", "there", False))[0] - - sp.assert_called_with( - artist="hello", release="there", tracks="0", limit=5 - ) - gp.assert_called_with(mbid, mock.ANY) - assert ai.tracks[0].title == "foo" - assert ai.album == "hi" - def test_follow_pseudo_releases(self): side_effect = [ { @@ -1061,8 +988,15 @@ class MBLibraryTest(MusicBrainzTestCase): class TestMusicBrainzPlugin(PluginMixin): plugin = "musicbrainz" + mbid = "d2a6f856-b553-40a0-ac54-a321e8e2da99" + RECORDING = {"title": "foo", "id": "bar", "length": 42} + @pytest.fixture - def mb_plugin(self, plugin_config): + def plugin_config(self): + return {} + + @pytest.fixture + def mb(self, plugin_config): self.config[self.plugin].set(plugin_config) return musicbrainz.MusicBrainzPlugin() @@ -1070,17 +1004,17 @@ class TestMusicBrainzPlugin(PluginMixin): @pytest.mark.parametrize( "plugin_config,va_likely,expected_additional_criteria", [ - ({}, False, {"artist": "artist"}), + ({}, False, {"artist": "Artist "}), ({}, True, {"arid": "89ad4ac3-39f7-470e-963a-56509c546377"}), ( {"extra_tags": ["label", "catalognum"]}, False, - {"artist": "artist", "label": "abc", "catno": "abc123"}, + {"artist": "Artist ", "label": "abc", "catno": "ABC123"}, ), ], ) def test_get_album_criteria( - self, mb_plugin, va_likely, expected_additional_criteria + self, mb, va_likely, expected_additional_criteria ): items = [ Item(catalognum="ABC 123", label="abc"), @@ -1088,10 +1022,57 @@ class TestMusicBrainzPlugin(PluginMixin): Item(catalognum="ABC 123", label="def"), ] - assert mb_plugin.get_album_criteria( - items, "Artist ", " Album", va_likely - ) == { - "release": "album", + assert mb.get_album_criteria(items, "Artist ", " Album", va_likely) == { + "release": " Album", "tracks": str(len(items)), **expected_additional_criteria, } + + def test_item_candidates(self, monkeypatch, mb): + monkeypatch.setattr( + "musicbrainzngs.search_recordings", + lambda *_, **__: {"recording-list": [self.RECORDING]}, + ) + + candidates = list(mb.item_candidates(Item(), "hello", "there")) + + assert len(candidates) == 1 + assert candidates[0].track_id == self.RECORDING["id"] + + def test_candidates(self, monkeypatch, mb): + monkeypatch.setattr( + "musicbrainzngs.search_releases", + lambda *_, **__: {"release-list": [{"id": self.mbid}]}, + ) + monkeypatch.setattr( + "musicbrainzngs.get_release_by_id", + lambda *_, **__: { + "release": { + "title": "hi", + "id": self.mbid, + "status": "status", + "medium-list": [ + { + "track-list": [ + { + "id": "baz", + "recording": self.RECORDING, + "position": 9, + "number": "A1", + } + ], + "position": 5, + } + ], + "artist-credit": [ + {"artist": {"name": "some-artist", "id": "some-id"}} + ], + "release-group": {"id": "another-id"}, + } + }, + ) + candidates = list(mb.candidates([], "hello", "there", False)) + + assert len(candidates) == 1 + assert candidates[0].tracks[0].track_id == self.RECORDING["id"] + assert candidates[0].album == "hi" From e8e9369bc7d2a4d408dfe6c61fdb3d5ef52eff2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 19 May 2025 09:18:06 +0100 Subject: [PATCH 05/49] Remove unused extra_tags parameter --- beets/plugins.py | 17 ++--------------- beets/test/helper.py | 2 +- beetsplug/beatport.py | 2 +- beetsplug/chroma.py | 2 +- beetsplug/discogs.py | 2 +- docs/changelog.rst | 4 ++++ 6 files changed, 10 insertions(+), 19 deletions(-) diff --git a/beets/plugins.py b/beets/plugins.py index 26e70ed72..25452560a 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -239,12 +239,7 @@ class BeetsPlugin: return Distance() def candidates( - self, - items: list[Item], - artist: str, - album: str, - va_likely: bool, - extra_tags: dict[str, Any] | None = None, + self, items: list[Item], artist: str, album: str, va_likely: bool ) -> Iterator[AlbumInfo]: """Return :py:class:`AlbumInfo` candidates that match the given album. @@ -252,9 +247,6 @@ class BeetsPlugin: :param artist: Album artist :param album: Album name :param va_likely: Whether the album is likely to be by various artists - :param extra_tags: is a an optional dictionary of extra tags to search. - Only relevant to :py:class:`MusicBrainzPlugin` autotagger and can be - ignored by other plugins """ yield from () @@ -872,12 +864,7 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta): return extract_release_id(self.data_source.lower(), id_string) def candidates( - self, - items: list[Item], - artist: str, - album: str, - va_likely: bool, - extra_tags: dict[str, Any] | None = None, + self, items: list[Item], artist: str, album: str, va_likely: bool ) -> Iterator[AlbumInfo]: query_filters = {"album": album} if not va_likely: diff --git a/beets/test/helper.py b/beets/test/helper.py index 66b4ddb71..a24836e84 100644 --- a/beets/test/helper.py +++ b/beets/test/helper.py @@ -806,7 +806,7 @@ class AutotagStub: for p in self.patchers: p.stop() - def candidates(self, items, artist, album, va_likely, extra_tags=None): + def candidates(self, items, artist, album, va_likely): if self.matching == self.IDENT: yield self._make_album_match(artist, album, len(items)) diff --git a/beetsplug/beatport.py b/beetsplug/beatport.py index d98fab722..20147b5cc 100644 --- a/beetsplug/beatport.py +++ b/beetsplug/beatport.py @@ -361,7 +361,7 @@ class BeatportPlugin(BeetsPlugin): data_source=self.data_source, info=track_info, config=self.config ) - def candidates(self, items, artist, release, va_likely, extra_tags=None): + def candidates(self, items, artist, release, va_likely): """Returns a list of AlbumInfo objects for beatport search results matching release and artist (if not various). """ diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 08fb97f59..518a41776 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -200,7 +200,7 @@ class AcoustidPlugin(plugins.BeetsPlugin): dist.add_expr("track_id", info.track_id not in recording_ids) return dist - def candidates(self, items, artist, album, va_likely, extra_tags=None): + def candidates(self, items, artist, album, va_likely): albums = [] for relid in prefix(_all_releases(items), MAX_RELEASES): album = self.mb.album_for_id(relid) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index a8d08c1e9..1852f300f 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -156,7 +156,7 @@ class DiscogsPlugin(BeetsPlugin): data_source="Discogs", info=track_info, config=self.config ) - def candidates(self, items, artist, album, va_likely, extra_tags=None): + def candidates(self, items, artist, album, va_likely): """Returns a list of AlbumInfo objects for discogs search results matching an album and artist (if not various). """ diff --git a/docs/changelog.rst b/docs/changelog.rst index a01ee8c97..3370f5396 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -26,6 +26,10 @@ Bug fixes: For packagers: +* Optional ``extra_tags`` parameter has been removed from + ``BeetsPlugin.candidates`` method signature since it is never passed in. If + you override this method in your plugin, feel free to remove this parameter. + Other changes: 2.3.1 (May 14, 2025) From 9242db04a556a587ac6097b00a62498331a2bc8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Fri, 9 May 2025 12:51:05 +0100 Subject: [PATCH 06/49] discogs: add configurable search_limit --- beetsplug/discogs.py | 14 +++++++------- docs/changelog.rst | 2 ++ docs/plugins/discogs.rst | 15 ++++++++++++--- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 1852f300f..187a30e4e 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -73,6 +73,7 @@ class DiscogsPlugin(BeetsPlugin): "separator": ", ", "index_tracks": False, "append_style_genre": False, + "search_limit": 5, } ) self.config["apikey"].redact = True @@ -257,8 +258,8 @@ class DiscogsPlugin(BeetsPlugin): ) if track_result: candidates.append(track_result) - # first 10 results, don't overwhelm with options - return candidates[:10] + + return candidates def album_for_id(self, album_id): """Fetches an album by its Discogs ID and returns an AlbumInfo object @@ -303,8 +304,9 @@ class DiscogsPlugin(BeetsPlugin): query = re.sub(r"(?i)\b(CD|disc|vinyl)\s*\d+", "", query) try: - releases = self.discogs_client.search(query, type="release").page(1) - + results = self.discogs_client.search(query, type="release") + results.per_page = self.config["search_limit"].as_number() + releases = results.page(1) except CONNECTION_ERRORS: self._log.debug( "Communication error while searching for {0!r}", @@ -312,9 +314,7 @@ class DiscogsPlugin(BeetsPlugin): exc_info=True, ) return [] - return [ - album for album in map(self.get_album_info, releases[:5]) if album - ] + return map(self.get_album_info, releases) def get_master_year(self, master_id): """Fetches a master release given its Discogs ID and returns its year diff --git a/docs/changelog.rst b/docs/changelog.rst index 3370f5396..ebb9880a9 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -17,6 +17,8 @@ New features: :bug:`4605` * :doc:`plugins/web`: Show notifications when a track plays. This uses the Media Session API to customize media notifications. +* :doc:`plugins/discogs`: Add configurable ``search_limit`` option to + limit the number of results returned by the Discogs metadata search queries. Bug fixes: diff --git a/docs/plugins/discogs.rst b/docs/plugins/discogs.rst index ac67f2d0a..c8df12a41 100644 --- a/docs/plugins/discogs.rst +++ b/docs/plugins/discogs.rst @@ -101,11 +101,20 @@ This option is useful when importing classical music. Other configurations available under ``discogs:`` are: -- **append_style_genre**: Appends the Discogs style (if found) to the genre tag. This can be useful if you want more granular genres to categorize your music. - For example, a release in Discogs might have a genre of "Electronic" and a style of "Techno": enabling this setting would set the genre to be "Electronic, Techno" (assuming default separator of ``", "``) instead of just "Electronic". +- **append_style_genre**: Appends the Discogs style (if found) to the genre + tag. This can be useful if you want more granular genres to categorize your + music. For example, a release in Discogs might have a genre of "Electronic" + and a style of "Techno": enabling this setting would set the genre to be + "Electronic, Techno" (assuming default separator of ``", "``) instead of just + "Electronic". Default: ``False`` -- **separator**: How to join multiple genre and style values from Discogs into a string. +- **separator**: How to join multiple genre and style values from Discogs into + a string. Default: ``", "`` +- **search_limit**: The maximum number of results to return from Discogs. This is + useful if you want to limit the number of results returned to speed up + searches. + Default: ``5`` Troubleshooting From 09862aeaea523a6de688543500f1ddfb664fa3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Fri, 9 May 2025 13:00:06 +0100 Subject: [PATCH 07/49] discogs: Add types to public methods --- beetsplug/discogs.py | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 187a30e4e..cd6e0cfd1 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -26,6 +26,7 @@ import socket import time import traceback from string import ascii_lowercase +from typing import TYPE_CHECKING import confuse from discogs_client import Client, Master, Release @@ -40,6 +41,11 @@ from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance from beets.util.id_extractors import extract_release_id +if TYPE_CHECKING: + from collections.abc import Iterable + + from beets.library import Item + USER_AGENT = f"beets/{beets.__version__} +https://beets.io/" API_KEY = "rAzVUQYRaoFjeBjyWuWZ" API_SECRET = "plxtUTqoCzwxZpqdPysCwGuBSmZNdZVy" @@ -157,16 +163,9 @@ class DiscogsPlugin(BeetsPlugin): data_source="Discogs", info=track_info, config=self.config ) - def candidates(self, items, artist, album, va_likely): - """Returns a list of AlbumInfo objects for discogs search results - matching an album and artist (if not various). - """ - if not album and not artist: - self._log.debug( - "Skipping Discogs query. Files missing album and artist tags." - ) - return [] - + def candidates( + self, items: list[Item], artist: str, album: str, va_likely: bool + ) -> Iterable[AlbumInfo]: if va_likely: query = album else: @@ -220,24 +219,9 @@ class DiscogsPlugin(BeetsPlugin): return None - def item_candidates(self, item, artist, title): - """Returns a list of TrackInfo objects for Search API results - matching ``title`` and ``artist``. - :param item: Singleton item to be matched. - :type item: beets.library.Item - :param artist: The artist of the track to be matched. - :type artist: str - :param title: The title of the track to be matched. - :type title: str - :return: Candidate TrackInfo objects. - :rtype: list[beets.autotag.hooks.TrackInfo] - """ - if not artist and not title: - self._log.debug( - "Skipping Discogs query. File missing artist and title tags." - ) - return [] - + def item_candidates( + self, item: Item, artist: str, title: str + ) -> Iterable[TrackInfo]: query = f"{artist} {title}" try: albums = self.get_albums(query) @@ -261,7 +245,7 @@ class DiscogsPlugin(BeetsPlugin): return candidates - def album_for_id(self, album_id): + def album_for_id(self, album_id: str) -> AlbumInfo | None: """Fetches an album by its Discogs ID and returns an AlbumInfo object or None if the album is not found. """ @@ -292,7 +276,7 @@ class DiscogsPlugin(BeetsPlugin): return None return self.get_album_info(result) - def get_albums(self, query): + def get_albums(self, query: str) -> Iterable[AlbumInfo]: """Returns a list of AlbumInfo objects for a discogs search query.""" # Strip non-word characters from query. Things like "!" and "-" can # cause a query to return no results, even if they match the artist or From 12149b3e6d525075f0521da178bd29fd180882ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Fri, 9 May 2025 13:44:54 +0100 Subject: [PATCH 08/49] discogs: remove duplicate error handling --- beetsplug/discogs.py | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index cd6e0cfd1..93791ad5e 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -166,22 +166,7 @@ class DiscogsPlugin(BeetsPlugin): def candidates( self, items: list[Item], artist: str, album: str, va_likely: bool ) -> Iterable[AlbumInfo]: - if va_likely: - query = album - else: - query = f"{artist} {album}" - try: - return self.get_albums(query) - except DiscogsAPIError as e: - self._log.debug("API Error: {0} (query: {1})", e, query) - if e.status_code == 401: - self.reset_auth() - return self.candidates(items, artist, album, va_likely) - else: - return [] - except CONNECTION_ERRORS: - self._log.debug("Connection error in album search", exc_info=True) - return [] + return self.get_albums(f"{artist} {album}" if va_likely else album) def get_track_from_album_by_title( self, album_info, title, dist_threshold=0.3 @@ -222,18 +207,8 @@ class DiscogsPlugin(BeetsPlugin): def item_candidates( self, item: Item, artist: str, title: str ) -> Iterable[TrackInfo]: - query = f"{artist} {title}" - try: - albums = self.get_albums(query) - except DiscogsAPIError as e: - self._log.debug("API Error: {0} (query: {1})", e, query) - if e.status_code == 401: - self.reset_auth() - return self.item_candidates(item, artist, title) - else: - return [] - except CONNECTION_ERRORS: - self._log.debug("Connection error in track search", exc_info=True) + albums = self.candidates([item], artist, title, False) + candidates = [] for album_cur in albums: self._log.debug("searching within album {0}", album_cur.album) From 8e5858254b056fd555c8afce12128f0a7c40d7e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Fri, 9 May 2025 14:27:41 +0100 Subject: [PATCH 09/49] discogs: cache master release lookups --- beetsplug/discogs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 93791ad5e..ad1310712 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -25,6 +25,7 @@ import re import socket import time import traceback +from functools import cache from string import ascii_lowercase from typing import TYPE_CHECKING @@ -275,16 +276,16 @@ class DiscogsPlugin(BeetsPlugin): return [] return map(self.get_album_info, releases) - def get_master_year(self, master_id): + @cache + def get_master_year(self, master_id: str) -> int | None: """Fetches a master release given its Discogs ID and returns its year or None if the master release is not found. """ - self._log.debug("Searching for master release {0}", master_id) + self._log.debug("Getting master release {0}", master_id) result = Master(self.discogs_client, {"id": master_id}) try: - year = result.fetch("year") - return year + return result.fetch("year") except DiscogsAPIError as e: if e.status_code != 404: self._log.debug( From 9cc7ecaceb39456e0f739be87c4eef096f9634c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Fri, 9 May 2025 14:53:05 +0100 Subject: [PATCH 10/49] discogs: cache TRACK_INDEX_RE --- beetsplug/discogs.py | 42 ++++++++++++++++++++---------------- test/plugins/test_discogs.py | 40 ++++++++++++++++------------------ 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index ad1310712..8fdd515bd 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -61,6 +61,22 @@ CONNECTION_ERRORS = ( ) +TRACK_INDEX_RE = re.compile( + r""" + (.*?) # medium: everything before medium_index. + (\d*?) # medium_index: a number at the end of + # `position`, except if followed by a subtrack index. + # subtrack_index: can only be matched if medium + # or medium_index have been matched, and can be + ( + (?<=\w)\.[\w]+ # a dot followed by a string (A.1, 2.A) + | (?<=\d)[A-Z]+ # a string that follows a number (1A, B2a) + )? + """, + re.VERBOSE, +) + + class ReleaseFormat(TypedDict): name: str qty: int @@ -655,33 +671,21 @@ class DiscogsPlugin(BeetsPlugin): medium_index=medium_index, ) - def get_track_index(self, position): + @staticmethod + def get_track_index( + position: str, + ) -> tuple[str | None, str | None, str | None]: """Returns the medium, medium index and subtrack index for a discogs track position.""" # Match the standard Discogs positions (12.2.9), which can have several # forms (1, 1-1, A1, A1.1, A1a, ...). - match = re.match( - r"^(.*?)" # medium: everything before medium_index. - r"(\d*?)" # medium_index: a number at the end of - # `position`, except if followed by a subtrack - # index. - # subtrack_index: can only be matched if medium - # or medium_index have been matched, and can be - r"((?<=\w)\.[\w]+" # - a dot followed by a string (A.1, 2.A) - r"|(?<=\d)[A-Z]+" # - a string that follows a number (1A, B2a) - r")?" - r"$", - position.upper(), - ) - - if match: + medium = index = subindex = None + if match := TRACK_INDEX_RE.fullmatch(position.upper()): medium, index, subindex = match.groups() if subindex and subindex.startswith("."): subindex = subindex[1:] - else: - self._log.debug("Invalid position: {0}", position) - medium = index = subindex = None + return medium or None, index or None, subindex or None def get_track_length(self, duration): diff --git a/test/plugins/test_discogs.py b/test/plugins/test_discogs.py index eb9a625b1..c31ac7511 100644 --- a/test/plugins/test_discogs.py +++ b/test/plugins/test_discogs.py @@ -171,27 +171,6 @@ class DGAlbumInfoTest(BeetsTestCase): assert t[3].index == 4 assert t[3].medium_total == 1 - def test_parse_position(self): - """Test the conversion of discogs `position` to medium, medium_index - and subtrack_index.""" - # List of tuples (discogs_position, (medium, medium_index, subindex) - positions = [ - ("1", (None, "1", None)), - ("A12", ("A", "12", None)), - ("12-34", ("12-", "34", None)), - ("CD1-1", ("CD1-", "1", None)), - ("1.12", (None, "1", "12")), - ("12.a", (None, "12", "A")), - ("12.34", (None, "12", "34")), - ("1ab", (None, "1", "AB")), - # Non-standard - ("IV", ("IV", None, None)), - ] - - d = DiscogsPlugin() - for position, expected in positions: - assert d.get_track_index(position) == expected - def test_parse_tracklist_without_sides(self): """Test standard Discogs position 12.2.9#1: "without sides".""" release = self._make_release_from_positions(["1", "2", "3"]) @@ -417,3 +396,22 @@ def test_get_media_and_albumtype(formats, expected_media, expected_albumtype): result = DiscogsPlugin.get_media_and_albumtype(formats) assert result == (expected_media, expected_albumtype) + + +@pytest.mark.parametrize( + "position, medium, index, subindex", + [ + ("1", None, "1", None), + ("A12", "A", "12", None), + ("12-34", "12-", "34", None), + ("CD1-1", "CD1-", "1", None), + ("1.12", None, "1", "12"), + ("12.a", None, "12", "A"), + ("12.34", None, "12", "34"), + ("1ab", None, "1", "AB"), + # Non-standard + ("IV", "IV", None, None), + ], +) +def test_get_track_index(position, medium, index, subindex): + assert DiscogsPlugin.get_track_index(position) == (medium, index, subindex) From d9b67acff5478fc96bce61e80c7ff97f2f6d6545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Fri, 9 May 2025 14:55:25 +0100 Subject: [PATCH 11/49] discogs: simplify getting track from album --- beetsplug/discogs.py | 59 ++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 43 deletions(-) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 8fdd515bd..34d8f21d1 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -43,7 +43,7 @@ from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance from beets.util.id_extractors import extract_release_id if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Callable, Iterable from beets.library import Item @@ -185,57 +185,30 @@ class DiscogsPlugin(BeetsPlugin): ) -> Iterable[AlbumInfo]: return self.get_albums(f"{artist} {album}" if va_likely else album) - def get_track_from_album_by_title( - self, album_info, title, dist_threshold=0.3 - ): - def compare_func(track_info): - track_title = getattr(track_info, "title", None) - dist = string_dist(track_title, title) - return track_title and dist < dist_threshold - - return self.get_track_from_album(album_info, compare_func) - - def get_track_from_album(self, album_info, compare_func): - """Return the first track of the release where `compare_func` returns - true. - - :return: TrackInfo object. - :rtype: beets.autotag.hooks.TrackInfo - """ - if not album_info: + def get_track_from_album( + self, album_info: AlbumInfo, compare: Callable[[TrackInfo], float] + ) -> TrackInfo | None: + """Return the best matching track of the release.""" + scores_and_tracks = [(compare(t), t) for t in album_info.tracks] + score, track_info = min(scores_and_tracks, key=lambda x: x[0]) + if score > 0.3: return None - for track_info in album_info.tracks: - # check for matching position - if not compare_func(track_info): - continue - - # attach artist info if not provided - if not track_info["artist"]: - track_info["artist"] = album_info.artist - track_info["artist_id"] = album_info.artist_id - # attach album info - track_info["album"] = album_info.album - - return track_info - - return None + track_info["artist"] = album_info.artist + track_info["artist_id"] = album_info.artist_id + track_info["album"] = album_info.album + return track_info def item_candidates( self, item: Item, artist: str, title: str ) -> Iterable[TrackInfo]: albums = self.candidates([item], artist, title, False) - candidates = [] - for album_cur in albums: - self._log.debug("searching within album {0}", album_cur.album) - track_result = self.get_track_from_album_by_title( - album_cur, item["title"] - ) - if track_result: - candidates.append(track_result) + def compare_func(track_info: TrackInfo) -> float: + return string_dist(track_info.title, title) - return candidates + tracks = (self.get_track_from_album(a, compare_func) for a in albums) + return list(filter(None, tracks)) def album_for_id(self, album_id: str) -> AlbumInfo | None: """Fetches an album by its Discogs ID and returns an AlbumInfo object From d3ef627494ed6678b3189a633c3b82f8bf30011e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 18 May 2025 04:17:19 +0100 Subject: [PATCH 12/49] Expect plugins to return Iterables instead of Iterators --- beets/plugins.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/beets/plugins.py b/beets/plugins.py index 25452560a..63e5d3bde 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -46,7 +46,7 @@ else: if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Iterable from confuse import ConfigView @@ -70,7 +70,7 @@ if TYPE_CHECKING: P = ParamSpec("P") Ret = TypeVar("Ret", bound=Any) Listener = Callable[..., None] - IterF = Callable[P, Iterator[Ret]] + IterF = Callable[P, Iterable[Ret]] PLUGIN_NAMESPACE = "beetsplug" @@ -240,7 +240,7 @@ class BeetsPlugin: def candidates( self, items: list[Item], artist: str, album: str, va_likely: bool - ) -> Iterator[AlbumInfo]: + ) -> Iterable[AlbumInfo]: """Return :py:class:`AlbumInfo` candidates that match the given album. :param items: List of items in the album @@ -252,7 +252,7 @@ class BeetsPlugin: def item_candidates( self, item: Item, artist: str, title: str - ) -> Iterator[TrackInfo]: + ) -> Iterable[TrackInfo]: """Return :py:class:`TrackInfo` candidates that match the given track. :param item: Track item @@ -487,7 +487,7 @@ def notify_info_yielded(event: str) -> Callable[[IterF[P, Ret]], IterF[P, Ret]]: def decorator(func: IterF[P, Ret]) -> IterF[P, Ret]: @wraps(func) - def wrapper(*args: P.args, **kwargs: P.kwargs) -> Iterator[Ret]: + def wrapper(*args: P.args, **kwargs: P.kwargs) -> Iterable[Ret]: for v in func(*args, **kwargs): send(event, info=v) yield v @@ -498,14 +498,14 @@ def notify_info_yielded(event: str) -> Callable[[IterF[P, Ret]], IterF[P, Ret]]: @notify_info_yielded("albuminfo_received") -def candidates(*args, **kwargs) -> Iterator[AlbumInfo]: +def candidates(*args, **kwargs) -> Iterable[AlbumInfo]: """Return matching album candidates from all plugins.""" for plugin in find_plugins(): yield from plugin.candidates(*args, **kwargs) @notify_info_yielded("trackinfo_received") -def item_candidates(*args, **kwargs) -> Iterator[TrackInfo]: +def item_candidates(*args, **kwargs) -> Iterable[TrackInfo]: """Return matching track candidates from all plugins.""" for plugin in find_plugins(): yield from plugin.item_candidates(*args, **kwargs) @@ -865,7 +865,7 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta): def candidates( self, items: list[Item], artist: str, album: str, va_likely: bool - ) -> Iterator[AlbumInfo]: + ) -> Iterable[AlbumInfo]: query_filters = {"album": album} if not va_likely: query_filters["artist"] = artist @@ -875,7 +875,7 @@ class MetadataSourcePlugin(Generic[R], BeetsPlugin, metaclass=abc.ABCMeta): def item_candidates( self, item: Item, artist: str, title: str - ) -> Iterator[TrackInfo]: + ) -> Iterable[TrackInfo]: for result in self._search_api( "track", {"artist": artist}, keywords=title ): From e151b4b49bb85cdf8074201b56fadfbc6079cab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 19 May 2025 08:55:58 +0100 Subject: [PATCH 13/49] Implement track_for_id to allow fetching singletons by discogs id --- beetsplug/discogs.py | 8 ++++++++ docs/changelog.rst | 3 +++ 2 files changed, 11 insertions(+) diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 34d8f21d1..696f1d1ac 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -241,6 +241,14 @@ class DiscogsPlugin(BeetsPlugin): return None return self.get_album_info(result) + def track_for_id(self, track_id: str) -> TrackInfo | None: + if album := self.album_for_id(track_id): + for track in album.tracks: + if track.track_id == track_id: + return track + + return None + def get_albums(self, query: str) -> Iterable[AlbumInfo]: """Returns a list of AlbumInfo objects for a discogs search query.""" # Strip non-word characters from query. Things like "!" and "-" can diff --git a/docs/changelog.rst b/docs/changelog.rst index ebb9880a9..825e287f6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -19,6 +19,9 @@ New features: Media Session API to customize media notifications. * :doc:`plugins/discogs`: Add configurable ``search_limit`` option to limit the number of results returned by the Discogs metadata search queries. +* :doc:`plugins/discogs`: Implement ``track_for_id`` method to allow retrieving + singletons by their Discogs ID. + :bug:`4661` Bug fixes: From 395aec96a36a440ab0ca9afc87f76471676bac98 Mon Sep 17 00:00:00 2001 From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com> Date: Wed, 9 Apr 2025 09:42:36 +0200 Subject: [PATCH 14/49] fetchart: fixup #5244 by restoring config validation and making things more Pythonic --- beetsplug/fetchart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py index 5451b4dbb..85bed6233 100644 --- a/beetsplug/fetchart.py +++ b/beetsplug/fetchart.py @@ -1295,7 +1295,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): @staticmethod def _is_source_file_removal_enabled(): - return config["import"]["delete"] or config["import"]["move"] + return config["import"]["delete"].get(bool) or config["import"]["move"].get(bool) # Asynchronous; after music is added to the library. def fetch_art(self, session, task): @@ -1339,7 +1339,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): """Place the discovered art in the filesystem.""" if task in self.art_candidates: candidate = self.art_candidates.pop(task) - removal_enabled = FetchArtPlugin._is_source_file_removal_enabled() + removal_enabled = self._is_source_file_removal_enabled() self._set_art(task.album, candidate, not removal_enabled) From a6f2389aed35474123e6f97bbdd22aa6b27f3236 Mon Sep 17 00:00:00 2001 From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com> Date: Wed, 9 Apr 2025 00:49:42 +0200 Subject: [PATCH 15/49] typing: fetchart + tests --- beets/test/helper.py | 31 ++- beetsplug/fetchart.py | 484 +++++++++++++++++++++++++-------------- test/plugins/test_art.py | 47 +++- 3 files changed, 381 insertions(+), 181 deletions(-) diff --git a/beets/test/helper.py b/beets/test/helper.py index a24836e84..b86db5b23 100644 --- a/beets/test/helper.py +++ b/beets/test/helper.py @@ -886,20 +886,43 @@ class FetchImageHelper: def run(self, *args, **kwargs): super().run(*args, **kwargs) - IMAGEHEADER = { + IMAGEHEADER: dict[str, bytes] = { "image/jpeg": b"\xff\xd8\xff" + b"\x00" * 3 + b"JFIF", "image/png": b"\211PNG\r\n\032\n", + "image/gif": b"GIF89a", + # dummy type that is definitely not a valid image content type + "image/watercolour": b"watercolour", + "text/html": ( + b"\n\n\n\n" + b"\n\n" + ), } - def mock_response(self, url, content_type="image/jpeg", file_type=None): + def mock_response( + self, + url: str, + content_type: str = "image/jpeg", + file_type: None | str = None, + ) -> None: + # Potentially return a file of a type that differs from the + # server-advertised content type to mimic misbehaving servers. if file_type is None: file_type = content_type + + try: + # imghdr reads 32 bytes + header = self.IMAGEHEADER[file_type].ljust(32, b"\x00") + except KeyError: + # If we can't return a file that looks like real file of the requested + # type, better fail the test than returning something else, which might + # violate assumption made when writing a test. + raise AssertionError(f"Mocking {file_type} responses not supported") + responses.add( responses.GET, url, content_type=content_type, - # imghdr reads 32 bytes - body=self.IMAGEHEADER.get(file_type, b"").ljust(32, b"\x00"), + body=header, ) diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py index 85bed6233..b04347f2c 100644 --- a/beetsplug/fetchart.py +++ b/beetsplug/fetchart.py @@ -14,10 +14,15 @@ """Fetches album art.""" +from __future__ import annotations + import os import re +from abc import ABC, abstractmethod from collections import OrderedDict from contextlib import closing +from enum import Enum +from typing import TYPE_CHECKING, AnyStr, ClassVar, Literal, Tuple, Type import confuse import requests @@ -27,8 +32,15 @@ from beets import config, importer, plugins, ui, util from beets.util import bytestring_path, get_temp_filename, sorted_walk, syspath from beets.util.artresizer import ArtResizer +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Sequence + from logging import Logger + + from beets.importer import ImportSession, ImportTask + from beets.library import Album, Library + try: - from bs4 import BeautifulSoup + from bs4 import BeautifulSoup, Tag HAS_BEAUTIFUL_SOUP = True except ImportError: @@ -39,33 +51,54 @@ CONTENT_TYPES = {"image/jpeg": [b"jpg", b"jpeg"], "image/png": [b"png"]} IMAGE_EXTENSIONS = [ext for exts in CONTENT_TYPES.values() for ext in exts] +class ImageAction(Enum): + """Indicates whether an image is useable or requires post-processing.""" + + BAD = 0 + EXACT = 1 + DOWNSCALE = 2 + DOWNSIZE = 3 + DEINTERLACE = 4 + REFORMAT = 5 + + +class MetadataMatch(Enum): + """Indicates whether a `Candidate` matches the search criteria exactly.""" + + EXACT = 0 + FALLBACK = 1 + + +SourceLocation = Literal["local", "remote"] + + class Candidate: """Holds information about a matching artwork, deals with validation of dimension restrictions and resizing. """ - CANDIDATE_BAD = 0 - CANDIDATE_EXACT = 1 - CANDIDATE_DOWNSCALE = 2 - CANDIDATE_DOWNSIZE = 3 - CANDIDATE_DEINTERLACE = 4 - CANDIDATE_REFORMAT = 5 - - MATCH_EXACT = 0 - MATCH_FALLBACK = 1 - def __init__( - self, log, path=None, url=None, source="", match=None, size=None + self, + log: Logger, + source_name: str, + path: None | bytes = None, + url: None | str = None, + match: None | MetadataMatch = None, + size: None | Tuple[int, int] = None, ): self._log = log self.path = path self.url = url - self.source = source - self.check = None + self.source_name = source_name + self._check: None | ImageAction = None self.match = match self.size = size - def _validate(self, plugin, skip_check_for=None): + def _validate( + self, + plugin: FetchArtPlugin, + skip_check_for: None | list[ImageAction] = None, + ) -> ImageAction: """Determine whether the candidate artwork is valid based on its dimensions (width and ratio). @@ -74,21 +107,16 @@ class Candidate: validated for a particular operation without changing plugin configuration. - Return `CANDIDATE_BAD` if the file is unusable. - Return `CANDIDATE_EXACT` if the file is usable as-is. - Return `CANDIDATE_DOWNSCALE` if the file must be rescaled. - Return `CANDIDATE_DOWNSIZE` if the file must be resized, and possibly + Return `ImageAction.BAD` if the file is unusable. + Return `ImageAction.EXACT` if the file is usable as-is. + Return `ImageAction.DOWNSCALE` if the file must be rescaled. + Return `ImageAction.DOWNSIZE` if the file must be resized, and possibly also rescaled. - Return `CANDIDATE_DEINTERLACE` if the file must be deinterlaced. - Return `CANDIDATE_REFORMAT` if the file has to be converted. + Return `ImageAction.DEINTERLACE` if the file must be deinterlaced. + Return `ImageAction.REFORMAT` if the file has to be converted. """ if not self.path: - return self.CANDIDATE_BAD - - if skip_check_for is None: - skip_check_for = [] - if isinstance(skip_check_for, int): - skip_check_for = [skip_check_for] + return ImageAction.BAD if not ( plugin.enforce_ratio @@ -98,7 +126,7 @@ class Candidate: or plugin.deinterlace or plugin.cover_format ): - return self.CANDIDATE_EXACT + return ImageAction.EXACT # get_size returns None if no local imaging backend is available if not self.size: @@ -113,7 +141,7 @@ class Candidate: "`enforce_ratio` and `max_filesize` " "may be violated." ) - return self.CANDIDATE_EXACT + return ImageAction.EXACT short_edge = min(self.size) long_edge = max(self.size) @@ -123,7 +151,7 @@ class Candidate: self._log.debug( "image too small ({} < {})", self.size[0], plugin.minwidth ) - return self.CANDIDATE_BAD + return ImageAction.BAD # Check aspect ratio. edge_diff = long_edge - short_edge @@ -137,7 +165,7 @@ class Candidate: short_edge, plugin.margin_px, ) - return self.CANDIDATE_BAD + return ImageAction.BAD elif plugin.margin_percent: margin_px = plugin.margin_percent * long_edge if edge_diff > margin_px: @@ -148,13 +176,13 @@ class Candidate: short_edge, margin_px, ) - return self.CANDIDATE_BAD + return ImageAction.BAD elif edge_diff: # also reached for margin_px == 0 and margin_percent == 0.0 self._log.debug( "image is not square ({} != {})", self.size[0], self.size[1] ) - return self.CANDIDATE_BAD + return ImageAction.BAD # Check maximum dimension. downscale = False @@ -188,23 +216,29 @@ class Candidate: plugin.cover_format, ) - if downscale and (self.CANDIDATE_DOWNSCALE not in skip_check_for): - return self.CANDIDATE_DOWNSCALE - if reformat and (self.CANDIDATE_REFORMAT not in skip_check_for): - return self.CANDIDATE_REFORMAT + skip_check_for = skip_check_for or [] + + if downscale and (ImageAction.DOWNSCALE not in skip_check_for): + return ImageAction.DOWNSCALE + if reformat and (ImageAction.REFORMAT not in skip_check_for): + return ImageAction.REFORMAT if plugin.deinterlace and ( - self.CANDIDATE_DEINTERLACE not in skip_check_for + ImageAction.DEINTERLACE not in skip_check_for ): - return self.CANDIDATE_DEINTERLACE - if downsize and (self.CANDIDATE_DOWNSIZE not in skip_check_for): - return self.CANDIDATE_DOWNSIZE - return self.CANDIDATE_EXACT + return ImageAction.DEINTERLACE + if downsize and (ImageAction.DOWNSIZE not in skip_check_for): + return ImageAction.DOWNSIZE + return ImageAction.EXACT - def validate(self, plugin, skip_check_for=None): - self.check = self._validate(plugin, skip_check_for) - return self.check + def validate( + self, + plugin: FetchArtPlugin, + skip_check_for: None | list[ImageAction] = None, + ) -> ImageAction: + self._check = self._validate(plugin, skip_check_for) + return self._check - def resize(self, plugin): + def resize(self, plugin: FetchArtPlugin) -> None: """Resize the candidate artwork according to the plugin's configuration until it is valid or no further resizing is possible. @@ -214,25 +248,30 @@ class Candidate: checks_performed = [] # we don't want to resize the image if it's valid or bad - while current_check not in [self.CANDIDATE_BAD, self.CANDIDATE_EXACT]: + while current_check not in [ImageAction.BAD, ImageAction.EXACT]: self._resize(plugin, current_check) checks_performed.append(current_check) current_check = self.validate( plugin, skip_check_for=checks_performed ) - def _resize(self, plugin, check=None): + def _resize( + self, plugin: FetchArtPlugin, check: None | ImageAction = None + ) -> None: """Resize the candidate artwork according to the plugin's configuration and the specified check. """ - if check == self.CANDIDATE_DOWNSCALE: + if check == ImageAction.DOWNSCALE: self.path = ArtResizer.shared.resize( plugin.maxwidth, self.path, quality=plugin.quality, max_filesize=plugin.max_filesize, ) - elif check == self.CANDIDATE_DOWNSIZE: + elif check == ImageAction.DOWNSIZE: + # This must only be called when _validate returned something other + # than ImageAction.Bad or ImageAction.EXACT; then the size is known. + assert self.size is not None # dimensions are correct, so maxwidth is set to maximum dimension self.path = ArtResizer.shared.resize( max(self.size), @@ -240,9 +279,9 @@ class Candidate: quality=plugin.quality, max_filesize=plugin.max_filesize, ) - elif check == self.CANDIDATE_DEINTERLACE: + elif check == ImageAction.DEINTERLACE: self.path = ArtResizer.shared.deinterlace(self.path) - elif check == self.CANDIDATE_REFORMAT: + elif check == ImageAction.REFORMAT: self.path = ArtResizer.shared.reformat( self.path, plugin.cover_format, @@ -250,7 +289,7 @@ class Candidate: ) -def _logged_get(log, *args, **kwargs): +def _logged_get(log: Logger, *args, **kwargs) -> requests.Response: """Like `requests.get`, but logs the effective URL to the specified `log` at the `DEBUG` level. @@ -295,7 +334,9 @@ class RequestMixin: must be named `self._log`. """ - def request(self, *args, **kwargs): + _log: Logger + + def request(self, *args, **kwargs) -> requests.Response: """Like `requests.get`, but uses the logger `self._log`. See also `_logged_get`. @@ -306,55 +347,83 @@ class RequestMixin: # ART SOURCES ################################################################ -class ArtSource(RequestMixin): - VALID_MATCHING_CRITERIA = ["default"] +class ArtSource(RequestMixin, ABC): + # Specify whether this source fetches local or remote images + LOC: ClassVar[SourceLocation] + # A list of methods to match metadata, sorted by descending accuracy + VALID_MATCHING_CRITERIA: list[str] = ["default"] + # A human-readable name for the art source + NAME: ClassVar[str] + # The key to select the art source in the config. This value will also be + # stored in the database. + ID: ClassVar[str] - def __init__(self, log, config, match_by=None): + def __init__( + self, + log: Logger, + config: confuse.ConfigView, + match_by: None | list[str] = None, + ) -> None: self._log = log self._config = config self.match_by = match_by or self.VALID_MATCHING_CRITERIA + def describe(self) -> str: + return f"{self.ID}[{', '.join(self.match_by)}]" + @staticmethod - def add_default_config(config): + def add_default_config(config: confuse.ConfigView) -> None: pass @classmethod - def available(cls, log, config): + def available(cls, log: Logger, config: confuse.ConfigView) -> bool: """Return whether or not all dependencies are met and the art source is in fact usable. """ return True - def get(self, album, plugin, paths): - raise NotImplementedError() + @abstractmethod + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: + pass - def _candidate(self, **kwargs): - return Candidate(source=self, log=self._log, **kwargs) + def _candidate(self, **kwargs) -> Candidate: + return Candidate(source_name=self.ID, log=self._log, **kwargs) - def fetch_image(self, candidate, plugin): - raise NotImplementedError() + @abstractmethod + def fetch_image(self, candidate: Candidate, plugin: FetchArtPlugin) -> None: + """Fetch the image to a temporary file if it is not already available + as a local file. - def cleanup(self, candidate): + After calling this, `Candidate.path` is set to the image path if + successful, or to `None` otherwise. + """ + pass + + def cleanup(self, candidate: Candidate) -> None: pass class LocalArtSource(ArtSource): - IS_LOCAL = True - LOC_STR = "local" + LOC = "local" - def fetch_image(self, candidate, plugin): + def fetch_image(self, candidate: Candidate, plugin: FetchArtPlugin) -> None: pass class RemoteArtSource(ArtSource): - IS_LOCAL = False - LOC_STR = "remote" + LOC = "remote" - def fetch_image(self, candidate, plugin): + def fetch_image(self, candidate: Candidate, plugin: FetchArtPlugin) -> None: """Downloads an image from a URL and checks whether it seems to - actually be an image. If so, returns a path to the downloaded image. - Otherwise, returns None. + actually be an image. """ + assert candidate.path is None + if plugin.maxwidth: candidate.url = ArtResizer.shared.proxy_url( plugin.maxwidth, candidate.url @@ -429,7 +498,7 @@ class RemoteArtSource(ArtSource): self._log.debug("error fetching art: {}", exc) return - def cleanup(self, candidate): + def cleanup(self, candidate: Candidate) -> None: if candidate.path: try: util.remove(path=candidate.path) @@ -439,19 +508,28 @@ class RemoteArtSource(ArtSource): class CoverArtArchive(RemoteArtSource): NAME = "Cover Art Archive" + ID = "coverart" VALID_MATCHING_CRITERIA = ["release", "releasegroup"] VALID_THUMBNAIL_SIZES = [250, 500, 1200] URL = "https://coverartarchive.org/release/{mbid}" GROUP_URL = "https://coverartarchive.org/release-group/{mbid}" - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: """Return the Cover Art Archive and Cover Art Archive release group URLs using album MusicBrainz release ID and release group ID. """ - def get_image_urls(url, preferred_width=None): + def get_image_urls( + url: str, + preferred_width: None | str = None, + ) -> Iterator[str]: try: response = self.request(url) except requests.RequestException: @@ -500,34 +578,46 @@ class CoverArtArchive(RemoteArtSource): if "release" in self.match_by and album.mb_albumid: for url in get_image_urls(release_url, preferred_width): - yield self._candidate(url=url, match=Candidate.MATCH_EXACT) + yield self._candidate(url=url, match=MetadataMatch.EXACT) if "releasegroup" in self.match_by and album.mb_releasegroupid: for url in get_image_urls(release_group_url, preferred_width): - yield self._candidate(url=url, match=Candidate.MATCH_FALLBACK) + yield self._candidate(url=url, match=MetadataMatch.FALLBACK) class Amazon(RemoteArtSource): NAME = "Amazon" + ID = "amazon" URL = "https://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg" INDICES = (1, 2) - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: """Generate URLs using Amazon ID (ASIN) string.""" if album.asin: for index in self.INDICES: yield self._candidate( url=self.URL % (album.asin, index), - match=Candidate.MATCH_EXACT, + match=MetadataMatch.EXACT, ) class AlbumArtOrg(RemoteArtSource): NAME = "AlbumArt.org scraper" + ID = "albumart" URL = "https://www.albumart.org/index_detail.php" PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"' - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ): """Return art URL from AlbumArt.org using album ASIN.""" if not album.asin: return @@ -543,13 +633,14 @@ class AlbumArtOrg(RemoteArtSource): m = re.search(self.PAT, resp.text) if m: image_url = m.group(1) - yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT) + yield self._candidate(url=image_url, match=MetadataMatch.EXACT) else: self._log.debug("no image found on page") class GoogleImages(RemoteArtSource): NAME = "Google Images" + ID = "google" URL = "https://www.googleapis.com/customsearch/v1" def __init__(self, *args, **kwargs): @@ -558,7 +649,7 @@ class GoogleImages(RemoteArtSource): self.cx = (self._config["google_engine"].get(),) @staticmethod - def add_default_config(config): + def add_default_config(config: confuse.ConfigView): config.add( { "google_key": None, @@ -569,13 +660,18 @@ class GoogleImages(RemoteArtSource): config["google_engine"].redact = True @classmethod - def available(cls, log, config): + def available(cls, log: Logger, config: confuse.ConfigView) -> bool: has_key = bool(config["google_key"].get()) if not has_key: log.debug("google: Disabling art source due to missing key") return has_key - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: """Return art URL from google custom search engine given an album title and interpreter. """ @@ -614,7 +710,7 @@ class GoogleImages(RemoteArtSource): if "items" in data.keys(): for item in data["items"]: yield self._candidate( - url=item["link"], match=Candidate.MATCH_EXACT + url=item["link"], match=MetadataMatch.EXACT ) @@ -622,6 +718,7 @@ class FanartTV(RemoteArtSource): """Art from fanart.tv requested using their API""" NAME = "fanart.tv" + ID = "fanarttv" API_URL = "https://webservice.fanart.tv/v3/" API_ALBUMS = API_URL + "music/albums/" PROJECT_KEY = "61a7d0ab4e67162b7a0c7c35915cd48e" @@ -631,7 +728,7 @@ class FanartTV(RemoteArtSource): self.client_key = self._config["fanarttv_key"].get() @staticmethod - def add_default_config(config): + def add_default_config(config: confuse.ConfigView): config.add( { "fanarttv_key": None, @@ -639,7 +736,12 @@ class FanartTV(RemoteArtSource): ) config["fanarttv_key"].redact = True - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: if not album.mb_releasegroupid: return @@ -695,15 +797,21 @@ class FanartTV(RemoteArtSource): # fanart.tv has a strict size requirement for album art to be # uploaded yield self._candidate( - url=item["url"], match=Candidate.MATCH_EXACT, size=(1000, 1000) + url=item["url"], match=MetadataMatch.EXACT, size=(1000, 1000) ) class ITunesStore(RemoteArtSource): NAME = "iTunes Store" + ID = "itunes" API_URL = "https://itunes.apple.com/search" - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: """Return art URL from iTunes Store given an album title.""" if not (album.albumartist and album.album): return @@ -752,7 +860,7 @@ class ITunesStore(RemoteArtSource): art_url = c["artworkUrl100"] art_url = art_url.replace("100x100bb", image_suffix) yield self._candidate( - url=art_url, match=Candidate.MATCH_EXACT + url=art_url, match=MetadataMatch.EXACT ) except KeyError as e: self._log.debug( @@ -767,7 +875,7 @@ class ITunesStore(RemoteArtSource): "100x100bb", image_suffix ) yield self._candidate( - url=fallback_art_url, match=Candidate.MATCH_FALLBACK + url=fallback_art_url, match=MetadataMatch.FALLBACK ) except KeyError as e: self._log.debug( @@ -779,6 +887,7 @@ class ITunesStore(RemoteArtSource): class Wikipedia(RemoteArtSource): NAME = "Wikipedia (queried through DBpedia)" + ID = "wikipedia" DBPEDIA_URL = "https://dbpedia.org/sparql" WIKIPEDIA_URL = "https://en.wikipedia.org/w/api.php" SPARQL_QUERY = """PREFIX rdf: @@ -803,7 +912,12 @@ class Wikipedia(RemoteArtSource): }} Limit 1""" - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: if not (album.albumartist and album.album): return @@ -913,9 +1027,7 @@ class Wikipedia(RemoteArtSource): results = data["query"]["pages"] for _, result in results.items(): image_url = result["imageinfo"][0]["url"] - yield self._candidate( - url=image_url, match=Candidate.MATCH_EXACT - ) + yield self._candidate(url=image_url, match=MetadataMatch.EXACT) except (ValueError, KeyError, IndexError): self._log.debug("wikipedia: error scraping imageinfo") return @@ -923,9 +1035,12 @@ class Wikipedia(RemoteArtSource): class FileSystem(LocalArtSource): NAME = "Filesystem" + ID = "filesystem" @staticmethod - def filename_priority(filename, cover_names): + def filename_priority( + filename: AnyStr, cover_names: Sequence[AnyStr] + ) -> list[int]: """Sort order for image names. Return indexes of cover names found in the image filename. This @@ -934,7 +1049,12 @@ class FileSystem(LocalArtSource): """ return [idx for (idx, x) in enumerate(cover_names) if x in filename] - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: """Look for album art files in the specified directories.""" if not paths: return @@ -973,7 +1093,7 @@ class FileSystem(LocalArtSource): util.displayable_path(fn), ) yield self._candidate( - path=os.path.join(path, fn), match=Candidate.MATCH_EXACT + path=os.path.join(path, fn), match=MetadataMatch.EXACT ) else: remaining.append(fn) @@ -986,12 +1106,13 @@ class FileSystem(LocalArtSource): ) yield self._candidate( path=os.path.join(path, remaining[0]), - match=Candidate.MATCH_FALLBACK, + match=MetadataMatch.FALLBACK, ) class LastFM(RemoteArtSource): NAME = "Last.fm" + ID = "lastfm" # Sizes in priority order. SIZES = OrderedDict( @@ -1006,12 +1127,12 @@ class LastFM(RemoteArtSource): API_URL = "https://ws.audioscrobbler.com/2.0" - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.key = (self._config["lastfm_key"].get(),) @staticmethod - def add_default_config(config): + def add_default_config(config: confuse.ConfigView) -> None: config.add( { "lastfm_key": None, @@ -1020,13 +1141,18 @@ class LastFM(RemoteArtSource): config["lastfm_key"].redact = True @classmethod - def available(cls, log, config): + def available(cls, log: Logger, config: confuse.ConfigView) -> bool: has_key = bool(config["lastfm_key"].get()) if not has_key: log.debug("lastfm: Disabling art source due to missing key") return has_key - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: if not album.mb_albumid: return @@ -1079,11 +1205,12 @@ class LastFM(RemoteArtSource): class Spotify(RemoteArtSource): NAME = "Spotify" + ID = "spotify" SPOTIFY_ALBUM_URL = "https://open.spotify.com/album/" @classmethod - def available(cls, log, config): + def available(cls, log: Logger, config: confuse.ConfigView) -> bool: if not HAS_BEAUTIFUL_SOUP: log.debug( "To use Spotify as an album art source, " @@ -1092,31 +1219,42 @@ class Spotify(RemoteArtSource): ) return HAS_BEAUTIFUL_SOUP - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: try: url = self.SPOTIFY_ALBUM_URL + album.items().get().spotify_album_id except AttributeError: self._log.debug("Fetchart: no Spotify album ID found") return + try: response = requests.get(url, timeout=10) response.raise_for_status() except requests.RequestException as e: self._log.debug("Error: " + str(e)) return + try: html = response.text soup = BeautifulSoup(html, "html.parser") - image_url = soup.find("meta", attrs={"property": "og:image"})[ - "content" - ] - yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT) except ValueError: + self._log.debug(f"Spotify: error loading response: {response.text}") + return + + tag = soup.find("meta", attrs={"property": "og:image"}) + if tag is None or not isinstance(tag, Tag): self._log.debug( - "Spotify: error loading response: {}".format(response.text) + "Spotify: Unexpected response, og:image tag missing" ) return + image_url = tag["content"] + yield self._candidate(url=image_url, match=MetadataMatch.EXACT) + class CoverArtUrl(RemoteArtSource): # This source is intended to be used with a plugin that sets the @@ -1125,8 +1263,14 @@ class CoverArtUrl(RemoteArtSource): # use that URL to fetch the image. NAME = "Cover Art URL" + ID = "cover_art_url" - def get(self, album, plugin, paths): + def get( + self, + album: Album, + plugin: FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[Candidate]: image_url = None try: # look for cover_art_url on album or first track @@ -1139,44 +1283,27 @@ class CoverArtUrl(RemoteArtSource): self._log.debug(f"Cover art URL not found for {album}") return if image_url: - yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT) + yield self._candidate(url=image_url, match=MetadataMatch.EXACT) else: self._log.debug(f"Cover art URL not found for {album}") return -# Try each source in turn. - -# Note that SOURCES_ALL is redundant (and presently unused). However, we keep -# it around nn order not break plugins that "register" (a.k.a. monkey-patch) -# their own fetchart sources. -SOURCES_ALL = [ - "filesystem", - "coverart", - "itunes", - "amazon", - "albumart", - "wikipedia", - "google", - "fanarttv", - "lastfm", - "spotify", -] - -ART_SOURCES = { - "filesystem": FileSystem, - "coverart": CoverArtArchive, - "itunes": ITunesStore, - "albumart": AlbumArtOrg, - "amazon": Amazon, - "wikipedia": Wikipedia, - "google": GoogleImages, - "fanarttv": FanartTV, - "lastfm": LastFM, - "spotify": Spotify, - "cover_art_url": CoverArtUrl, +# All art sources. The order they will be tried in is specified by the config. +ART_SOURCES: set[Type[ArtSource]] = { + FileSystem, + CoverArtArchive, + ITunesStore, + AlbumArtOrg, + Amazon, + Wikipedia, + GoogleImages, + FanartTV, + LastFM, + Spotify, + CoverArtUrl, } -SOURCE_NAMES = {v: k for k, v in ART_SOURCES.items()} + # PLUGIN LOGIC ############################################################### @@ -1185,12 +1312,12 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): PAT_PX = r"(0|[1-9][0-9]*)px" PAT_PERCENT = r"(100(\.00?)?|[1-9]?[0-9](\.[0-9]{1,2})?)%" - def __init__(self): + def __init__(self) -> None: super().__init__() # Holds candidates corresponding to downloaded images between # fetching them and placing them in the filesystem. - self.art_candidates = {} + self.art_candidates: dict[ImportTask, Candidate] = {} self.config.add( { @@ -1216,7 +1343,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): "cover_format": None, } ) - for source in ART_SOURCES.values(): + for source in ART_SOURCES: source.add_default_config(self.config) self.minwidth = self.config["minwidth"].get(int) @@ -1237,7 +1364,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): self.margin_px = None self.margin_percent = None self.deinterlace = self.config["deinterlace"].get(bool) - if type(self.enforce_ratio) is str: + if isinstance(self.enforce_ratio, str): if self.enforce_ratio[-1] == "%": self.margin_percent = float(self.enforce_ratio[:-1]) / 100 elif self.enforce_ratio[-2:] == "px": @@ -1262,8 +1389,8 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): self.register_listener("import_task_files", self.assign_art) available_sources = [ - (s_name, c) - for (s_name, s_cls) in ART_SOURCES.items() + (s_cls.ID, c) + for s_cls in ART_SOURCES if s_cls.available(self._log, self.config) for c in s_cls.VALID_MATCHING_CRITERIA ] @@ -1288,17 +1415,21 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): others.append((s, c)) sources = others + fs + sources_by_name = {s_cls.ID: s_cls for s_cls in ART_SOURCES} + self.sources = [ - ART_SOURCES[s](self._log, self.config, match_by=[c]) + sources_by_name[s](self._log, self.config, match_by=[c]) for s, c in sources ] @staticmethod - def _is_source_file_removal_enabled(): - return config["import"]["delete"].get(bool) or config["import"]["move"].get(bool) + def _is_source_file_removal_enabled() -> bool: + return config["import"]["delete"].get(bool) or config["import"][ + "move" + ].get(bool) # Asynchronous; after music is added to the library. - def fetch_art(self, session, task): + def fetch_art(self, session: ImportSession, task: ImportTask) -> None: """Find art for the album being imported.""" if task.is_album: # Only fetch art for full albums. if task.album.artpath and os.path.isfile( @@ -1324,18 +1455,20 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): if candidate: self.art_candidates[task] = candidate - def _set_art(self, album, candidate, delete=False): + def _set_art( + self, album: Album, candidate: Candidate, delete: bool = False + ) -> None: album.set_art(candidate.path, delete) if self.store_source: # store the source of the chosen artwork in a flexible field self._log.debug( "Storing art_source for {0.albumartist} - {0.album}", album ) - album.art_source = SOURCE_NAMES[type(candidate.source)] + album.art_source = candidate.source_name album.store() # Synchronous; after music files are put in place. - def assign_art(self, session, task): + def assign_art(self, session: ImportSession, task: ImportTask): """Place the discovered art in the filesystem.""" if task in self.art_candidates: candidate = self.art_candidates.pop(task) @@ -1347,7 +1480,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): task.prune(candidate.path) # Manual album art fetching. - def commands(self): + def commands(self) -> list[ui.Subcommand]: cmd = ui.Subcommand("fetchart", help="download album art") cmd.parser.add_option( "-f", @@ -1366,7 +1499,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): help="quiet mode: do not output albums that already have artwork", ) - def func(lib, opts, args): + def func(lib: Library, opts, args) -> None: self.batch_fetch_art( lib, lib.albums(ui.decargs(args)), opts.force, opts.quiet ) @@ -1376,7 +1509,12 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): # Utilities converted from functions to methods on logging overhaul - def art_for_album(self, album, paths, local_only=False): + def art_for_album( + self, + album: Album, + paths: None | Sequence[bytes], + local_only: bool = False, + ) -> None | Candidate: """Given an Album object, returns a path to downloaded art for the album (or None if no art is found). If `maxwidth`, then images are resized to this maximum pixel size. If `quality` then resized images @@ -1387,22 +1525,22 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): out = None for source in self.sources: - if source.IS_LOCAL or not local_only: + if source.LOC == "local" or not local_only: self._log.debug( - "trying source {0} for album {1.albumartist} - {1.album}", - SOURCE_NAMES[type(source)], - album, + f"trying source {source.describe()}" + f" for album {album.albumartist} - {album.album}" ) # URLs might be invalid at this point, or the image may not # fulfill the requirements for candidate in source.get(album, self, paths): source.fetch_image(candidate, self) - if candidate.validate(self): + if candidate.validate(self) != ImageAction.BAD: out = candidate + assert out.path is not None # help mypy self._log.debug( - "using {0.LOC_STR} image {1}".format( - source, util.displayable_path(out.path) - ) + "using {0.LOC} image {1}", + source, + util.displayable_path(out.path), ) break # Remove temporary files for invalid candidates. @@ -1415,7 +1553,13 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): return out - def batch_fetch_art(self, lib, albums, force, quiet): + def batch_fetch_art( + self, + lib: Library, + albums: Iterable[Album], + force: bool, + quiet: bool, + ) -> None: """Fetch album art for each of the albums. This implements the manual fetchart CLI command. """ diff --git a/test/plugins/test_art.py b/test/plugins/test_art.py index acb712354..b8a9a3bda 100644 --- a/test/plugins/test_art.py +++ b/test/plugins/test_art.py @@ -14,8 +14,11 @@ """Tests for the album art fetchers.""" +from __future__ import annotations + import os import shutil +from typing import TYPE_CHECKING from unittest.mock import patch import confuse @@ -37,6 +40,11 @@ from beetsplug import fetchart logger = logging.getLogger("beets.test_art") +if TYPE_CHECKING: + from collections.abc import Iterator, Sequence + + from beets.library import Album + class Settings: """Used to pass settings to the ArtSources when the plugin isn't fully @@ -48,6 +56,19 @@ class Settings: setattr(self, k, v) +class DummyRemoteArtSource(fetchart.RemoteArtSource): + NAME = "Dummy Art Source" + ID = "dummy" + + def get( + self, + album: Album, + plugin: fetchart.FetchArtPlugin, + paths: None | Sequence[bytes], + ) -> Iterator[fetchart.Candidate]: + return iter(()) + + class UseThePlugin(CleanupModulesMixin, BeetsTestCase): modules = (fetchart.__name__, ArtResizer.__module__) @@ -202,9 +223,11 @@ class FetchImageTest(FetchImageTestCase): def setUp(self): super().setUp() self.dpath = os.path.join(self.temp_dir, b"arttest") - self.source = fetchart.RemoteArtSource(logger, self.plugin.config) + self.source = DummyRemoteArtSource(logger, self.plugin.config) self.settings = Settings(maxwidth=0) - self.candidate = fetchart.Candidate(logger, url=self.URL) + self.candidate = fetchart.Candidate( + logger, self.source.ID, url=self.URL + ) def test_invalid_type_returns_none(self): self.mock_response(self.URL, "image/watercolour") @@ -432,7 +455,7 @@ class ITunesStoreTest(UseThePlugin): self.mock_response(fetchart.ITunesStore.API_URL, json) candidate = next(self.source.get(self.album, self.settings, [])) assert candidate.url == "url_to_the_image" - assert candidate.match == fetchart.Candidate.MATCH_EXACT + assert candidate.match == fetchart.MetadataMatch.EXACT def test_itunesstore_no_result(self): json = '{"results": []}' @@ -471,7 +494,7 @@ class ITunesStoreTest(UseThePlugin): self.mock_response(fetchart.ITunesStore.API_URL, json) candidate = next(self.source.get(self.album, self.settings, [])) assert candidate.url == "url_to_the_image" - assert candidate.match == fetchart.Candidate.MATCH_FALLBACK + assert candidate.match == fetchart.MetadataMatch.FALLBACK def test_itunesstore_returns_result_without_artwork(self): json = """{ @@ -727,7 +750,11 @@ class ArtImporterTest(UseThePlugin): self.art_file = os.path.join(self.temp_dir, b"tmpcover.jpg") _common.touch(self.art_file) self.old_afa = self.plugin.art_for_album - self.afa_response = fetchart.Candidate(logger, path=self.art_file) + self.afa_response = fetchart.Candidate( + logger, + source_name="test", + path=self.art_file, + ) def art_for_album(i, p, local_only=False): return self.afa_response @@ -814,7 +841,11 @@ class ArtImporterTest(UseThePlugin): def test_do_not_delete_original_if_already_in_place(self): artdest = os.path.join(os.path.dirname(self.i.path), b"cover.jpg") shutil.copyfile(syspath(self.art_file), syspath(artdest)) - self.afa_response = fetchart.Candidate(logger, path=artdest) + self.afa_response = fetchart.Candidate( + logger, + source_name="test", + path=artdest, + ) self._fetch_art(True) def test_fetch_art_if_imported_file_deleted(self): @@ -855,7 +886,9 @@ class ArtForAlbumTest(UseThePlugin): def fs_source_get(_self, album, settings, paths): if paths: - yield fetchart.Candidate(logger, path=self.image_file) + yield fetchart.Candidate( + logger, source_name=_self.ID, path=self.image_file + ) fetchart.FileSystem.get = fs_source_get From 4fdfb393db0263c6001902d6b33ec34d2d552170 Mon Sep 17 00:00:00 2001 From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com> Date: Wed, 9 Apr 2025 12:43:15 +0200 Subject: [PATCH 16/49] update changelog --- docs/changelog.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 825e287f6..44f2d305d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -35,6 +35,12 @@ For packagers: ``BeetsPlugin.candidates`` method signature since it is never passed in. If you override this method in your plugin, feel free to remove this parameter. +For plugin developers: + +* The `fetchart` plugins has seen a few changes to function signatures and + source registration in the process of introducing typings to the code. + Custom art sources might need to be adapted. + Other changes: 2.3.1 (May 14, 2025) From ff22da0691ffe87049235f1e1e81cd9d3613c1a8 Mon Sep 17 00:00:00 2001 From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com> Date: Wed, 9 Apr 2025 17:38:56 +0200 Subject: [PATCH 17/49] install imagemagick in CI environment --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 748cf24d1..2bad34d8b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -33,7 +33,7 @@ jobs: if: matrix.platform == 'ubuntu-latest' run: | sudo apt update - sudo apt install ffmpeg gobject-introspection libcairo2-dev libgirepository-2.0-dev pandoc + sudo apt install ffmpeg gobject-introspection libcairo2-dev libgirepository-2.0-dev pandoc imagemagick - name: Get changed lyrics files id: lyrics-update From abbabcf92eecf382b7806b086af24c0202d3d820 Mon Sep 17 00:00:00 2001 From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com> Date: Mon, 14 Apr 2025 12:04:26 +0200 Subject: [PATCH 18/49] tests: skip tests that require ArtResizer.compare if it is broken This is just a quick workaround to allow CI to pass. This didn't show up in CI before because we didn't install ImageMagick. --- test/plugins/test_embedart.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/test/plugins/test_embedart.py b/test/plugins/test_embedart.py index cb4d1a421..f2f02137b 100644 --- a/test/plugins/test_embedart.py +++ b/test/plugins/test_embedart.py @@ -34,8 +34,35 @@ def require_artresizer_compare(test): def wrapper(*args, **kwargs): if not ArtResizer.shared.can_compare: raise unittest.SkipTest("compare not available") - else: - return test(*args, **kwargs) + + # PHASH computation in ImageMagick changed at some point in an + # undocumented way. Check at a low level that comparisons of our + # fixtures give the expected results. Only then, plugin logic tests + # below are meaningful. + # cf. https://github.com/ImageMagick/ImageMagick/discussions/5191 + # It would be better to investigate what exactly change in IM and + # handle that in ArtResizer.IMBackend.{can_compare,compare}. + # Skipping the tests as below is a quick fix to CI, but users may + # still see unexpected behaviour. + abbey_artpath = os.path.join(_common.RSRC, b"abbey.jpg") + abbey_similarpath = os.path.join(_common.RSRC, b"abbey-similar.jpg") + abbey_differentpath = os.path.join(_common.RSRC, b"abbey-different.jpg") + compare_threshold = 20 + + similar_compares_ok = ArtResizer.shared.compare( + abbey_artpath, + abbey_similarpath, + compare_threshold, + ) + different_compares_ok = ArtResizer.shared.compare( + abbey_artpath, + abbey_differentpath, + compare_threshold, + ) + if not similar_compares_ok or different_compares_ok: + raise unittest.SkipTest("IM version with broken compare") + + return test(*args, **kwargs) wrapper.__name__ = test.__name__ return wrapper From fd383ead514ec4102e373258a9343aab750373ac Mon Sep 17 00:00:00 2001 From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com> Date: Thu, 17 Apr 2025 21:49:42 +0200 Subject: [PATCH 19/49] ci: run with fetchart dependencies (Pillow) at least in the with-coverage tests --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2bad34d8b..333706dc7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -60,7 +60,7 @@ jobs: env: LYRICS_UPDATED: ${{ steps.lyrics-update.outputs.any_changed }} run: | - poetry install --extras=autobpm --extras=lyrics --extras=docs --extras=replaygain --extras=reflink + poetry install --extras=autobpm --extras=lyrics --extras=docs --extras=replaygain --extras=reflink --extras=fetchart poe docs poe test-with-coverage From 10d1c51a1d5051c2409c6bd4f25ef13a4f654f19 Mon Sep 17 00:00:00 2001 From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com> Date: Tue, 20 May 2025 09:19:10 +0200 Subject: [PATCH 20/49] fetchart: consistently use lazy string formatting for debug logging --- beetsplug/fetchart.py | 52 +++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py index b04347f2c..25e649e68 100644 --- a/beetsplug/fetchart.py +++ b/beetsplug/fetchart.py @@ -22,6 +22,7 @@ from abc import ABC, abstractmethod from collections import OrderedDict from contextlib import closing from enum import Enum +from functools import cached_property from typing import TYPE_CHECKING, AnyStr, ClassVar, Literal, Tuple, Type import confuse @@ -368,7 +369,8 @@ class ArtSource(RequestMixin, ABC): self._config = config self.match_by = match_by or self.VALID_MATCHING_CRITERIA - def describe(self) -> str: + @cached_property + def description(self) -> str: return f"{self.ID}[{', '.join(self.match_by)}]" @staticmethod @@ -487,7 +489,7 @@ class RemoteArtSource(ArtSource): for chunk in data: fh.write(chunk) self._log.debug( - "downloaded art to: {0}", util.displayable_path(filename) + "downloaded art to: {}", util.displayable_path(filename) ) candidate.path = util.bytestring_path(filename) return @@ -533,18 +535,14 @@ class CoverArtArchive(RemoteArtSource): try: response = self.request(url) except requests.RequestException: - self._log.debug( - "{}: error receiving response".format(self.NAME) - ) + self._log.debug("{}: error receiving response", self.NAME) return try: data = response.json() except ValueError: self._log.debug( - "{}: error loading response: {}".format( - self.NAME, response.text - ) + "{}: error loading response: {}", self.NAME, response.text ) return @@ -624,7 +622,7 @@ class AlbumArtOrg(RemoteArtSource): # Get the page from albumart.org. try: resp = self.request(self.URL, params={"asin": album.asin}) - self._log.debug("scraped art URL: {0}", resp.url) + self._log.debug("scraped art URL: {}", resp.url) except requests.RequestException: self._log.debug("error scraping art page") return @@ -697,14 +695,12 @@ class GoogleImages(RemoteArtSource): try: data = response.json() except ValueError: - self._log.debug( - "google: error loading response: {}".format(response.text) - ) + self._log.debug("google: error loading response: {}", response.text) return if "error" in data: reason = data["error"]["errors"][0]["reason"] - self._log.debug("google fetchart error: {0}", reason) + self._log.debug("google fetchart error: {}", reason) return if "items" in data.keys(): @@ -826,13 +822,13 @@ class ITunesStore(RemoteArtSource): r = self.request(self.API_URL, params=payload) r.raise_for_status() except requests.RequestException as e: - self._log.debug("iTunes search failed: {0}", e) + self._log.debug("iTunes search failed: {}", e) return try: candidates = r.json()["results"] except ValueError as e: - self._log.debug("Could not decode json response: {0}", e) + self._log.debug("Could not decode json response: {}", e) return except KeyError as e: self._log.debug( @@ -1089,7 +1085,7 @@ class FileSystem(LocalArtSource): for fn in images: if re.search(cover_pat, os.path.splitext(fn)[0], re.I): self._log.debug( - "using well-named art file {0}", + "using well-named art file {}", util.displayable_path(fn), ) yield self._candidate( @@ -1101,7 +1097,7 @@ class FileSystem(LocalArtSource): # Fall back to any image in the folder. if remaining and not plugin.cautious: self._log.debug( - "using fallback art file {0}", + "using fallback art file {}", util.displayable_path(remaining[0]), ) yield self._candidate( @@ -1197,9 +1193,7 @@ class LastFM(RemoteArtSource): url=images[size], size=self.SIZES[size] ) except ValueError: - self._log.debug( - "lastfm: error loading response: {}".format(response.text) - ) + self._log.debug("lastfm: error loading response: {}", response.text) return @@ -1235,14 +1229,16 @@ class Spotify(RemoteArtSource): response = requests.get(url, timeout=10) response.raise_for_status() except requests.RequestException as e: - self._log.debug("Error: " + str(e)) + self._log.debug("Error: {!s}", e) return try: html = response.text soup = BeautifulSoup(html, "html.parser") except ValueError: - self._log.debug(f"Spotify: error loading response: {response.text}") + self._log.debug( + "Spotify: error loading response: {}", response.text + ) return tag = soup.find("meta", attrs={"property": "og:image"}) @@ -1278,14 +1274,14 @@ class CoverArtUrl(RemoteArtSource): image_url = album.cover_art_url else: image_url = album.items().get().cover_art_url - self._log.debug(f"Cover art URL {image_url} found for {album}") + self._log.debug("Cover art URL {} found for {}", image_url, album) except (AttributeError, TypeError): - self._log.debug(f"Cover art URL not found for {album}") + self._log.debug("Cover art URL not found for {}", album) return if image_url: yield self._candidate(url=image_url, match=MetadataMatch.EXACT) else: - self._log.debug(f"Cover art URL not found for {album}") + self._log.debug("Cover art URL not found for {}", album) return @@ -1527,8 +1523,10 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): for source in self.sources: if source.LOC == "local" or not local_only: self._log.debug( - f"trying source {source.describe()}" - f" for album {album.albumartist} - {album.album}" + "trying source {0.description}" + " for album {1.albumartist} - {1.album}", + source, + album, ) # URLs might be invalid at this point, or the image may not # fulfill the requirements From 728076e97df3b803c26b6d149d69a7ae3b3fa072 Mon Sep 17 00:00:00 2001 From: wisp3rwind <17089248+wisp3rwind@users.noreply.github.com> Date: Tue, 20 May 2025 10:04:07 +0200 Subject: [PATCH 21/49] fetchart: assert some invariants to satisfy mypy Eventually, it would be nice to avoid this by more expressive typings. For now, avoid such larger refactoring. --- beetsplug/fetchart.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py index 25e649e68..3473fe08b 100644 --- a/beetsplug/fetchart.py +++ b/beetsplug/fetchart.py @@ -262,6 +262,11 @@ class Candidate: """Resize the candidate artwork according to the plugin's configuration and the specified check. """ + # This must only be called when _validate returned something other than + # ImageAction.Bad or ImageAction.EXACT; then path and size are known. + assert self.path is not None + assert self.size is not None + if check == ImageAction.DOWNSCALE: self.path = ArtResizer.shared.resize( plugin.maxwidth, @@ -270,9 +275,6 @@ class Candidate: max_filesize=plugin.max_filesize, ) elif check == ImageAction.DOWNSIZE: - # This must only be called when _validate returned something other - # than ImageAction.Bad or ImageAction.EXACT; then the size is known. - assert self.size is not None # dimensions are correct, so maxwidth is set to maximum dimension self.path = ArtResizer.shared.resize( max(self.size), @@ -424,7 +426,11 @@ class RemoteArtSource(ArtSource): """Downloads an image from a URL and checks whether it seems to actually be an image. """ + # This must only be called for candidates that were returned by + # self.get, which are expected to have a url and no path (because they + # haven't been downloaded yet). assert candidate.path is None + assert candidate.url is not None if plugin.maxwidth: candidate.url = ArtResizer.shared.proxy_url( From 8e84268e06c6f5eb297614f294abc678c569c719 Mon Sep 17 00:00:00 2001 From: Manu Date: Sun, 18 May 2025 21:37:01 +0200 Subject: [PATCH 22/49] Replace outdated bitesized label link --- CONTRIBUTING.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5fccb8e80..2a6006b36 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -154,7 +154,7 @@ Code Contribution Ideas ^^^^^^^^^^^^^^^^^^^^^^^ - We maintain a set of `issues marked as - “bite-sized” `__. + “good first issue” `__. These are issues that would serve as a good introduction to the codebase. Claim one and start exploring! - Like testing? Our `test From 9584216209c3a6bd80ebe46c2113c41c8352ee37 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr <39738318+semohr@users.noreply.github.com> Date: Thu, 22 May 2025 11:35:40 +0200 Subject: [PATCH 23/49] Streamlined auto api referencing for documentation (#5795) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description The current developer documentation feels somewhat cluttered due to inline auto-generated API references for certain classes. To improve readability and maintainability, this PR introduces a more streamlined approach that aligns better with best practices observed in other PyData ecosystem documentation. Specifically, this PR: - Adds a dedicated `api/` folder to the documentation structure. - Moves all auto-generated references (classes, methods, etc.) to this folder. - Enables clean, concise linking to API elements from the narrative documentation—without interrupting human-written content with large autogenerated blocks. This separation makes the documentation easier to navigate and maintain, while still providing full API reference coverage where needed. - [x] Documentation - [x] Changelog --- docs/.gitignore | 2 + docs/Makefile | 3 +- docs/_templates/autosummary/base.rst | 3 + docs/_templates/autosummary/class.rst | 28 +++++ docs/_templates/autosummary/module.rst | 11 ++ docs/api/database.rst | 47 ++++++++ docs/api/plugins.rst | 11 ++ docs/changelog.rst | 5 + docs/conf.py | 46 ++++++-- docs/dev/index.rst | 10 ++ docs/dev/library.rst | 156 ++++++------------------- docs/dev/plugins.rst | 11 +- 12 files changed, 204 insertions(+), 129 deletions(-) create mode 100644 docs/.gitignore create mode 100644 docs/_templates/autosummary/base.rst create mode 100644 docs/_templates/autosummary/class.rst create mode 100644 docs/_templates/autosummary/module.rst create mode 100644 docs/api/database.rst create mode 100644 docs/api/plugins.rst diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 000000000..1f041cc9d --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,2 @@ +_build +generated/ \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile index f940dd931..d642530f1 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -6,6 +6,7 @@ SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build +SOURCEDIR = . # When both are available, use Sphinx 2.x for autodoc compatibility. ifeq ($(shell which sphinx-build2 >/dev/null 2>&1 ; echo $$?),0) @@ -39,7 +40,7 @@ help: @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: - -rm -rf $(BUILDDIR)/* + -rm -rf $(BUILDDIR)/* $(SOURCEDIR)/api/generated/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html diff --git a/docs/_templates/autosummary/base.rst b/docs/_templates/autosummary/base.rst new file mode 100644 index 000000000..822f55dc2 --- /dev/null +++ b/docs/_templates/autosummary/base.rst @@ -0,0 +1,3 @@ +{{ fullname | escape | underline}} +.. currentmodule:: {{ module }} +.. auto{{ objtype }}:: {{ objname }} \ No newline at end of file diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst new file mode 100644 index 000000000..6927f8360 --- /dev/null +++ b/docs/_templates/autosummary/class.rst @@ -0,0 +1,28 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :members: <-- add at least this line + :private-members: + :show-inheritance: <-- plus I want to show inheritance... + :inherited-members: <-- ...and inherited members too + + {% block methods %} + .. automethod:: __init__ + + {% if methods %} + .. rubric:: {{ _('Public methods summary') }} + + .. autosummary:: + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% for item in _methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + .. rubric:: {{ _('Methods definition') }} + diff --git a/docs/_templates/autosummary/module.rst b/docs/_templates/autosummary/module.rst new file mode 100644 index 000000000..9383a2307 --- /dev/null +++ b/docs/_templates/autosummary/module.rst @@ -0,0 +1,11 @@ +{{ fullname | escape | underline}} +{% block modules %} +{% if modules %} +.. rubric:: Modules + +{% for item in modules %} +{{ item }} + +{%- endfor %} +{% endif %} +{% endblock %} \ No newline at end of file diff --git a/docs/api/database.rst b/docs/api/database.rst new file mode 100644 index 000000000..627b5dc39 --- /dev/null +++ b/docs/api/database.rst @@ -0,0 +1,47 @@ +Database +-------- + +.. currentmodule:: beets.library + + +Library +''''''' + +.. autosummary:: + :toctree: generated/ + + Library + + +Models +'''''' + +.. autosummary:: + :toctree: generated/ + + LibModel + Album + Item + + +Transactions +'''''''''''' + +.. currentmodule:: beets.dbcore.db + +.. autosummary:: + :toctree: generated/ + + Transaction + +Queries +''''''' + +.. currentmodule:: beets.dbcore.query + +.. autosummary:: + :toctree: generated/ + + Query + FieldQuery + AndQuery \ No newline at end of file diff --git a/docs/api/plugins.rst b/docs/api/plugins.rst new file mode 100644 index 000000000..0d6c13718 --- /dev/null +++ b/docs/api/plugins.rst @@ -0,0 +1,11 @@ +Plugins +------- + +.. currentmodule:: beets.plugins + + + +.. autosummary:: + :toctree: generated/ + + BeetsPlugin diff --git a/docs/changelog.rst b/docs/changelog.rst index 44f2d305d..09259e1fa 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -43,16 +43,21 @@ For plugin developers: Other changes: +* Documentation structure for auto generated API references changed slightly. + Autogenerated API references are now located in the `docs/api` subdirectory. + 2.3.1 (May 14, 2025) -------------------- Bug fixes: + * :doc:`/reference/pathformat`: Fixed a regression where path legalization incorrectly removed parts of user-configured path formats that followed a dot (**.**). :bug:`5771` For packagers: + * Force ``poetry`` version below 2 to avoid it mangling file modification times in ``sdist`` package. :bug:`5770` diff --git a/docs/conf.py b/docs/conf.py index 497c5e71e..d0f8cdffe 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,19 +1,35 @@ -AUTHOR = "Adrian Sampson" +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html -# General configuration +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -extensions = ["sphinx.ext.autodoc", "sphinx.ext.extlinks"] - -exclude_patterns = ["_build"] -source_suffix = {".rst": "restructuredtext"} -master_doc = "index" project = "beets" +AUTHOR = "Adrian Sampson" copyright = "2016, Adrian Sampson" +master_doc = "index" +language = "en" version = "2.3" release = "2.3.1" +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.extlinks", +] +autosummary_generate = True +exclude_patterns = ["_build"] +templates_path = ["_templates"] +source_suffix = {".rst": "restructuredtext", ".md": "markdown"} + + pygments_style = "sphinx" # External links to the bug tracker and other sites. @@ -59,10 +75,24 @@ man_pages = [ ), ] -# Options for pydata theme + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + + html_theme = "pydata_sphinx_theme" html_theme_options = {"collapse_navigation": True, "logo": {"text": "beets"}} html_title = "beets" html_logo = "_static/beets_logo_nobg.png" html_static_path = ["_static"] html_css_files = ["beets.css"] + + +def skip_member(app, what, name, obj, skip, options): + if name.startswith("_"): + return True + return skip + + +def setup(app): + app.connect("autodoc-skip-member", skip_member) diff --git a/docs/dev/index.rst b/docs/dev/index.rst index 63335160c..10b3566c2 100644 --- a/docs/dev/index.rst +++ b/docs/dev/index.rst @@ -10,8 +10,18 @@ and write metadata tags in media files. .. _MediaFile: https://mediafile.readthedocs.io/en/latest/ .. toctree:: + :maxdepth: 1 plugins library importer cli + + +.. toctree:: + :maxdepth: 1 + :caption: API Reference + + ../api/plugins + ../api/database + diff --git a/docs/dev/library.rst b/docs/dev/library.rst index 9740c8b90..8c47e4dc3 100644 --- a/docs/dev/library.rst +++ b/docs/dev/library.rst @@ -20,30 +20,18 @@ invocation of beets usually has only one :class:`Library`. It's powered by abstraction, something like a very minimal `ORM`_. The library is also responsible for handling queries to retrieve stored objects. -.. autoclass:: Library(path, directory[, path_formats[, replacements]]) +Overview +'''''''' - .. automethod:: __init__ +You can add new items or albums to the library via the +:py:meth:`Library.add` and :py:meth:`Library.add_album` methods. - You can add new items or albums to the library: +You may also query the library for items and albums using the +:py:meth:`Library.items`, :py:meth:`Library.albums`, :py:meth:`Library.get_item` and :py:meth:`Library.get_album` methods. - .. automethod:: add - - .. automethod:: add_album - - And there are methods for querying the database: - - .. automethod:: items - - .. automethod:: albums - - .. automethod:: get_item - - .. automethod:: get_album - - Any modifications must go through a :class:`Transaction` which you get can - using this method: - - .. automethod:: transaction +Any modifications to the library must go through a +:class:`Transaction` object, which you can get using the +:py:meth:`Library.transaction` context manager. .. _SQLite: https://sqlite.org/index.html .. _ORM: https://en.wikipedia.org/wiki/Object-relational_mapping @@ -54,7 +42,7 @@ Model Classes The two model entities in beets libraries, :class:`Item` and :class:`Album`, share a base class, :class:`LibModel`, that provides common functionality. That -class itself specialises :class:`dbcore.Model` which provides an ORM-like +class itself specialises :class:`beets.dbcore.Model` which provides an ORM-like abstraction. To get or change the metadata of a model (an item or album), either access its @@ -68,42 +56,25 @@ Model base Models use dirty-flags to track when the object's metadata goes out of sync with the database. The dirty dictionary maps field names to booleans indicating whether the field has been written since the object was last -synchronized (via load or store) with the database. +synchronized (via load or store) with the database. This logic is implemented +in the model base class :class:`LibModel` and is inherited by both +:class:`Item` and :class:`Album`. -.. autoclass:: LibModel +We provide CRUD-like methods for interacting with the database: - .. automethod:: all_keys +* :py:meth:`LibModel.store` +* :py:meth:`LibModel.load` +* :py:meth:`LibModel.remove` +* :py:meth:`LibModel.add` - .. automethod:: __init__ +The base class :class:`beets.dbcore.Model` has a ``dict``-like interface, so +normal the normal mapping API is supported: - .. autoattribute:: _types +* :py:meth:`LibModel.keys` +* :py:meth:`LibModel.update` +* :py:meth:`LibModel.items` +* :py:meth:`LibModel.get` - .. autoattribute:: _fields - - There are CRUD-like methods for interacting with the database: - - .. automethod:: store - - .. automethod:: load - - .. automethod:: remove - - .. automethod:: add - - The base class :class:`dbcore.Model` has a ``dict``-like interface, so - normal the normal mapping API is supported: - - .. automethod:: keys - - .. automethod:: update - - .. automethod:: items - - .. note:: - The :py:meth:`Album.items` method is not inherited from - :py:meth:`LibModel.items` for historical reasons. - - .. automethod:: get Item '''' @@ -155,38 +126,6 @@ This leads to the following implementation policy: * On every modification to DB metadata (``item.field = ...``), the DB mtime is reset to zero. - -.. autoclass:: Item - - .. automethod:: __init__ - - .. automethod:: from_path - - .. automethod:: get_album - - .. automethod:: destination - - .. automethod:: current_mtime - - The methods ``read()`` and ``write()`` are complementary: one reads a - file's tags and updates the item's metadata fields accordingly while the - other takes the item's fields and writes them to the file's tags. - - .. automethod:: read - - .. automethod:: write - - .. automethod:: try_write - - .. automethod:: try_sync - - The :class:`Item` class supplements the normal model interface so that they - interacting with the filesystem as well: - - .. automethod:: move - - .. automethod:: remove - Album ''''' @@ -205,35 +144,10 @@ For those fields that are both item-level and album-level (e.g., ``year`` or use an SQLite table called ``albums``, in which each column is an album metadata field. -.. autoclass:: Album - .. automethod:: __init__ - - .. automethod:: item_dir - - .. automethod:: items - - Albums extend the normal model interface to also forward changes to their - items: - - .. autoattribute:: item_keys - - .. automethod:: store - - .. automethod:: try_sync - - .. automethod:: move - - .. automethod:: remove - - Albums also manage album art, image files that are associated with each - album: - - .. automethod:: set_art - - .. automethod:: move_art - - .. automethod:: art_destination +.. note:: + The :py:meth:`Album.items` method is not inherited from + :py:meth:`LibModel.items` for historical reasons. Transactions '''''''''''' @@ -241,24 +155,30 @@ Transactions The :class:`Library` class provides the basic methods necessary to access and manipulate its contents. To perform more complicated operations atomically, or to interact directly with the underlying SQLite database, you must use a -*transaction* (see this `blog post`_ for motivation). For example:: +*transaction* (see this `blog post`_ for motivation). For example + +.. code-block:: python lib = Library() with lib.transaction() as tx: items = lib.items(query) lib.add_album(list(items)) -.. _blog post: https://beets.io/blog/sqlite-nightmare.html - .. currentmodule:: beets.dbcore.db -.. autoclass:: Transaction - :members: +The :class:`Transaction` class is a context manager that provides a +transactional interface to the underlying SQLite database. It is +responsible for managing the transaction's lifecycle, including +beginning, committing, and rolling back the transaction if +an error occurs. +.. _blog post: https://beets.io/blog/sqlite-nightmare.html Queries ------- +.. currentmodule:: beets.dbcore.query + To access albums and items in a library, we use :doc:`/reference/query`. In beets, the :class:`Query` abstract base class represents a criterion that matches items or albums in the database. diff --git a/docs/dev/plugins.rst b/docs/dev/plugins.rst index 2d30f86c9..c24a94093 100644 --- a/docs/dev/plugins.rst +++ b/docs/dev/plugins.rst @@ -1,3 +1,11 @@ +Plugin Development Guide +======================== + +Beets plugins are Python modules or packages that extend the core functionality +of beets. The plugin system is designed to be flexible, allowing developers to +add virtually any type of features. + + .. _writing-plugins: Writing Plugins @@ -413,9 +421,8 @@ to extend the kinds of metadata that they can easily manage. The ``MediaFile`` class uses ``MediaField`` descriptors to provide access to file tags. If you have created a descriptor you can add it through -your plugins ``add_media_field()`` method. +your plugins :py:meth:`beets.plugins.BeetsPlugin.add_media_field()`` method. -.. automethod:: beets.plugins.BeetsPlugin.add_media_field .. _MediaFile: https://mediafile.readthedocs.io/en/latest/ From 43b20f285029227745bc9c36c58c118e4cfdd686 Mon Sep 17 00:00:00 2001 From: David Logie Date: Thu, 22 May 2025 02:37:41 +0100 Subject: [PATCH 24/49] Make `mb_albumartistids` available at the album level. --- beets/library.py | 2 ++ test/test_importer.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/beets/library.py b/beets/library.py index 8fd1c8022..9be3a1602 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1179,6 +1179,7 @@ class Album(LibModel): "comp": types.BOOLEAN, "mb_albumid": types.STRING, "mb_albumartistid": types.STRING, + "mb_albumartistids": types.MULTI_VALUE_DSV, "albumtype": types.STRING, "albumtypes": types.SEMICOLON_SPACE_DSV, "label": types.STRING, @@ -1235,6 +1236,7 @@ class Album(LibModel): "comp", "mb_albumid", "mb_albumartistid", + "mb_albumartistids", "albumtype", "albumtypes", "label", diff --git a/test/test_importer.py b/test/test_importer.py index 993362254..5070a058d 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -129,6 +129,11 @@ class NonAutotaggedImportTest(AsIsImporterMixin, ImportTestCase): self.run_asis_importer(delete=True) self.assertNotExists(os.path.join(self.import_dir, b"album")) + def test_album_mb_albumartistids(self): + self.run_asis_importer() + album = self.lib.albums()[0] + assert album.mb_albumartistids == album.items()[0].mb_albumartistids + @unittest.skipUnless(_common.HAVE_SYMLINK, "need symlinks") def test_import_link_arrives(self): self.run_asis_importer(link=True) From c9f98fca5550add0942cbaf10d16b0e753002fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 26 May 2025 12:40:38 +0100 Subject: [PATCH 25/49] Use unittest.TestCase for tests that don't require the dir setup --- test/plugins/test_art.py | 5 +++-- test/plugins/test_beatport.py | 3 ++- test/plugins/test_lastgenre.py | 4 ++-- test/plugins/test_musicbrainz.py | 3 ++- test/plugins/test_subsonicupdate.py | 4 ++-- test/plugins/test_the.py | 5 +++-- test/test_files.py | 2 +- test/test_importer.py | 4 ++-- test/test_logging.py | 10 +++------- test/test_query.py | 4 ++-- test/test_sort.py | 2 +- test/test_ui.py | 8 ++++---- test/test_util.py | 3 +-- 13 files changed, 28 insertions(+), 29 deletions(-) diff --git a/test/plugins/test_art.py b/test/plugins/test_art.py index b8a9a3bda..6577b54fc 100644 --- a/test/plugins/test_art.py +++ b/test/plugins/test_art.py @@ -18,6 +18,7 @@ from __future__ import annotations import os import shutil +import unittest from typing import TYPE_CHECKING from unittest.mock import patch @@ -1012,7 +1013,7 @@ class ArtForAlbumTest(UseThePlugin): self._assert_image_operated(self.IMG_348x348, self.RESIZE_OP, True) -class DeprecatedConfigTest(BeetsTestCase): +class DeprecatedConfigTest(unittest.TestCase): """While refactoring the plugin, the remote_priority option was deprecated, and a new codepath should translate its effect. Check that it actually does so. @@ -1030,7 +1031,7 @@ class DeprecatedConfigTest(BeetsTestCase): assert isinstance(self.plugin.sources[-1], fetchart.FileSystem) -class EnforceRatioConfigTest(BeetsTestCase): +class EnforceRatioConfigTest(unittest.TestCase): """Throw some data at the regexes.""" def _load_with_config(self, values, should_raise): diff --git a/test/plugins/test_beatport.py b/test/plugins/test_beatport.py index d072340b5..b92a3bf15 100644 --- a/test/plugins/test_beatport.py +++ b/test/plugins/test_beatport.py @@ -14,6 +14,7 @@ """Tests for the 'beatport' plugin.""" +import unittest from datetime import timedelta from beets.test import _common @@ -585,7 +586,7 @@ class BeatportTest(BeetsTestCase): assert track.genre == test_track.genre -class BeatportResponseEmptyTest(BeetsTestCase): +class BeatportResponseEmptyTest(unittest.TestCase): def _make_tracks_response(self): results = [ { diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index 49d219de9..d34860774 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -14,17 +14,17 @@ """Tests for the 'lastgenre' plugin.""" +import unittest from unittest.mock import Mock import pytest from beets import config from beets.test import _common -from beets.test.helper import BeetsTestCase from beetsplug import lastgenre -class LastGenrePluginTest(BeetsTestCase): +class LastGenrePluginTest(unittest.TestCase): def setUp(self): super().setUp() self.plugin = lastgenre.LastGenrePlugin() diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index d9a06d00a..100cfc498 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -14,6 +14,7 @@ """Tests for MusicBrainz API wrapper.""" +import unittest from unittest import mock import pytest @@ -665,7 +666,7 @@ class MBAlbumInfoTest(MusicBrainzTestCase): assert t[1].trackdisambig == "SECOND TRACK" -class ArtistFlatteningTest(BeetsTestCase): +class ArtistFlatteningTest(unittest.TestCase): def _credit_dict(self, suffix=""): return { "artist": { diff --git a/test/plugins/test_subsonicupdate.py b/test/plugins/test_subsonicupdate.py index 891f75cb7..183c2bd67 100644 --- a/test/plugins/test_subsonicupdate.py +++ b/test/plugins/test_subsonicupdate.py @@ -1,11 +1,11 @@ """Tests for the 'subsonic' plugin.""" +import unittest from urllib.parse import parse_qs, urlparse import responses from beets import config -from beets.test.helper import BeetsTestCase from beetsplug import subsonicupdate @@ -24,7 +24,7 @@ def _params(url): return parse_qs(urlparse(url).query) -class SubsonicPluginTest(BeetsTestCase): +class SubsonicPluginTest(unittest.TestCase): """Test class for subsonicupdate.""" @responses.activate diff --git a/test/plugins/test_the.py b/test/plugins/test_the.py index bf073301b..c8f919de2 100644 --- a/test/plugins/test_the.py +++ b/test/plugins/test_the.py @@ -1,11 +1,12 @@ """Tests for the 'the' plugin""" +import unittest + from beets import config -from beets.test.helper import BeetsTestCase from beetsplug.the import FORMAT, PATTERN_A, PATTERN_THE, ThePlugin -class ThePluginTest(BeetsTestCase): +class ThePluginTest(unittest.TestCase): def test_unthe_with_default_patterns(self): assert ThePlugin().unthe("", PATTERN_THE) == "" assert ( diff --git a/test/test_files.py b/test/test_files.py index 72b1610c0..8be94f328 100644 --- a/test/test_files.py +++ b/test/test_files.py @@ -200,7 +200,7 @@ class MoveTest(BeetsTestCase): assert self.i.path == util.normpath(self.dest) -class HelperTest(BeetsTestCase): +class HelperTest(unittest.TestCase): def test_ancestry_works_on_file(self): p = "/a/b/c" a = ["/", "/a", "/a/b"] diff --git a/test/test_importer.py b/test/test_importer.py index 5070a058d..9bb0e8a63 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -924,7 +924,7 @@ class ChooseCandidateTest(AutotagImportTestCase): assert self.lib.albums().get().album == "Applied Album MM" -class InferAlbumDataTest(BeetsTestCase): +class InferAlbumDataTest(unittest.TestCase): def setUp(self): super().setUp() @@ -1220,7 +1220,7 @@ class ImportDuplicateSingletonTest(ImportTestCase): return item -class TagLogTest(BeetsTestCase): +class TagLogTest(unittest.TestCase): def test_tag_log_line(self): sio = StringIO() handler = logging.StreamHandler(sio) diff --git a/test/test_logging.py b/test/test_logging.py index d95a54387..1859ea2dd 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -3,21 +3,17 @@ import logging as log import sys import threading +import unittest from io import StringIO import beets.logging as blog import beetsplug from beets import plugins, ui from beets.test import _common, helper -from beets.test.helper import ( - AsIsImporterMixin, - BeetsTestCase, - ImportTestCase, - PluginMixin, -) +from beets.test.helper import AsIsImporterMixin, ImportTestCase, PluginMixin -class LoggingTest(BeetsTestCase): +class LoggingTest(unittest.TestCase): def test_logging_management(self): l1 = log.getLogger("foo123") l2 = blog.getLogger("foo123") diff --git a/test/test_query.py b/test/test_query.py index f85e5c637..22c2710de 100644 --- a/test/test_query.py +++ b/test/test_query.py @@ -373,7 +373,7 @@ class GetTest(DummyDataTestCase): dbcore.query.RegexpQuery("year", "199(") -class MatchTest(BeetsTestCase): +class MatchTest(unittest.TestCase): def setUp(self): super().setUp() self.item = _common.item() @@ -811,7 +811,7 @@ class NoneQueryTest(BeetsTestCase, AssertsMixin): self.assertInResult(item, matched) -class NotQueryMatchTest(BeetsTestCase): +class NotQueryMatchTest(unittest.TestCase): """Test `query.NotQuery` matching against a single item, using the same cases and assertions as on `MatchTest`, plus assertion on the negated queries (ie. assert q -> assert not NotQuery(q)). diff --git a/test/test_sort.py b/test/test_sort.py index d6aa5c518..25d993e30 100644 --- a/test/test_sort.py +++ b/test/test_sort.py @@ -378,7 +378,7 @@ class ConfigSortTest(DummyDataTestCase): assert results[0].albumartist > results[1].albumartist -class CaseSensitivityTest(DummyDataTestCase, BeetsTestCase): +class CaseSensitivityTest(DummyDataTestCase): """If case_insensitive is false, lower-case values should be placed after all upper-case values. E.g., `Foo Qux bar` """ diff --git a/test/test_ui.py b/test/test_ui.py index afa16e171..8bb0218d5 100644 --- a/test/test_ui.py +++ b/test/test_ui.py @@ -1337,7 +1337,7 @@ class ShowChangeTest(BeetsTestCase): @patch("beets.library.Item.try_filesize", Mock(return_value=987)) -class SummarizeItemsTest(BeetsTestCase): +class SummarizeItemsTest(unittest.TestCase): def setUp(self): super().setUp() item = library.Item() @@ -1374,7 +1374,7 @@ class SummarizeItemsTest(BeetsTestCase): assert summary == "3 items, G 2, F 1, 4kbps, 32:42, 2.9 KiB" -class PathFormatTest(BeetsTestCase): +class PathFormatTest(unittest.TestCase): def test_custom_paths_prepend(self): default_formats = ui.get_path_formats() @@ -1521,7 +1521,7 @@ class CommonOptionsParserCliTest(BeetsTestCase): # assert 'plugins: ' in output -class CommonOptionsParserTest(BeetsTestCase): +class CommonOptionsParserTest(unittest.TestCase): def test_album_option(self): parser = ui.CommonOptionsParser() assert not parser._album_flags @@ -1614,7 +1614,7 @@ class CommonOptionsParserTest(BeetsTestCase): ) -class EncodingTest(BeetsTestCase): +class EncodingTest(unittest.TestCase): """Tests for the `terminal_encoding` config option and our `_in_encoding` and `_out_encoding` utility functions. """ diff --git a/test/test_util.py b/test/test_util.py index 6b795b957..d08868619 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -25,7 +25,6 @@ import pytest from beets import util from beets.test import _common -from beets.test.helper import BeetsTestCase class UtilTest(unittest.TestCase): @@ -132,7 +131,7 @@ class UtilTest(unittest.TestCase): pass -class PathConversionTest(BeetsTestCase): +class PathConversionTest(unittest.TestCase): def test_syspath_windows_format(self): with _common.platform_windows(): path = os.path.join("a", "b", "c") From 5900282093c484d964377fa5d40151bd18a2bd56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 26 May 2025 13:05:40 +0100 Subject: [PATCH 26/49] Use BeetsTestCase for lastgenre tests, re-use the defined config --- test/plugins/test_lastgenre.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index d34860774..be145d811 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -14,17 +14,16 @@ """Tests for the 'lastgenre' plugin.""" -import unittest from unittest.mock import Mock import pytest -from beets import config from beets.test import _common +from beets.test.helper import BeetsTestCase from beetsplug import lastgenre -class LastGenrePluginTest(unittest.TestCase): +class LastGenrePluginTest(BeetsTestCase): def setUp(self): super().setUp() self.plugin = lastgenre.LastGenrePlugin() @@ -32,12 +31,12 @@ class LastGenrePluginTest(unittest.TestCase): def _setup_config( self, whitelist=False, canonical=False, count=1, prefer_specific=False ): - config["lastgenre"]["canonical"] = canonical - config["lastgenre"]["count"] = count - config["lastgenre"]["prefer_specific"] = prefer_specific + self.config["lastgenre"]["canonical"] = canonical + self.config["lastgenre"]["count"] = count + self.config["lastgenre"]["prefer_specific"] = prefer_specific if isinstance(whitelist, (bool, (str,))): # Filename, default, or disabled. - config["lastgenre"]["whitelist"] = whitelist + self.config["lastgenre"]["whitelist"] = whitelist self.plugin.setup() if not isinstance(whitelist, (bool, (str,))): # Explicit list of genres. @@ -463,11 +462,10 @@ def test_get_genre(config_values, item_genre, mock_genres, expected_result): lastgenre.LastGenrePlugin.fetch_album_genre = mock_fetch_album_genre lastgenre.LastGenrePlugin.fetch_artist_genre = mock_fetch_artist_genre - # Configure - config["lastgenre"] = config_values - # Initialize plugin instance and item plugin = lastgenre.LastGenrePlugin() + # Configure + plugin.config.set(config_values) item = _common.item() item.genre = item_genre From da5ec00aaff0f05737048f6c6e267d8e8eb80aba Mon Sep 17 00:00:00 2001 From: Ben Stolovitz Date: Mon, 26 May 2025 13:24:57 -0400 Subject: [PATCH 27/49] [Test] Fix path tests on windows (#5803) ## Description Fixes #5802. Today, tests fail on most Windows machines because we hard-code `D:` as the root drive, but most machines use `C:`. This change uses the same normalization function in the test assertion to ensure the drives match. ## To Do - [ ] ~~Documentation.~~ - [x] Changelog. - [x] Tests. (this is a tests change) ## What changed? * Updated tests to generate the drive name via normalization, instead of hard-coding `D:`. * Updated the `Item::destination()` method to document the `relative_to_libdir` param. ## How tested? * [x] Tests pass locally. --- beets/library.py | 4 +++- docs/changelog.rst | 2 ++ test/test_library.py | 10 ++++++++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/beets/library.py b/beets/library.py index 9be3a1602..b813d2ba4 100644 --- a/beets/library.py +++ b/beets/library.py @@ -1084,7 +1084,9 @@ class Item(LibModel): (i.e., where the file ought to be). The path is returned as a bytestring. ``basedir`` can override the - library's base directory for the destination. + library's base directory for the destination. If ``relative_to_libdir`` + is true, returns just the fragment of the path underneath the library + base directory. """ db = self._check_db() basedir = basedir or db.directory diff --git a/docs/changelog.rst b/docs/changelog.rst index 09259e1fa..9bc065419 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -28,6 +28,8 @@ Bug fixes: * :doc:`plugins/musicbrainz`: fix regression where user configured ``extra_tags`` have been read incorrectly. :bug:`5788` +* tests: Fix library tests failing on Windows when run from outside ``D:/``. + :bug:`5802` For packagers: diff --git a/test/test_library.py b/test/test_library.py index 39f1d0b9e..bb014d76d 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -34,7 +34,7 @@ from beets.library import Album from beets.test import _common from beets.test._common import item from beets.test.helper import BeetsTestCase, ItemInDBTestCase -from beets.util import as_string, bytestring_path, syspath +from beets.util import as_string, bytestring_path, normpath, syspath # Shortcut to path normalization. np = util.normpath @@ -553,6 +553,9 @@ class ItemFormattedMappingTest(ItemInDBTestCase): class PathFormattingMixin: """Utilities for testing path formatting.""" + i: beets.library.Item + lib: beets.library.Library + def _setf(self, fmt): self.lib.path_formats.insert(0, ("default", fmt)) @@ -560,9 +563,12 @@ class PathFormattingMixin: if i is None: i = self.i + # Handle paths on Windows. if os.path.sep != "/": dest = dest.replace(b"/", os.path.sep.encode()) - dest = b"D:" + dest + + # Paths are normalized based on the CWD. + dest = normpath(dest) actual = i.destination() From dd2f203090af6f570b0c5158661d718607758c96 Mon Sep 17 00:00:00 2001 From: Uncorrupt3318 <25012611+Uncorrupt3318@users.noreply.github.com> Date: Mon, 26 May 2025 23:17:52 +0100 Subject: [PATCH 28/49] Feat: Add replace plugin (#5644) Adds replace plugin. The plugin allows the user to replace the audio file of a song, while keeping the tags and file name. Some music servers keep track of favourite songs via paths and tags. Now there won't be a need to 'refavourite'. Plus, this skips the import/merge steps. --- beetsplug/replace.py | 122 +++++++++++++++++++++++++++++++++++ docs/changelog.rst | 1 + docs/plugins/index.rst | 1 + docs/plugins/replace.rst | 17 +++++ test/plugins/test_replace.py | 115 +++++++++++++++++++++++++++++++++ 5 files changed, 256 insertions(+) create mode 100644 beetsplug/replace.py create mode 100644 docs/plugins/replace.rst create mode 100644 test/plugins/test_replace.py diff --git a/beetsplug/replace.py b/beetsplug/replace.py new file mode 100644 index 000000000..0c570877b --- /dev/null +++ b/beetsplug/replace.py @@ -0,0 +1,122 @@ +import shutil +from pathlib import Path + +import mediafile + +from beets import ui, util +from beets.library import Item, Library +from beets.plugins import BeetsPlugin + + +class ReplacePlugin(BeetsPlugin): + def commands(self): + cmd = ui.Subcommand( + "replace", help="replace audio file while keeping tags" + ) + cmd.func = self.run + return [cmd] + + def run(self, lib: Library, args: list[str]) -> None: + if len(args) < 2: + raise ui.UserError("Usage: beet replace ") + + new_file_path: Path = Path(args[-1]) + item_query: list[str] = args[:-1] + + self.file_check(new_file_path) + + item_list = list(lib.items(item_query)) + + if not item_list: + raise ui.UserError("No matching songs found.") + + song = self.select_song(item_list) + + if not song: + ui.print_("Operation cancelled.") + return + + if not self.confirm_replacement(new_file_path, song): + ui.print_("Aborting replacement.") + return + + self.replace_file(new_file_path, song) + + def file_check(self, filepath: Path) -> None: + """Check if the file exists and is supported""" + if not filepath.is_file(): + raise ui.UserError( + f"'{util.displayable_path(filepath)}' is not a valid file." + ) + + try: + mediafile.MediaFile(util.syspath(filepath)) + except mediafile.FileTypeError as fte: + raise ui.UserError(fte) + + def select_song(self, items: list[Item]): + """Present a menu of matching songs and get user selection.""" + ui.print_("\nMatching songs:") + for i, item in enumerate(items, 1): + ui.print_(f"{i}. {util.displayable_path(item)}") + + while True: + try: + index = int( + input( + f"Which song would you like to replace? " + f"[1-{len(items)}] (0 to cancel): " + ) + ) + if index == 0: + return None + if 1 <= index <= len(items): + return items[index - 1] + ui.print_( + f"Invalid choice. Please enter a number " + f"between 1 and {len(items)}." + ) + except ValueError: + ui.print_("Invalid input. Please type in a number.") + + def confirm_replacement(self, new_file_path: Path, song: Item): + """Get user confirmation for the replacement.""" + original_file_path: Path = Path(song.path.decode()) + + if not original_file_path.exists(): + raise ui.UserError("The original song file was not found.") + + ui.print_( + f"\nReplacing: {util.displayable_path(new_file_path)} " + f"-> {util.displayable_path(original_file_path)}" + ) + decision: str = ( + input("Are you sure you want to replace this track? (y/N): ") + .strip() + .casefold() + ) + return decision in {"yes", "y"} + + def replace_file(self, new_file_path: Path, song: Item) -> None: + """Replace the existing file with the new one.""" + original_file_path = Path(song.path.decode()) + dest = original_file_path.with_suffix(new_file_path.suffix) + + try: + shutil.move(util.syspath(new_file_path), util.syspath(dest)) + except Exception as e: + raise ui.UserError(f"Error replacing file: {e}") + + if ( + new_file_path.suffix != original_file_path.suffix + and original_file_path.exists() + ): + try: + original_file_path.unlink() + except Exception as e: + raise ui.UserError(f"Could not delete original file: {e}") + + song.path = str(dest).encode() + song.store() + + ui.print_("Replacement successful.") diff --git a/docs/changelog.rst b/docs/changelog.rst index 9bc065419..688717351 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -22,6 +22,7 @@ New features: * :doc:`plugins/discogs`: Implement ``track_for_id`` method to allow retrieving singletons by their Discogs ID. :bug:`4661` +* :doc:`plugins/replace`: Add new plugin. Bug fixes: diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst index 82fa94281..5fbe42d9f 100644 --- a/docs/plugins/index.rst +++ b/docs/plugins/index.rst @@ -125,6 +125,7 @@ following to your configuration: playlist plexupdate random + replace replaygain rewrite scrub diff --git a/docs/plugins/replace.rst b/docs/plugins/replace.rst new file mode 100644 index 000000000..8695d492c --- /dev/null +++ b/docs/plugins/replace.rst @@ -0,0 +1,17 @@ +Replace Plugin +============== + +The ``replace`` plugin provides a command that replaces the audio file +of a track, while keeping the name and tags intact. It should save +some time when you get the wrong version of a song. + +Enable the ``replace`` plugin in your configuration (see :ref:`using-plugins`) +and then type:: + + $ beet replace + +The plugin will show you a list of files for you to pick from, and then +ask for confirmation. + +Consider using the `replaygain` command from the +:doc:`/plugins/replaygain` plugin, if you usually use it during imports. diff --git a/test/plugins/test_replace.py b/test/plugins/test_replace.py new file mode 100644 index 000000000..a247e317a --- /dev/null +++ b/test/plugins/test_replace.py @@ -0,0 +1,115 @@ +import shutil +from pathlib import Path + +import pytest +from mediafile import MediaFile + +from beets import ui +from beets.test import _common +from beetsplug.replace import ReplacePlugin + +replace = ReplacePlugin() + + +class TestReplace: + @pytest.fixture(autouse=True) + def _fake_dir(self, tmp_path): + self.fake_dir = tmp_path + + @pytest.fixture(autouse=True) + def _fake_file(self, tmp_path): + self.fake_file = tmp_path + + def test_path_is_dir(self): + fake_directory = self.fake_dir / "fakeDir" + fake_directory.mkdir() + with pytest.raises(ui.UserError): + replace.file_check(fake_directory) + + def test_path_is_unsupported_file(self): + fake_file = self.fake_file / "fakefile.txt" + fake_file.write_text("test", encoding="utf-8") + with pytest.raises(ui.UserError): + replace.file_check(fake_file) + + def test_path_is_supported_file(self): + dest = self.fake_file / "full.mp3" + src = Path(_common.RSRC.decode()) / "full.mp3" + shutil.copyfile(src, dest) + + mediafile = MediaFile(dest) + mediafile.albumartist = "AAA" + mediafile.disctitle = "DDD" + mediafile.genres = ["a", "b", "c"] + mediafile.composer = None + mediafile.save() + + replace.file_check(Path(str(dest))) + + def test_select_song_valid_choice(self, monkeypatch, capfd): + songs = ["Song A", "Song B", "Song C"] + monkeypatch.setattr("builtins.input", lambda _: "2") + + selected_song = replace.select_song(songs) + + captured = capfd.readouterr() + + assert "1. Song A" in captured.out + assert "2. Song B" in captured.out + assert "3. Song C" in captured.out + assert selected_song == "Song B" + + def test_select_song_cancel(self, monkeypatch): + songs = ["Song A", "Song B", "Song C"] + monkeypatch.setattr("builtins.input", lambda _: "0") + + selected_song = replace.select_song(songs) + + assert selected_song is None + + def test_select_song_invalid_then_valid(self, monkeypatch, capfd): + songs = ["Song A", "Song B", "Song C"] + inputs = iter(["invalid", "4", "3"]) + monkeypatch.setattr("builtins.input", lambda _: next(inputs)) + + selected_song = replace.select_song(songs) + + captured = capfd.readouterr() + + assert "Invalid input. Please type in a number." in captured.out + assert ( + "Invalid choice. Please enter a number between 1 and 3." + in captured.out + ) + assert selected_song == "Song C" + + def test_confirm_replacement_file_not_exist(self): + class Song: + path = b"test123321.txt" + + song = Song() + + with pytest.raises(ui.UserError): + replace.confirm_replacement("test", song) + + def test_confirm_replacement_yes(self, monkeypatch): + src = Path(_common.RSRC.decode()) / "full.mp3" + monkeypatch.setattr("builtins.input", lambda _: "YES ") + + class Song: + path = str(src).encode() + + song = Song() + + assert replace.confirm_replacement("test", song) is True + + def test_confirm_replacement_no(self, monkeypatch): + src = Path(_common.RSRC.decode()) / "full.mp3" + monkeypatch.setattr("builtins.input", lambda _: "test123") + + class Song: + path = str(src).encode() + + song = Song() + + assert replace.confirm_replacement("test", song) is False From 0f76312f3120af4f2bd3b309928603ac61018976 Mon Sep 17 00:00:00 2001 From: Ian McCowan Date: Fri, 30 May 2025 06:41:29 -0700 Subject: [PATCH 29/49] Fix duplicate database change event send on Library.add (#5561) ## Description Fixes #5560. Also a couple other incidental changes / improvements: * Add `EventType` that holds the actual string literals used for event sending. With type checking, this can prevent subtle bugs resulting from misspelled event names. * Fix `HiddenFileTest` by using `bytestring_path()` ## To Do - [x] ~Documentation.~ - [x] Changelog. - [x] Tests. --------- Co-authored-by: J0J0 Todos Co-authored-by: J0J0 Todos <2733783+JOJ0@users.noreply.github.com> --- beets/event_types.py | 33 +++++++++++++++++++++++++++++++++ beets/library.py | 3 ++- beets/plugins.py | 5 ++++- docs/changelog.rst | 4 ++++ test/test_hidden.py | 4 ++-- test/test_library.py | 22 +++++++++++++++++++++- 6 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 beets/event_types.py diff --git a/beets/event_types.py b/beets/event_types.py new file mode 100644 index 000000000..d5fc01eec --- /dev/null +++ b/beets/event_types.py @@ -0,0 +1,33 @@ +from typing import Literal + +EventType = Literal[ + "pluginload", + "import", + "album_imported", + "album_removed", + "item_copied", + "item_imported", + "before_item_moved", + "item_moved", + "item_linked", + "item_hardlinked", + "item_reflinked", + "item_removed", + "write", + "after_write", + "import_task_created", + "import_task_start", + "import_task_apply", + "import_task_before_choice", + "import_task_choice", + "import_task_files", + "library_opened", + "database_change", + "cli_exit", + "import_begin", + "trackinfo_received", + "albuminfo_received", + "before_choose_candidate", + "mb_track_extract", + "mb_album_extract", +] diff --git a/beets/library.py b/beets/library.py index b813d2ba4..271059c69 100644 --- a/beets/library.py +++ b/beets/library.py @@ -369,8 +369,9 @@ class LibModel(dbcore.Model["Library"]): plugins.send("database_change", lib=self._db, model=self) def add(self, lib=None): + # super().add() calls self.store(), which sends `database_change`, + # so don't do it here super().add(lib) - plugins.send("database_change", lib=self._db, model=self) def __format__(self, spec): if not spec: diff --git a/beets/plugins.py b/beets/plugins.py index 63e5d3bde..d87dd5d1e 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -39,6 +39,9 @@ import beets from beets import logging from beets.util.id_extractors import extract_release_id +if TYPE_CHECKING: + from beets.event_types import EventType + if sys.version_info >= (3, 10): from typing import ParamSpec else: @@ -292,7 +295,7 @@ class BeetsPlugin: _raw_listeners: dict[str, list[Listener]] | None = None listeners: dict[str, list[Listener]] | None = None - def register_listener(self, event: str, func: Listener) -> None: + def register_listener(self, event: "EventType", func: Listener): """Add a function as a listener for the specified event.""" wrapped_func = self._set_log_level_and_params(logging.WARNING, func) diff --git a/docs/changelog.rst b/docs/changelog.rst index 688717351..b78cae78f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -31,6 +31,10 @@ Bug fixes: :bug:`5788` * tests: Fix library tests failing on Windows when run from outside ``D:/``. :bug:`5802` +* Fix an issue where calling `Library.add` would cause the `database_change` + event to be sent twice, not once. + :bug:`5560` +* Fix ``HiddenFileTest`` by using ``bytestring_path()``. For packagers: diff --git a/test/test_hidden.py b/test/test_hidden.py index a7e6a1a10..bd974b1cb 100644 --- a/test/test_hidden.py +++ b/test/test_hidden.py @@ -22,7 +22,7 @@ import tempfile import unittest from beets import util -from beets.util import hidden +from beets.util import bytestring_path, hidden class HiddenFileTest(unittest.TestCase): @@ -44,7 +44,7 @@ class HiddenFileTest(unittest.TestCase): else: raise e - assert hidden.is_hidden(f.name) + assert hidden.is_hidden(bytestring_path(f.name)) def test_windows_hidden(self): if not sys.platform == "win32": diff --git a/test/test_library.py b/test/test_library.py index bb014d76d..36322cfec 100644 --- a/test/test_library.py +++ b/test/test_library.py @@ -29,11 +29,12 @@ from mediafile import MediaFile, UnreadableFileError import beets.dbcore.query import beets.library +import beets.logging as blog from beets import config, plugins, util from beets.library import Album from beets.test import _common from beets.test._common import item -from beets.test.helper import BeetsTestCase, ItemInDBTestCase +from beets.test.helper import BeetsTestCase, ItemInDBTestCase, capture_log from beets.util import as_string, bytestring_path, normpath, syspath # Shortcut to path normalization. @@ -126,6 +127,25 @@ class AddTest(BeetsTestCase): ) assert new_grouping == self.i.grouping + def test_library_add_one_database_change_event(self): + """Test library.add emits only one database_change event.""" + self.item = _common.item() + self.item.path = beets.util.normpath( + os.path.join( + self.temp_dir, + b"a", + b"b.mp3", + ) + ) + self.item.album = "a" + self.item.title = "b" + + blog.getLogger("beets").set_global_level(blog.DEBUG) + with capture_log() as logs: + self.lib.add(self.item) + + assert logs.count("Sending event: database_change") == 1 + class RemoveTest(ItemInDBTestCase): def test_remove_deletes_from_db(self): From 509cbdcbe472aa3cf6559f6fba3eb2e6c9dcf47d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 24 May 2025 15:16:02 +0100 Subject: [PATCH 30/49] Move sanitize_pairs/choices from plugins to util module --- beets/plugins.py | 60 ------------------------------------ beets/util/config.py | 66 ++++++++++++++++++++++++++++++++++++++++ beetsplug/fetchart.py | 3 +- beetsplug/lyrics.py | 3 +- test/test_plugins.py | 10 ------ test/util/test_config.py | 15 +++++++++ 6 files changed, 85 insertions(+), 72 deletions(-) create mode 100644 beets/util/config.py create mode 100644 test/util/test_config.py diff --git a/beets/plugins.py b/beets/plugins.py index d87dd5d1e..6d3a8447e 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -654,66 +654,6 @@ def feat_tokens(for_artist: bool = True) -> str: ) -def sanitize_choices( - choices: Sequence[str], choices_all: Sequence[str] -) -> list[str]: - """Clean up a stringlist configuration attribute: keep only choices - elements present in choices_all, remove duplicate elements, expand '*' - wildcard while keeping original stringlist order. - """ - seen: set[str] = set() - others = [x for x in choices_all if x not in choices] - res: list[str] = [] - for s in choices: - if s not in seen: - if s in list(choices_all): - res.append(s) - elif s == "*": - res.extend(others) - seen.add(s) - return res - - -def sanitize_pairs( - pairs: Sequence[tuple[str, str]], pairs_all: Sequence[tuple[str, str]] -) -> list[tuple[str, str]]: - """Clean up a single-element mapping configuration attribute as returned - by Confuse's `Pairs` template: keep only two-element tuples present in - pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*') - wildcards while keeping the original order. Note that ('*', '*') and - ('*', 'whatever') have the same effect. - - For example, - - >>> sanitize_pairs( - ... [('foo', 'baz bar'), ('key', '*'), ('*', '*')], - ... [('foo', 'bar'), ('foo', 'baz'), ('foo', 'foobar'), - ... ('key', 'value')] - ... ) - [('foo', 'baz'), ('foo', 'bar'), ('key', 'value'), ('foo', 'foobar')] - """ - pairs_all = list(pairs_all) - seen: set[tuple[str, str]] = set() - others = [x for x in pairs_all if x not in pairs] - res: list[tuple[str, str]] = [] - for k, values in pairs: - for v in values.split(): - x = (k, v) - if x in pairs_all: - if x not in seen: - seen.add(x) - res.append(x) - elif k == "*": - new = [o for o in others if o not in seen] - seen.update(new) - res.extend(new) - elif v == "*": - new = [o for o in others if o not in seen and o[0] == k] - seen.update(new) - res.extend(new) - return res - - def get_distance( config: ConfigView, data_source: str, info: AlbumInfo | TrackInfo ) -> Distance: diff --git a/beets/util/config.py b/beets/util/config.py new file mode 100644 index 000000000..218a9d133 --- /dev/null +++ b/beets/util/config.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Collection, Sequence + + +def sanitize_choices( + choices: Sequence[str], choices_all: Collection[str] +) -> list[str]: + """Clean up a stringlist configuration attribute: keep only choices + elements present in choices_all, remove duplicate elements, expand '*' + wildcard while keeping original stringlist order. + """ + seen: set[str] = set() + others = [x for x in choices_all if x not in choices] + res: list[str] = [] + for s in choices: + if s not in seen: + if s in list(choices_all): + res.append(s) + elif s == "*": + res.extend(others) + seen.add(s) + return res + + +def sanitize_pairs( + pairs: Sequence[tuple[str, str]], pairs_all: Sequence[tuple[str, str]] +) -> list[tuple[str, str]]: + """Clean up a single-element mapping configuration attribute as returned + by Confuse's `Pairs` template: keep only two-element tuples present in + pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*') + wildcards while keeping the original order. Note that ('*', '*') and + ('*', 'whatever') have the same effect. + + For example, + + >>> sanitize_pairs( + ... [('foo', 'baz bar'), ('key', '*'), ('*', '*')], + ... [('foo', 'bar'), ('foo', 'baz'), ('foo', 'foobar'), + ... ('key', 'value')] + ... ) + [('foo', 'baz'), ('foo', 'bar'), ('key', 'value'), ('foo', 'foobar')] + """ + pairs_all = list(pairs_all) + seen: set[tuple[str, str]] = set() + others = [x for x in pairs_all if x not in pairs] + res: list[tuple[str, str]] = [] + for k, values in pairs: + for v in values.split(): + x = (k, v) + if x in pairs_all: + if x not in seen: + seen.add(x) + res.append(x) + elif k == "*": + new = [o for o in others if o not in seen] + seen.update(new) + res.extend(new) + elif v == "*": + new = [o for o in others if o not in seen and o[0] == k] + seen.update(new) + res.extend(new) + return res diff --git a/beetsplug/fetchart.py b/beetsplug/fetchart.py index 3473fe08b..b442633da 100644 --- a/beetsplug/fetchart.py +++ b/beetsplug/fetchart.py @@ -32,6 +32,7 @@ from mediafile import image_mime_type from beets import config, importer, plugins, ui, util from beets.util import bytestring_path, get_temp_filename, sorted_walk, syspath from beets.util.artresizer import ArtResizer +from beets.util.config import sanitize_pairs if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Sequence @@ -1396,7 +1397,7 @@ class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): if s_cls.available(self._log, self.config) for c in s_cls.VALID_MATCHING_CRITERIA ] - sources = plugins.sanitize_pairs( + sources = sanitize_pairs( self.config["sources"].as_pairs(default_value="*"), available_sources, ) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 3e979221c..e2c0c7fd2 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -39,6 +39,7 @@ from unidecode import unidecode import beets from beets import plugins, ui from beets.autotag.hooks import string_dist +from beets.util.config import sanitize_choices if TYPE_CHECKING: from logging import Logger @@ -957,7 +958,7 @@ class LyricsPlugin(RequestHandler, plugins.BeetsPlugin): def backends(self) -> list[Backend]: user_sources = self.config["sources"].get() - chosen = plugins.sanitize_choices(user_sources, self.BACKEND_BY_NAME) + chosen = sanitize_choices(user_sources, self.BACKEND_BY_NAME) if "google" in chosen and not self.config["google_API_key"].get(): self.warn("Disabling Google source: no API key configured.") chosen.remove("google") diff --git a/test/test_plugins.py b/test/test_plugins.py index 3e809e492..207522430 100644 --- a/test/test_plugins.py +++ b/test/test_plugins.py @@ -15,7 +15,6 @@ import itertools import os -import unittest from unittest.mock import ANY, Mock, patch import pytest @@ -215,15 +214,6 @@ class EventsTest(PluginImportTestCase): ] -class HelpersTest(unittest.TestCase): - def test_sanitize_choices(self): - assert plugins.sanitize_choices(["A", "Z"], ("A", "B")) == ["A"] - assert plugins.sanitize_choices(["A", "A"], ("A")) == ["A"] - assert plugins.sanitize_choices( - ["D", "*", "A"], ("A", "B", "C", "D") - ) == ["D", "B", "C", "A"] - - class ListenersTest(PluginLoaderTestCase): def test_register(self): class DummyPlugin(plugins.BeetsPlugin): diff --git a/test/util/test_config.py b/test/util/test_config.py new file mode 100644 index 000000000..0c49f85b1 --- /dev/null +++ b/test/util/test_config.py @@ -0,0 +1,15 @@ +import unittest + +from beets.util.config import sanitize_choices + + +class HelpersTest(unittest.TestCase): + def test_sanitize_choices(self): + assert sanitize_choices(["A", "Z"], ("A", "B")) == ["A"] + assert sanitize_choices(["A", "A"], ("A")) == ["A"] + assert sanitize_choices(["D", "*", "A"], ("A", "B", "C", "D")) == [ + "D", + "B", + "C", + "A", + ] From 1c9aebd36c4ebd9a0a08ac6a131ee06927f2bad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 25 May 2025 10:52:39 +0100 Subject: [PATCH 31/49] match.current_metadata -> util.get_most_common_tags --- beets/autotag/__init__.py | 9 +---- beets/autotag/match.py | 44 ++------------------- beets/importer/tasks.py | 2 +- beets/util/__init__.py | 40 +++++++++++++++++++ test/test_autotag.py | 80 -------------------------------------- test/test_util.py | 82 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 127 insertions(+), 130 deletions(-) diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index 42f957b0d..5b6a11195 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -24,13 +24,7 @@ from beets.library import Album, Item, LibModel from beets.util import unique_list from .hooks import AlbumInfo, AlbumMatch, Distance, TrackInfo, TrackMatch -from .match import ( - Proposal, - Recommendation, - current_metadata, - tag_album, - tag_item, -) +from .match import Proposal, Recommendation, tag_album, tag_item __all__ = [ "AlbumInfo", @@ -43,7 +37,6 @@ __all__ = [ "apply_album_metadata", "apply_item_metadata", "apply_metadata", - "current_metadata", "tag_album", "tag_item", ] diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 91a315de0..4dc4c1052 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -36,7 +36,7 @@ from beets.autotag import ( TrackMatch, hooks, ) -from beets.util import plurality +from beets.util import get_most_common_tags if TYPE_CHECKING: from collections.abc import Iterable, Sequence @@ -80,44 +80,6 @@ class Proposal(NamedTuple): # Primary matching functionality. -def current_metadata( - items: Iterable[Item], -) -> tuple[dict[str, Any], dict[str, Any]]: - """Extract the likely current metadata for an album given a list of its - items. Return two dictionaries: - - The most common value for each field. - - Whether each field's value was unanimous (values are booleans). - """ - assert items # Must be nonempty. - - likelies = {} - consensus = {} - fields = [ - "artist", - "album", - "albumartist", - "year", - "disctotal", - "mb_albumid", - "label", - "barcode", - "catalognum", - "country", - "media", - "albumdisambig", - ] - for field in fields: - values = [item[field] for item in items if item] - likelies[field], freq = plurality(values) - consensus[field] = freq == len(values) - - # If there's an album artist consensus, use this for the artist. - if consensus["albumartist"] and likelies["albumartist"]: - likelies["artist"] = likelies["albumartist"] - - return likelies, consensus - - def assign_items( items: Sequence[Item], tracks: Sequence[TrackInfo], @@ -231,7 +193,7 @@ def distance( keys are a subset of `items` and the values are a subset of `album_info.tracks`. """ - likelies, _ = current_metadata(items) + likelies, _ = get_most_common_tags(items) dist = hooks.Distance() @@ -499,7 +461,7 @@ def tag_album( candidates. """ # Get current metadata. - likelies, consensus = current_metadata(items) + likelies, consensus = get_most_common_tags(items) cur_artist: str = likelies["artist"] cur_album: str = likelies["album"] log.debug("Tagging {0} - {1}", cur_artist, cur_album) diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index d2f638c55..75f04cf5a 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -228,7 +228,7 @@ class ImportTask(BaseImportTask): or APPLY (in which case the data comes from the choice). """ if self.choice_flag in (Action.ASIS, Action.RETAG): - likelies, consensus = autotag.current_metadata(self.items) + likelies, consensus = util.get_most_common_tags(self.items) return likelies elif self.choice_flag is Action.APPLY and self.match: return self.match.info.copy() diff --git a/beets/util/__init__.py b/beets/util/__init__.py index 6bc4d14ee..9bd7451f8 100644 --- a/beets/util/__init__.py +++ b/beets/util/__init__.py @@ -56,6 +56,8 @@ if TYPE_CHECKING: from collections.abc import Iterator, Sequence from logging import Logger + from beets.library import Item + if sys.version_info >= (3, 10): from typing import TypeAlias else: @@ -814,6 +816,44 @@ def plurality(objs: Iterable[T]) -> tuple[T, int]: return c.most_common(1)[0] +def get_most_common_tags( + items: Sequence[Item], +) -> tuple[dict[str, Any], dict[str, Any]]: + """Extract the likely current metadata for an album given a list of its + items. Return two dictionaries: + - The most common value for each field. + - Whether each field's value was unanimous (values are booleans). + """ + assert items # Must be nonempty. + + likelies = {} + consensus = {} + fields = [ + "artist", + "album", + "albumartist", + "year", + "disctotal", + "mb_albumid", + "label", + "barcode", + "catalognum", + "country", + "media", + "albumdisambig", + ] + for field in fields: + values = [item[field] for item in items if item] + likelies[field], freq = plurality(values) + consensus[field] = freq == len(values) + + # If there's an album artist consensus, use this for the artist. + if consensus["albumartist"] and likelies["albumartist"]: + likelies["artist"] = likelies["albumartist"] + + return likelies, consensus + + # stdout and stderr as bytes class CommandOutput(NamedTuple): stdout: bytes diff --git a/test/test_autotag.py b/test/test_autotag.py index 7f8ed3d2e..bd4205806 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -24,86 +24,6 @@ from beets.autotag import AlbumInfo, TrackInfo, correct_list_fields, match from beets.autotag.hooks import Distance, string_dist from beets.library import Item from beets.test.helper import BeetsTestCase, ConfigMixin -from beets.util import plurality - - -class PluralityTest(BeetsTestCase): - def test_plurality_consensus(self): - objs = [1, 1, 1, 1] - obj, freq = plurality(objs) - assert obj == 1 - assert freq == 4 - - def test_plurality_near_consensus(self): - objs = [1, 1, 2, 1] - obj, freq = plurality(objs) - assert obj == 1 - assert freq == 3 - - def test_plurality_conflict(self): - objs = [1, 1, 2, 2, 3] - obj, freq = plurality(objs) - assert obj in (1, 2) - assert freq == 2 - - def test_plurality_empty_sequence_raises_error(self): - with pytest.raises(ValueError, match="must be non-empty"): - plurality([]) - - def test_current_metadata_finds_pluralities(self): - items = [ - Item(artist="The Beetles", album="The White Album"), - Item(artist="The Beatles", album="The White Album"), - Item(artist="The Beatles", album="Teh White Album"), - ] - likelies, consensus = match.current_metadata(items) - assert likelies["artist"] == "The Beatles" - assert likelies["album"] == "The White Album" - assert not consensus["artist"] - - def test_current_metadata_artist_consensus(self): - items = [ - Item(artist="The Beatles", album="The White Album"), - Item(artist="The Beatles", album="The White Album"), - Item(artist="The Beatles", album="Teh White Album"), - ] - likelies, consensus = match.current_metadata(items) - assert likelies["artist"] == "The Beatles" - assert likelies["album"] == "The White Album" - assert consensus["artist"] - - def test_albumartist_consensus(self): - items = [ - Item(artist="tartist1", album="album", albumartist="aartist"), - Item(artist="tartist2", album="album", albumartist="aartist"), - Item(artist="tartist3", album="album", albumartist="aartist"), - ] - likelies, consensus = match.current_metadata(items) - assert likelies["artist"] == "aartist" - assert not consensus["artist"] - - def test_current_metadata_likelies(self): - fields = [ - "artist", - "album", - "albumartist", - "year", - "disctotal", - "mb_albumid", - "label", - "barcode", - "catalognum", - "country", - "media", - "albumdisambig", - ] - items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)] - likelies, _ = match.current_metadata(items) - for f in fields: - if isinstance(likelies[f], int): - assert likelies[f] == 0 - else: - assert likelies[f] == f"{f}_1" def _make_item(title, track, artist="some artist"): diff --git a/test/test_util.py b/test/test_util.py index d08868619..5aa6c5dc7 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -24,7 +24,10 @@ from unittest.mock import Mock, patch import pytest from beets import util +from beets.library import Item from beets.test import _common +from beets.test.helper import BeetsTestCase +from beets.util import plurality class UtilTest(unittest.TestCase): @@ -217,3 +220,82 @@ class TestPathLegalization: expected_path, expected_truncated, ) + + +class PluralityTest(BeetsTestCase): + def test_plurality_consensus(self): + objs = [1, 1, 1, 1] + obj, freq = plurality(objs) + assert obj == 1 + assert freq == 4 + + def test_plurality_near_consensus(self): + objs = [1, 1, 2, 1] + obj, freq = plurality(objs) + assert obj == 1 + assert freq == 3 + + def test_plurality_conflict(self): + objs = [1, 1, 2, 2, 3] + obj, freq = plurality(objs) + assert obj in (1, 2) + assert freq == 2 + + def test_plurality_empty_sequence_raises_error(self): + with pytest.raises(ValueError, match="must be non-empty"): + plurality([]) + + def test_current_metadata_finds_pluralities(self): + items = [ + Item(artist="The Beetles", album="The White Album"), + Item(artist="The Beatles", album="The White Album"), + Item(artist="The Beatles", album="Teh White Album"), + ] + likelies, consensus = util.get_most_common_tags(items) + assert likelies["artist"] == "The Beatles" + assert likelies["album"] == "The White Album" + assert not consensus["artist"] + + def test_current_metadata_artist_consensus(self): + items = [ + Item(artist="The Beatles", album="The White Album"), + Item(artist="The Beatles", album="The White Album"), + Item(artist="The Beatles", album="Teh White Album"), + ] + likelies, consensus = util.get_most_common_tags(items) + assert likelies["artist"] == "The Beatles" + assert likelies["album"] == "The White Album" + assert consensus["artist"] + + def test_albumartist_consensus(self): + items = [ + Item(artist="tartist1", album="album", albumartist="aartist"), + Item(artist="tartist2", album="album", albumartist="aartist"), + Item(artist="tartist3", album="album", albumartist="aartist"), + ] + likelies, consensus = util.get_most_common_tags(items) + assert likelies["artist"] == "aartist" + assert not consensus["artist"] + + def test_current_metadata_likelies(self): + fields = [ + "artist", + "album", + "albumartist", + "year", + "disctotal", + "mb_albumid", + "label", + "barcode", + "catalognum", + "country", + "media", + "albumdisambig", + ] + items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)] + likelies, _ = util.get_most_common_tags(items) + for f in fields: + if isinstance(likelies[f], int): + assert likelies[f] == 0 + else: + assert likelies[f] == f"{f}_1" From 01b6ea78987931804d08f7ae377ab54f46b95f0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 25 May 2025 02:50:57 +0100 Subject: [PATCH 32/49] Simplify and speed up plurality/album tags retrieval tests --- test/test_util.py | 95 +++++++++++++---------------------------------- 1 file changed, 26 insertions(+), 69 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 5aa6c5dc7..d8a4ca0db 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -26,8 +26,6 @@ import pytest from beets import util from beets.library import Item from beets.test import _common -from beets.test.helper import BeetsTestCase -from beets.util import plurality class UtilTest(unittest.TestCase): @@ -222,80 +220,39 @@ class TestPathLegalization: ) -class PluralityTest(BeetsTestCase): - def test_plurality_consensus(self): - objs = [1, 1, 1, 1] - obj, freq = plurality(objs) - assert obj == 1 - assert freq == 4 +class TestPlurality: + @pytest.mark.parametrize( + "objs, expected_obj, expected_freq", + [ + pytest.param([1, 1, 1, 1], 1, 4, id="consensus"), + pytest.param([1, 1, 2, 1], 1, 3, id="near consensus"), + pytest.param([1, 1, 2, 2, 3], 1, 2, id="conflict-first-wins"), + ], + ) + def test_plurality(self, objs, expected_obj, expected_freq): + assert (expected_obj, expected_freq) == util.plurality(objs) - def test_plurality_near_consensus(self): - objs = [1, 1, 2, 1] - obj, freq = plurality(objs) - assert obj == 1 - assert freq == 3 - - def test_plurality_conflict(self): - objs = [1, 1, 2, 2, 3] - obj, freq = plurality(objs) - assert obj in (1, 2) - assert freq == 2 - - def test_plurality_empty_sequence_raises_error(self): + def test_empty_sequence_raises_error(self): with pytest.raises(ValueError, match="must be non-empty"): - plurality([]) + util.plurality([]) - def test_current_metadata_finds_pluralities(self): + def test_get_most_common_tags(self): items = [ - Item(artist="The Beetles", album="The White Album"), - Item(artist="The Beatles", album="The White Album"), - Item(artist="The Beatles", album="Teh White Album"), + Item(albumartist="aartist", label="label 1", album="album"), + Item(albumartist="aartist", label="label 2", album="album"), + Item(albumartist="aartist", label="label 3", album="another album"), ] - likelies, consensus = util.get_most_common_tags(items) - assert likelies["artist"] == "The Beatles" - assert likelies["album"] == "The White Album" - assert not consensus["artist"] - def test_current_metadata_artist_consensus(self): - items = [ - Item(artist="The Beatles", album="The White Album"), - Item(artist="The Beatles", album="The White Album"), - Item(artist="The Beatles", album="Teh White Album"), - ] likelies, consensus = util.get_most_common_tags(items) - assert likelies["artist"] == "The Beatles" - assert likelies["album"] == "The White Album" - assert consensus["artist"] - def test_albumartist_consensus(self): - items = [ - Item(artist="tartist1", album="album", albumartist="aartist"), - Item(artist="tartist2", album="album", albumartist="aartist"), - Item(artist="tartist3", album="album", albumartist="aartist"), - ] - likelies, consensus = util.get_most_common_tags(items) + assert likelies["albumartist"] == "aartist" + assert likelies["album"] == "album" + # albumartist consensus overrides artist assert likelies["artist"] == "aartist" - assert not consensus["artist"] + assert likelies["label"] == "label 1" + assert likelies["year"] == 0 - def test_current_metadata_likelies(self): - fields = [ - "artist", - "album", - "albumartist", - "year", - "disctotal", - "mb_albumid", - "label", - "barcode", - "catalognum", - "country", - "media", - "albumdisambig", - ] - items = [Item(**{f: f"{f}_{i or 1}" for f in fields}) for i in range(5)] - likelies, _ = util.get_most_common_tags(items) - for f in fields: - if isinstance(likelies[f], int): - assert likelies[f] == 0 - else: - assert likelies[f] == f"{f}_1" + assert consensus["year"] + assert consensus["albumartist"] + assert not consensus["album"] + assert not consensus["label"] From adbd50b2374edb4fe9f9455da9401dc1225a4818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 25 May 2025 02:59:27 +0100 Subject: [PATCH 33/49] Move distance to a separate module --- beets/autotag/__init__.py | 14 +- beets/autotag/distance.py | 531 ++++++++++++++++++++++++++++++++++ beets/autotag/hooks.py | 334 +-------------------- beets/autotag/match.py | 205 +------------ beets/plugins.py | 13 +- beetsplug/chroma.py | 2 +- beetsplug/discogs.py | 3 +- beetsplug/lyrics.py | 2 +- test/autotag/test_distance.py | 476 ++++++++++++++++++++++++++++++ test/test_autotag.py | 472 ------------------------------ 10 files changed, 1028 insertions(+), 1024 deletions(-) create mode 100644 beets/autotag/distance.py create mode 100644 test/autotag/test_distance.py diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index 5b6a11195..5b16b012e 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -14,22 +14,26 @@ """Facilities for automatically determining files' correct metadata.""" -from collections.abc import Mapping, Sequence -from typing import Union +from __future__ import annotations + +from typing import TYPE_CHECKING, Union from beets import config, logging -from beets.library import Album, Item, LibModel # Parts of external interface. from beets.util import unique_list -from .hooks import AlbumInfo, AlbumMatch, Distance, TrackInfo, TrackMatch +from .hooks import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch from .match import Proposal, Recommendation, tag_album, tag_item +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from beets.library import Album, Item, LibModel + __all__ = [ "AlbumInfo", "AlbumMatch", - "Distance", "TrackInfo", "TrackMatch", "Proposal", diff --git a/beets/autotag/distance.py b/beets/autotag/distance.py new file mode 100644 index 000000000..d146c27f0 --- /dev/null +++ b/beets/autotag/distance.py @@ -0,0 +1,531 @@ +from __future__ import annotations + +import datetime +import re +from functools import cache, total_ordering +from typing import TYPE_CHECKING, Any + +from jellyfish import levenshtein_distance +from unidecode import unidecode + +from beets import config, plugins +from beets.util import as_string, cached_classproperty, get_most_common_tags + +if TYPE_CHECKING: + from collections.abc import Iterator, Sequence + + from beets.library import Item + + from .hooks import AlbumInfo, TrackInfo + +# Candidate distance scoring. + +# Artist signals that indicate "various artists". These are used at the +# album level to determine whether a given release is likely a VA +# release and also on the track level to to remove the penalty for +# differing artists. +VA_ARTISTS = ("", "various artists", "various", "va", "unknown") + +# Parameters for string distance function. +# Words that can be moved to the end of a string using a comma. +SD_END_WORDS = ["the", "a", "an"] +# Reduced weights for certain portions of the string. +SD_PATTERNS = [ + (r"^the ", 0.1), + (r"[\[\(]?(ep|single)[\]\)]?", 0.0), + (r"[\[\(]?(featuring|feat|ft)[\. :].+", 0.1), + (r"\(.*?\)", 0.3), + (r"\[.*?\]", 0.3), + (r"(, )?(pt\.|part) .+", 0.2), +] +# Replacements to use before testing distance. +SD_REPLACE = [ + (r"&", "and"), +] + + +def _string_dist_basic(str1: str, str2: str) -> float: + """Basic edit distance between two strings, ignoring + non-alphanumeric characters and case. Comparisons are based on a + transliteration/lowering to ASCII characters. Normalized by string + length. + """ + assert isinstance(str1, str) + assert isinstance(str2, str) + str1 = as_string(unidecode(str1)) + str2 = as_string(unidecode(str2)) + str1 = re.sub(r"[^a-z0-9]", "", str1.lower()) + str2 = re.sub(r"[^a-z0-9]", "", str2.lower()) + if not str1 and not str2: + return 0.0 + return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2))) + + +def string_dist(str1: str | None, str2: str | None) -> float: + """Gives an "intuitive" edit distance between two strings. This is + an edit distance, normalized by the string length, with a number of + tweaks that reflect intuition about text. + """ + if str1 is None and str2 is None: + return 0.0 + if str1 is None or str2 is None: + return 1.0 + + str1 = str1.lower() + str2 = str2.lower() + + # Don't penalize strings that move certain words to the end. For + # example, "the something" should be considered equal to + # "something, the". + for word in SD_END_WORDS: + if str1.endswith(", %s" % word): + str1 = "{} {}".format(word, str1[: -len(word) - 2]) + if str2.endswith(", %s" % word): + str2 = "{} {}".format(word, str2[: -len(word) - 2]) + + # Perform a couple of basic normalizing substitutions. + for pat, repl in SD_REPLACE: + str1 = re.sub(pat, repl, str1) + str2 = re.sub(pat, repl, str2) + + # Change the weight for certain string portions matched by a set + # of regular expressions. We gradually change the strings and build + # up penalties associated with parts of the string that were + # deleted. + base_dist = _string_dist_basic(str1, str2) + penalty = 0.0 + for pat, weight in SD_PATTERNS: + # Get strings that drop the pattern. + case_str1 = re.sub(pat, "", str1) + case_str2 = re.sub(pat, "", str2) + + if case_str1 != str1 or case_str2 != str2: + # If the pattern was present (i.e., it is deleted in the + # the current case), recalculate the distances for the + # modified strings. + case_dist = _string_dist_basic(case_str1, case_str2) + case_delta = max(0.0, base_dist - case_dist) + if case_delta == 0.0: + continue + + # Shift our baseline strings down (to avoid rematching the + # same part of the string) and add a scaled distance + # amount to the penalties. + str1 = case_str1 + str2 = case_str2 + base_dist = case_dist + penalty += weight * case_delta + + return base_dist + penalty + + +@total_ordering +class Distance: + """Keeps track of multiple distance penalties. Provides a single + weighted distance for all penalties as well as a weighted distance + for each individual penalty. + """ + + def __init__(self) -> None: + self._penalties: dict[str, list[float]] = {} + self.tracks: dict[TrackInfo, Distance] = {} + + @cached_classproperty + def _weights(cls) -> dict[str, float]: + """A dictionary from keys to floating-point weights.""" + weights_view = config["match"]["distance_weights"] + weights = {} + for key in weights_view.keys(): + weights[key] = weights_view[key].as_number() + return weights + + # Access the components and their aggregates. + + @property + def distance(self) -> float: + """Return a weighted and normalized distance across all + penalties. + """ + dist_max = self.max_distance + if dist_max: + return self.raw_distance / self.max_distance + return 0.0 + + @property + def max_distance(self) -> float: + """Return the maximum distance penalty (normalization factor).""" + dist_max = 0.0 + for key, penalty in self._penalties.items(): + dist_max += len(penalty) * self._weights[key] + return dist_max + + @property + def raw_distance(self) -> float: + """Return the raw (denormalized) distance.""" + dist_raw = 0.0 + for key, penalty in self._penalties.items(): + dist_raw += sum(penalty) * self._weights[key] + return dist_raw + + def items(self) -> list[tuple[str, float]]: + """Return a list of (key, dist) pairs, with `dist` being the + weighted distance, sorted from highest to lowest. Does not + include penalties with a zero value. + """ + list_ = [] + for key in self._penalties: + dist = self[key] + if dist: + list_.append((key, dist)) + # Convert distance into a negative float we can sort items in + # ascending order (for keys, when the penalty is equal) and + # still get the items with the biggest distance first. + return sorted( + list_, key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0]) + ) + + def __hash__(self) -> int: + return id(self) + + def __eq__(self, other) -> bool: + return self.distance == other + + # Behave like a float. + + def __lt__(self, other) -> bool: + return self.distance < other + + def __float__(self) -> float: + return self.distance + + def __sub__(self, other) -> float: + return self.distance - other + + def __rsub__(self, other) -> float: + return other - self.distance + + def __str__(self) -> str: + return f"{self.distance:.2f}" + + # Behave like a dict. + + def __getitem__(self, key) -> float: + """Returns the weighted distance for a named penalty.""" + dist = sum(self._penalties[key]) * self._weights[key] + dist_max = self.max_distance + if dist_max: + return dist / dist_max + return 0.0 + + def __iter__(self) -> Iterator[tuple[str, float]]: + return iter(self.items()) + + def __len__(self) -> int: + return len(self.items()) + + def keys(self) -> list[str]: + return [key for key, _ in self.items()] + + def update(self, dist: Distance): + """Adds all the distance penalties from `dist`.""" + if not isinstance(dist, Distance): + raise ValueError( + "`dist` must be a Distance object, not {}".format(type(dist)) + ) + for key, penalties in dist._penalties.items(): + self._penalties.setdefault(key, []).extend(penalties) + + # Adding components. + + def _eq(self, value1: re.Pattern[str] | Any, value2: Any) -> bool: + """Returns True if `value1` is equal to `value2`. `value1` may + be a compiled regular expression, in which case it will be + matched against `value2`. + """ + if isinstance(value1, re.Pattern): + return bool(value1.match(value2)) + return value1 == value2 + + def add(self, key: str, dist: float): + """Adds a distance penalty. `key` must correspond with a + configured weight setting. `dist` must be a float between 0.0 + and 1.0, and will be added to any existing distance penalties + for the same key. + """ + if not 0.0 <= dist <= 1.0: + raise ValueError(f"`dist` must be between 0.0 and 1.0, not {dist}") + self._penalties.setdefault(key, []).append(dist) + + def add_equality( + self, + key: str, + value: Any, + options: list[Any] | tuple[Any, ...] | Any, + ): + """Adds a distance penalty of 1.0 if `value` doesn't match any + of the values in `options`. If an option is a compiled regular + expression, it will be considered equal if it matches against + `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + for opt in options: + if self._eq(opt, value): + dist = 0.0 + break + else: + dist = 1.0 + self.add(key, dist) + + def add_expr(self, key: str, expr: bool): + """Adds a distance penalty of 1.0 if `expr` evaluates to True, + or 0.0. + """ + if expr: + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_number(self, key: str, number1: int, number2: int): + """Adds a distance penalty of 1.0 for each number of difference + between `number1` and `number2`, or 0.0 when there is no + difference. Use this when there is no upper limit on the + difference between the two numbers. + """ + diff = abs(number1 - number2) + if diff: + for i in range(diff): + self.add(key, 1.0) + else: + self.add(key, 0.0) + + def add_priority( + self, + key: str, + value: Any, + options: list[Any] | tuple[Any, ...] | Any, + ): + """Adds a distance penalty that corresponds to the position at + which `value` appears in `options`. A distance penalty of 0.0 + for the first option, or 1.0 if there is no matching option. If + an option is a compiled regular expression, it will be + considered equal if it matches against `value`. + """ + if not isinstance(options, (list, tuple)): + options = [options] + unit = 1.0 / (len(options) or 1) + for i, opt in enumerate(options): + if self._eq(opt, value): + dist = i * unit + break + else: + dist = 1.0 + self.add(key, dist) + + def add_ratio( + self, + key: str, + number1: int | float, + number2: int | float, + ): + """Adds a distance penalty for `number1` as a ratio of `number2`. + `number1` is bound at 0 and `number2`. + """ + number = float(max(min(number1, number2), 0)) + if number2: + dist = number / number2 + else: + dist = 0.0 + self.add(key, dist) + + def add_string(self, key: str, str1: str | None, str2: str | None): + """Adds a distance penalty based on the edit distance between + `str1` and `str2`. + """ + dist = string_dist(str1, str2) + self.add(key, dist) + + +@cache +def get_track_length_grace() -> float: + """Get cached grace period for track length matching.""" + return config["match"]["track_length_grace"].as_number() + + +@cache +def get_track_length_max() -> float: + """Get cached maximum track length for track length matching.""" + return config["match"]["track_length_max"].as_number() + + +def track_index_changed(item: Item, track_info: TrackInfo) -> bool: + """Returns True if the item and track info index is different. Tolerates + per disc and per release numbering. + """ + return item.track not in (track_info.medium_index, track_info.index) + + +def track_distance( + item: Item, + track_info: TrackInfo, + incl_artist: bool = False, +) -> Distance: + """Determines the significance of a track metadata change. Returns a + Distance object. `incl_artist` indicates that a distance component should + be included for the track artist (i.e., for various-artist releases). + + ``track_length_grace`` and ``track_length_max`` configuration options are + cached because this function is called many times during the matching + process and their access comes with a performance overhead. + """ + dist = Distance() + + # Length. + if info_length := track_info.length: + diff = abs(item.length - info_length) - get_track_length_grace() + dist.add_ratio("track_length", diff, get_track_length_max()) + + # Title. + dist.add_string("track_title", item.title, track_info.title) + + # Artist. Only check if there is actually an artist in the track data. + if ( + incl_artist + and track_info.artist + and item.artist.lower() not in VA_ARTISTS + ): + dist.add_string("track_artist", item.artist, track_info.artist) + + # Track index. + if track_info.index and item.track: + dist.add_expr("track_index", track_index_changed(item, track_info)) + + # Track ID. + if item.mb_trackid: + dist.add_expr("track_id", item.mb_trackid != track_info.track_id) + + # Penalize mismatching disc numbers. + if track_info.medium and item.disc: + dist.add_expr("medium", item.disc != track_info.medium) + + # Plugins. + dist.update(plugins.track_distance(item, track_info)) + + return dist + + +def distance( + items: Sequence[Item], + album_info: AlbumInfo, + mapping: dict[Item, TrackInfo], +) -> Distance: + """Determines how "significant" an album metadata change would be. + Returns a Distance object. `album_info` is an AlbumInfo object + reflecting the album to be compared. `items` is a sequence of all + Item objects that will be matched (order is not important). + `mapping` is a dictionary mapping Items to TrackInfo objects; the + keys are a subset of `items` and the values are a subset of + `album_info.tracks`. + """ + likelies, _ = get_most_common_tags(items) + + dist = Distance() + + # Artist, if not various. + if not album_info.va: + dist.add_string("artist", likelies["artist"], album_info.artist) + + # Album. + dist.add_string("album", likelies["album"], album_info.album) + + preferred_config = config["match"]["preferred"] + # Current or preferred media. + if album_info.media: + # Preferred media options. + media_patterns: Sequence[str] = preferred_config["media"].as_str_seq() + options = [ + re.compile(r"(\d+x)?(%s)" % pat, re.I) for pat in media_patterns + ] + if options: + dist.add_priority("media", album_info.media, options) + # Current media. + elif likelies["media"]: + dist.add_equality("media", album_info.media, likelies["media"]) + + # Mediums. + if likelies["disctotal"] and album_info.mediums: + dist.add_number("mediums", likelies["disctotal"], album_info.mediums) + + # Prefer earliest release. + if album_info.year and preferred_config["original_year"]: + # Assume 1889 (earliest first gramophone discs) if we don't know the + # original year. + original = album_info.original_year or 1889 + diff = abs(album_info.year - original) + diff_max = abs(datetime.date.today().year - original) + dist.add_ratio("year", diff, diff_max) + # Year. + elif likelies["year"] and album_info.year: + if likelies["year"] in (album_info.year, album_info.original_year): + # No penalty for matching release or original year. + dist.add("year", 0.0) + elif album_info.original_year: + # Prefer matchest closest to the release year. + diff = abs(likelies["year"] - album_info.year) + diff_max = abs( + datetime.date.today().year - album_info.original_year + ) + dist.add_ratio("year", diff, diff_max) + else: + # Full penalty when there is no original year. + dist.add("year", 1.0) + + # Preferred countries. + country_patterns: Sequence[str] = preferred_config["countries"].as_str_seq() + options = [re.compile(pat, re.I) for pat in country_patterns] + if album_info.country and options: + dist.add_priority("country", album_info.country, options) + # Country. + elif likelies["country"] and album_info.country: + dist.add_string("country", likelies["country"], album_info.country) + + # Label. + if likelies["label"] and album_info.label: + dist.add_string("label", likelies["label"], album_info.label) + + # Catalog number. + if likelies["catalognum"] and album_info.catalognum: + dist.add_string( + "catalognum", likelies["catalognum"], album_info.catalognum + ) + + # Disambiguation. + if likelies["albumdisambig"] and album_info.albumdisambig: + dist.add_string( + "albumdisambig", likelies["albumdisambig"], album_info.albumdisambig + ) + + # Album ID. + if likelies["mb_albumid"]: + dist.add_equality( + "album_id", likelies["mb_albumid"], album_info.album_id + ) + + # Tracks. + dist.tracks = {} + for item, track in mapping.items(): + dist.tracks[track] = track_distance(item, track, album_info.va) + dist.add("tracks", dist.tracks[track].distance) + + # Missing tracks. + for _ in range(len(album_info.tracks) - len(mapping)): + dist.add("missing_tracks", 1.0) + + # Unmatched tracks. + for _ in range(len(items) - len(mapping)): + dist.add("unmatched_tracks", 1.0) + + # Plugins. + dist.update(plugins.album_distance(items, album_info, mapping)) + + return dist diff --git a/beets/autotag/hooks.py b/beets/autotag/hooks.py index 641a6cb4f..7cd215fc4 100644 --- a/beets/autotag/hooks.py +++ b/beets/autotag/hooks.py @@ -16,21 +16,15 @@ from __future__ import annotations -import re -from functools import total_ordering from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar -from jellyfish import levenshtein_distance -from unidecode import unidecode - -from beets import config, logging -from beets.util import as_string, cached_classproperty +from beets import logging if TYPE_CHECKING: - from collections.abc import Iterator - from beets.library import Item + from .distance import Distance + log = logging.getLogger("beets") V = TypeVar("V") @@ -254,328 +248,6 @@ class TrackInfo(AttrDict[Any]): return dupe -# Candidate distance scoring. - -# Parameters for string distance function. -# Words that can be moved to the end of a string using a comma. -SD_END_WORDS = ["the", "a", "an"] -# Reduced weights for certain portions of the string. -SD_PATTERNS = [ - (r"^the ", 0.1), - (r"[\[\(]?(ep|single)[\]\)]?", 0.0), - (r"[\[\(]?(featuring|feat|ft)[\. :].+", 0.1), - (r"\(.*?\)", 0.3), - (r"\[.*?\]", 0.3), - (r"(, )?(pt\.|part) .+", 0.2), -] -# Replacements to use before testing distance. -SD_REPLACE = [ - (r"&", "and"), -] - - -def _string_dist_basic(str1: str, str2: str) -> float: - """Basic edit distance between two strings, ignoring - non-alphanumeric characters and case. Comparisons are based on a - transliteration/lowering to ASCII characters. Normalized by string - length. - """ - assert isinstance(str1, str) - assert isinstance(str2, str) - str1 = as_string(unidecode(str1)) - str2 = as_string(unidecode(str2)) - str1 = re.sub(r"[^a-z0-9]", "", str1.lower()) - str2 = re.sub(r"[^a-z0-9]", "", str2.lower()) - if not str1 and not str2: - return 0.0 - return levenshtein_distance(str1, str2) / float(max(len(str1), len(str2))) - - -def string_dist(str1: str | None, str2: str | None) -> float: - """Gives an "intuitive" edit distance between two strings. This is - an edit distance, normalized by the string length, with a number of - tweaks that reflect intuition about text. - """ - if str1 is None and str2 is None: - return 0.0 - if str1 is None or str2 is None: - return 1.0 - - str1 = str1.lower() - str2 = str2.lower() - - # Don't penalize strings that move certain words to the end. For - # example, "the something" should be considered equal to - # "something, the". - for word in SD_END_WORDS: - if str1.endswith(", %s" % word): - str1 = "{} {}".format(word, str1[: -len(word) - 2]) - if str2.endswith(", %s" % word): - str2 = "{} {}".format(word, str2[: -len(word) - 2]) - - # Perform a couple of basic normalizing substitutions. - for pat, repl in SD_REPLACE: - str1 = re.sub(pat, repl, str1) - str2 = re.sub(pat, repl, str2) - - # Change the weight for certain string portions matched by a set - # of regular expressions. We gradually change the strings and build - # up penalties associated with parts of the string that were - # deleted. - base_dist = _string_dist_basic(str1, str2) - penalty = 0.0 - for pat, weight in SD_PATTERNS: - # Get strings that drop the pattern. - case_str1 = re.sub(pat, "", str1) - case_str2 = re.sub(pat, "", str2) - - if case_str1 != str1 or case_str2 != str2: - # If the pattern was present (i.e., it is deleted in the - # the current case), recalculate the distances for the - # modified strings. - case_dist = _string_dist_basic(case_str1, case_str2) - case_delta = max(0.0, base_dist - case_dist) - if case_delta == 0.0: - continue - - # Shift our baseline strings down (to avoid rematching the - # same part of the string) and add a scaled distance - # amount to the penalties. - str1 = case_str1 - str2 = case_str2 - base_dist = case_dist - penalty += weight * case_delta - - return base_dist + penalty - - -@total_ordering -class Distance: - """Keeps track of multiple distance penalties. Provides a single - weighted distance for all penalties as well as a weighted distance - for each individual penalty. - """ - - def __init__(self) -> None: - self._penalties: dict[str, list[float]] = {} - self.tracks: dict[TrackInfo, Distance] = {} - - @cached_classproperty - def _weights(cls) -> dict[str, float]: - """A dictionary from keys to floating-point weights.""" - weights_view = config["match"]["distance_weights"] - weights = {} - for key in weights_view.keys(): - weights[key] = weights_view[key].as_number() - return weights - - # Access the components and their aggregates. - - @property - def distance(self) -> float: - """Return a weighted and normalized distance across all - penalties. - """ - dist_max = self.max_distance - if dist_max: - return self.raw_distance / self.max_distance - return 0.0 - - @property - def max_distance(self) -> float: - """Return the maximum distance penalty (normalization factor).""" - dist_max = 0.0 - for key, penalty in self._penalties.items(): - dist_max += len(penalty) * self._weights[key] - return dist_max - - @property - def raw_distance(self) -> float: - """Return the raw (denormalized) distance.""" - dist_raw = 0.0 - for key, penalty in self._penalties.items(): - dist_raw += sum(penalty) * self._weights[key] - return dist_raw - - def items(self) -> list[tuple[str, float]]: - """Return a list of (key, dist) pairs, with `dist` being the - weighted distance, sorted from highest to lowest. Does not - include penalties with a zero value. - """ - list_ = [] - for key in self._penalties: - dist = self[key] - if dist: - list_.append((key, dist)) - # Convert distance into a negative float we can sort items in - # ascending order (for keys, when the penalty is equal) and - # still get the items with the biggest distance first. - return sorted( - list_, key=lambda key_and_dist: (-key_and_dist[1], key_and_dist[0]) - ) - - def __hash__(self) -> int: - return id(self) - - def __eq__(self, other) -> bool: - return self.distance == other - - # Behave like a float. - - def __lt__(self, other) -> bool: - return self.distance < other - - def __float__(self) -> float: - return self.distance - - def __sub__(self, other) -> float: - return self.distance - other - - def __rsub__(self, other) -> float: - return other - self.distance - - def __str__(self) -> str: - return f"{self.distance:.2f}" - - # Behave like a dict. - - def __getitem__(self, key) -> float: - """Returns the weighted distance for a named penalty.""" - dist = sum(self._penalties[key]) * self._weights[key] - dist_max = self.max_distance - if dist_max: - return dist / dist_max - return 0.0 - - def __iter__(self) -> Iterator[tuple[str, float]]: - return iter(self.items()) - - def __len__(self) -> int: - return len(self.items()) - - def keys(self) -> list[str]: - return [key for key, _ in self.items()] - - def update(self, dist: Distance): - """Adds all the distance penalties from `dist`.""" - if not isinstance(dist, Distance): - raise ValueError( - "`dist` must be a Distance object, not {}".format(type(dist)) - ) - for key, penalties in dist._penalties.items(): - self._penalties.setdefault(key, []).extend(penalties) - - # Adding components. - - def _eq(self, value1: re.Pattern[str] | Any, value2: Any) -> bool: - """Returns True if `value1` is equal to `value2`. `value1` may - be a compiled regular expression, in which case it will be - matched against `value2`. - """ - if isinstance(value1, re.Pattern): - return bool(value1.match(value2)) - return value1 == value2 - - def add(self, key: str, dist: float): - """Adds a distance penalty. `key` must correspond with a - configured weight setting. `dist` must be a float between 0.0 - and 1.0, and will be added to any existing distance penalties - for the same key. - """ - if not 0.0 <= dist <= 1.0: - raise ValueError(f"`dist` must be between 0.0 and 1.0, not {dist}") - self._penalties.setdefault(key, []).append(dist) - - def add_equality( - self, - key: str, - value: Any, - options: list[Any] | tuple[Any, ...] | Any, - ): - """Adds a distance penalty of 1.0 if `value` doesn't match any - of the values in `options`. If an option is a compiled regular - expression, it will be considered equal if it matches against - `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - for opt in options: - if self._eq(opt, value): - dist = 0.0 - break - else: - dist = 1.0 - self.add(key, dist) - - def add_expr(self, key: str, expr: bool): - """Adds a distance penalty of 1.0 if `expr` evaluates to True, - or 0.0. - """ - if expr: - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_number(self, key: str, number1: int, number2: int): - """Adds a distance penalty of 1.0 for each number of difference - between `number1` and `number2`, or 0.0 when there is no - difference. Use this when there is no upper limit on the - difference between the two numbers. - """ - diff = abs(number1 - number2) - if diff: - for i in range(diff): - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_priority( - self, - key: str, - value: Any, - options: list[Any] | tuple[Any, ...] | Any, - ): - """Adds a distance penalty that corresponds to the position at - which `value` appears in `options`. A distance penalty of 0.0 - for the first option, or 1.0 if there is no matching option. If - an option is a compiled regular expression, it will be - considered equal if it matches against `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - unit = 1.0 / (len(options) or 1) - for i, opt in enumerate(options): - if self._eq(opt, value): - dist = i * unit - break - else: - dist = 1.0 - self.add(key, dist) - - def add_ratio( - self, - key: str, - number1: int | float, - number2: int | float, - ): - """Adds a distance penalty for `number1` as a ratio of `number2`. - `number1` is bound at 0 and `number2`. - """ - number = float(max(min(number1, number2), 0)) - if number2: - dist = number / number2 - else: - dist = 0.0 - self.add(key, dist) - - def add_string(self, key: str, str1: str | None, str2: str | None): - """Adds a distance penalty based on the edit distance between - `str1` and `str2`. - """ - dist = string_dist(str1, str2) - self.add(key, dist) - - # Structures that compose all the information for a candidate match. diff --git a/beets/autotag/match.py b/beets/autotag/match.py index 4dc4c1052..64572cf3b 100644 --- a/beets/autotag/match.py +++ b/beets/autotag/match.py @@ -18,37 +18,23 @@ releases and tracks. from __future__ import annotations -import datetime -import re from enum import IntEnum -from functools import cache from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar import lap import numpy as np from beets import config, logging, plugins -from beets.autotag import ( - AlbumInfo, - AlbumMatch, - Distance, - TrackInfo, - TrackMatch, - hooks, -) +from beets.autotag import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch, hooks from beets.util import get_most_common_tags +from .distance import VA_ARTISTS, distance, track_distance + if TYPE_CHECKING: from collections.abc import Iterable, Sequence from beets.library import Item -# Artist signals that indicate "various artists". These are used at the -# album level to determine whether a given release is likely a VA -# release and also on the track level to to remove the penalty for -# differing artists. -VA_ARTISTS = ("", "various artists", "various", "va", "unknown") - # Global logger. log = logging.getLogger("beets") @@ -112,191 +98,6 @@ def assign_items( return mapping, extra_items, extra_tracks -def track_index_changed(item: Item, track_info: TrackInfo) -> bool: - """Returns True if the item and track info index is different. Tolerates - per disc and per release numbering. - """ - return item.track not in (track_info.medium_index, track_info.index) - - -@cache -def get_track_length_grace() -> float: - """Get cached grace period for track length matching.""" - return config["match"]["track_length_grace"].as_number() - - -@cache -def get_track_length_max() -> float: - """Get cached maximum track length for track length matching.""" - return config["match"]["track_length_max"].as_number() - - -def track_distance( - item: Item, - track_info: TrackInfo, - incl_artist: bool = False, -) -> Distance: - """Determines the significance of a track metadata change. Returns a - Distance object. `incl_artist` indicates that a distance component should - be included for the track artist (i.e., for various-artist releases). - - ``track_length_grace`` and ``track_length_max`` configuration options are - cached because this function is called many times during the matching - process and their access comes with a performance overhead. - """ - dist = hooks.Distance() - - # Length. - if info_length := track_info.length: - diff = abs(item.length - info_length) - get_track_length_grace() - dist.add_ratio("track_length", diff, get_track_length_max()) - - # Title. - dist.add_string("track_title", item.title, track_info.title) - - # Artist. Only check if there is actually an artist in the track data. - if ( - incl_artist - and track_info.artist - and item.artist.lower() not in VA_ARTISTS - ): - dist.add_string("track_artist", item.artist, track_info.artist) - - # Track index. - if track_info.index and item.track: - dist.add_expr("track_index", track_index_changed(item, track_info)) - - # Track ID. - if item.mb_trackid: - dist.add_expr("track_id", item.mb_trackid != track_info.track_id) - - # Penalize mismatching disc numbers. - if track_info.medium and item.disc: - dist.add_expr("medium", item.disc != track_info.medium) - - # Plugins. - dist.update(plugins.track_distance(item, track_info)) - - return dist - - -def distance( - items: Sequence[Item], - album_info: AlbumInfo, - mapping: dict[Item, TrackInfo], -) -> Distance: - """Determines how "significant" an album metadata change would be. - Returns a Distance object. `album_info` is an AlbumInfo object - reflecting the album to be compared. `items` is a sequence of all - Item objects that will be matched (order is not important). - `mapping` is a dictionary mapping Items to TrackInfo objects; the - keys are a subset of `items` and the values are a subset of - `album_info.tracks`. - """ - likelies, _ = get_most_common_tags(items) - - dist = hooks.Distance() - - # Artist, if not various. - if not album_info.va: - dist.add_string("artist", likelies["artist"], album_info.artist) - - # Album. - dist.add_string("album", likelies["album"], album_info.album) - - preferred_config = config["match"]["preferred"] - # Current or preferred media. - if album_info.media: - # Preferred media options. - media_patterns: Sequence[str] = preferred_config["media"].as_str_seq() - options = [ - re.compile(r"(\d+x)?(%s)" % pat, re.I) for pat in media_patterns - ] - if options: - dist.add_priority("media", album_info.media, options) - # Current media. - elif likelies["media"]: - dist.add_equality("media", album_info.media, likelies["media"]) - - # Mediums. - if likelies["disctotal"] and album_info.mediums: - dist.add_number("mediums", likelies["disctotal"], album_info.mediums) - - # Prefer earliest release. - if album_info.year and preferred_config["original_year"]: - # Assume 1889 (earliest first gramophone discs) if we don't know the - # original year. - original = album_info.original_year or 1889 - diff = abs(album_info.year - original) - diff_max = abs(datetime.date.today().year - original) - dist.add_ratio("year", diff, diff_max) - # Year. - elif likelies["year"] and album_info.year: - if likelies["year"] in (album_info.year, album_info.original_year): - # No penalty for matching release or original year. - dist.add("year", 0.0) - elif album_info.original_year: - # Prefer matchest closest to the release year. - diff = abs(likelies["year"] - album_info.year) - diff_max = abs( - datetime.date.today().year - album_info.original_year - ) - dist.add_ratio("year", diff, diff_max) - else: - # Full penalty when there is no original year. - dist.add("year", 1.0) - - # Preferred countries. - country_patterns: Sequence[str] = preferred_config["countries"].as_str_seq() - options = [re.compile(pat, re.I) for pat in country_patterns] - if album_info.country and options: - dist.add_priority("country", album_info.country, options) - # Country. - elif likelies["country"] and album_info.country: - dist.add_string("country", likelies["country"], album_info.country) - - # Label. - if likelies["label"] and album_info.label: - dist.add_string("label", likelies["label"], album_info.label) - - # Catalog number. - if likelies["catalognum"] and album_info.catalognum: - dist.add_string( - "catalognum", likelies["catalognum"], album_info.catalognum - ) - - # Disambiguation. - if likelies["albumdisambig"] and album_info.albumdisambig: - dist.add_string( - "albumdisambig", likelies["albumdisambig"], album_info.albumdisambig - ) - - # Album ID. - if likelies["mb_albumid"]: - dist.add_equality( - "album_id", likelies["mb_albumid"], album_info.album_id - ) - - # Tracks. - dist.tracks = {} - for item, track in mapping.items(): - dist.tracks[track] = track_distance(item, track, album_info.va) - dist.add("tracks", dist.tracks[track].distance) - - # Missing tracks. - for _ in range(len(album_info.tracks) - len(mapping)): - dist.add("missing_tracks", 1.0) - - # Unmatched tracks. - for _ in range(len(items) - len(mapping)): - dist.add("unmatched_tracks", 1.0) - - # Plugins. - dist.update(plugins.album_distance(items, album_info, mapping)) - - return dist - - def match_by_id(items: Iterable[Item]) -> AlbumInfo | None: """If the items are tagged with an external source ID, return an AlbumInfo object for the corresponding album. Otherwise, returns diff --git a/beets/plugins.py b/beets/plugins.py index 6d3a8447e..cd66435b5 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -37,6 +37,7 @@ import mediafile import beets from beets import logging +from beets.autotag.distance import Distance from beets.util.id_extractors import extract_release_id if TYPE_CHECKING: @@ -53,7 +54,7 @@ if TYPE_CHECKING: from confuse import ConfigView - from beets.autotag import AlbumInfo, Distance, TrackInfo + from beets.autotag import AlbumInfo, TrackInfo from beets.dbcore import Query from beets.dbcore.db import FieldQueryType from beets.dbcore.types import Type @@ -224,8 +225,6 @@ class BeetsPlugin: """Should return a Distance object to be added to the distance for every track comparison. """ - from beets.autotag.hooks import Distance - return Distance() def album_distance( @@ -237,8 +236,6 @@ class BeetsPlugin: """Should return a Distance object to be added to the distance for every album-level comparison. """ - from beets.autotag.hooks import Distance - return Distance() def candidates( @@ -458,8 +455,6 @@ def track_distance(item: Item, info: TrackInfo) -> Distance: """Gets the track distance calculated by all loaded plugins. Returns a Distance object. """ - from beets.autotag.hooks import Distance - dist = Distance() for plugin in find_plugins(): dist.update(plugin.track_distance(item, info)) @@ -472,8 +467,6 @@ def album_distance( mapping: dict[Item, TrackInfo], ) -> Distance: """Returns the album distance calculated by plugins.""" - from beets.autotag.hooks import Distance - dist = Distance() for plugin in find_plugins(): dist.update(plugin.album_distance(items, album_info, mapping)) @@ -660,8 +653,6 @@ def get_distance( """Returns the ``data_source`` weight and the maximum source weight for albums or individual tracks. """ - from beets.autotag.hooks import Distance - dist = Distance() if info.data_source == data_source: dist.add("source", config["source_weight"].as_number()) diff --git a/beetsplug/chroma.py b/beetsplug/chroma.py index 518a41776..5c718154b 100644 --- a/beetsplug/chroma.py +++ b/beetsplug/chroma.py @@ -24,7 +24,7 @@ import acoustid import confuse from beets import config, plugins, ui, util -from beets.autotag.hooks import Distance +from beets.autotag.distance import Distance from beetsplug.musicbrainz import MusicBrainzPlugin API_KEY = "1vOwZtEn" diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 696f1d1ac..2408f3498 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -38,7 +38,8 @@ from typing_extensions import TypedDict import beets import beets.ui from beets import config -from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist +from beets.autotag.distance import string_dist +from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance from beets.util.id_extractors import extract_release_id diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index e2c0c7fd2..f1c40ab24 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -38,7 +38,7 @@ from unidecode import unidecode import beets from beets import plugins, ui -from beets.autotag.hooks import string_dist +from beets.autotag.distance import string_dist from beets.util.config import sanitize_choices if TYPE_CHECKING: diff --git a/test/autotag/test_distance.py b/test/autotag/test_distance.py new file mode 100644 index 000000000..ec00ebcdf --- /dev/null +++ b/test/autotag/test_distance.py @@ -0,0 +1,476 @@ +import re +import unittest + +from beets import config +from beets.autotag import AlbumInfo, TrackInfo, match +from beets.autotag.distance import Distance, string_dist +from beets.library import Item +from beets.test.helper import BeetsTestCase + + +def _make_item(title, track, artist="some artist"): + return Item( + title=title, + track=track, + artist=artist, + album="some album", + length=1, + mb_trackid="", + mb_albumid="", + mb_artistid="", + ) + + +def _make_trackinfo(): + return [ + TrackInfo( + title="one", track_id=None, artist="some artist", length=1, index=1 + ), + TrackInfo( + title="two", track_id=None, artist="some artist", length=1, index=2 + ), + TrackInfo( + title="three", + track_id=None, + artist="some artist", + length=1, + index=3, + ), + ] + + +def _clear_weights(): + """Hack around the lazy descriptor used to cache weights for + Distance calculations. + """ + Distance.__dict__["_weights"].cache = {} + + +class DistanceTest(BeetsTestCase): + def tearDown(self): + super().tearDown() + _clear_weights() + + def test_add(self): + dist = Distance() + dist.add("add", 1.0) + assert dist._penalties == {"add": [1.0]} + + def test_add_equality(self): + dist = Distance() + dist.add_equality("equality", "ghi", ["abc", "def", "ghi"]) + assert dist._penalties["equality"] == [0.0] + + dist.add_equality("equality", "xyz", ["abc", "def", "ghi"]) + assert dist._penalties["equality"] == [0.0, 1.0] + + dist.add_equality("equality", "abc", re.compile(r"ABC", re.I)) + assert dist._penalties["equality"] == [0.0, 1.0, 0.0] + + def test_add_expr(self): + dist = Distance() + dist.add_expr("expr", True) + assert dist._penalties["expr"] == [1.0] + + dist.add_expr("expr", False) + assert dist._penalties["expr"] == [1.0, 0.0] + + def test_add_number(self): + dist = Distance() + # Add a full penalty for each number of difference between two numbers. + + dist.add_number("number", 1, 1) + assert dist._penalties["number"] == [0.0] + + dist.add_number("number", 1, 2) + assert dist._penalties["number"] == [0.0, 1.0] + + dist.add_number("number", 2, 1) + assert dist._penalties["number"] == [0.0, 1.0, 1.0] + + dist.add_number("number", -1, 2) + assert dist._penalties["number"] == [0.0, 1.0, 1.0, 1.0, 1.0, 1.0] + + def test_add_priority(self): + dist = Distance() + dist.add_priority("priority", "abc", "abc") + assert dist._penalties["priority"] == [0.0] + + dist.add_priority("priority", "def", ["abc", "def"]) + assert dist._penalties["priority"] == [0.0, 0.5] + + dist.add_priority( + "priority", "gh", ["ab", "cd", "ef", re.compile("GH", re.I)] + ) + assert dist._penalties["priority"] == [0.0, 0.5, 0.75] + + dist.add_priority("priority", "xyz", ["abc", "def"]) + assert dist._penalties["priority"] == [0.0, 0.5, 0.75, 1.0] + + def test_add_ratio(self): + dist = Distance() + dist.add_ratio("ratio", 25, 100) + assert dist._penalties["ratio"] == [0.25] + + dist.add_ratio("ratio", 10, 5) + assert dist._penalties["ratio"] == [0.25, 1.0] + + dist.add_ratio("ratio", -5, 5) + assert dist._penalties["ratio"] == [0.25, 1.0, 0.0] + + dist.add_ratio("ratio", 5, 0) + assert dist._penalties["ratio"] == [0.25, 1.0, 0.0, 0.0] + + def test_add_string(self): + dist = Distance() + sdist = string_dist("abc", "bcd") + dist.add_string("string", "abc", "bcd") + assert dist._penalties["string"] == [sdist] + assert dist._penalties["string"] != [0] + + def test_add_string_none(self): + dist = Distance() + dist.add_string("string", None, "string") + assert dist._penalties["string"] == [1] + + def test_add_string_both_none(self): + dist = Distance() + dist.add_string("string", None, None) + assert dist._penalties["string"] == [0] + + def test_distance(self): + config["match"]["distance_weights"]["album"] = 2.0 + config["match"]["distance_weights"]["medium"] = 1.0 + _clear_weights() + + dist = Distance() + dist.add("album", 0.5) + dist.add("media", 0.25) + dist.add("media", 0.75) + assert dist.distance == 0.5 + + # __getitem__() + assert dist["album"] == 0.25 + assert dist["media"] == 0.25 + + def test_max_distance(self): + config["match"]["distance_weights"]["album"] = 3.0 + config["match"]["distance_weights"]["medium"] = 1.0 + _clear_weights() + + dist = Distance() + dist.add("album", 0.5) + dist.add("medium", 0.0) + dist.add("medium", 0.0) + assert dist.max_distance == 5.0 + + def test_operators(self): + config["match"]["distance_weights"]["source"] = 1.0 + config["match"]["distance_weights"]["album"] = 2.0 + config["match"]["distance_weights"]["medium"] = 1.0 + _clear_weights() + + dist = Distance() + dist.add("source", 0.0) + dist.add("album", 0.5) + dist.add("medium", 0.25) + dist.add("medium", 0.75) + assert len(dist) == 2 + assert list(dist) == [("album", 0.2), ("medium", 0.2)] + assert dist == 0.4 + assert dist < 1.0 + assert dist > 0.0 + assert dist - 0.4 == 0.0 + assert 0.4 - dist == 0.0 + assert float(dist) == 0.4 + + def test_raw_distance(self): + config["match"]["distance_weights"]["album"] = 3.0 + config["match"]["distance_weights"]["medium"] = 1.0 + _clear_weights() + + dist = Distance() + dist.add("album", 0.5) + dist.add("medium", 0.25) + dist.add("medium", 0.5) + assert dist.raw_distance == 2.25 + + def test_items(self): + config["match"]["distance_weights"]["album"] = 4.0 + config["match"]["distance_weights"]["medium"] = 2.0 + _clear_weights() + + dist = Distance() + dist.add("album", 0.1875) + dist.add("medium", 0.75) + assert dist.items() == [("medium", 0.25), ("album", 0.125)] + + # Sort by key if distance is equal. + dist = Distance() + dist.add("album", 0.375) + dist.add("medium", 0.75) + assert dist.items() == [("album", 0.25), ("medium", 0.25)] + + def test_update(self): + dist1 = Distance() + dist1.add("album", 0.5) + dist1.add("media", 1.0) + + dist2 = Distance() + dist2.add("album", 0.75) + dist2.add("album", 0.25) + dist2.add("media", 0.05) + + dist1.update(dist2) + + assert dist1._penalties == { + "album": [0.5, 0.75, 0.25], + "media": [1.0, 0.05], + } + + +class TrackDistanceTest(BeetsTestCase): + def test_identical_tracks(self): + item = _make_item("one", 1) + info = _make_trackinfo()[0] + dist = match.track_distance(item, info, incl_artist=True) + assert dist == 0.0 + + def test_different_title(self): + item = _make_item("foo", 1) + info = _make_trackinfo()[0] + dist = match.track_distance(item, info, incl_artist=True) + assert dist != 0.0 + + def test_different_artist(self): + item = _make_item("one", 1) + item.artist = "foo" + info = _make_trackinfo()[0] + dist = match.track_distance(item, info, incl_artist=True) + assert dist != 0.0 + + def test_various_artists_tolerated(self): + item = _make_item("one", 1) + item.artist = "Various Artists" + info = _make_trackinfo()[0] + dist = match.track_distance(item, info, incl_artist=True) + assert dist == 0.0 + + +class AlbumDistanceTest(BeetsTestCase): + def _mapping(self, items, info): + out = {} + for i, t in zip(items, info.tracks): + out[i] = t + return out + + def _dist(self, items, info): + return match.distance(items, info, self._mapping(items, info)) + + def test_identical_albums(self): + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("two", 2)) + items.append(_make_item("three", 3)) + info = AlbumInfo( + artist="some artist", + album="some album", + tracks=_make_trackinfo(), + va=False, + ) + assert self._dist(items, info) == 0 + + def test_incomplete_album(self): + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("three", 3)) + info = AlbumInfo( + artist="some artist", + album="some album", + tracks=_make_trackinfo(), + va=False, + ) + dist = self._dist(items, info) + assert dist != 0 + # Make sure the distance is not too great + assert dist < 0.2 + + def test_global_artists_differ(self): + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("two", 2)) + items.append(_make_item("three", 3)) + info = AlbumInfo( + artist="someone else", + album="some album", + tracks=_make_trackinfo(), + va=False, + ) + assert self._dist(items, info) != 0 + + def test_comp_track_artists_match(self): + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("two", 2)) + items.append(_make_item("three", 3)) + info = AlbumInfo( + artist="should be ignored", + album="some album", + tracks=_make_trackinfo(), + va=True, + ) + assert self._dist(items, info) == 0 + + def test_comp_no_track_artists(self): + # Some VA releases don't have track artists (incomplete metadata). + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("two", 2)) + items.append(_make_item("three", 3)) + info = AlbumInfo( + artist="should be ignored", + album="some album", + tracks=_make_trackinfo(), + va=True, + ) + info.tracks[0].artist = None + info.tracks[1].artist = None + info.tracks[2].artist = None + assert self._dist(items, info) == 0 + + def test_comp_track_artists_do_not_match(self): + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("two", 2, "someone else")) + items.append(_make_item("three", 3)) + info = AlbumInfo( + artist="some artist", + album="some album", + tracks=_make_trackinfo(), + va=True, + ) + assert self._dist(items, info) != 0 + + def test_tracks_out_of_order(self): + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("three", 2)) + items.append(_make_item("two", 3)) + info = AlbumInfo( + artist="some artist", + album="some album", + tracks=_make_trackinfo(), + va=False, + ) + dist = self._dist(items, info) + assert 0 < dist < 0.2 + + def test_two_medium_release(self): + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("two", 2)) + items.append(_make_item("three", 3)) + info = AlbumInfo( + artist="some artist", + album="some album", + tracks=_make_trackinfo(), + va=False, + ) + info.tracks[0].medium_index = 1 + info.tracks[1].medium_index = 2 + info.tracks[2].medium_index = 1 + dist = self._dist(items, info) + assert dist == 0 + + def test_per_medium_track_numbers(self): + items = [] + items.append(_make_item("one", 1)) + items.append(_make_item("two", 2)) + items.append(_make_item("three", 1)) + info = AlbumInfo( + artist="some artist", + album="some album", + tracks=_make_trackinfo(), + va=False, + ) + info.tracks[0].medium_index = 1 + info.tracks[1].medium_index = 2 + info.tracks[2].medium_index = 1 + dist = self._dist(items, info) + assert dist == 0 + + +class StringDistanceTest(unittest.TestCase): + def test_equal_strings(self): + dist = string_dist("Some String", "Some String") + assert dist == 0.0 + + def test_different_strings(self): + dist = string_dist("Some String", "Totally Different") + assert dist != 0.0 + + def test_punctuation_ignored(self): + dist = string_dist("Some String", "Some.String!") + assert dist == 0.0 + + def test_case_ignored(self): + dist = string_dist("Some String", "sOME sTring") + assert dist == 0.0 + + def test_leading_the_has_lower_weight(self): + dist1 = string_dist("XXX Band Name", "Band Name") + dist2 = string_dist("The Band Name", "Band Name") + assert dist2 < dist1 + + def test_parens_have_lower_weight(self): + dist1 = string_dist("One .Two.", "One") + dist2 = string_dist("One (Two)", "One") + assert dist2 < dist1 + + def test_brackets_have_lower_weight(self): + dist1 = string_dist("One .Two.", "One") + dist2 = string_dist("One [Two]", "One") + assert dist2 < dist1 + + def test_ep_label_has_zero_weight(self): + dist = string_dist("My Song (EP)", "My Song") + assert dist == 0.0 + + def test_featured_has_lower_weight(self): + dist1 = string_dist("My Song blah Someone", "My Song") + dist2 = string_dist("My Song feat Someone", "My Song") + assert dist2 < dist1 + + def test_postfix_the(self): + dist = string_dist("The Song Title", "Song Title, The") + assert dist == 0.0 + + def test_postfix_a(self): + dist = string_dist("A Song Title", "Song Title, A") + assert dist == 0.0 + + def test_postfix_an(self): + dist = string_dist("An Album Title", "Album Title, An") + assert dist == 0.0 + + def test_empty_strings(self): + dist = string_dist("", "") + assert dist == 0.0 + + def test_solo_pattern(self): + # Just make sure these don't crash. + string_dist("The ", "") + string_dist("(EP)", "(EP)") + string_dist(", An", "") + + def test_heuristic_does_not_harm_distance(self): + dist = string_dist("Untitled", "[Untitled]") + assert dist == 0.0 + + def test_ampersand_expansion(self): + dist = string_dist("And", "&") + assert dist == 0.0 + + def test_accented_characters(self): + dist = string_dist("\xe9\xe1\xf1", "ean") + assert dist == 0.0 diff --git a/test/test_autotag.py b/test/test_autotag.py index bd4205806..8d467e5ed 100644 --- a/test/test_autotag.py +++ b/test/test_autotag.py @@ -14,410 +14,14 @@ """Tests for autotagging functionality.""" -import re -import unittest - import pytest from beets import autotag, config from beets.autotag import AlbumInfo, TrackInfo, correct_list_fields, match -from beets.autotag.hooks import Distance, string_dist from beets.library import Item from beets.test.helper import BeetsTestCase, ConfigMixin -def _make_item(title, track, artist="some artist"): - return Item( - title=title, - track=track, - artist=artist, - album="some album", - length=1, - mb_trackid="", - mb_albumid="", - mb_artistid="", - ) - - -def _make_trackinfo(): - return [ - TrackInfo( - title="one", track_id=None, artist="some artist", length=1, index=1 - ), - TrackInfo( - title="two", track_id=None, artist="some artist", length=1, index=2 - ), - TrackInfo( - title="three", - track_id=None, - artist="some artist", - length=1, - index=3, - ), - ] - - -def _clear_weights(): - """Hack around the lazy descriptor used to cache weights for - Distance calculations. - """ - Distance.__dict__["_weights"].cache = {} - - -class DistanceTest(BeetsTestCase): - def tearDown(self): - super().tearDown() - _clear_weights() - - def test_add(self): - dist = Distance() - dist.add("add", 1.0) - assert dist._penalties == {"add": [1.0]} - - def test_add_equality(self): - dist = Distance() - dist.add_equality("equality", "ghi", ["abc", "def", "ghi"]) - assert dist._penalties["equality"] == [0.0] - - dist.add_equality("equality", "xyz", ["abc", "def", "ghi"]) - assert dist._penalties["equality"] == [0.0, 1.0] - - dist.add_equality("equality", "abc", re.compile(r"ABC", re.I)) - assert dist._penalties["equality"] == [0.0, 1.0, 0.0] - - def test_add_expr(self): - dist = Distance() - dist.add_expr("expr", True) - assert dist._penalties["expr"] == [1.0] - - dist.add_expr("expr", False) - assert dist._penalties["expr"] == [1.0, 0.0] - - def test_add_number(self): - dist = Distance() - # Add a full penalty for each number of difference between two numbers. - - dist.add_number("number", 1, 1) - assert dist._penalties["number"] == [0.0] - - dist.add_number("number", 1, 2) - assert dist._penalties["number"] == [0.0, 1.0] - - dist.add_number("number", 2, 1) - assert dist._penalties["number"] == [0.0, 1.0, 1.0] - - dist.add_number("number", -1, 2) - assert dist._penalties["number"] == [0.0, 1.0, 1.0, 1.0, 1.0, 1.0] - - def test_add_priority(self): - dist = Distance() - dist.add_priority("priority", "abc", "abc") - assert dist._penalties["priority"] == [0.0] - - dist.add_priority("priority", "def", ["abc", "def"]) - assert dist._penalties["priority"] == [0.0, 0.5] - - dist.add_priority( - "priority", "gh", ["ab", "cd", "ef", re.compile("GH", re.I)] - ) - assert dist._penalties["priority"] == [0.0, 0.5, 0.75] - - dist.add_priority("priority", "xyz", ["abc", "def"]) - assert dist._penalties["priority"] == [0.0, 0.5, 0.75, 1.0] - - def test_add_ratio(self): - dist = Distance() - dist.add_ratio("ratio", 25, 100) - assert dist._penalties["ratio"] == [0.25] - - dist.add_ratio("ratio", 10, 5) - assert dist._penalties["ratio"] == [0.25, 1.0] - - dist.add_ratio("ratio", -5, 5) - assert dist._penalties["ratio"] == [0.25, 1.0, 0.0] - - dist.add_ratio("ratio", 5, 0) - assert dist._penalties["ratio"] == [0.25, 1.0, 0.0, 0.0] - - def test_add_string(self): - dist = Distance() - sdist = string_dist("abc", "bcd") - dist.add_string("string", "abc", "bcd") - assert dist._penalties["string"] == [sdist] - assert dist._penalties["string"] != [0] - - def test_add_string_none(self): - dist = Distance() - dist.add_string("string", None, "string") - assert dist._penalties["string"] == [1] - - def test_add_string_both_none(self): - dist = Distance() - dist.add_string("string", None, None) - assert dist._penalties["string"] == [0] - - def test_distance(self): - config["match"]["distance_weights"]["album"] = 2.0 - config["match"]["distance_weights"]["medium"] = 1.0 - _clear_weights() - - dist = Distance() - dist.add("album", 0.5) - dist.add("media", 0.25) - dist.add("media", 0.75) - assert dist.distance == 0.5 - - # __getitem__() - assert dist["album"] == 0.25 - assert dist["media"] == 0.25 - - def test_max_distance(self): - config["match"]["distance_weights"]["album"] = 3.0 - config["match"]["distance_weights"]["medium"] = 1.0 - _clear_weights() - - dist = Distance() - dist.add("album", 0.5) - dist.add("medium", 0.0) - dist.add("medium", 0.0) - assert dist.max_distance == 5.0 - - def test_operators(self): - config["match"]["distance_weights"]["source"] = 1.0 - config["match"]["distance_weights"]["album"] = 2.0 - config["match"]["distance_weights"]["medium"] = 1.0 - _clear_weights() - - dist = Distance() - dist.add("source", 0.0) - dist.add("album", 0.5) - dist.add("medium", 0.25) - dist.add("medium", 0.75) - assert len(dist) == 2 - assert list(dist) == [("album", 0.2), ("medium", 0.2)] - assert dist == 0.4 - assert dist < 1.0 - assert dist > 0.0 - assert dist - 0.4 == 0.0 - assert 0.4 - dist == 0.0 - assert float(dist) == 0.4 - - def test_raw_distance(self): - config["match"]["distance_weights"]["album"] = 3.0 - config["match"]["distance_weights"]["medium"] = 1.0 - _clear_weights() - - dist = Distance() - dist.add("album", 0.5) - dist.add("medium", 0.25) - dist.add("medium", 0.5) - assert dist.raw_distance == 2.25 - - def test_items(self): - config["match"]["distance_weights"]["album"] = 4.0 - config["match"]["distance_weights"]["medium"] = 2.0 - _clear_weights() - - dist = Distance() - dist.add("album", 0.1875) - dist.add("medium", 0.75) - assert dist.items() == [("medium", 0.25), ("album", 0.125)] - - # Sort by key if distance is equal. - dist = Distance() - dist.add("album", 0.375) - dist.add("medium", 0.75) - assert dist.items() == [("album", 0.25), ("medium", 0.25)] - - def test_update(self): - dist1 = Distance() - dist1.add("album", 0.5) - dist1.add("media", 1.0) - - dist2 = Distance() - dist2.add("album", 0.75) - dist2.add("album", 0.25) - dist2.add("media", 0.05) - - dist1.update(dist2) - - assert dist1._penalties == { - "album": [0.5, 0.75, 0.25], - "media": [1.0, 0.05], - } - - -class TrackDistanceTest(BeetsTestCase): - def test_identical_tracks(self): - item = _make_item("one", 1) - info = _make_trackinfo()[0] - dist = match.track_distance(item, info, incl_artist=True) - assert dist == 0.0 - - def test_different_title(self): - item = _make_item("foo", 1) - info = _make_trackinfo()[0] - dist = match.track_distance(item, info, incl_artist=True) - assert dist != 0.0 - - def test_different_artist(self): - item = _make_item("one", 1) - item.artist = "foo" - info = _make_trackinfo()[0] - dist = match.track_distance(item, info, incl_artist=True) - assert dist != 0.0 - - def test_various_artists_tolerated(self): - item = _make_item("one", 1) - item.artist = "Various Artists" - info = _make_trackinfo()[0] - dist = match.track_distance(item, info, incl_artist=True) - assert dist == 0.0 - - -class AlbumDistanceTest(BeetsTestCase): - def _mapping(self, items, info): - out = {} - for i, t in zip(items, info.tracks): - out[i] = t - return out - - def _dist(self, items, info): - return match.distance(items, info, self._mapping(items, info)) - - def test_identical_albums(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - assert self._dist(items, info) == 0 - - def test_incomplete_album(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - dist = self._dist(items, info) - assert dist != 0 - # Make sure the distance is not too great - assert dist < 0.2 - - def test_global_artists_differ(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="someone else", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - assert self._dist(items, info) != 0 - - def test_comp_track_artists_match(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="should be ignored", - album="some album", - tracks=_make_trackinfo(), - va=True, - ) - assert self._dist(items, info) == 0 - - def test_comp_no_track_artists(self): - # Some VA releases don't have track artists (incomplete metadata). - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="should be ignored", - album="some album", - tracks=_make_trackinfo(), - va=True, - ) - info.tracks[0].artist = None - info.tracks[1].artist = None - info.tracks[2].artist = None - assert self._dist(items, info) == 0 - - def test_comp_track_artists_do_not_match(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2, "someone else")) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=True, - ) - assert self._dist(items, info) != 0 - - def test_tracks_out_of_order(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("three", 2)) - items.append(_make_item("two", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - dist = self._dist(items, info) - assert 0 < dist < 0.2 - - def test_two_medium_release(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - info.tracks[0].medium_index = 1 - info.tracks[1].medium_index = 2 - info.tracks[2].medium_index = 1 - dist = self._dist(items, info) - assert dist == 0 - - def test_per_medium_track_numbers(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 1)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - info.tracks[0].medium_index = 1 - info.tracks[1].medium_index = 2 - info.tracks[2].medium_index = 1 - dist = self._dist(items, info) - assert dist == 0 - - class TestAssignment(ConfigMixin): A = "one" B = "two" @@ -840,82 +444,6 @@ class ApplyCompilationTest(BeetsTestCase, ApplyTestUtil): assert self.items[1].comp -class StringDistanceTest(unittest.TestCase): - def test_equal_strings(self): - dist = string_dist("Some String", "Some String") - assert dist == 0.0 - - def test_different_strings(self): - dist = string_dist("Some String", "Totally Different") - assert dist != 0.0 - - def test_punctuation_ignored(self): - dist = string_dist("Some String", "Some.String!") - assert dist == 0.0 - - def test_case_ignored(self): - dist = string_dist("Some String", "sOME sTring") - assert dist == 0.0 - - def test_leading_the_has_lower_weight(self): - dist1 = string_dist("XXX Band Name", "Band Name") - dist2 = string_dist("The Band Name", "Band Name") - assert dist2 < dist1 - - def test_parens_have_lower_weight(self): - dist1 = string_dist("One .Two.", "One") - dist2 = string_dist("One (Two)", "One") - assert dist2 < dist1 - - def test_brackets_have_lower_weight(self): - dist1 = string_dist("One .Two.", "One") - dist2 = string_dist("One [Two]", "One") - assert dist2 < dist1 - - def test_ep_label_has_zero_weight(self): - dist = string_dist("My Song (EP)", "My Song") - assert dist == 0.0 - - def test_featured_has_lower_weight(self): - dist1 = string_dist("My Song blah Someone", "My Song") - dist2 = string_dist("My Song feat Someone", "My Song") - assert dist2 < dist1 - - def test_postfix_the(self): - dist = string_dist("The Song Title", "Song Title, The") - assert dist == 0.0 - - def test_postfix_a(self): - dist = string_dist("A Song Title", "Song Title, A") - assert dist == 0.0 - - def test_postfix_an(self): - dist = string_dist("An Album Title", "Album Title, An") - assert dist == 0.0 - - def test_empty_strings(self): - dist = string_dist("", "") - assert dist == 0.0 - - def test_solo_pattern(self): - # Just make sure these don't crash. - string_dist("The ", "") - string_dist("(EP)", "(EP)") - string_dist(", An", "") - - def test_heuristic_does_not_harm_distance(self): - dist = string_dist("Untitled", "[Untitled]") - assert dist == 0.0 - - def test_ampersand_expansion(self): - dist = string_dist("And", "&") - assert dist == 0.0 - - def test_accented_characters(self): - dist = string_dist("\xe9\xe1\xf1", "ean") - assert dist == 0.0 - - @pytest.mark.parametrize( "single_field,list_field", [ From 318a840af2e4e80f5e8c6465a0d0d44d31dad4a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 25 May 2025 13:43:20 +0100 Subject: [PATCH 34/49] Rewrite distance tests --- test/autotag/test_distance.py | 597 ++++++++++++---------------------- 1 file changed, 210 insertions(+), 387 deletions(-) diff --git a/test/autotag/test_distance.py b/test/autotag/test_distance.py index ec00ebcdf..e3ce9f891 100644 --- a/test/autotag/test_distance.py +++ b/test/autotag/test_distance.py @@ -1,176 +1,108 @@ import re -import unittest -from beets import config -from beets.autotag import AlbumInfo, TrackInfo, match -from beets.autotag.distance import Distance, string_dist +import pytest + +from beets.autotag import AlbumInfo, TrackInfo +from beets.autotag.distance import ( + Distance, + distance, + string_dist, + track_distance, +) from beets.library import Item -from beets.test.helper import BeetsTestCase +from beets.test.helper import ConfigMixin + +_p = pytest.param -def _make_item(title, track, artist="some artist"): - return Item( - title=title, - track=track, - artist=artist, - album="some album", - length=1, - mb_trackid="", - mb_albumid="", - mb_artistid="", - ) +class TestDistance: + @pytest.fixture(scope="class") + def config(self): + return ConfigMixin().config + @pytest.fixture + def dist(self, config): + config["match"]["distance_weights"]["source"] = 2.0 + config["match"]["distance_weights"]["album"] = 4.0 + config["match"]["distance_weights"]["medium"] = 2.0 -def _make_trackinfo(): - return [ - TrackInfo( - title="one", track_id=None, artist="some artist", length=1, index=1 - ), - TrackInfo( - title="two", track_id=None, artist="some artist", length=1, index=2 - ), - TrackInfo( - title="three", - track_id=None, - artist="some artist", - length=1, - index=3, - ), - ] + Distance.__dict__["_weights"].cache = {} + return Distance() -def _clear_weights(): - """Hack around the lazy descriptor used to cache weights for - Distance calculations. - """ - Distance.__dict__["_weights"].cache = {} - - -class DistanceTest(BeetsTestCase): - def tearDown(self): - super().tearDown() - _clear_weights() - - def test_add(self): - dist = Distance() + def test_add(self, dist): dist.add("add", 1.0) + assert dist._penalties == {"add": [1.0]} - def test_add_equality(self): - dist = Distance() - dist.add_equality("equality", "ghi", ["abc", "def", "ghi"]) - assert dist._penalties["equality"] == [0.0] + @pytest.mark.parametrize( + "key, args_with_expected", + [ + ( + "equality", + [ + (("ghi", ["abc", "def", "ghi"]), [0.0]), + (("xyz", ["abc", "def", "ghi"]), [0.0, 1.0]), + (("abc", re.compile(r"ABC", re.I)), [0.0, 1.0, 0.0]), + ], + ), + ("expr", [((True,), [1.0]), ((False,), [1.0, 0.0])]), + ( + "number", + [ + ((1, 1), [0.0]), + ((1, 2), [0.0, 1.0]), + ((2, 1), [0.0, 1.0, 1.0]), + ((-1, 2), [0.0, 1.0, 1.0, 1.0, 1.0, 1.0]), + ], + ), + ( + "priority", + [ + (("abc", "abc"), [0.0]), + (("def", ["abc", "def"]), [0.0, 0.5]), + (("gh", ["ab", "cd", "ef", re.compile("GH", re.I)]), [0.0, 0.5, 0.75]), # noqa: E501 + (("xyz", ["abc", "def"]), [0.0, 0.5, 0.75, 1.0]), + ], + ), + ( + "ratio", + [ + ((25, 100), [0.25]), + ((10, 5), [0.25, 1.0]), + ((-5, 5), [0.25, 1.0, 0.0]), + ((5, 0), [0.25, 1.0, 0.0, 0.0]), + ], + ), + ( + "string", + [ + (("abc", "bcd"), [2 / 3]), + (("abc", None), [2 / 3, 1]), + ((None, None), [2 / 3, 1, 0]), + ], + ), + ], + ) # fmt: skip + def test_add_methods(self, dist, key, args_with_expected): + method = getattr(dist, f"add_{key}") + for arg_set, expected in args_with_expected: + method(key, *arg_set) + assert dist._penalties[key] == expected - dist.add_equality("equality", "xyz", ["abc", "def", "ghi"]) - assert dist._penalties["equality"] == [0.0, 1.0] - - dist.add_equality("equality", "abc", re.compile(r"ABC", re.I)) - assert dist._penalties["equality"] == [0.0, 1.0, 0.0] - - def test_add_expr(self): - dist = Distance() - dist.add_expr("expr", True) - assert dist._penalties["expr"] == [1.0] - - dist.add_expr("expr", False) - assert dist._penalties["expr"] == [1.0, 0.0] - - def test_add_number(self): - dist = Distance() - # Add a full penalty for each number of difference between two numbers. - - dist.add_number("number", 1, 1) - assert dist._penalties["number"] == [0.0] - - dist.add_number("number", 1, 2) - assert dist._penalties["number"] == [0.0, 1.0] - - dist.add_number("number", 2, 1) - assert dist._penalties["number"] == [0.0, 1.0, 1.0] - - dist.add_number("number", -1, 2) - assert dist._penalties["number"] == [0.0, 1.0, 1.0, 1.0, 1.0, 1.0] - - def test_add_priority(self): - dist = Distance() - dist.add_priority("priority", "abc", "abc") - assert dist._penalties["priority"] == [0.0] - - dist.add_priority("priority", "def", ["abc", "def"]) - assert dist._penalties["priority"] == [0.0, 0.5] - - dist.add_priority( - "priority", "gh", ["ab", "cd", "ef", re.compile("GH", re.I)] - ) - assert dist._penalties["priority"] == [0.0, 0.5, 0.75] - - dist.add_priority("priority", "xyz", ["abc", "def"]) - assert dist._penalties["priority"] == [0.0, 0.5, 0.75, 1.0] - - def test_add_ratio(self): - dist = Distance() - dist.add_ratio("ratio", 25, 100) - assert dist._penalties["ratio"] == [0.25] - - dist.add_ratio("ratio", 10, 5) - assert dist._penalties["ratio"] == [0.25, 1.0] - - dist.add_ratio("ratio", -5, 5) - assert dist._penalties["ratio"] == [0.25, 1.0, 0.0] - - dist.add_ratio("ratio", 5, 0) - assert dist._penalties["ratio"] == [0.25, 1.0, 0.0, 0.0] - - def test_add_string(self): - dist = Distance() - sdist = string_dist("abc", "bcd") - dist.add_string("string", "abc", "bcd") - assert dist._penalties["string"] == [sdist] - assert dist._penalties["string"] != [0] - - def test_add_string_none(self): - dist = Distance() - dist.add_string("string", None, "string") - assert dist._penalties["string"] == [1] - - def test_add_string_both_none(self): - dist = Distance() - dist.add_string("string", None, None) - assert dist._penalties["string"] == [0] - - def test_distance(self): - config["match"]["distance_weights"]["album"] = 2.0 - config["match"]["distance_weights"]["medium"] = 1.0 - _clear_weights() - - dist = Distance() + def test_distance(self, dist): dist.add("album", 0.5) dist.add("media", 0.25) dist.add("media", 0.75) + assert dist.distance == 0.5 + assert dist.max_distance == 6.0 + assert dist.raw_distance == 3.0 - # __getitem__() - assert dist["album"] == 0.25 - assert dist["media"] == 0.25 + assert dist["album"] == 1 / 3 + assert dist["media"] == 1 / 6 - def test_max_distance(self): - config["match"]["distance_weights"]["album"] = 3.0 - config["match"]["distance_weights"]["medium"] = 1.0 - _clear_weights() - - dist = Distance() - dist.add("album", 0.5) - dist.add("medium", 0.0) - dist.add("medium", 0.0) - assert dist.max_distance == 5.0 - - def test_operators(self): - config["match"]["distance_weights"]["source"] = 1.0 - config["match"]["distance_weights"]["album"] = 2.0 - config["match"]["distance_weights"]["medium"] = 1.0 - _clear_weights() - - dist = Distance() + def test_operators(self, dist): dist.add("source", 0.0) dist.add("album", 0.5) dist.add("medium", 0.25) @@ -184,23 +116,7 @@ class DistanceTest(BeetsTestCase): assert 0.4 - dist == 0.0 assert float(dist) == 0.4 - def test_raw_distance(self): - config["match"]["distance_weights"]["album"] = 3.0 - config["match"]["distance_weights"]["medium"] = 1.0 - _clear_weights() - - dist = Distance() - dist.add("album", 0.5) - dist.add("medium", 0.25) - dist.add("medium", 0.5) - assert dist.raw_distance == 2.25 - - def test_items(self): - config["match"]["distance_weights"]["album"] = 4.0 - config["match"]["distance_weights"]["medium"] = 2.0 - _clear_weights() - - dist = Distance() + def test_penalties_sort(self, dist): dist.add("album", 0.1875) dist.add("medium", 0.75) assert dist.items() == [("medium", 0.25), ("album", 0.125)] @@ -211,8 +127,8 @@ class DistanceTest(BeetsTestCase): dist.add("medium", 0.75) assert dist.items() == [("album", 0.25), ("medium", 0.25)] - def test_update(self): - dist1 = Distance() + def test_update(self, dist): + dist1 = dist dist1.add("album", 0.5) dist1.add("media", 1.0) @@ -229,248 +145,155 @@ class DistanceTest(BeetsTestCase): } -class TrackDistanceTest(BeetsTestCase): - def test_identical_tracks(self): - item = _make_item("one", 1) - info = _make_trackinfo()[0] - dist = match.track_distance(item, info, incl_artist=True) - assert dist == 0.0 +class TestTrackDistance: + @pytest.fixture(scope="class") + def info(self): + return TrackInfo(title="title", artist="artist") - def test_different_title(self): - item = _make_item("foo", 1) - info = _make_trackinfo()[0] - dist = match.track_distance(item, info, incl_artist=True) - assert dist != 0.0 + @pytest.mark.parametrize( + "title, artist, expected_penalty", + [ + _p("title", "artist", False, id="identical"), + _p("title", "Various Artists", False, id="tolerate-va"), + _p("title", "different artist", True, id="different-artist"), + _p("different title", "artist", True, id="different-title"), + ], + ) + def test_track_distance(self, info, title, artist, expected_penalty): + item = Item(artist=artist, title=title) - def test_different_artist(self): - item = _make_item("one", 1) - item.artist = "foo" - info = _make_trackinfo()[0] - dist = match.track_distance(item, info, incl_artist=True) - assert dist != 0.0 - - def test_various_artists_tolerated(self): - item = _make_item("one", 1) - item.artist = "Various Artists" - info = _make_trackinfo()[0] - dist = match.track_distance(item, info, incl_artist=True) - assert dist == 0.0 + assert ( + bool(track_distance(item, info, incl_artist=True)) + == expected_penalty + ) -class AlbumDistanceTest(BeetsTestCase): - def _mapping(self, items, info): - out = {} - for i, t in zip(items, info.tracks): - out[i] = t - return out +class TestAlbumDistance: + @pytest.fixture(scope="class") + def items(self): + return [ + Item( + title=title, + track=track, + artist="artist", + album="album", + length=1, + ) + for title, track in [("one", 1), ("two", 2), ("three", 3)] + ] - def _dist(self, items, info): - return match.distance(items, info, self._mapping(items, info)) + @pytest.fixture + def get_dist(self, items): + def inner(info: AlbumInfo): + return distance(items, info, dict(zip(items, info.tracks))) - def test_identical_albums(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), + return inner + + @pytest.fixture + def info(self, items): + return AlbumInfo( + artist="artist", + album="album", + tracks=[ + TrackInfo( + title=i.title, + artist=i.artist, + index=i.track, + length=i.length, + ) + for i in items + ], va=False, ) - assert self._dist(items, info) == 0 - def test_incomplete_album(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, + def test_identical_albums(self, get_dist, info): + assert get_dist(info) == 0 + + def test_incomplete_album(self, get_dist, info): + info.tracks.pop(2) + + assert 0 < float(get_dist(info)) < 0.2 + + def test_overly_complete_album(self, get_dist, info): + info.tracks.append( + Item(index=4, title="four", artist="artist", length=1) ) - dist = self._dist(items, info) - assert dist != 0 - # Make sure the distance is not too great - assert dist < 0.2 - def test_global_artists_differ(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="someone else", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - assert self._dist(items, info) != 0 + assert 0 < float(get_dist(info)) < 0.2 - def test_comp_track_artists_match(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="should be ignored", - album="some album", - tracks=_make_trackinfo(), - va=True, - ) - assert self._dist(items, info) == 0 + @pytest.mark.parametrize("va", [True, False]) + def test_albumartist(self, get_dist, info, va): + info.artist = "another artist" + info.va = va - def test_comp_no_track_artists(self): + assert bool(get_dist(info)) is not va + + def test_comp_no_track_artists(self, get_dist, info): # Some VA releases don't have track artists (incomplete metadata). - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="should be ignored", - album="some album", - tracks=_make_trackinfo(), - va=True, - ) - info.tracks[0].artist = None - info.tracks[1].artist = None - info.tracks[2].artist = None - assert self._dist(items, info) == 0 + info.artist = "another artist" + info.va = True + for track in info.tracks: + track.artist = None - def test_comp_track_artists_do_not_match(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2, "someone else")) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=True, - ) - assert self._dist(items, info) != 0 + assert get_dist(info) == 0 - def test_tracks_out_of_order(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("three", 2)) - items.append(_make_item("two", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - dist = self._dist(items, info) - assert 0 < dist < 0.2 + def test_comp_track_artists_do_not_match(self, get_dist, info): + info.va = True + info.tracks[0].artist = "another artist" - def test_two_medium_release(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 3)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) + assert get_dist(info) != 0 + + def test_tracks_out_of_order(self, get_dist, info): + tracks = info.tracks + tracks[1].title, tracks[2].title = tracks[2].title, tracks[1].title + + assert 0 < float(get_dist(info)) < 0.2 + + def test_two_medium_release(self, get_dist, info): info.tracks[0].medium_index = 1 info.tracks[1].medium_index = 2 info.tracks[2].medium_index = 1 - dist = self._dist(items, info) - assert dist == 0 - def test_per_medium_track_numbers(self): - items = [] - items.append(_make_item("one", 1)) - items.append(_make_item("two", 2)) - items.append(_make_item("three", 1)) - info = AlbumInfo( - artist="some artist", - album="some album", - tracks=_make_trackinfo(), - va=False, - ) - info.tracks[0].medium_index = 1 - info.tracks[1].medium_index = 2 - info.tracks[2].medium_index = 1 - dist = self._dist(items, info) - assert dist == 0 + assert get_dist(info) == 0 -class StringDistanceTest(unittest.TestCase): - def test_equal_strings(self): - dist = string_dist("Some String", "Some String") - assert dist == 0.0 +class TestStringDistance: + @pytest.mark.parametrize( + "string1, string2", + [ + ("Some String", "Some String"), + ("Some String", "Some.String!"), + ("Some String", "sOME sTring"), + ("My Song (EP)", "My Song"), + ("The Song Title", "Song Title, The"), + ("A Song Title", "Song Title, A"), + ("An Album Title", "Album Title, An"), + ("", ""), + ("Untitled", "[Untitled]"), + ("And", "&"), + ("\xe9\xe1\xf1", "ean"), + ], + ) + def test_matching_distance(self, string1, string2): + assert string_dist(string1, string2) == 0.0 - def test_different_strings(self): - dist = string_dist("Some String", "Totally Different") - assert dist != 0.0 + def test_different_distance(self): + assert string_dist("Some String", "Totally Different") != 0.0 - def test_punctuation_ignored(self): - dist = string_dist("Some String", "Some.String!") - assert dist == 0.0 - - def test_case_ignored(self): - dist = string_dist("Some String", "sOME sTring") - assert dist == 0.0 - - def test_leading_the_has_lower_weight(self): - dist1 = string_dist("XXX Band Name", "Band Name") - dist2 = string_dist("The Band Name", "Band Name") - assert dist2 < dist1 - - def test_parens_have_lower_weight(self): - dist1 = string_dist("One .Two.", "One") - dist2 = string_dist("One (Two)", "One") - assert dist2 < dist1 - - def test_brackets_have_lower_weight(self): - dist1 = string_dist("One .Two.", "One") - dist2 = string_dist("One [Two]", "One") - assert dist2 < dist1 - - def test_ep_label_has_zero_weight(self): - dist = string_dist("My Song (EP)", "My Song") - assert dist == 0.0 - - def test_featured_has_lower_weight(self): - dist1 = string_dist("My Song blah Someone", "My Song") - dist2 = string_dist("My Song feat Someone", "My Song") - assert dist2 < dist1 - - def test_postfix_the(self): - dist = string_dist("The Song Title", "Song Title, The") - assert dist == 0.0 - - def test_postfix_a(self): - dist = string_dist("A Song Title", "Song Title, A") - assert dist == 0.0 - - def test_postfix_an(self): - dist = string_dist("An Album Title", "Album Title, An") - assert dist == 0.0 - - def test_empty_strings(self): - dist = string_dist("", "") - assert dist == 0.0 + @pytest.mark.parametrize( + "string1, string2, reference", + [ + ("XXX Band Name", "The Band Name", "Band Name"), + ("One .Two.", "One (Two)", "One"), + ("One .Two.", "One [Two]", "One"), + ("My Song blah Someone", "My Song feat Someone", "My Song"), + ], + ) + def test_relative_weights(self, string1, string2, reference): + assert string_dist(string2, reference) < string_dist(string1, reference) def test_solo_pattern(self): # Just make sure these don't crash. string_dist("The ", "") string_dist("(EP)", "(EP)") string_dist(", An", "") - - def test_heuristic_does_not_harm_distance(self): - dist = string_dist("Untitled", "[Untitled]") - assert dist == 0.0 - - def test_ampersand_expansion(self): - dist = string_dist("And", "&") - assert dist == 0.0 - - def test_accented_characters(self): - dist = string_dist("\xe9\xe1\xf1", "ean") - assert dist == 0.0 From 0da6192a4ab799196ff49e403e8cd91fbaaef4bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 25 May 2025 14:01:29 +0100 Subject: [PATCH 35/49] Test sanitize_pairs --- test/util/test_config.py | 47 ++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/test/util/test_config.py b/test/util/test_config.py index 0c49f85b1..7105844dd 100644 --- a/test/util/test_config.py +++ b/test/util/test_config.py @@ -1,15 +1,38 @@ -import unittest +import pytest -from beets.util.config import sanitize_choices +from beets.util.config import sanitize_choices, sanitize_pairs -class HelpersTest(unittest.TestCase): - def test_sanitize_choices(self): - assert sanitize_choices(["A", "Z"], ("A", "B")) == ["A"] - assert sanitize_choices(["A", "A"], ("A")) == ["A"] - assert sanitize_choices(["D", "*", "A"], ("A", "B", "C", "D")) == [ - "D", - "B", - "C", - "A", - ] +@pytest.mark.parametrize( + "input_choices, valid_choices, expected", + [ + (["A", "Z"], ("A", "B"), ["A"]), + (["A", "A"], ("A"), ["A"]), + (["D", "*", "A"], ("A", "B", "C", "D"), ["D", "B", "C", "A"]), + ], +) +def test_sanitize_choices(input_choices, valid_choices, expected): + assert sanitize_choices(input_choices, valid_choices) == expected + + +def test_sanitize_pairs(): + assert sanitize_pairs( + [ + ("foo", "baz bar"), + ("foo", "baz bar"), + ("key", "*"), + ("*", "*"), + ("discard", "bye"), + ], + [ + ("foo", "bar"), + ("foo", "baz"), + ("foo", "foobar"), + ("key", "value"), + ], + ) == [ + ("foo", "baz"), + ("foo", "bar"), + ("key", "value"), + ("foo", "foobar"), + ] From cb246c28bc69986a702fa683f1f7c3c1d94425c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 26 May 2025 11:49:20 +0100 Subject: [PATCH 36/49] Remove dead chartlyrics This integration test failed because `chartlyrics.com` website is no longer available, so I'm removing it. --- test/plugins/lyrics_pages.py | 39 ------------------------------------ 1 file changed, 39 deletions(-) diff --git a/test/plugins/lyrics_pages.py b/test/plugins/lyrics_pages.py index ef2eeb1a2..e1806b167 100644 --- a/test/plugins/lyrics_pages.py +++ b/test/plugins/lyrics_pages.py @@ -108,45 +108,6 @@ lyrics_pages = [ url_title="The Beatles - Lady Madonna Lyrics | AZLyrics.com", marks=[xfail_on_ci("AZLyrics is blocked by Cloudflare")], ), - LyricsPage.make( - "http://www.chartlyrics.com/_LsLsZ7P4EK-F-LD4dJgDQ/Lady+Madonna.aspx", - """ - Lady Madonna, - Children at your feet - Wonder how you manage to make ends meet. - - Who finds the money - When you pay the rent? - Did you think that money was heaven-sent? - - Friday night arrives without a suitcase. - Sunday morning creeping like a nun. - Monday's child has learned to tie his bootlace. - - See how they run. - - Lady Madonna, - Baby at your breast - Wonders how you manage to feed the rest. - - See how they run. - - Lady Madonna, - Lying on the bed. - Listen to the music playing in your head. - - Tuesday afternoon is never ending. - Wednesday morning papers didn't come. - Thursday night your stockings needed mending. - - See how they run. - - Lady Madonna, - Children at your feet - Wonder how you manage to make ends meet. - """, - url_title="The Beatles Lady Madonna lyrics", - ), LyricsPage.make( "https://www.dainuzodziai.lt/m/mergaites-nori-mylet-atlanta/", """ From 99f7e94b594314c39709125ba5e6f898c50592f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Mon, 26 May 2025 13:37:23 +0100 Subject: [PATCH 37/49] Add Distance and current_metadata to autotag.__init__ for backward compat --- beets/autotag/__init__.py | 8 ++++++-- beets/plugins.py | 12 +++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/beets/autotag/__init__.py b/beets/autotag/__init__.py index 5b16b012e..8cfe534ab 100644 --- a/beets/autotag/__init__.py +++ b/beets/autotag/__init__.py @@ -19,10 +19,12 @@ from __future__ import annotations from typing import TYPE_CHECKING, Union from beets import config, logging +from beets.util import get_most_common_tags as current_metadata # Parts of external interface. from beets.util import unique_list +from .distance import Distance from .hooks import AlbumInfo, AlbumMatch, TrackInfo, TrackMatch from .match import Proposal, Recommendation, tag_album, tag_item @@ -34,13 +36,15 @@ if TYPE_CHECKING: __all__ = [ "AlbumInfo", "AlbumMatch", - "TrackInfo", - "TrackMatch", + "Distance", # for backwards compatibility "Proposal", "Recommendation", + "TrackInfo", + "TrackMatch", "apply_album_metadata", "apply_item_metadata", "apply_metadata", + "current_metadata", # for backwards compatibility "tag_album", "tag_item", ] diff --git a/beets/plugins.py b/beets/plugins.py index cd66435b5..1ae672e20 100644 --- a/beets/plugins.py +++ b/beets/plugins.py @@ -37,7 +37,6 @@ import mediafile import beets from beets import logging -from beets.autotag.distance import Distance from beets.util.id_extractors import extract_release_id if TYPE_CHECKING: @@ -55,6 +54,7 @@ if TYPE_CHECKING: from confuse import ConfigView from beets.autotag import AlbumInfo, TrackInfo + from beets.autotag.distance import Distance from beets.dbcore import Query from beets.dbcore.db import FieldQueryType from beets.dbcore.types import Type @@ -225,6 +225,8 @@ class BeetsPlugin: """Should return a Distance object to be added to the distance for every track comparison. """ + from beets.autotag.distance import Distance + return Distance() def album_distance( @@ -236,6 +238,8 @@ class BeetsPlugin: """Should return a Distance object to be added to the distance for every album-level comparison. """ + from beets.autotag.distance import Distance + return Distance() def candidates( @@ -455,6 +459,8 @@ def track_distance(item: Item, info: TrackInfo) -> Distance: """Gets the track distance calculated by all loaded plugins. Returns a Distance object. """ + from beets.autotag.distance import Distance + dist = Distance() for plugin in find_plugins(): dist.update(plugin.track_distance(item, info)) @@ -467,6 +473,8 @@ def album_distance( mapping: dict[Item, TrackInfo], ) -> Distance: """Returns the album distance calculated by plugins.""" + from beets.autotag.distance import Distance + dist = Distance() for plugin in find_plugins(): dist.update(plugin.album_distance(items, album_info, mapping)) @@ -653,6 +661,8 @@ def get_distance( """Returns the ``data_source`` weight and the maximum source weight for albums or individual tracks. """ + from beets.autotag.distance import Distance + dist = Distance() if info.data_source == data_source: dist.add("source", config["source_weight"].as_number()) From 2f98f11d576601a87431ce1941893249f2ffe4d2 Mon Sep 17 00:00:00 2001 From: Ben Stolovitz Date: Sun, 25 May 2025 14:38:44 -0400 Subject: [PATCH 38/49] fix local langdetect test failures avoid linter error avoid other linter error fix format changing deps (no lock!) poetry lock? lint & format attempt 2 at poetry lock crlf -> lf line endings changelog! --- docs/changelog.rst | 2 ++ poetry.lock | 7 +++++-- pyproject.toml | 1 + test/plugins/test_lyrics.py | 7 ------- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index b78cae78f..03a2a72ea 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -35,6 +35,8 @@ Bug fixes: event to be sent twice, not once. :bug:`5560` * Fix ``HiddenFileTest`` by using ``bytestring_path()``. +* tests: Fix tests failing without ``langdetect`` (by making it required). + :bug:`5797` For packagers: diff --git a/poetry.lock b/poetry.lock index bdd0ee0ca..752953e1d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1030,7 +1030,7 @@ files = [ name = "langdetect" version = "1.0.9" description = "Language detection library ported from Google's language-detection." -optional = true +optional = false python-versions = "*" files = [ {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"}, @@ -1271,8 +1271,11 @@ files = [ {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"}, {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, @@ -3276,4 +3279,4 @@ web = ["flask", "flask-cors"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "d609e83f7ffeefc12e28d627e5646aa5c1a6f5a56d7013bb649a468069550dba" +content-hash = "b3f2746a43227fe639d17eb22d7924e30c9d83eef53dce2c10388c602f0c6665" diff --git a/pyproject.toml b/pyproject.toml index 8b817a078..ea69240d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,7 @@ sphinx = { version = "*", optional = true } beautifulsoup4 = "*" codecov = ">=2.1.13" flask = "*" +langdetect = "*" mock = "*" pylast = "*" pytest = "*" diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 74e727099..945a7158c 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -14,8 +14,6 @@ """Tests for the 'lyrics' plugin.""" -import importlib.util -import os import re import textwrap from functools import partial @@ -30,11 +28,6 @@ from beetsplug import lyrics from .lyrics_pages import LyricsPage, lyrics_pages -github_ci = os.environ.get("GITHUB_ACTIONS") == "true" -if not github_ci and not importlib.util.find_spec("langdetect"): - pytest.skip("langdetect isn't available", allow_module_level=True) - - PHRASE_BY_TITLE = { "Lady Madonna": "friday night arrives without a suitcase", "Jazz'n'blues": "as i check my balance i kiss the screen", From 66864fcc27756c43566ffa998c137e241744f422 Mon Sep 17 00:00:00 2001 From: Sebastian Mohr <39738318+semohr@users.noreply.github.com> Date: Wed, 11 Jun 2025 15:19:46 +0200 Subject: [PATCH 39/49] Minor improvements to spotify plugin typing. (#5815) ## Description Added some more typehints to the spotify plugin. Also added a method to get the tokenfile and changed to logic for the handle_response to use `requests.request`. This is done mainly to prepare for https://github.com/beetbox/beets/pull/5787, see also https://github.com/beetbox/beets/pull/5814 --- beetsplug/spotify.py | 161 +++++++++++++++++++++---------------------- 1 file changed, 79 insertions(+), 82 deletions(-) diff --git a/beetsplug/spotify.py b/beetsplug/spotify.py index c0d212971..9d285928a 100644 --- a/beetsplug/spotify.py +++ b/beetsplug/spotify.py @@ -25,6 +25,7 @@ import json import re import time import webbrowser +from typing import TYPE_CHECKING, Any, Literal, Sequence import confuse import requests @@ -33,8 +34,11 @@ import unidecode from beets import ui from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.dbcore import types -from beets.library import DateType -from beets.plugins import BeetsPlugin, MetadataSourcePlugin +from beets.library import DateType, Library +from beets.plugins import BeetsPlugin, MetadataSourcePlugin, Response + +if TYPE_CHECKING: + from beetsplug._typing import JSONDict DEFAULT_WAITING_TIME = 5 @@ -107,33 +111,33 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): self.config["client_id"].redact = True self.config["client_secret"].redact = True - self.tokenfile = self.config["tokenfile"].get( - confuse.Filename(in_app_dir=True) - ) # Path to the JSON file for storing the OAuth access token. self.setup() def setup(self): """Retrieve previously saved OAuth token or generate a new one.""" + try: - with open(self.tokenfile) as f: + with open(self._tokenfile()) as f: token_data = json.load(f) except OSError: self._authenticate() else: self.access_token = token_data["access_token"] + def _tokenfile(self) -> str: + """Get the path to the JSON file for storing the OAuth token.""" + return self.config["tokenfile"].get(confuse.Filename(in_app_dir=True)) + def _authenticate(self): """Request an access token via the Client Credentials Flow: https://developer.spotify.com/documentation/general/guides/authorization-guide/#client-credentials-flow """ + c_id: str = self.config["client_id"].as_str() + c_secret: str = self.config["client_secret"].as_str() + headers = { "Authorization": "Basic {}".format( - base64.b64encode( - ":".join( - self.config[k].as_str() - for k in ("client_id", "client_secret") - ).encode() - ).decode() + base64.b64encode(f"{c_id}:{c_secret}".encode()).decode() ) } response = requests.post( @@ -154,27 +158,32 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): self._log.debug( "{} access token: {}", self.data_source, self.access_token ) - with open(self.tokenfile, "w") as f: + with open(self._tokenfile(), "w") as f: json.dump({"access_token": self.access_token}, f) def _handle_response( - self, request_type, url, params=None, retry_count=0, max_retries=3 - ): + self, + method: Literal["get", "post", "put", "delete"], + url: str, + params: Any = None, + retry_count: int = 0, + max_retries: int = 3, + ) -> JSONDict: """Send a request, reauthenticating if necessary. - :param request_type: Type of :class:`Request` constructor, - e.g. ``requests.get``, ``requests.post``, etc. - :type request_type: function + :param method: HTTP method to use for the request. :param url: URL for the new :class:`Request` object. - :type url: str :param params: (optional) list of tuples or bytes to send in the query string for the :class:`Request`. :type params: dict - :return: JSON data for the class:`Response ` object. - :rtype: dict """ + + if retry_count > max_retries: + raise SpotifyAPIError("Maximum retries reached.") + try: - response = request_type( + response = requests.request( + method, url, headers={"Authorization": f"Bearer {self.access_token}"}, params=params, @@ -189,22 +198,28 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): self._log.error(f"Network error: {e}") raise SpotifyAPIError("Network error.") except requests.exceptions.RequestException as e: + if e.response is None: + self._log.error(f"Request failed: {e}") + raise SpotifyAPIError("Request failed.") if e.response.status_code == 401: self._log.debug( f"{self.data_source} access token has expired. " f"Reauthenticating." ) self._authenticate() - return self._handle_response(request_type, url, params=params) + return self._handle_response( + method, + url, + params=params, + retry_count=retry_count + 1, + ) elif e.response.status_code == 404: raise SpotifyAPIError( f"API Error: {e.response.status_code}\n" f"URL: {url}\nparams: {params}" ) elif e.response.status_code == 429: - if retry_count >= max_retries: - raise SpotifyAPIError("Maximum retries reached.") - seconds = response.headers.get( + seconds = e.response.headers.get( "Retry-After", DEFAULT_WAITING_TIME ) self._log.debug( @@ -212,7 +227,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): ) time.sleep(int(seconds) + 1) return self._handle_response( - request_type, + method, url, params=params, retry_count=retry_count + 1, @@ -244,9 +259,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): if not (spotify_id := self._get_id(album_id)): return None - album_data = self._handle_response( - requests.get, self.album_url + spotify_id - ) + album_data = self._handle_response("get", self.album_url + spotify_id) if album_data["name"] == "": self._log.debug("Album removed from Spotify: {}", album_id) return None @@ -277,9 +290,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): tracks_data = album_data["tracks"] tracks_items = tracks_data["items"] while tracks_data["next"]: - tracks_data = self._handle_response( - requests.get, tracks_data["next"] - ) + tracks_data = self._handle_response("get", tracks_data["next"]) tracks_items.extend(tracks_data["items"]) tracks = [] @@ -312,14 +323,12 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): data_url=album_data["external_urls"]["spotify"], ) - def _get_track(self, track_data): + def _get_track(self, track_data: JSONDict) -> TrackInfo: """Convert a Spotify track object dict to a TrackInfo object. :param track_data: Simplified track object (https://developer.spotify.com/documentation/web-api/reference/object-model/#track-object-simplified) - :type track_data: dict :return: TrackInfo object for track - :rtype: beets.autotag.hooks.TrackInfo """ artist, artist_id = self.get_artist(track_data["artists"]) @@ -344,26 +353,23 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): data_url=track_data["external_urls"]["spotify"], ) - def track_for_id(self, track_id=None, track_data=None): - """Fetch a track by its Spotify ID or URL and return a - TrackInfo object or None if the track is not found. + def track_for_id(self, track_id: str) -> None | TrackInfo: + """Fetch a track by its Spotify ID or URL. - :param track_id: (Optional) Spotify ID or URL for the track. Either - ``track_id`` or ``track_data`` must be provided. - :type track_id: str - :param track_data: (Optional) Simplified track object dict. May be - provided instead of ``track_id`` to avoid unnecessary API calls. - :type track_data: dict - :return: TrackInfo object for track - :rtype: beets.autotag.hooks.TrackInfo or None + Returns a TrackInfo object or None if the track is not found. """ - if not track_data: - if not (spotify_id := self._get_id(track_id)) or not ( - track_data := self._handle_response( - requests.get, f"{self.track_url}{spotify_id}" - ) - ): - return None + + if not (spotify_id := self._get_id(track_id)): + self._log.debug("Invalid Spotify ID: {}", track_id) + return None + + if not ( + track_data := self._handle_response( + "get", f"{self.track_url}{spotify_id}" + ) + ): + self._log.debug("Track not found: {}", track_id) + return None track = self._get_track(track_data) @@ -371,7 +377,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): # release) and `track.medium_total` (total number of tracks on # the track's disc). album_data = self._handle_response( - requests.get, self.album_url + track_data["album"]["id"] + "get", self.album_url + track_data["album"]["id"] ) medium_total = 0 for i, track_data in enumerate(album_data["tracks"]["items"], start=1): @@ -383,17 +389,16 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): return track @staticmethod - def _construct_search_query(filters=None, keywords=""): + def _construct_search_query( + filters: dict[str, str], keywords: str = "" + ) -> str: """Construct a query string with the specified filters and keywords to be provided to the Spotify Search API - (https://developer.spotify.com/documentation/web-api/reference/search/search/#writing-a-query---guidelines). + (https://developer.spotify.com/documentation/web-api/reference/search). :param filters: (Optional) Field filters to apply. - :type filters: dict :param keywords: (Optional) Query keywords to use. - :type keywords: str :return: Query string to be provided to the Search API. - :rtype: str """ query_components = [ keywords, @@ -404,34 +409,31 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): query = query.decode("utf8") return unidecode.unidecode(query) - def _search_api(self, query_type, filters=None, keywords=""): + def _search_api( + self, + query_type: Literal["album", "track"], + filters: dict[str, str], + keywords: str = "", + ) -> Sequence[Response]: """Query the Spotify Search API for the specified ``keywords``, applying the provided ``filters``. :param query_type: Item type to search across. Valid types are: 'album', 'artist', 'playlist', and 'track'. - :type query_type: str :param filters: (Optional) Field filters to apply. - :type filters: dict :param keywords: (Optional) Query keywords to use. - :type keywords: str - :return: JSON data for the class:`Response ` object or None - if no search results are returned. - :rtype: dict or None """ query = self._construct_search_query(keywords=keywords, filters=filters) - if not query: - return None self._log.debug(f"Searching {self.data_source} for '{query}'") try: response = self._handle_response( - requests.get, + "get", self.search_url, params={"q": query, "type": query_type}, ) except SpotifyAPIError as e: self._log.debug("Spotify API error: {}", e) - return [] + return () response_data = response.get(query_type + "s", {}).get("items", []) self._log.debug( "Found {} result(s) from {} for '{}'", @@ -441,7 +443,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): ) return response_data - def commands(self): + def commands(self) -> list[ui.Subcommand]: # autotagger import command def queries(lib, opts, args): success = self._parse_opts(opts) @@ -506,17 +508,14 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): self.opts = opts return True - def _match_library_tracks(self, library, keywords): + def _match_library_tracks(self, library: Library, keywords: str): """Get a list of simplified track object dicts for library tracks matching the specified ``keywords``. :param library: beets library object to query. - :type library: beets.library.Library :param keywords: Query to match library items against. - :type keywords: str :return: List of simplified track object dicts for library items matching the specified query. - :rtype: list[dict] """ results = [] failures = [] @@ -683,11 +682,9 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): if write: item.try_write() - def track_info(self, track_id=None): + def track_info(self, track_id: str): """Fetch a track's popularity and external IDs using its Spotify ID.""" - track_data = self._handle_response( - requests.get, self.track_url + track_id - ) + track_data = self._handle_response("get", self.track_url + track_id) self._log.debug( "track_popularity: {} and track_isrc: {}", track_data.get("popularity"), @@ -700,11 +697,11 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): track_data.get("external_ids").get("upc"), ) - def track_audio_features(self, track_id=None): + def track_audio_features(self, track_id: str): """Fetch track audio features by its Spotify ID.""" try: return self._handle_response( - requests.get, self.audio_features_url + track_id + "get", self.audio_features_url + track_id ) except SpotifyAPIError as e: self._log.debug("Spotify API error: {}", e) From 4893cee5e5149c4de05bcd200d167ecda10f7503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20M=C3=A9meint?= Date: Fri, 13 Jun 2025 20:12:03 +0000 Subject: [PATCH 40/49] Fix the MusicBrainz search not taking into account the album/recording aliases --- beetsplug/musicbrainz.py | 3 ++- docs/changelog.rst | 2 ++ test/plugins/test_musicbrainz.py | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/beetsplug/musicbrainz.py b/beetsplug/musicbrainz.py index ceb931179..e33cc4fce 100644 --- a/beetsplug/musicbrainz.py +++ b/beetsplug/musicbrainz.py @@ -771,6 +771,7 @@ class MusicBrainzPlugin(BeetsPlugin): ) -> dict[str, str]: criteria = { "release": album, + "alias": album, "tracks": str(len(items)), } | ({"arid": VARIOUS_ARTISTS_ID} if va_likely else {"artist": artist}) @@ -826,7 +827,7 @@ class MusicBrainzPlugin(BeetsPlugin): def item_candidates( self, item: Item, artist: str, title: str ) -> Iterator[beets.autotag.hooks.TrackInfo]: - criteria = {"artist": artist, "recording": title} + criteria = {"artist": artist, "recording": title, "alias": title} yield from filter( None, map(self.track_info, self._search_api("recording", criteria)) diff --git a/docs/changelog.rst b/docs/changelog.rst index 03a2a72ea..88b82e4da 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -37,6 +37,8 @@ Bug fixes: * Fix ``HiddenFileTest`` by using ``bytestring_path()``. * tests: Fix tests failing without ``langdetect`` (by making it required). :bug:`5797` +* :doc:`plugins/musicbrainz`: Fix the MusicBrainz search not taking into + account the album/recording aliases For packagers: diff --git a/test/plugins/test_musicbrainz.py b/test/plugins/test_musicbrainz.py index 100cfc498..aea05bc20 100644 --- a/test/plugins/test_musicbrainz.py +++ b/test/plugins/test_musicbrainz.py @@ -1025,6 +1025,7 @@ class TestMusicBrainzPlugin(PluginMixin): assert mb.get_album_criteria(items, "Artist ", " Album", va_likely) == { "release": " Album", + "alias": " Album", "tracks": str(len(items)), **expected_additional_criteria, } From d5bd24bb648dea3880f32d40b4a7c73422a8b6ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 22 Jun 2025 17:47:30 +0100 Subject: [PATCH 41/49] Update pipx-install-action to fix caching errors --- .github/workflows/ci.yaml | 2 +- .github/workflows/integration_test.yaml | 2 +- .github/workflows/lint.yml | 8 ++++---- .github/workflows/make_release.yaml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 333706dc7..ac3263bcd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -21,7 +21,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.1 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - name: Setup Python with poetry caching # poetry cache requires poetry to already be installed, weirdly uses: actions/setup-python@v5 diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index eae04d1d4..f88864c48 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -9,7 +9,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.1 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - uses: actions/setup-python@v5 with: python-version: 3.9 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 16757da27..c9b66f402 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -53,7 +53,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.1 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} @@ -74,7 +74,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.1 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} @@ -94,7 +94,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.1 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} @@ -118,7 +118,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.1 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} diff --git a/.github/workflows/make_release.yaml b/.github/workflows/make_release.yaml index 7ea2d631c..b18dded8d 100644 --- a/.github/workflows/make_release.yaml +++ b/.github/workflows/make_release.yaml @@ -19,7 +19,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.1 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} @@ -50,7 +50,7 @@ jobs: ref: ${{ env.NEW_TAG }} - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.1 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} From ab9b2e0b69210d733b1bf80d63380a279233c7d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 22 Jun 2025 18:07:22 +0100 Subject: [PATCH 42/49] Try using threeal/pipx-install-action@v1.0.0 for CI tests I have reported the issue with BrandonLWhite/pipx-install-action@v1.0.3 failing on Windows here: https://github.com/BrandonLWhite/pipx-install-action/issues/62 --- .github/workflows/ci.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ac3263bcd..9d0e67d5f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -21,7 +21,10 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - uses: BrandonLWhite/pipx-install-action@v1.0.3 + # BrandonLWhite/pipx-install-action@v1.0.3 fails on Windows, thus we're using an alternative action here + uses: threeal/pipx-install-action@v1.0.0 + with: + packages: poethepoet>=0.26 poetry<2 - name: Setup Python with poetry caching # poetry cache requires poetry to already be installed, weirdly uses: actions/setup-python@v5 From 9926a1ac3cb8d4877061df2fae94ddb8b2aab7b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 29 Jun 2025 13:43:48 +0100 Subject: [PATCH 43/49] Revert "Try using threeal/pipx-install-action@v1.0.0 for CI tests" This reverts commit ab9b2e0b69210d733b1bf80d63380a279233c7d9. --- .github/workflows/ci.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9d0e67d5f..ac3263bcd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -21,10 +21,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python tools - # BrandonLWhite/pipx-install-action@v1.0.3 fails on Windows, thus we're using an alternative action here - uses: threeal/pipx-install-action@v1.0.0 - with: - packages: poethepoet>=0.26 poetry<2 + uses: BrandonLWhite/pipx-install-action@v1.0.3 - name: Setup Python with poetry caching # poetry cache requires poetry to already be installed, weirdly uses: actions/setup-python@v5 From dd6cb538ac343c837348a0e4b404bc994ed97db1 Mon Sep 17 00:00:00 2001 From: dhruvravii <122979040+dhruvravii@users.noreply.github.com> Date: Tue, 1 Jul 2025 14:38:54 +0530 Subject: [PATCH 44/49] Fix: Spotify plugin unable to recognize Chinese and Japanese albums. (#5705) Fixes an issue where each spotify query was converted to ascii before sending. Adds a new config option to enable legacy behaviour. A file called japanese_track_request.json was made to mimic the Spotify API response since I don't have the credentials. Entries in that will need to be modified with the actual entries. Co-authored-by: Sebastian Mohr Co-authored-by: Sebastian Mohr <39738318+semohr@users.noreply.github.com> Co-authored-by: J0J0 Todos <2733783+JOJ0@users.noreply.github.com> --- beetsplug/spotify.py | 12 ++- docs/changelog.rst | 7 +- docs/plugins/spotify.rst | 8 ++ test/plugins/test_spotify.py | 79 +++++++++++++++- test/rsrc/spotify/japanese_track_request.json | 89 +++++++++++++++++++ 5 files changed, 188 insertions(+), 7 deletions(-) create mode 100644 test/rsrc/spotify/japanese_track_request.json diff --git a/beetsplug/spotify.py b/beetsplug/spotify.py index 9d285928a..76ceeed68 100644 --- a/beetsplug/spotify.py +++ b/beetsplug/spotify.py @@ -106,6 +106,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): "client_id": "4e414367a1d14c75a5c5129a627fcab8", "client_secret": "f82bdc09b2254f1a8286815d02fd46dc", "tokenfile": "spotify_token.json", + "search_query_ascii": False, } ) self.config["client_id"].redact = True @@ -388,9 +389,8 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): track.medium_total = medium_total return track - @staticmethod def _construct_search_query( - filters: dict[str, str], keywords: str = "" + self, filters: dict[str, str], keywords: str = "" ) -> str: """Construct a query string with the specified filters and keywords to be provided to the Spotify Search API @@ -407,7 +407,11 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): query = " ".join([q for q in query_components if q]) if not isinstance(query, str): query = query.decode("utf8") - return unidecode.unidecode(query) + + if self.config["search_query_ascii"].get(): + query = unidecode.unidecode(query) + + return query def _search_api( self, @@ -424,6 +428,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): :param keywords: (Optional) Query keywords to use. """ query = self._construct_search_query(keywords=keywords, filters=filters) + self._log.debug(f"Searching {self.data_source} for '{query}'") try: response = self._handle_response( @@ -560,6 +565,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): query = self._construct_search_query( keywords=keywords, filters=query_filters ) + failures.append(query) continue diff --git a/docs/changelog.rst b/docs/changelog.rst index 88b82e4da..1baa54011 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -39,7 +39,12 @@ Bug fixes: :bug:`5797` * :doc:`plugins/musicbrainz`: Fix the MusicBrainz search not taking into account the album/recording aliases - +* :doc:`/plugins/spotify`: Fix the issue with that every query to spotify was + ascii encoded. This resulted in bad matches for queries that contained special + e.g. non latin characters as 盗作. If you want to keep the legacy behavior + set the config option ``spotify.search_query_ascii: yes``. + :bug:`5699` + For packagers: * Optional ``extra_tags`` parameter has been removed from diff --git a/docs/plugins/spotify.rst b/docs/plugins/spotify.rst index 233d00726..c5aff8ef3 100644 --- a/docs/plugins/spotify.rst +++ b/docs/plugins/spotify.rst @@ -83,6 +83,13 @@ in config.yaml under the ``spotify:`` section: track/album/artist fields before sending them to Spotify. Can be useful for changing certain abbreviations, like ft. -> feat. See the examples below. Default: None. +- **search_query_ascii**: If set to ``yes``, the search query will be converted to + ASCII before being sent to Spotify. Converting searches to ASCII can + enhance search results in some cases, but in general, it is not recommended. + For instance `artist:deadmau5 album:4×4` will be converted to + `artist:deadmau5 album:4x4` (notice `×!=x`). + Default: ``no``. + Here's an example:: @@ -92,6 +99,7 @@ Here's an example:: region_filter: US show_failures: on tiebreak: first + search_query_ascii: no regex: [ { diff --git a/test/plugins/test_spotify.py b/test/plugins/test_spotify.py index a2336df10..a2fb26f4b 100644 --- a/test/plugins/test_spotify.py +++ b/test/plugins/test_spotify.py @@ -7,7 +7,7 @@ import responses from beets.library import Item from beets.test import _common -from beets.test.helper import BeetsTestCase +from beets.test.helper import PluginTestCase from beetsplug import spotify @@ -23,10 +23,11 @@ def _params(url): return parse_qs(urlparse(url).query) -class SpotifyPluginTest(BeetsTestCase): +class SpotifyPluginTest(PluginTestCase): + plugin = "spotify" + @responses.activate def setUp(self): - super().setUp() responses.add( responses.POST, spotify.SpotifyPlugin.oauth_token_url, @@ -39,6 +40,7 @@ class SpotifyPluginTest(BeetsTestCase): "scope": "", }, ) + super().setUp() self.spotify = spotify.SpotifyPlugin() opts = ArgumentsMock("list", False) self.spotify._parse_opts(opts) @@ -176,3 +178,74 @@ class SpotifyPluginTest(BeetsTestCase): results = self.spotify._match_library_tracks(self.lib, "Happy") assert 1 == len(results) assert "6NPVjNh8Jhru9xOmyQigds" == results[0]["id"] + + @responses.activate + def test_japanese_track(self): + """Ensure non-ASCII characters remain unchanged in search queries""" + + # Path to the mock JSON file for the Japanese track + json_file = os.path.join( + _common.RSRC, b"spotify", b"japanese_track_request.json" + ) + + # Load the mock JSON response + with open(json_file, "rb") as f: + response_body = f.read() + + # Mock Spotify Search API response + responses.add( + responses.GET, + spotify.SpotifyPlugin.search_url, + body=response_body, + status=200, + content_type="application/json", + ) + + # Create a mock item with Japanese metadata + item = Item( + mb_trackid="56789", + album="盗作", + albumartist="ヨルシカ", + title="思想犯", + length=10, + ) + item.add(self.lib) + + # Search without ascii encoding + + with self.configure_plugin( + { + "search_query_ascii": False, + } + ): + assert self.spotify.config["search_query_ascii"].get() is False + # Call the method to match library tracks + results = self.spotify._match_library_tracks(self.lib, item.title) + + # Assertions to verify results + assert results is not None + assert 1 == len(results) + assert results[0]["name"] == item.title + assert results[0]["artists"][0]["name"] == item.albumartist + assert results[0]["album"]["name"] == item.album + + # Verify search query parameters + params = _params(responses.calls[0].request.url) + query = params["q"][0] + assert item.title in query + assert f"artist:{item.albumartist}" in query + assert f"album:{item.album}" in query + assert not query.isascii() + + # Is not found in the library if ascii encoding is enabled + with self.configure_plugin( + { + "search_query_ascii": True, + } + ): + assert self.spotify.config["search_query_ascii"].get() is True + results = self.spotify._match_library_tracks(self.lib, item.title) + params = _params(responses.calls[1].request.url) + query = params["q"][0] + + assert query.isascii() diff --git a/test/rsrc/spotify/japanese_track_request.json b/test/rsrc/spotify/japanese_track_request.json new file mode 100644 index 000000000..04559588e --- /dev/null +++ b/test/rsrc/spotify/japanese_track_request.json @@ -0,0 +1,89 @@ +{ + "tracks":{ + "href":"https://api.spotify.com/v1/search?query=Happy+album%3ADespicable+Me+2+artist%3APharrell+Williams&offset=0&limit=20&type=track", + "items":[ + { + "album":{ + "album_type":"compilation", + "available_markets":[ + "AD", "AR", "AT", "AU", "BE", "BG", "BO", "BR", "CA", + "CH", "CL", "CO", "CR", "CY", "CZ", "DE", "DK", "DO", + "EC", "EE", "ES", "FI", "FR", "GB", "GR", "GT", "HK", + "HN", "HU", "IE", "IS", "IT", "LI", "LT", "LU", "LV", + "MC", "MT", "MX", "MY", "NI", "NL", "NO", "NZ", "PA", + "PE", "PH", "PL", "PT", "PY", "RO", "SE", "SG", "SI", + "SK", "SV", "TR", "TW", "US", "UY" + ], + "external_urls":{ + "spotify":"https://open.spotify.com/album/5l3zEmMrOhOzG8d8s83GOL" + }, + "href":"https://api.spotify.com/v1/albums/5l3zEmMrOhOzG8d8s83GOL", + "id":"5l3zEmMrOhOzG8d8s83GOL", + "images":[ + { + "height":640, + "width":640, + "url":"https://i.scdn.co/image/cb7905340c132365bbaee3f17498f062858382e8" + }, + { + "height":300, + "width":300, + "url":"https://i.scdn.co/image/af369120f0b20099d6784ab31c88256113f10ffb" + }, + { + "height":64, + "width":64, + "url":"https://i.scdn.co/image/9dad385ddf2e7db0bef20cec1fcbdb08689d9ae8" + } + ], + "name":"盗作", + "type":"album", + "uri":"spotify:album:5l3zEmMrOhOzG8d8s83GOL" + }, + "artists":[ + { + "external_urls":{ + "spotify":"https://open.spotify.com/artist/2RdwBSPQiwcmiDo9kixcl8" + }, + "href":"https://api.spotify.com/v1/artists/2RdwBSPQiwcmiDo9kixcl8", + "id":"2RdwBSPQiwcmiDo9kixcl8", + "name":"ヨルシカ", + "type":"artist", + "uri":"spotify:artist:2RdwBSPQiwcmiDo9kixcl8" + } + ], + "available_markets":[ + "AD", "AR", "AT", "AU", "BE", "BG", "BO", "BR", "CA", + "CH", "CL", "CO", "CR", "CY", "CZ", "DE", "DK", "DO", + "EC", "EE", "ES", "FI", "FR", "GB", "GR", "GT", "HK", + "HN", "HU", "IE", "IS", "IT", "LI", "LT", "LU", "LV", + "MC", "MT", "MX", "MY", "NI", "NL", "NO", "NZ", "PA", + "PE", "PH", "PL", "PT", "PY", "RO", "SE", "SG", "SI", + "SK", "SV", "TR", "TW", "US", "UY" + ], + "disc_number":1, + "duration_ms":233305, + "explicit":false, + "external_ids":{ + "isrc":"USQ4E1300686" + }, + "external_urls":{ + "spotify":"https://open.spotify.com/track/6NPVjNh8Jhru9xOmyQigds" + }, + "href":"https://api.spotify.com/v1/tracks/6NPVjNh8Jhru9xOmyQigds", + "id":"6NPVjNh8Jhru9xOmyQigds", + "name":"思想犯", + "popularity":89, + "preview_url":"https://p.scdn.co/mp3-preview/6b00000be293e6b25f61c33e206a0c522b5cbc87", + "track_number":4, + "type":"track", + "uri":"spotify:track:6NPVjNh8Jhru9xOmyQigds" + } + ], + "limit":20, + "next":null, + "offset":0, + "previous":null, + "total":1 + } +} From ac96b9b64e97cdca0be6305e939e902e8742f147 Mon Sep 17 00:00:00 2001 From: Noor Date: Wed, 2 Jul 2025 20:40:37 +0200 Subject: [PATCH 45/49] Preserve line breaks for example cases in substitution plugin docs (#5846) ## Description Adds line block markup to example substitutions in the plugin documentation, so that each case is shown on a separate line: > The replacement can be an expression utilising the matched regex, allowing us to create more general rules. Say for example, we want to sort all albums by multiple artists into the directory of the first artist. We can thus capture everything before the first ,, `` &`` or `` and``, and use this capture group in the output, discarding the rest of the string. > > ```yaml > substitute: > ^(.*?)(,| &| and).*: \1 > ``` > > This would handle all the below cases in a single rule: > >> Bob Dylan and The Band -> Bob Dylan >> Neil Young & Crazy Horse -> Neil Young >> James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston --- docs/changelog.rst | 2 ++ docs/plugins/substitute.rst | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 1baa54011..0e5799846 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -61,6 +61,8 @@ Other changes: * Documentation structure for auto generated API references changed slightly. Autogenerated API references are now located in the `docs/api` subdirectory. +* :doc:`/plugins/substitute`: Fix rST formatting for example cases so that each + case is shown on separate lines. 2.3.1 (May 14, 2025) -------------------- diff --git a/docs/plugins/substitute.rst b/docs/plugins/substitute.rst index 87ee2ad45..c6fec8054 100644 --- a/docs/plugins/substitute.rst +++ b/docs/plugins/substitute.rst @@ -31,9 +31,9 @@ group in the output, discarding the rest of the string. This would handle all the below cases in a single rule: - Bob Dylan and The Band -> Bob Dylan - Neil Young & Crazy Horse -> Neil Young - James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston + | Bob Dylan and The Band -> Bob Dylan + | Neil Young & Crazy Horse -> Neil Young + | James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston To apply the substitution, you have to call the function ``%substitute{}`` in the paths section. For example: From 537a71ff8286461440ee883ff2c077636bf7e120 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Mon, 6 Nov 2023 14:47:34 +0100 Subject: [PATCH 46/49] duplicates: Add --remove option to duplicates plugin Removes from library but keeps files. --- beetsplug/duplicates.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/beetsplug/duplicates.py b/beetsplug/duplicates.py index fadb29845..76441133f 100644 --- a/beetsplug/duplicates.py +++ b/beetsplug/duplicates.py @@ -53,6 +53,7 @@ class DuplicatesPlugin(BeetsPlugin): "tiebreak": {}, "strict": False, "tag": "", + "remove": False, } ) @@ -131,6 +132,13 @@ class DuplicatesPlugin(BeetsPlugin): action="store", help="tag matched items with 'k=v' attribute", ) + self._command.parser.add_option( + "-r", + "--remove", + dest="remove", + action="store_true", + help="remove items from library", + ) self._command.parser.add_all_common_options() def commands(self): @@ -141,6 +149,7 @@ class DuplicatesPlugin(BeetsPlugin): copy = bytestring_path(self.config["copy"].as_str()) count = self.config["count"].get(bool) delete = self.config["delete"].get(bool) + remove = self.config["remove"].get(bool) fmt = self.config["format"].get(str) full = self.config["full"].get(bool) keys = self.config["keys"].as_str_seq() @@ -196,6 +205,7 @@ class DuplicatesPlugin(BeetsPlugin): copy=copy, move=move, delete=delete, + remove=remove, tag=tag, fmt=fmt.format(obj_count), ) @@ -204,7 +214,14 @@ class DuplicatesPlugin(BeetsPlugin): return [self._command] def _process_item( - self, item, copy=False, move=False, delete=False, tag=False, fmt="" + self, + item, + copy=False, + move=False, + delete=False, + tag=False, + fmt="", + remove=False, ): """Process Item `item`.""" print_(format(item, fmt)) @@ -216,6 +233,8 @@ class DuplicatesPlugin(BeetsPlugin): item.store() if delete: item.remove(delete=True) + if remove: + item.remove(delete=False) if tag: try: k, v = tag.split("=") From 549847bfd8c64ef68c65b36f652a5d01031e4294 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sun, 22 Jun 2025 08:48:10 +0200 Subject: [PATCH 47/49] duplicates: Add docs for --remove option --- docs/plugins/duplicates.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/plugins/duplicates.rst b/docs/plugins/duplicates.rst index 8b11b6661..8ce0e4578 100644 --- a/docs/plugins/duplicates.rst +++ b/docs/plugins/duplicates.rst @@ -34,6 +34,7 @@ duplicates themselves via command-line switches :: -o DEST, --copy=DEST copy items to dest -p, --path print paths for matched items or albums -t TAG, --tag=TAG tag matched items with 'k=v' attribute + -r, --remove remove items from library Configuration ------------- @@ -57,7 +58,7 @@ file. The available options mirror the command-line options: ``$albumartist - $album - $title: $count`` (for tracks) or ``$albumartist - $album: $count`` (for albums). Default: ``no``. -- **delete**: Removes matched items from the library and from the disk. +- **delete**: Remove matched items from the library and from the disk. Default: ``no`` - **format**: A specific format with which to print every track or album. This uses the same template syntax as beets' @@ -92,6 +93,8 @@ file. The available options mirror the command-line options: set. If you would like to consider the lower bitrates as duplicates, for example, set ``tiebreak: items: [bitrate]``. Default: ``{}``. +- **remove**: Remove matched items from the library, but not from the disk. + Default: ``no``. Examples -------- From 47eee070ba426fc61bfd3e72d166917c60076b8a Mon Sep 17 00:00:00 2001 From: J0J0 Todos <2733783+JOJ0@users.noreply.github.com> Date: Wed, 2 Jul 2025 07:10:46 +0200 Subject: [PATCH 48/49] duplicates: remove or delete options mutually exclusive Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- beetsplug/duplicates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beetsplug/duplicates.py b/beetsplug/duplicates.py index 76441133f..5a2be0cd2 100644 --- a/beetsplug/duplicates.py +++ b/beetsplug/duplicates.py @@ -233,7 +233,7 @@ class DuplicatesPlugin(BeetsPlugin): item.store() if delete: item.remove(delete=True) - if remove: + elif remove: item.remove(delete=False) if tag: try: From 7c22cd635c502e2f3ccb71803c3034bc2bb2ec11 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sat, 5 Jul 2025 07:24:26 +0200 Subject: [PATCH 49/49] duplicates: Add changelog for --remove option --- docs/changelog.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 0e5799846..d1a477cb5 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -23,6 +23,9 @@ New features: singletons by their Discogs ID. :bug:`4661` * :doc:`plugins/replace`: Add new plugin. +* :doc:`plugins/duplicates`: Add ``--remove`` option, allowing to remove from + the library without deleting media files. + :bug:`5832` Bug fixes: