Disambiguation fix implemented & tested

This commit is contained in:
Henry 2025-09-19 20:46:07 -07:00
parent 24fbc566f6
commit dda265dc77
5 changed files with 86 additions and 21 deletions

View file

@ -271,10 +271,9 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
"""Returns an artist string (all artists) and an artist_id (the main """Returns an artist string (all artists) and an artist_id (the main
artist) for a list of artist object dicts. artist) for a list of artist object dicts.
For each artist, this function moves articles (such as 'a', 'an', For each artist, this function moves articles (such as 'a', 'an', and 'the')
and 'the') to the front and strips trailing disambiguation numbers. It to the front. It returns a tuple containing the comma-separated string
returns a tuple containing the comma-separated string of all of all normalized artists and the ``id`` of the main/first artist.
normalized artists and the ``id`` of the main/first artist.
Alternatively a keyword can be used to combine artists together into a Alternatively a keyword can be used to combine artists together into a
single string by passing the join_key argument. single string by passing the join_key argument.

View file

@ -76,7 +76,7 @@ TRACK_INDEX_RE = re.compile(
re.VERBOSE, re.VERBOSE,
) )
DISAMBIGUATION_RE = re.compile(r" \(\d+\)$") DISAMBIGUATION_RE = re.compile(r" \(\d+\)")
class ReleaseFormat(TypedDict): class ReleaseFormat(TypedDict):
@ -365,23 +365,22 @@ class DiscogsPlugin(MetadataSourcePlugin):
label = catalogno = labelid = None label = catalogno = labelid = None
if result.data.get("labels"): if result.data.get("labels"):
label = result.data["labels"][0].get("name") label = self.strip_disambiguation(
result.data["labels"][0].get("name")
)
catalogno = result.data["labels"][0].get("catno") catalogno = result.data["labels"][0].get("catno")
labelid = result.data["labels"][0].get("id") labelid = result.data["labels"][0].get("id")
cover_art_url = self.select_cover_art(result) cover_art_url = self.select_cover_art(result)
# Additional cleanups (various artists name, catalog number, media). # Additional cleanups
# (various artists name, catalog number, media, disambiguation).
if va: if va:
artist = config["va_name"].as_str() artist = config["va_name"].as_str()
else:
artist = self.strip_disambiguation(artist)
if catalogno == "none": if catalogno == "none":
catalogno = None catalogno = None
# Remove Discogs specific artist disambiguation 'Artist (2)' or 'Label (3)'
if self.config["strip_disambiguation"]:
artist = DISAMBIGUATION_RE.sub("", artist)
if label is not None:
label = DISAMBIGUATION_RE.sub("", label)
# Explicitly set the `media` for the tracks, since it is expected by # Explicitly set the `media` for the tracks, since it is expected by
# `autotag.apply_metadata`, and set `medium_total`. # `autotag.apply_metadata`, and set `medium_total`.
for track in tracks: for track in tracks:
@ -631,6 +630,14 @@ class DiscogsPlugin(MetadataSourcePlugin):
return tracklist return tracklist
def strip_disambiguation(self, text) -> str:
"""Removes discogs specific disambiguations from a string.
Turns 'Label Name (5)' to 'Label Name' or 'Artist (1) & Another Artist (2)'
to 'Artist & Another Artist'. Does nothing if strip_disambiguation is False."""
if not self.config["strip_disambiguation"]:
return text
return DISAMBIGUATION_RE.sub("", text)
def get_track_info(self, track, index, divisions): def get_track_info(self, track, index, divisions):
"""Returns a TrackInfo object for a discogs track.""" """Returns a TrackInfo object for a discogs track."""
title = track["title"] title = track["title"]
@ -643,6 +650,7 @@ class DiscogsPlugin(MetadataSourcePlugin):
artist, artist_id = self.get_artist( artist, artist_id = self.get_artist(
track.get("artists", []), join_key="join" track.get("artists", []), join_key="join"
) )
artist = self.strip_disambiguation(artist)
length = self.get_track_length(track["duration"]) length = self.get_track_length(track["duration"])
return TrackInfo( return TrackInfo(
title=title, title=title,

View file

@ -18,8 +18,12 @@ Bug fixes:
- :doc:`plugins/spotify` Removed old and undocumented config options - :doc:`plugins/spotify` Removed old and undocumented config options
`artist_field`, `album_field` and `track` that were causing issues with track `artist_field`, `album_field` and `track` that were causing issues with track
matching. :bug:`5189` matching. :bug:`5189`
- :doc:`plugins/discogs` Added config option `strip_disambiguation` to allow choice of removing discogs numeric disambiguation :bug:`5366` - :doc:`plugins/discogs` Added config option `strip_disambiguation` to allow
- :doc:`plugins/discogs` Fixed inconsistency in stripping disambiguation from artists but not labels :bug:`5366` choice of removing discogs numeric disambiguation :bug:`5366`
- :doc:`plugins/discogs` Fixed inconsistency in stripping disambiguation from
artists but not labels. :bug:`5366`
- :doc:`plugins/discogs` Wrote test coverage for removing disambiguation.
:bug:`5366`
For packagers: For packagers:

View file

@ -109,6 +109,9 @@ Other configurations available under ``discogs:`` are:
- **search_limit**: The maximum number of results to return from Discogs. This - **search_limit**: The maximum number of results to return from Discogs. This
is useful if you want to limit the number of results returned to speed up is useful if you want to limit the number of results returned to speed up
searches. Default: ``5`` searches. Default: ``5``
- **strip_disambiguation**: Discogs uses strings like ``"(4)"`` to mark distinct
artists and labels with the same name. If you'd like to use the discogs
disambiguation in your tags, you can disable it. Default: ``True``
.. _discogs guidelines: https://support.discogs.com/hc/en-us/articles/360005055373-Database-Guidelines-12-Tracklisting#Index_Tracks_And_Headings .. _discogs guidelines: https://support.discogs.com/hc/en-us/articles/360005055373-Database-Guidelines-12-Tracklisting#Index_Tracks_And_Headings

View file

@ -375,7 +375,7 @@ class DGAlbumInfoTest(BeetsTestCase):
assert d.style is None assert d.style is None
def test_strip_disambiguation_label_artist(self): def test_strip_disambiguation_label_artist(self):
"""Test removing discogs disambiguation""" """Test removing discogs disambiguation from artist and label"""
data = { data = {
"id": 123, "id": 123,
"uri": "https://www.discogs.com/release/123456-something", "uri": "https://www.discogs.com/release/123456-something",
@ -399,21 +399,21 @@ class DGAlbumInfoTest(BeetsTestCase):
assert d.label == "LABEL NAME" assert d.label == "LABEL NAME"
def test_strip_disambiguation_off_label_artist(self): def test_strip_disambiguation_off_label_artist(self):
"""Test not removing discogs disambiguation""" """Test not removing discogs disambiguation from artist and label"""
config["discogs"]["strip_disambiguation"] = False
data = { data = {
"id": 123, "id": 123,
"uri": "https://www.discogs.com/release/123456-something", "uri": "https://www.discogs.com/release/123456-something",
"tracklist": [self._make_track("A", "1", "01:01")], "tracklist": [self._make_track("a", "1", "01:01")],
"artists": [{"name": "ARTIST NAME (2)", "id": 321, "join": ""}], "artists": [{"name": "ARTIST NAME (2)", "id": 321, "join": ""}],
"title": "TITLE", "title": "title",
"labels": [ "labels": [
{ {
"name": "LABEL NAME (5)", "name": "LABEL NAME (5)",
"catno": "CATALOG NUMBER", "catno": "catalog number",
} }
], ],
} }
config["discogs"]["strip_disambiguation"] = False
release = Bag( release = Bag(
data=data, data=data,
title=data["title"], title=data["title"],
@ -423,6 +423,57 @@ class DGAlbumInfoTest(BeetsTestCase):
assert d.artist == "ARTIST NAME (2)" assert d.artist == "ARTIST NAME (2)"
assert d.label == "LABEL NAME (5)" assert d.label == "LABEL NAME (5)"
def test_strip_disambiguation_multiple_artists(self):
"""Test removing disambiguation if there are multiple artists on the release"""
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [self._make_track("a", "1", "01:01")],
"artists": [
{"name": "ARTIST NAME (2)", "id": 321, "join": "&"},
{"name": "OTHER ARTIST (5)", "id": 321, "join": ""},
],
"title": "title",
}
release = Bag(
data=data,
title=data["title"],
artists=[Bag(data=d) for d in data["artists"]],
)
d = DiscogsPlugin().get_album_info(release)
assert d.artist == "ARTIST NAME & OTHER ARTIST"
def test_strip_disambiguation_artist_tracks(self):
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [
{
"title": "track",
"position": "A",
"type_": "track",
"duration": "5:44",
"artists": [
{
"name": "TEST ARTIST (5)",
"tracks": "",
"id": 11146,
}
],
}
],
"artists": [{"name": "OTHER ARTIST (5)", "id": 321, "join": ""}],
"title": "title",
}
release = Bag(
data=data,
title=data["title"],
artists=[Bag(data=d) for d in data["artists"]],
)
d = DiscogsPlugin().get_album_info(release)
assert d.tracks[0].artist == "TEST ARTIST"
assert d.artist == "OTHER ARTIST"
@pytest.mark.parametrize( @pytest.mark.parametrize(
"formats, expected_media, expected_albumtype", "formats, expected_media, expected_albumtype",