Disambiguation fix implemented & tested

This commit is contained in:
Henry 2025-09-19 20:46:07 -07:00
parent 24fbc566f6
commit dda265dc77
5 changed files with 86 additions and 21 deletions

View file

@ -271,10 +271,9 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
"""Returns an artist string (all artists) and an artist_id (the main
artist) for a list of artist object dicts.
For each artist, this function moves articles (such as 'a', 'an',
and 'the') to the front and strips trailing disambiguation numbers. It
returns a tuple containing the comma-separated string of all
normalized artists and the ``id`` of the main/first artist.
For each artist, this function moves articles (such as 'a', 'an', and 'the')
to the front. It returns a tuple containing the comma-separated string
of all normalized artists and the ``id`` of the main/first artist.
Alternatively a keyword can be used to combine artists together into a
single string by passing the join_key argument.

View file

@ -76,7 +76,7 @@ TRACK_INDEX_RE = re.compile(
re.VERBOSE,
)
DISAMBIGUATION_RE = re.compile(r" \(\d+\)$")
DISAMBIGUATION_RE = re.compile(r" \(\d+\)")
class ReleaseFormat(TypedDict):
@ -365,23 +365,22 @@ class DiscogsPlugin(MetadataSourcePlugin):
label = catalogno = labelid = None
if result.data.get("labels"):
label = result.data["labels"][0].get("name")
label = self.strip_disambiguation(
result.data["labels"][0].get("name")
)
catalogno = result.data["labels"][0].get("catno")
labelid = result.data["labels"][0].get("id")
cover_art_url = self.select_cover_art(result)
# Additional cleanups (various artists name, catalog number, media).
# Additional cleanups
# (various artists name, catalog number, media, disambiguation).
if va:
artist = config["va_name"].as_str()
else:
artist = self.strip_disambiguation(artist)
if catalogno == "none":
catalogno = None
# Remove Discogs specific artist disambiguation 'Artist (2)' or 'Label (3)'
if self.config["strip_disambiguation"]:
artist = DISAMBIGUATION_RE.sub("", artist)
if label is not None:
label = DISAMBIGUATION_RE.sub("", label)
# Explicitly set the `media` for the tracks, since it is expected by
# `autotag.apply_metadata`, and set `medium_total`.
for track in tracks:
@ -631,6 +630,14 @@ class DiscogsPlugin(MetadataSourcePlugin):
return tracklist
def strip_disambiguation(self, text) -> str:
"""Removes discogs specific disambiguations from a string.
Turns 'Label Name (5)' to 'Label Name' or 'Artist (1) & Another Artist (2)'
to 'Artist & Another Artist'. Does nothing if strip_disambiguation is False."""
if not self.config["strip_disambiguation"]:
return text
return DISAMBIGUATION_RE.sub("", text)
def get_track_info(self, track, index, divisions):
"""Returns a TrackInfo object for a discogs track."""
title = track["title"]
@ -643,6 +650,7 @@ class DiscogsPlugin(MetadataSourcePlugin):
artist, artist_id = self.get_artist(
track.get("artists", []), join_key="join"
)
artist = self.strip_disambiguation(artist)
length = self.get_track_length(track["duration"])
return TrackInfo(
title=title,

View file

@ -18,8 +18,12 @@ Bug fixes:
- :doc:`plugins/spotify` Removed old and undocumented config options
`artist_field`, `album_field` and `track` that were causing issues with track
matching. :bug:`5189`
- :doc:`plugins/discogs` Added config option `strip_disambiguation` to allow choice of removing discogs numeric disambiguation :bug:`5366`
- :doc:`plugins/discogs` Fixed inconsistency in stripping disambiguation from artists but not labels :bug:`5366`
- :doc:`plugins/discogs` Added config option `strip_disambiguation` to allow
choice of removing discogs numeric disambiguation :bug:`5366`
- :doc:`plugins/discogs` Fixed inconsistency in stripping disambiguation from
artists but not labels. :bug:`5366`
- :doc:`plugins/discogs` Wrote test coverage for removing disambiguation.
:bug:`5366`
For packagers:

View file

@ -109,6 +109,9 @@ Other configurations available under ``discogs:`` are:
- **search_limit**: The maximum number of results to return from Discogs. This
is useful if you want to limit the number of results returned to speed up
searches. Default: ``5``
- **strip_disambiguation**: Discogs uses strings like ``"(4)"`` to mark distinct
artists and labels with the same name. If you'd like to use the discogs
disambiguation in your tags, you can disable it. Default: ``True``
.. _discogs guidelines: https://support.discogs.com/hc/en-us/articles/360005055373-Database-Guidelines-12-Tracklisting#Index_Tracks_And_Headings

View file

@ -375,7 +375,7 @@ class DGAlbumInfoTest(BeetsTestCase):
assert d.style is None
def test_strip_disambiguation_label_artist(self):
"""Test removing discogs disambiguation"""
"""Test removing discogs disambiguation from artist and label"""
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
@ -399,21 +399,21 @@ class DGAlbumInfoTest(BeetsTestCase):
assert d.label == "LABEL NAME"
def test_strip_disambiguation_off_label_artist(self):
"""Test not removing discogs disambiguation"""
"""Test not removing discogs disambiguation from artist and label"""
config["discogs"]["strip_disambiguation"] = False
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [self._make_track("A", "1", "01:01")],
"tracklist": [self._make_track("a", "1", "01:01")],
"artists": [{"name": "ARTIST NAME (2)", "id": 321, "join": ""}],
"title": "TITLE",
"title": "title",
"labels": [
{
"name": "LABEL NAME (5)",
"catno": "CATALOG NUMBER",
"catno": "catalog number",
}
],
}
config["discogs"]["strip_disambiguation"] = False
release = Bag(
data=data,
title=data["title"],
@ -423,6 +423,57 @@ class DGAlbumInfoTest(BeetsTestCase):
assert d.artist == "ARTIST NAME (2)"
assert d.label == "LABEL NAME (5)"
def test_strip_disambiguation_multiple_artists(self):
"""Test removing disambiguation if there are multiple artists on the release"""
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [self._make_track("a", "1", "01:01")],
"artists": [
{"name": "ARTIST NAME (2)", "id": 321, "join": "&"},
{"name": "OTHER ARTIST (5)", "id": 321, "join": ""},
],
"title": "title",
}
release = Bag(
data=data,
title=data["title"],
artists=[Bag(data=d) for d in data["artists"]],
)
d = DiscogsPlugin().get_album_info(release)
assert d.artist == "ARTIST NAME & OTHER ARTIST"
def test_strip_disambiguation_artist_tracks(self):
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [
{
"title": "track",
"position": "A",
"type_": "track",
"duration": "5:44",
"artists": [
{
"name": "TEST ARTIST (5)",
"tracks": "",
"id": 11146,
}
],
}
],
"artists": [{"name": "OTHER ARTIST (5)", "id": 321, "join": ""}],
"title": "title",
}
release = Bag(
data=data,
title=data["title"],
artists=[Bag(data=d) for d in data["artists"]],
)
d = DiscogsPlugin().get_album_info(release)
assert d.tracks[0].artist == "TEST ARTIST"
assert d.artist == "OTHER ARTIST"
@pytest.mark.parametrize(
"formats, expected_media, expected_albumtype",