Discogs Label Disambiguation Fix + config option. (#6035)

Fixes #5366 . 

Adds removal of disambiguation from label names, in addition this PR
moves the Discogs disambiguation function out of the
MetadataSourcePlugin, and puts it in the Discogs plugin, keeping the
parent class more generic.

A config option has been added to allow disabling Discogs disambiguation
removal. Tests and docs are written for the feature, and shows no side
effects in other plugins that rely on the MetadataSourcePlugin.
This commit is contained in:
Šarūnas Nejus 2025-09-22 21:55:47 +01:00 committed by GitHub
commit 5e0e898429
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 108 additions and 8 deletions

View file

@ -271,10 +271,9 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
"""Returns an artist string (all artists) and an artist_id (the main """Returns an artist string (all artists) and an artist_id (the main
artist) for a list of artist object dicts. artist) for a list of artist object dicts.
For each artist, this function moves articles (such as 'a', 'an', For each artist, this function moves articles (such as 'a', 'an', and 'the')
and 'the') to the front and strips trailing disambiguation numbers. It to the front. It returns a tuple containing the comma-separated string
returns a tuple containing the comma-separated string of all of all normalized artists and the ``id`` of the main/first artist.
normalized artists and the ``id`` of the main/first artist.
Alternatively a keyword can be used to combine artists together into a Alternatively a keyword can be used to combine artists together into a
single string by passing the join_key argument. single string by passing the join_key argument.
@ -298,8 +297,6 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
if not artist_id: if not artist_id:
artist_id = artist[id_key] artist_id = artist[id_key]
name = artist[name_key] name = artist[name_key]
# Strip disambiguation number.
name = re.sub(r" \(\d+\)$", "", name)
# Move articles to the front. # Move articles to the front.
name = re.sub(r"^(.*?), (a|an|the)$", r"\2 \1", name, flags=re.I) name = re.sub(r"^(.*?), (a|an|the)$", r"\2 \1", name, flags=re.I)
# Use a join keyword if requested and available. # Use a join keyword if requested and available.

View file

@ -76,6 +76,8 @@ TRACK_INDEX_RE = re.compile(
re.VERBOSE, re.VERBOSE,
) )
DISAMBIGUATION_RE = re.compile(r" \(\d+\)")
class ReleaseFormat(TypedDict): class ReleaseFormat(TypedDict):
name: str name: str
@ -96,6 +98,7 @@ class DiscogsPlugin(MetadataSourcePlugin):
"separator": ", ", "separator": ", ",
"index_tracks": False, "index_tracks": False,
"append_style_genre": False, "append_style_genre": False,
"strip_disambiguation": True,
} }
) )
self.config["apikey"].redact = True self.config["apikey"].redact = True
@ -362,15 +365,20 @@ class DiscogsPlugin(MetadataSourcePlugin):
label = catalogno = labelid = None label = catalogno = labelid = None
if result.data.get("labels"): if result.data.get("labels"):
label = result.data["labels"][0].get("name") label = self.strip_disambiguation(
result.data["labels"][0].get("name")
)
catalogno = result.data["labels"][0].get("catno") catalogno = result.data["labels"][0].get("catno")
labelid = result.data["labels"][0].get("id") labelid = result.data["labels"][0].get("id")
cover_art_url = self.select_cover_art(result) cover_art_url = self.select_cover_art(result)
# Additional cleanups (various artists name, catalog number, media). # Additional cleanups
# (various artists name, catalog number, media, disambiguation).
if va: if va:
artist = config["va_name"].as_str() artist = config["va_name"].as_str()
else:
artist = self.strip_disambiguation(artist)
if catalogno == "none": if catalogno == "none":
catalogno = None catalogno = None
# Explicitly set the `media` for the tracks, since it is expected by # Explicitly set the `media` for the tracks, since it is expected by
@ -622,6 +630,14 @@ class DiscogsPlugin(MetadataSourcePlugin):
return tracklist return tracklist
def strip_disambiguation(self, text: str) -> str:
"""Removes discogs specific disambiguations from a string.
Turns 'Label Name (5)' to 'Label Name' or 'Artist (1) & Another Artist (2)'
to 'Artist & Another Artist'. Does nothing if strip_disambiguation is False."""
if not self.config["strip_disambiguation"]:
return text
return DISAMBIGUATION_RE.sub("", text)
def get_track_info(self, track, index, divisions): def get_track_info(self, track, index, divisions):
"""Returns a TrackInfo object for a discogs track.""" """Returns a TrackInfo object for a discogs track."""
title = track["title"] title = track["title"]
@ -634,6 +650,7 @@ class DiscogsPlugin(MetadataSourcePlugin):
artist, artist_id = self.get_artist( artist, artist_id = self.get_artist(
track.get("artists", []), join_key="join" track.get("artists", []), join_key="join"
) )
artist = self.strip_disambiguation(artist)
length = self.get_track_length(track["duration"]) length = self.get_track_length(track["duration"])
return TrackInfo( return TrackInfo(
title=title, title=title,

View file

@ -11,6 +11,8 @@ New features:
- :doc:`plugins/lastgenre`: Add a ``--pretend`` option to preview genre changes - :doc:`plugins/lastgenre`: Add a ``--pretend`` option to preview genre changes
without storing or writing them. without storing or writing them.
- :doc:`plugins/discogs`: New config option `strip_disambiguation` to toggle
stripping discogs numeric disambiguation on artist and label fields.
Bug fixes: Bug fixes:
@ -21,6 +23,8 @@ Bug fixes:
- :doc:`plugins/spotify` Removed old and undocumented config options - :doc:`plugins/spotify` Removed old and undocumented config options
`artist_field`, `album_field` and `track` that were causing issues with track `artist_field`, `album_field` and `track` that were causing issues with track
matching. :bug:`5189` matching. :bug:`5189`
- :doc:`plugins/discogs` Fixed inconsistency in stripping disambiguation from
artists but not labels. :bug:`5366`
For packagers: For packagers:
@ -35,6 +39,8 @@ Other changes:
- Moved ``art.py`` utility module from ``beets`` into ``beetsplug`` namespace as - Moved ``art.py`` utility module from ``beets`` into ``beetsplug`` namespace as
it is not used in the core beets codebase. It can now be found in it is not used in the core beets codebase. It can now be found in
``beetsplug._utils``. ``beetsplug._utils``.
- :class:`beets.metadata_plugin.MetadataSourcePlugin`: Remove discogs specific
disambiguation stripping.
2.4.0 (September 13, 2025) 2.4.0 (September 13, 2025)
-------------------------- --------------------------

View file

@ -109,6 +109,9 @@ Other configurations available under ``discogs:`` are:
- **search_limit**: The maximum number of results to return from Discogs. This - **search_limit**: The maximum number of results to return from Discogs. This
is useful if you want to limit the number of results returned to speed up is useful if you want to limit the number of results returned to speed up
searches. Default: ``5`` searches. Default: ``5``
- **strip_disambiguation**: Discogs uses strings like ``"(4)"`` to mark distinct
artists and labels with the same name. If you'd like to use the discogs
disambiguation in your tags, you can disable it. Default: ``True``
.. _discogs guidelines: https://support.discogs.com/hc/en-us/articles/360005055373-Database-Guidelines-12-Tracklisting#Index_Tracks_And_Headings .. _discogs guidelines: https://support.discogs.com/hc/en-us/articles/360005055373-Database-Guidelines-12-Tracklisting#Index_Tracks_And_Headings

View file

@ -374,6 +374,83 @@ class DGAlbumInfoTest(BeetsTestCase):
assert d.genre == "GENRE1, GENRE2" assert d.genre == "GENRE1, GENRE2"
assert d.style is None assert d.style is None
def test_strip_disambiguation(self):
"""Test removing disambiguation from all disambiguated fields."""
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [
{
"title": "track",
"position": "A",
"type_": "track",
"duration": "5:44",
"artists": [
{"name": "TEST ARTIST (5)", "tracks": "", "id": 11146}
],
}
],
"artists": [
{"name": "ARTIST NAME (2)", "id": 321, "join": "&"},
{"name": "OTHER ARTIST (5)", "id": 321, "join": ""},
],
"title": "title",
"labels": [
{
"name": "LABEL NAME (5)",
"catno": "catalog number",
}
],
}
release = Bag(
data=data,
title=data["title"],
artists=[Bag(data=d) for d in data["artists"]],
)
d = DiscogsPlugin().get_album_info(release)
assert d.artist == "ARTIST NAME & OTHER ARTIST"
assert d.tracks[0].artist == "TEST ARTIST"
assert d.label == "LABEL NAME"
def test_strip_disambiguation_false(self):
"""Test disabling disambiguation removal from all disambiguated fields."""
config["discogs"]["strip_disambiguation"] = False
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [
{
"title": "track",
"position": "A",
"type_": "track",
"duration": "5:44",
"artists": [
{"name": "TEST ARTIST (5)", "tracks": "", "id": 11146}
],
}
],
"artists": [
{"name": "ARTIST NAME (2)", "id": 321, "join": "&"},
{"name": "OTHER ARTIST (5)", "id": 321, "join": ""},
],
"title": "title",
"labels": [
{
"name": "LABEL NAME (5)",
"catno": "catalog number",
}
],
}
release = Bag(
data=data,
title=data["title"],
artists=[Bag(data=d) for d in data["artists"]],
)
d = DiscogsPlugin().get_album_info(release)
assert d.artist == "ARTIST NAME (2) & OTHER ARTIST (5)"
assert d.tracks[0].artist == "TEST ARTIST (5)"
assert d.label == "LABEL NAME (5)"
@pytest.mark.parametrize( @pytest.mark.parametrize(
"formats, expected_media, expected_albumtype", "formats, expected_media, expected_albumtype",