Discogs Label Disambiguation Fix + config option. (#6035)

Fixes #5366 . 

Adds removal of disambiguation from label names, in addition this PR
moves the Discogs disambiguation function out of the
MetadataSourcePlugin, and puts it in the Discogs plugin, keeping the
parent class more generic.

A config option has been added to allow disabling Discogs disambiguation
removal. Tests and docs are written for the feature, and shows no side
effects in other plugins that rely on the MetadataSourcePlugin.
This commit is contained in:
Šarūnas Nejus 2025-09-22 21:55:47 +01:00 committed by GitHub
commit 5e0e898429
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 108 additions and 8 deletions

View file

@ -271,10 +271,9 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
"""Returns an artist string (all artists) and an artist_id (the main
artist) for a list of artist object dicts.
For each artist, this function moves articles (such as 'a', 'an',
and 'the') to the front and strips trailing disambiguation numbers. It
returns a tuple containing the comma-separated string of all
normalized artists and the ``id`` of the main/first artist.
For each artist, this function moves articles (such as 'a', 'an', and 'the')
to the front. It returns a tuple containing the comma-separated string
of all normalized artists and the ``id`` of the main/first artist.
Alternatively a keyword can be used to combine artists together into a
single string by passing the join_key argument.
@ -298,8 +297,6 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
if not artist_id:
artist_id = artist[id_key]
name = artist[name_key]
# Strip disambiguation number.
name = re.sub(r" \(\d+\)$", "", name)
# Move articles to the front.
name = re.sub(r"^(.*?), (a|an|the)$", r"\2 \1", name, flags=re.I)
# Use a join keyword if requested and available.

View file

@ -76,6 +76,8 @@ TRACK_INDEX_RE = re.compile(
re.VERBOSE,
)
DISAMBIGUATION_RE = re.compile(r" \(\d+\)")
class ReleaseFormat(TypedDict):
name: str
@ -96,6 +98,7 @@ class DiscogsPlugin(MetadataSourcePlugin):
"separator": ", ",
"index_tracks": False,
"append_style_genre": False,
"strip_disambiguation": True,
}
)
self.config["apikey"].redact = True
@ -362,15 +365,20 @@ class DiscogsPlugin(MetadataSourcePlugin):
label = catalogno = labelid = None
if result.data.get("labels"):
label = result.data["labels"][0].get("name")
label = self.strip_disambiguation(
result.data["labels"][0].get("name")
)
catalogno = result.data["labels"][0].get("catno")
labelid = result.data["labels"][0].get("id")
cover_art_url = self.select_cover_art(result)
# Additional cleanups (various artists name, catalog number, media).
# Additional cleanups
# (various artists name, catalog number, media, disambiguation).
if va:
artist = config["va_name"].as_str()
else:
artist = self.strip_disambiguation(artist)
if catalogno == "none":
catalogno = None
# Explicitly set the `media` for the tracks, since it is expected by
@ -622,6 +630,14 @@ class DiscogsPlugin(MetadataSourcePlugin):
return tracklist
def strip_disambiguation(self, text: str) -> str:
"""Removes discogs specific disambiguations from a string.
Turns 'Label Name (5)' to 'Label Name' or 'Artist (1) & Another Artist (2)'
to 'Artist & Another Artist'. Does nothing if strip_disambiguation is False."""
if not self.config["strip_disambiguation"]:
return text
return DISAMBIGUATION_RE.sub("", text)
def get_track_info(self, track, index, divisions):
"""Returns a TrackInfo object for a discogs track."""
title = track["title"]
@ -634,6 +650,7 @@ class DiscogsPlugin(MetadataSourcePlugin):
artist, artist_id = self.get_artist(
track.get("artists", []), join_key="join"
)
artist = self.strip_disambiguation(artist)
length = self.get_track_length(track["duration"])
return TrackInfo(
title=title,

View file

@ -11,6 +11,8 @@ New features:
- :doc:`plugins/lastgenre`: Add a ``--pretend`` option to preview genre changes
without storing or writing them.
- :doc:`plugins/discogs`: New config option `strip_disambiguation` to toggle
stripping discogs numeric disambiguation on artist and label fields.
Bug fixes:
@ -21,6 +23,8 @@ Bug fixes:
- :doc:`plugins/spotify` Removed old and undocumented config options
`artist_field`, `album_field` and `track` that were causing issues with track
matching. :bug:`5189`
- :doc:`plugins/discogs` Fixed inconsistency in stripping disambiguation from
artists but not labels. :bug:`5366`
For packagers:
@ -35,6 +39,8 @@ Other changes:
- Moved ``art.py`` utility module from ``beets`` into ``beetsplug`` namespace as
it is not used in the core beets codebase. It can now be found in
``beetsplug._utils``.
- :class:`beets.metadata_plugin.MetadataSourcePlugin`: Remove discogs specific
disambiguation stripping.
2.4.0 (September 13, 2025)
--------------------------

View file

@ -109,6 +109,9 @@ Other configurations available under ``discogs:`` are:
- **search_limit**: The maximum number of results to return from Discogs. This
is useful if you want to limit the number of results returned to speed up
searches. Default: ``5``
- **strip_disambiguation**: Discogs uses strings like ``"(4)"`` to mark distinct
artists and labels with the same name. If you'd like to use the discogs
disambiguation in your tags, you can disable it. Default: ``True``
.. _discogs guidelines: https://support.discogs.com/hc/en-us/articles/360005055373-Database-Guidelines-12-Tracklisting#Index_Tracks_And_Headings

View file

@ -374,6 +374,83 @@ class DGAlbumInfoTest(BeetsTestCase):
assert d.genre == "GENRE1, GENRE2"
assert d.style is None
def test_strip_disambiguation(self):
"""Test removing disambiguation from all disambiguated fields."""
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [
{
"title": "track",
"position": "A",
"type_": "track",
"duration": "5:44",
"artists": [
{"name": "TEST ARTIST (5)", "tracks": "", "id": 11146}
],
}
],
"artists": [
{"name": "ARTIST NAME (2)", "id": 321, "join": "&"},
{"name": "OTHER ARTIST (5)", "id": 321, "join": ""},
],
"title": "title",
"labels": [
{
"name": "LABEL NAME (5)",
"catno": "catalog number",
}
],
}
release = Bag(
data=data,
title=data["title"],
artists=[Bag(data=d) for d in data["artists"]],
)
d = DiscogsPlugin().get_album_info(release)
assert d.artist == "ARTIST NAME & OTHER ARTIST"
assert d.tracks[0].artist == "TEST ARTIST"
assert d.label == "LABEL NAME"
def test_strip_disambiguation_false(self):
"""Test disabling disambiguation removal from all disambiguated fields."""
config["discogs"]["strip_disambiguation"] = False
data = {
"id": 123,
"uri": "https://www.discogs.com/release/123456-something",
"tracklist": [
{
"title": "track",
"position": "A",
"type_": "track",
"duration": "5:44",
"artists": [
{"name": "TEST ARTIST (5)", "tracks": "", "id": 11146}
],
}
],
"artists": [
{"name": "ARTIST NAME (2)", "id": 321, "join": "&"},
{"name": "OTHER ARTIST (5)", "id": 321, "join": ""},
],
"title": "title",
"labels": [
{
"name": "LABEL NAME (5)",
"catno": "catalog number",
}
],
}
release = Bag(
data=data,
title=data["title"],
artists=[Bag(data=d) for d in data["artists"]],
)
d = DiscogsPlugin().get_album_info(release)
assert d.artist == "ARTIST NAME (2) & OTHER ARTIST (5)"
assert d.tracks[0].artist == "TEST ARTIST (5)"
assert d.label == "LABEL NAME (5)"
@pytest.mark.parametrize(
"formats, expected_media, expected_albumtype",