From 364111d68878c83239c9b221b1765ac89d2f54e1 Mon Sep 17 00:00:00 2001 From: soergeld Date: Wed, 30 Dec 2020 16:02:19 +0100 Subject: [PATCH 01/14] use browse for big releases --- beets/autotag/mb.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 7952c5566..0db305a50 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -71,6 +71,9 @@ RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', 'labels', 'artist-credits', 'aliases', 'recording-level-rels', 'work-rels', 'work-level-rels', 'artist-rels'] +BROWSE_INCLUDES = ['artist-credits', 'work-rels', + 'artist-rels', 'recording-rels', 'release-rels'] + TRACK_INCLUDES = ['artists', 'aliases'] if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] @@ -285,6 +288,24 @@ def album_info(release): artist_name, artist_sort_name, artist_credit_name = \ _flatten_artist_credit(release['artist-credit']) + ntracks = 0 + for medium in release['medium-list']: + ntracks += len(medium['track-list']) + + # for albums with more than 500 tracks + if ntracks > 500: + recording_list = [] + for i in range((ntracks//100)+1): + recording_list.extend(musicbrainzngs.browse_recordings( + release=release['id'], limit=100, includes=BROWSE_INCLUDES, + offset=100*i)['recording-list']) + for medium in release['medium-list']: + for recording in medium['track-list']: + recording_info = list(filter(lambda track: track['id'] == + recording['recording']['id'], + recording_list))[0] + recording['recording'] = recording_info + # Basic info. track_infos = [] index = 0 From 67587850c95645e3ab91145a4c3ae57c8fc0ebb8 Mon Sep 17 00:00:00 2001 From: soergeld Date: Wed, 30 Dec 2020 16:04:40 +0100 Subject: [PATCH 02/14] changelog --- docs/changelog.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index b73320756..fbe449ca4 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -290,6 +290,8 @@ Fixes: :bug:`2242` * :doc:`plugins/replaygain`: Disable parallel analysis on import by default. :bug:`3819` +* Fix :bug:`3308` by using browsing for big releases to retrieve additional + information. Thanks to :user:`dosoe`. For plugin developers: From 66379d542cf8e999e896a5cd4fd7eadd36822905 Mon Sep 17 00:00:00 2001 From: soergeld Date: Wed, 30 Dec 2020 16:05:05 +0100 Subject: [PATCH 03/14] style --- beets/autotag/mb.py | 1 - 1 file changed, 1 deletion(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 0db305a50..be62f840d 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -73,7 +73,6 @@ RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', 'work-level-rels', 'artist-rels'] BROWSE_INCLUDES = ['artist-credits', 'work-rels', 'artist-rels', 'recording-rels', 'release-rels'] - TRACK_INCLUDES = ['artists', 'aliases'] if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] From 7da5c374ccdfae13b0a16eb7bf53b140b5d22c57 Mon Sep 17 00:00:00 2001 From: soergeld Date: Wed, 30 Dec 2020 16:18:59 +0100 Subject: [PATCH 04/14] style --- beets/autotag/mb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index be62f840d..6292ae77d 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -294,10 +294,10 @@ def album_info(release): # for albums with more than 500 tracks if ntracks > 500: recording_list = [] - for i in range((ntracks//100)+1): + for i in range((ntracks // 100) + 1): recording_list.extend(musicbrainzngs.browse_recordings( release=release['id'], limit=100, includes=BROWSE_INCLUDES, - offset=100*i)['recording-list']) + offset=100 * i)['recording-list']) for medium in release['medium-list']: for recording in medium['track-list']: recording_info = list(filter(lambda track: track['id'] == From 52a85cdf18aed8883aed3a51899643483ac31afe Mon Sep 17 00:00:00 2001 From: soergeld Date: Fri, 1 Jan 2021 14:55:14 +0100 Subject: [PATCH 05/14] style and legibility --- beets/autotag/mb.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 6292ae77d..2facb004b 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -73,6 +73,7 @@ RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', 'work-level-rels', 'artist-rels'] BROWSE_INCLUDES = ['artist-credits', 'work-rels', 'artist-rels', 'recording-rels', 'release-rels'] +BROWSE_CHUNKSIZE = 100 TRACK_INCLUDES = ['artists', 'aliases'] if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] @@ -291,13 +292,16 @@ def album_info(release): for medium in release['medium-list']: ntracks += len(medium['track-list']) - # for albums with more than 500 tracks + # The MusicBrainz API omits 'artist-relation-list' and 'work-relation-list' + # when the release has more than 500 tracks. So we use browse_recordings + # on chunks of tracks to recover the same information in this case. if ntracks > 500: recording_list = [] - for i in range((ntracks // 100) + 1): + for i in range(0, ntracks, BROWSE_CHUNKSIZE): recording_list.extend(musicbrainzngs.browse_recordings( - release=release['id'], limit=100, includes=BROWSE_INCLUDES, - offset=100 * i)['recording-list']) + release=release['id'], limit=BROWSE_CHUNKSIZE, + includes=BROWSE_INCLUDES, + offset=BROWSE_CHUNKSIZE * i)['recording-list']) for medium in release['medium-list']: for recording in medium['track-list']: recording_info = list(filter(lambda track: track['id'] == From b691a71745feafa084d242e5e25c9a14abcd4d22 Mon Sep 17 00:00:00 2001 From: soergeld Date: Fri, 1 Jan 2021 14:56:46 +0100 Subject: [PATCH 06/14] style and legibility --- beets/autotag/mb.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 2facb004b..6d8d1198f 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -288,9 +288,7 @@ def album_info(release): artist_name, artist_sort_name, artist_credit_name = \ _flatten_artist_credit(release['artist-credit']) - ntracks = 0 - for medium in release['medium-list']: - ntracks += len(medium['track-list']) + ntracks = sum(len(m['track-list']) for m in release['medium-list']) # The MusicBrainz API omits 'artist-relation-list' and 'work-relation-list' # when the release has more than 500 tracks. So we use browse_recordings From c87dc08c4abef2aa67aff42b70ffd6f0b55110e6 Mon Sep 17 00:00:00 2001 From: soergeld Date: Thu, 7 Jan 2021 12:37:40 +0100 Subject: [PATCH 07/14] move 500 to global constant --- beets/autotag/mb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 6d8d1198f..d7535207c 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -74,6 +74,7 @@ RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', BROWSE_INCLUDES = ['artist-credits', 'work-rels', 'artist-rels', 'recording-rels', 'release-rels'] BROWSE_CHUNKSIZE = 100 +v = 500 TRACK_INCLUDES = ['artists', 'aliases'] if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] @@ -293,7 +294,7 @@ def album_info(release): # The MusicBrainz API omits 'artist-relation-list' and 'work-relation-list' # when the release has more than 500 tracks. So we use browse_recordings # on chunks of tracks to recover the same information in this case. - if ntracks > 500: + if ntracks > BROWSE_MAXTRACKS: recording_list = [] for i in range(0, ntracks, BROWSE_CHUNKSIZE): recording_list.extend(musicbrainzngs.browse_recordings( From 7afdbd49b39d251243a4dce96b941a897c6f1fde Mon Sep 17 00:00:00 2001 From: soergeld Date: Thu, 7 Jan 2021 12:41:42 +0100 Subject: [PATCH 08/14] TYPO --- beets/autotag/mb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index d7535207c..44d2160dc 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -74,7 +74,7 @@ RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', BROWSE_INCLUDES = ['artist-credits', 'work-rels', 'artist-rels', 'recording-rels', 'release-rels'] BROWSE_CHUNKSIZE = 100 -v = 500 +BROWSE_MAXTRACKS = 500 TRACK_INCLUDES = ['artists', 'aliases'] if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] From 4d86fd8a7c8b48c0dacc30541cb7b91a2fc5553f Mon Sep 17 00:00:00 2001 From: soergeld Date: Thu, 7 Jan 2021 14:28:45 +0100 Subject: [PATCH 09/14] logging --- beets/autotag/mb.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 44d2160dc..df8e12a86 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -295,6 +295,8 @@ def album_info(release): # when the release has more than 500 tracks. So we use browse_recordings # on chunks of tracks to recover the same information in this case. if ntracks > BROWSE_MAXTRACKS: + log.info(u'Album '+str(trackid)+u' has too many tracks') + log.info(u'Fetching recordings in batches of '+str(BROWSE_CHUNKSIZE)) recording_list = [] for i in range(0, ntracks, BROWSE_CHUNKSIZE): recording_list.extend(musicbrainzngs.browse_recordings( @@ -540,6 +542,7 @@ def album_for_id(releaseid): try: res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES) + log.info(u'Album '+str(trackid)+u' fetched from MusicBrainz') except musicbrainzngs.ResponseError: log.debug(u'Album ID match failed.') return None @@ -559,6 +562,7 @@ def track_for_id(releaseid): return try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) + log.info(u'Track '+str(trackid)+u' fetched from MusicBrainz') except musicbrainzngs.ResponseError: log.debug(u'Track ID match failed.') return None From e54bf275461f7fdf2e200f0dd93033cd6751a1ed Mon Sep 17 00:00:00 2001 From: soergeld Date: Thu, 7 Jan 2021 14:32:31 +0100 Subject: [PATCH 10/14] style --- beets/autotag/mb.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index df8e12a86..470b20776 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -295,8 +295,8 @@ def album_info(release): # when the release has more than 500 tracks. So we use browse_recordings # on chunks of tracks to recover the same information in this case. if ntracks > BROWSE_MAXTRACKS: - log.info(u'Album '+str(trackid)+u' has too many tracks') - log.info(u'Fetching recordings in batches of '+str(BROWSE_CHUNKSIZE)) + log.info(u'Album ' + str(release['id']) + u' has too many tracks') + log.info(u'Fetching recordings in batches of ' + str(BROWSE_CHUNKSIZE)) recording_list = [] for i in range(0, ntracks, BROWSE_CHUNKSIZE): recording_list.extend(musicbrainzngs.browse_recordings( @@ -542,7 +542,7 @@ def album_for_id(releaseid): try: res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES) - log.info(u'Album '+str(trackid)+u' fetched from MusicBrainz') + log.info(u'Album ' + str(releaseid) + u' fetched from MusicBrainz') except musicbrainzngs.ResponseError: log.debug(u'Album ID match failed.') return None @@ -562,7 +562,7 @@ def track_for_id(releaseid): return try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) - log.info(u'Track '+str(trackid)+u' fetched from MusicBrainz') + log.info(u'Track ' + str(releaseid) + u' fetched from MusicBrainz') except musicbrainzngs.ResponseError: log.debug(u'Track ID match failed.') return None From 9d34d0f793eef90893cc89b2932ecd82a1afc11f Mon Sep 17 00:00:00 2001 From: Dorian Soergel Date: Thu, 7 Jan 2021 18:36:38 +0100 Subject: [PATCH 11/14] debug Co-authored-by: Adrian Sampson --- beets/autotag/mb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 470b20776..a024fb1d4 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -295,7 +295,7 @@ def album_info(release): # when the release has more than 500 tracks. So we use browse_recordings # on chunks of tracks to recover the same information in this case. if ntracks > BROWSE_MAXTRACKS: - log.info(u'Album ' + str(release['id']) + u' has too many tracks') + log.debug(u'Album {} has too many tracks', release['id']) log.info(u'Fetching recordings in batches of ' + str(BROWSE_CHUNKSIZE)) recording_list = [] for i in range(0, ntracks, BROWSE_CHUNKSIZE): From 2e4a873f57792e56095885eccc9402527c3e144f Mon Sep 17 00:00:00 2001 From: soergeld Date: Thu, 7 Jan 2021 19:05:27 +0100 Subject: [PATCH 12/14] logs and simplifications --- beets/autotag/mb.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index a024fb1d4..a860fad84 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -296,18 +296,19 @@ def album_info(release): # on chunks of tracks to recover the same information in this case. if ntracks > BROWSE_MAXTRACKS: log.debug(u'Album {} has too many tracks', release['id']) - log.info(u'Fetching recordings in batches of ' + str(BROWSE_CHUNKSIZE)) recording_list = [] for i in range(0, ntracks, BROWSE_CHUNKSIZE): + log.debug(u'Retrieving tracks starting at {}', i) recording_list.extend(musicbrainzngs.browse_recordings( release=release['id'], limit=BROWSE_CHUNKSIZE, includes=BROWSE_INCLUDES, - offset=BROWSE_CHUNKSIZE * i)['recording-list']) + offset=i)['recording-list']) + track_map = {} + for recording in recording_list: + track_map[recording['id']] = recording for medium in release['medium-list']: for recording in medium['track-list']: - recording_info = list(filter(lambda track: track['id'] == - recording['recording']['id'], - recording_list))[0] + recording_info = track_map[recording['recording']['id']] recording['recording'] = recording_info # Basic info. @@ -542,7 +543,6 @@ def album_for_id(releaseid): try: res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES) - log.info(u'Album ' + str(releaseid) + u' fetched from MusicBrainz') except musicbrainzngs.ResponseError: log.debug(u'Album ID match failed.') return None @@ -562,7 +562,6 @@ def track_for_id(releaseid): return try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) - log.info(u'Track ' + str(releaseid) + u' fetched from MusicBrainz') except musicbrainzngs.ResponseError: log.debug(u'Track ID match failed.') return None From 422bd456f59c1b53af549ea47a84281e2c448254 Mon Sep 17 00:00:00 2001 From: soergeld Date: Thu, 7 Jan 2021 19:49:50 +0100 Subject: [PATCH 13/14] prepare for inclusion of work-level-rels --- beets/autotag/mb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index a860fad84..971807044 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -73,6 +73,8 @@ RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', 'work-level-rels', 'artist-rels'] BROWSE_INCLUDES = ['artist-credits', 'work-rels', 'artist-rels', 'recording-rels', 'release-rels'] +if "work-level-rels" in musicbrainzngs.VALID_BROWSE_INCLUDES['recording']: + BROWSE_INCLUDES.append("work-level-rels") BROWSE_CHUNKSIZE = 100 BROWSE_MAXTRACKS = 500 TRACK_INCLUDES = ['artists', 'aliases'] From db6dbbf27b4a0ecccbc978bd5983725d1bc38b3d Mon Sep 17 00:00:00 2001 From: Dorian Soergel Date: Fri, 8 Jan 2021 17:32:37 +0100 Subject: [PATCH 14/14] simplification --- beets/autotag/mb.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/beets/autotag/mb.py b/beets/autotag/mb.py index 971807044..03ea5b382 100644 --- a/beets/autotag/mb.py +++ b/beets/autotag/mb.py @@ -305,9 +305,7 @@ def album_info(release): release=release['id'], limit=BROWSE_CHUNKSIZE, includes=BROWSE_INCLUDES, offset=i)['recording-list']) - track_map = {} - for recording in recording_list: - track_map[recording['id']] = recording + track_map = {r['id']: r for r in recording_list} for medium in release['medium-list']: for recording in medium['track-list']: recording_info = track_map[recording['recording']['id']]