Introduce new module beets.util.id_extractors

- We introduce a new submodule of beets.util named id_extractors.
- Parts of the ID extraction utilites required by metadata source plugins
  should live there.
- Also this enables future usage of those utilities from the "outside" of
  metadata source plugins.
- Move Discogs ID extractor to the new module and change test_discogs to use
  the new location.
- Add spotify_id_regex variable to the new module.
This commit is contained in:
J0J0 Todos 2023-01-11 09:13:13 +01:00
parent 8bbaefbdc9
commit 0175a9aed8
3 changed files with 54 additions and 28 deletions

View file

@ -0,0 +1,49 @@
# This file is part of beets.
# Copyright 2016, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Helpers around the extraction of album/track ID's from metadata sources."""
import re
# Spotify IDs consist of 22 alphanumeric characters
# (zero-left-padded base62 representation of randomly generated UUID4)
spotify_id_regex = {
'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})',
'match_group': 2,
}
def extract_discogs_id_regex(album_id):
"""Returns the Discogs_id or None."""
# Discogs-IDs are simple integers. In order to avoid confusion with
# other metadata plugins, we only look for very specific formats of the
# input string:
# - plain integer, optionally wrapped in brackets and prefixed by an
# 'r', as this is how discogs displays the release ID on its webpage.
# - legacy url format: discogs.com/<name of release>/release/<id>
# - current url format: discogs.com/release/<id>-<name of release>
# See #291, #4080 and #4085 for the discussions leading up to these
# patterns.
# Regex has been tested here https://regex101.com/r/wyLdB4/2
for pattern in [
r'^\[?r?(?P<id>\d+)\]?$',
r'discogs\.com/release/(?P<id>\d+)-',
r'discogs\.com/[^/]+/release/(?P<id>\d+)',
]:
match = re.search(pattern, album_id)
if match:
return int(match.group('id'))
return None

View file

@ -18,6 +18,7 @@ python3-discogs-client library.
import beets.ui
from beets import config
from beets.util.id_extractors import extract_discogs_id_regex
from beets.autotag.hooks import AlbumInfo, TrackInfo
from beets.plugins import MetadataSourcePlugin, BeetsPlugin, get_distance
import confuse
@ -218,31 +219,6 @@ class DiscogsPlugin(BeetsPlugin):
# first 10 results, don't overwhelm with options
return candidates[:10]
@staticmethod
def extract_release_id_regex(album_id):
"""Returns the Discogs_id or None."""
# Discogs-IDs are simple integers. In order to avoid confusion with
# other metadata plugins, we only look for very specific formats of the
# input string:
# - plain integer, optionally wrapped in brackets and prefixed by an
# 'r', as this is how discogs displays the release ID on its webpage.
# - legacy url format: discogs.com/<name of release>/release/<id>
# - current url format: discogs.com/release/<id>-<name of release>
# See #291, #4080 and #4085 for the discussions leading up to these
# patterns.
# Regex has been tested here https://regex101.com/r/wyLdB4/2
for pattern in [
r'^\[?r?(?P<id>\d+)\]?$',
r'discogs\.com/release/(?P<id>\d+)-',
r'discogs\.com/[^/]+/release/(?P<id>\d+)',
]:
match = re.search(pattern, album_id)
if match:
return int(match.group('id'))
return None
def album_for_id(self, album_id):
"""Fetches an album by its Discogs ID and returns an AlbumInfo object
or None if the album is not found.
@ -252,7 +228,7 @@ class DiscogsPlugin(BeetsPlugin):
self._log.debug('Searching for release {0}', album_id)
discogs_id = self.extract_release_id_regex(album_id)
discogs_id = extract_discogs_id_regex(album_id)
if not discogs_id:
return None
@ -365,7 +341,7 @@ class DiscogsPlugin(BeetsPlugin):
else:
genre = base_genre
discogs_albumid = self.extract_release_id_regex(result.data.get('uri'))
discogs_albumid = extract_discogs_id_regex(result.data.get('uri'))
# Extract information for the optional AlbumInfo fields that are
# contained on nested discogs fields.

View file

@ -21,6 +21,7 @@ from test._common import Bag
from test.helper import capture_log
from beets import config
from beets.util.id_extractors import extract_discogs_id_regex
from beetsplug.discogs import DiscogsPlugin
@ -371,7 +372,7 @@ class DGAlbumInfoTest(_common.TestCase):
('005b84a0-ecd6-39f1-b2f6-6eb48756b268', ''),
]
for test_pattern, expected in test_patterns:
match = DiscogsPlugin.extract_release_id_regex(test_pattern)
match = extract_discogs_id_regex(test_pattern)
if not match:
match = ''
self.assertEqual(match, expected)