mirror of
https://github.com/beetbox/beets.git
synced 2026-01-05 23:43:31 +01:00
Merge pull request #4633 from JOJ0/refactor_id_extraction
Refactor metadata source ID extraction utilities
This commit is contained in:
commit
40d27f5522
8 changed files with 161 additions and 50 deletions
|
|
@ -705,22 +705,27 @@ class MetadataSourcePlugin(metaclass=abc.ABCMeta):
|
|||
|
||||
return artist_string, artist_id
|
||||
|
||||
def _get_id(self, url_type, id_):
|
||||
@staticmethod
|
||||
def _get_id(url_type, id_, id_regex):
|
||||
"""Parse an ID from its URL if necessary.
|
||||
|
||||
:param url_type: Type of URL. Either 'album' or 'track'.
|
||||
:type url_type: str
|
||||
:param id_: Album/track ID or URL.
|
||||
:type id_: str
|
||||
:param id_regex: A dictionary containing a regular expression
|
||||
extracting an ID from an URL (if it's not an ID already) in
|
||||
'pattern' and the number of the match group in 'match_group'.
|
||||
:type id_regex: dict
|
||||
:return: Album/track ID.
|
||||
:rtype: str
|
||||
"""
|
||||
self._log.debug(
|
||||
"Searching {} for {} '{}'", self.data_source, url_type, id_
|
||||
log.debug(
|
||||
"Extracting {} ID from '{}'", url_type, id_
|
||||
)
|
||||
match = re.search(self.id_regex['pattern'].format(url_type), str(id_))
|
||||
match = re.search(id_regex['pattern'].format(url_type), str(id_))
|
||||
if match:
|
||||
id_ = match.group(self.id_regex['match_group'])
|
||||
id_ = match.group(id_regex['match_group'])
|
||||
if id_:
|
||||
return id_
|
||||
return None
|
||||
|
|
|
|||
65
beets/util/id_extractors.py
Normal file
65
beets/util/id_extractors.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# This file is part of beets.
|
||||
# Copyright 2016, Adrian Sampson.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
# "Software"), to deal in the Software without restriction, including
|
||||
# without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so, subject to
|
||||
# the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
|
||||
"""Helpers around the extraction of album/track ID's from metadata sources."""
|
||||
|
||||
import re
|
||||
|
||||
# Spotify IDs consist of 22 alphanumeric characters
|
||||
# (zero-left-padded base62 representation of randomly generated UUID4)
|
||||
spotify_id_regex = {
|
||||
'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})',
|
||||
'match_group': 2,
|
||||
}
|
||||
|
||||
deezer_id_regex = {
|
||||
'pattern': r'(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)',
|
||||
'match_group': 4,
|
||||
}
|
||||
|
||||
beatport_id_regex = {
|
||||
'pattern': r'(^|beatport\.com/release/.+/)(\d+)$',
|
||||
'match_group': 2,
|
||||
}
|
||||
|
||||
# A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID,
|
||||
# the URL can be used as the identifier. The Bandcamp metadata source plugin
|
||||
# works that way - https://github.com/unrblt/beets-bandcamp. Bandcamp album
|
||||
# URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum
|
||||
|
||||
|
||||
def extract_discogs_id_regex(album_id):
|
||||
"""Returns the Discogs_id or None."""
|
||||
# Discogs-IDs are simple integers. In order to avoid confusion with
|
||||
# other metadata plugins, we only look for very specific formats of the
|
||||
# input string:
|
||||
# - plain integer, optionally wrapped in brackets and prefixed by an
|
||||
# 'r', as this is how discogs displays the release ID on its webpage.
|
||||
# - legacy url format: discogs.com/<name of release>/release/<id>
|
||||
# - legacy url short format: discogs.com/release/<id>
|
||||
# - current url format: discogs.com/release/<id>-<name of release>
|
||||
# See #291, #4080 and #4085 for the discussions leading up to these
|
||||
# patterns.
|
||||
# Regex has been tested here https://regex101.com/r/TOu7kw/1
|
||||
|
||||
for pattern in [
|
||||
r'^\[?r?(?P<id>\d+)\]?$',
|
||||
r'discogs\.com/release/(?P<id>\d+)-?',
|
||||
r'discogs\.com/[^/]+/release/(?P<id>\d+)',
|
||||
]:
|
||||
match = re.search(pattern, album_id)
|
||||
if match:
|
||||
return int(match.group('id'))
|
||||
|
||||
return None
|
||||
|
|
@ -28,6 +28,7 @@ import beets.ui
|
|||
from beets.autotag.hooks import AlbumInfo, TrackInfo
|
||||
from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
|
||||
import confuse
|
||||
from beets.util.id_extractors import beatport_id_regex
|
||||
|
||||
|
||||
AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing)
|
||||
|
|
@ -267,6 +268,7 @@ class BeatportTrack(BeatportObject):
|
|||
|
||||
class BeatportPlugin(BeetsPlugin):
|
||||
data_source = 'Beatport'
|
||||
id_regex = beatport_id_regex
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
|
@ -380,11 +382,13 @@ class BeatportPlugin(BeetsPlugin):
|
|||
or None if the query is not a valid ID or release is not found.
|
||||
"""
|
||||
self._log.debug('Searching for release {0}', release_id)
|
||||
match = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id)
|
||||
if not match:
|
||||
|
||||
release_id = self._get_id('album', release_id, self.id_regex)
|
||||
if release_id is None:
|
||||
self._log.debug('Not a valid Beatport release ID.')
|
||||
return None
|
||||
release = self.client.get_release(match.group(2))
|
||||
|
||||
release = self.client.get_release(release_id)
|
||||
if release:
|
||||
return self._get_album_info(release)
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import requests
|
|||
from beets import ui
|
||||
from beets.autotag import AlbumInfo, TrackInfo
|
||||
from beets.plugins import MetadataSourcePlugin, BeetsPlugin
|
||||
from betts.utils.id_extractors import deezer_id_regex
|
||||
|
||||
|
||||
class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
|
||||
|
|
@ -34,10 +35,7 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
|
|||
album_url = 'https://api.deezer.com/album/'
|
||||
track_url = 'https://api.deezer.com/track/'
|
||||
|
||||
id_regex = {
|
||||
'pattern': r'(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)',
|
||||
'match_group': 4,
|
||||
}
|
||||
id_regex = deezer_id_regex
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
|
@ -51,7 +49,7 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
|
|||
:return: AlbumInfo object for album.
|
||||
:rtype: beets.autotag.hooks.AlbumInfo or None
|
||||
"""
|
||||
deezer_id = self._get_id('album', album_id)
|
||||
deezer_id = self._get_id('album', album_id, self.id_regex)
|
||||
if deezer_id is None:
|
||||
return None
|
||||
|
||||
|
|
@ -154,7 +152,7 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
|
|||
:rtype: beets.autotag.hooks.TrackInfo or None
|
||||
"""
|
||||
if track_data is None:
|
||||
deezer_id = self._get_id('track', track_id)
|
||||
deezer_id = self._get_id('track', track_id, self.id_regex)
|
||||
if deezer_id is None:
|
||||
return None
|
||||
track_data = requests.get(self.track_url + deezer_id).json()
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ python3-discogs-client library.
|
|||
|
||||
import beets.ui
|
||||
from beets import config
|
||||
from beets.util.id_extractors import extract_discogs_id_regex
|
||||
from beets.autotag.hooks import AlbumInfo, TrackInfo
|
||||
from beets.plugins import MetadataSourcePlugin, BeetsPlugin, get_distance
|
||||
import confuse
|
||||
|
|
@ -218,31 +219,6 @@ class DiscogsPlugin(BeetsPlugin):
|
|||
# first 10 results, don't overwhelm with options
|
||||
return candidates[:10]
|
||||
|
||||
@staticmethod
|
||||
def extract_release_id_regex(album_id):
|
||||
"""Returns the Discogs_id or None."""
|
||||
# Discogs-IDs are simple integers. In order to avoid confusion with
|
||||
# other metadata plugins, we only look for very specific formats of the
|
||||
# input string:
|
||||
# - plain integer, optionally wrapped in brackets and prefixed by an
|
||||
# 'r', as this is how discogs displays the release ID on its webpage.
|
||||
# - legacy url format: discogs.com/<name of release>/release/<id>
|
||||
# - current url format: discogs.com/release/<id>-<name of release>
|
||||
# See #291, #4080 and #4085 for the discussions leading up to these
|
||||
# patterns.
|
||||
# Regex has been tested here https://regex101.com/r/wyLdB4/2
|
||||
|
||||
for pattern in [
|
||||
r'^\[?r?(?P<id>\d+)\]?$',
|
||||
r'discogs\.com/release/(?P<id>\d+)-',
|
||||
r'discogs\.com/[^/]+/release/(?P<id>\d+)',
|
||||
]:
|
||||
match = re.search(pattern, album_id)
|
||||
if match:
|
||||
return int(match.group('id'))
|
||||
|
||||
return None
|
||||
|
||||
def album_for_id(self, album_id):
|
||||
"""Fetches an album by its Discogs ID and returns an AlbumInfo object
|
||||
or None if the album is not found.
|
||||
|
|
@ -252,7 +228,7 @@ class DiscogsPlugin(BeetsPlugin):
|
|||
|
||||
self._log.debug('Searching for release {0}', album_id)
|
||||
|
||||
discogs_id = self.extract_release_id_regex(album_id)
|
||||
discogs_id = extract_discogs_id_regex(album_id)
|
||||
|
||||
if not discogs_id:
|
||||
return None
|
||||
|
|
@ -365,7 +341,7 @@ class DiscogsPlugin(BeetsPlugin):
|
|||
else:
|
||||
genre = base_genre
|
||||
|
||||
discogs_albumid = self.extract_release_id_regex(result.data.get('uri'))
|
||||
discogs_albumid = extract_discogs_id_regex(result.data.get('uri'))
|
||||
|
||||
# Extract information for the optional AlbumInfo fields that are
|
||||
# contained on nested discogs fields.
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ from beets.autotag.hooks import AlbumInfo, TrackInfo
|
|||
from beets.dbcore import types
|
||||
from beets.library import DateType
|
||||
from beets.plugins import BeetsPlugin, MetadataSourcePlugin
|
||||
from beets.util.id_extractors import spotify_id_regex
|
||||
|
||||
DEFAULT_WAITING_TIME = 5
|
||||
|
||||
|
|
@ -69,12 +70,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
|
|||
track_url = 'https://api.spotify.com/v1/tracks/'
|
||||
audio_features_url = 'https://api.spotify.com/v1/audio-features/'
|
||||
|
||||
# Spotify IDs consist of 22 alphanumeric characters
|
||||
# (zero-left-padded base62 representation of randomly generated UUID4)
|
||||
id_regex = {
|
||||
'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})',
|
||||
'match_group': 2,
|
||||
}
|
||||
id_regex = spotify_id_regex
|
||||
|
||||
spotify_audio_features = {
|
||||
'acousticness': 'spotify_acousticness',
|
||||
|
|
@ -216,7 +212,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
|
|||
:return: AlbumInfo object for album
|
||||
:rtype: beets.autotag.hooks.AlbumInfo or None
|
||||
"""
|
||||
spotify_id = self._get_id('album', album_id)
|
||||
spotify_id = self._get_id('album', album_id, self.id_regex)
|
||||
if spotify_id is None:
|
||||
return None
|
||||
|
||||
|
|
@ -330,7 +326,7 @@ class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin):
|
|||
:rtype: beets.autotag.hooks.TrackInfo or None
|
||||
"""
|
||||
if track_data is None:
|
||||
spotify_id = self._get_id('track', track_id)
|
||||
spotify_id = self._get_id('track', track_id, self.id_regex)
|
||||
if spotify_id is None:
|
||||
return None
|
||||
track_data = self._handle_response(
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from test._common import Bag
|
|||
from test.helper import capture_log
|
||||
|
||||
from beets import config
|
||||
from beets.util.id_extractors import extract_discogs_id_regex
|
||||
|
||||
from beetsplug.discogs import DiscogsPlugin
|
||||
|
||||
|
|
@ -371,7 +372,7 @@ class DGAlbumInfoTest(_common.TestCase):
|
|||
('005b84a0-ecd6-39f1-b2f6-6eb48756b268', ''),
|
||||
]
|
||||
for test_pattern, expected in test_patterns:
|
||||
match = DiscogsPlugin.extract_release_id_regex(test_pattern)
|
||||
match = extract_discogs_id_regex(test_pattern)
|
||||
if not match:
|
||||
match = ''
|
||||
self.assertEqual(match, expected)
|
||||
|
|
|
|||
|
|
@ -26,6 +26,9 @@ from beets.library import Item
|
|||
from beets.dbcore import types
|
||||
from mediafile import MediaFile
|
||||
from beets.util import displayable_path, bytestring_path, syspath
|
||||
from beets.plugins import MetadataSourcePlugin
|
||||
from beets.util.id_extractors import spotify_id_regex, deezer_id_regex, \
|
||||
beatport_id_regex
|
||||
|
||||
from test.test_importer import ImportHelper, AutotagStub
|
||||
from test.test_ui_importer import TerminalImportSessionSetup
|
||||
|
|
@ -558,6 +561,69 @@ class PromptChoicesTest(TerminalImportSessionSetup, unittest.TestCase,
|
|||
require=ANY)
|
||||
|
||||
|
||||
class ParseSpotifyIDTest(unittest.TestCase):
|
||||
def test_parse_id_correct(self):
|
||||
id_string = "39WqpoPgZxygo6YQjehLJJ"
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_string, spotify_id_regex)
|
||||
self.assertEqual(out, id_string)
|
||||
|
||||
def test_parse_id_non_id_returns_none(self):
|
||||
id_string = "blah blah"
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_string, spotify_id_regex)
|
||||
self.assertEqual(out, None)
|
||||
|
||||
def test_parse_id_url_finds_id(self):
|
||||
id_string = "39WqpoPgZxygo6YQjehLJJ"
|
||||
id_url = "https://open.spotify.com/album/%s" % id_string
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_url, spotify_id_regex)
|
||||
self.assertEqual(out, id_string)
|
||||
|
||||
|
||||
class ParseDeezerIDTest(unittest.TestCase):
|
||||
def test_parse_id_correct(self):
|
||||
id_string = "176356382"
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_string, deezer_id_regex)
|
||||
self.assertEqual(out, id_string)
|
||||
|
||||
def test_parse_id_non_id_returns_none(self):
|
||||
id_string = "blah blah"
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_string, deezer_id_regex)
|
||||
self.assertEqual(out, None)
|
||||
|
||||
def test_parse_id_url_finds_id(self):
|
||||
id_string = "176356382"
|
||||
id_url = "https://www.deezer.com/album/%s" % id_string
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_url, deezer_id_regex)
|
||||
self.assertEqual(out, id_string)
|
||||
|
||||
|
||||
class ParseBeatportIDTest(unittest.TestCase):
|
||||
def test_parse_id_correct(self):
|
||||
id_string = "3089651"
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_string, beatport_id_regex)
|
||||
self.assertEqual(out, id_string)
|
||||
|
||||
def test_parse_id_non_id_returns_none(self):
|
||||
id_string = "blah blah"
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_string, beatport_id_regex)
|
||||
self.assertEqual(out, None)
|
||||
|
||||
def test_parse_id_url_finds_id(self):
|
||||
id_string = "3089651"
|
||||
id_url = "https://www.beatport.com/release/album-name/%s" % id_string
|
||||
out = MetadataSourcePlugin._get_id(
|
||||
"album", id_url, beatport_id_regex)
|
||||
self.assertEqual(out, id_string)
|
||||
|
||||
|
||||
def suite():
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue