diff --git a/beets/util/id_extractors.py b/beets/util/id_extractors.py new file mode 100644 index 000000000..ad46f877e --- /dev/null +++ b/beets/util/id_extractors.py @@ -0,0 +1,49 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Helpers around the extraction of album/track ID's from metadata sources.""" + +import re + +# Spotify IDs consist of 22 alphanumeric characters +# (zero-left-padded base62 representation of randomly generated UUID4) +spotify_id_regex = { + 'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})', + 'match_group': 2, +} + + +def extract_discogs_id_regex(album_id): + """Returns the Discogs_id or None.""" + # Discogs-IDs are simple integers. In order to avoid confusion with + # other metadata plugins, we only look for very specific formats of the + # input string: + # - plain integer, optionally wrapped in brackets and prefixed by an + # 'r', as this is how discogs displays the release ID on its webpage. + # - legacy url format: discogs.com//release/ + # - current url format: discogs.com/release/- + # See #291, #4080 and #4085 for the discussions leading up to these + # patterns. + # Regex has been tested here https://regex101.com/r/wyLdB4/2 + + for pattern in [ + r'^\[?r?(?P\d+)\]?$', + r'discogs\.com/release/(?P\d+)-', + r'discogs\.com/[^/]+/release/(?P\d+)', + ]: + match = re.search(pattern, album_id) + if match: + return int(match.group('id')) + + return None diff --git a/beetsplug/discogs.py b/beetsplug/discogs.py index 103aa1107..c8798db88 100644 --- a/beetsplug/discogs.py +++ b/beetsplug/discogs.py @@ -18,6 +18,7 @@ python3-discogs-client library. import beets.ui from beets import config +from beets.util.id_extractors import extract_discogs_id_regex from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.plugins import MetadataSourcePlugin, BeetsPlugin, get_distance import confuse @@ -218,31 +219,6 @@ class DiscogsPlugin(BeetsPlugin): # first 10 results, don't overwhelm with options return candidates[:10] - @staticmethod - def extract_release_id_regex(album_id): - """Returns the Discogs_id or None.""" - # Discogs-IDs are simple integers. In order to avoid confusion with - # other metadata plugins, we only look for very specific formats of the - # input string: - # - plain integer, optionally wrapped in brackets and prefixed by an - # 'r', as this is how discogs displays the release ID on its webpage. - # - legacy url format: discogs.com//release/ - # - current url format: discogs.com/release/- - # See #291, #4080 and #4085 for the discussions leading up to these - # patterns. - # Regex has been tested here https://regex101.com/r/wyLdB4/2 - - for pattern in [ - r'^\[?r?(?P\d+)\]?$', - r'discogs\.com/release/(?P\d+)-', - r'discogs\.com/[^/]+/release/(?P\d+)', - ]: - match = re.search(pattern, album_id) - if match: - return int(match.group('id')) - - return None - def album_for_id(self, album_id): """Fetches an album by its Discogs ID and returns an AlbumInfo object or None if the album is not found. @@ -252,7 +228,7 @@ class DiscogsPlugin(BeetsPlugin): self._log.debug('Searching for release {0}', album_id) - discogs_id = self.extract_release_id_regex(album_id) + discogs_id = extract_discogs_id_regex(album_id) if not discogs_id: return None @@ -365,7 +341,7 @@ class DiscogsPlugin(BeetsPlugin): else: genre = base_genre - discogs_albumid = self.extract_release_id_regex(result.data.get('uri')) + discogs_albumid = extract_discogs_id_regex(result.data.get('uri')) # Extract information for the optional AlbumInfo fields that are # contained on nested discogs fields. diff --git a/test/test_discogs.py b/test/test_discogs.py index c2aa7682c..25b9962b0 100644 --- a/test/test_discogs.py +++ b/test/test_discogs.py @@ -21,6 +21,7 @@ from test._common import Bag from test.helper import capture_log from beets import config +from beets.util.id_extractors import extract_discogs_id_regex from beetsplug.discogs import DiscogsPlugin @@ -371,7 +372,7 @@ class DGAlbumInfoTest(_common.TestCase): ('005b84a0-ecd6-39f1-b2f6-6eb48756b268', ''), ] for test_pattern, expected in test_patterns: - match = DiscogsPlugin.extract_release_id_regex(test_pattern) + match = extract_discogs_id_regex(test_pattern) if not match: match = '' self.assertEqual(match, expected)