# This file is part of beets. # Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. """Helpers around the extraction of album/track ID's from metadata sources.""" import re # Spotify IDs consist of 22 alphanumeric characters # (zero-left-padded base62 representation of randomly generated UUID4) spotify_id_regex = { 'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})', 'match_group': 2, } deezer_id_regex = { 'pattern': r'(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)', 'match_group': 4, } beatport_id_regex = { 'pattern': r'(^|beatport\.com/release/.+/)(\d+)$', 'match_group': 2, } # A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID, # the URL can be used as the identifier. The Bandcamp metadata source plugin # works that way - https://github.com/unrblt/beets-bandcamp. Bandcamp album # URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum def extract_discogs_id_regex(album_id): """Returns the Discogs_id or None.""" # Discogs-IDs are simple integers. In order to avoid confusion with # other metadata plugins, we only look for very specific formats of the # input string: # - plain integer, optionally wrapped in brackets and prefixed by an # 'r', as this is how discogs displays the release ID on its webpage. # - legacy url format: discogs.com//release/ # - current url format: discogs.com/release/- # See #291, #4080 and #4085 for the discussions leading up to these # patterns. # Regex has been tested here https://regex101.com/r/wyLdB4/2 for pattern in [ r'^\[?r?(?P\d+)\]?$', r'discogs\.com/release/(?P\d+)-', r'discogs\.com/[^/]+/release/(?P\d+)', ]: match = re.search(pattern, album_id) if match: return int(match.group('id')) return None