mirror of
https://github.com/beetbox/beets.git
synced 2025-12-20 23:53:15 +01:00
64 lines
2.4 KiB
Python
64 lines
2.4 KiB
Python
# This file is part of beets.
|
|
# Copyright 2016, Adrian Sampson.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
"""Helpers around the extraction of album/track ID's from metadata sources."""
|
|
|
|
import re
|
|
|
|
# Spotify IDs consist of 22 alphanumeric characters
|
|
# (zero-left-padded base62 representation of randomly generated UUID4)
|
|
spotify_id_regex = {
|
|
'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})',
|
|
'match_group': 2,
|
|
}
|
|
|
|
deezer_id_regex = {
|
|
'pattern': r'(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)',
|
|
'match_group': 4,
|
|
}
|
|
|
|
beatport_id_regex = {
|
|
'pattern': r'(^|beatport\.com/release/.+/)(\d+)$',
|
|
'match_group': 2,
|
|
}
|
|
|
|
# A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID,
|
|
# the URL can be used as the identifier. The Bandcamp metadata source plugin
|
|
# works that way - https://github.com/unrblt/beets-bandcamp. Bandcamp album
|
|
# URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum
|
|
|
|
|
|
def extract_discogs_id_regex(album_id):
|
|
"""Returns the Discogs_id or None."""
|
|
# Discogs-IDs are simple integers. In order to avoid confusion with
|
|
# other metadata plugins, we only look for very specific formats of the
|
|
# input string:
|
|
# - plain integer, optionally wrapped in brackets and prefixed by an
|
|
# 'r', as this is how discogs displays the release ID on its webpage.
|
|
# - legacy url format: discogs.com/<name of release>/release/<id>
|
|
# - current url format: discogs.com/release/<id>-<name of release>
|
|
# See #291, #4080 and #4085 for the discussions leading up to these
|
|
# patterns.
|
|
# Regex has been tested here https://regex101.com/r/wyLdB4/2
|
|
|
|
for pattern in [
|
|
r'^\[?r?(?P<id>\d+)\]?$',
|
|
r'discogs\.com/release/(?P<id>\d+)-',
|
|
r'discogs\.com/[^/]+/release/(?P<id>\d+)',
|
|
]:
|
|
match = re.search(pattern, album_id)
|
|
if match:
|
|
return int(match.group('id'))
|
|
|
|
return None
|