mirror of
https://github.com/beetbox/beets.git
synced 2025-12-26 02:24:33 +01:00
In the case that Mutagen throws an exception while trying to read a file, we throw an UnreadableFileError, which is a new superclass for FileTypeError.
617 lines
22 KiB
Python
617 lines
22 KiB
Python
# This file is part of beets.
|
|
# Copyright 2010, Adrian Sampson.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
"""Handles low-level interfacing for files' tags. Wraps Mutagen to
|
|
automatically detect file types and provide a unified interface for a useful
|
|
subset of music files' tags.
|
|
|
|
Usage:
|
|
|
|
>>> f = MediaFile('Lucy.mp3')
|
|
>>> f.title
|
|
u'Lucy in the Sky with Diamonds'
|
|
>>> f.artist = 'The Beatles'
|
|
>>> f.save()
|
|
|
|
A field will always return a reasonable value of the correct type, even if no
|
|
tag is present. If no value is available, the value will be false (e.g., zero
|
|
or the empty string).
|
|
"""
|
|
|
|
import mutagen
|
|
import mutagen.mp3
|
|
import mutagen.flac
|
|
import mutagen.monkeysaudio
|
|
import datetime
|
|
import re
|
|
|
|
__all__ = ['FileTypeError', 'MediaFile']
|
|
|
|
# Currently allowed values for type:
|
|
# mp3, mp4, flac, ogg, ape
|
|
|
|
# Raised for any file MediaFile can't read.
|
|
class UnreadableFileError(IOError):
|
|
pass
|
|
|
|
# Raised for files that don't seem to have a type MediaFile supports.
|
|
class FileTypeError(UnreadableFileError):
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#### flags used to define fields' behavior ####
|
|
|
|
class Enumeration(object):
|
|
def __init__(self, *values):
|
|
for value, equiv in zip(values, range(1, len(values)+1)):
|
|
setattr(self, value, equiv)
|
|
# determine style of packing if any
|
|
packing = Enumeration('SLASHED', # pair delimited by /
|
|
'TUPLE', # a python tuple of 2 items
|
|
'DATE' # YYYY-MM-DD
|
|
)
|
|
|
|
class StorageStyle(object):
|
|
"""Parameterizes the storage behavior of a single field for a certain tag
|
|
format.
|
|
"""
|
|
def __init__(self,
|
|
# The Mutagen key used to access the data for this field.
|
|
key,
|
|
# Store item as a single object or as first element of a list.
|
|
list_elem = True,
|
|
# Which type the value is stored as (unicode, int, or bool).
|
|
as_type = unicode,
|
|
# If this value is packed in a multiple-value storage unit,
|
|
# which type of packing (in the packing enum). Otherwise,
|
|
# None. (Makes as_type irrelevant).
|
|
packing = None,
|
|
# If the value is packed, in which position it is stored.
|
|
pack_pos = 0,
|
|
# ID3 storage only: match against this 'desc' field as well
|
|
# as the key.
|
|
id3_desc = None):
|
|
self.key = key
|
|
self.list_elem = list_elem
|
|
self.as_type = as_type
|
|
self.packing = packing
|
|
self.pack_pos = pack_pos
|
|
self.id3_desc = id3_desc
|
|
|
|
|
|
|
|
|
|
#### dealing with packings ####
|
|
|
|
class Packed(object):
|
|
"""Makes a packed list of values subscriptable. To access the packed output
|
|
after making changes, use packed_thing.items.
|
|
"""
|
|
|
|
def __init__(self, items, packstyle, none_val=0, out_type=int):
|
|
"""Create a Packed object for subscripting the packed values in items.
|
|
The items are packed using packstyle, which is a value from the
|
|
packing enum. none_val is returned from a request when no suitable
|
|
value is found in the items. Vales are converted to out_type before
|
|
they are returned.
|
|
"""
|
|
self.items = items
|
|
self.packstyle = packstyle
|
|
self.none_val = none_val
|
|
self.out_type = out_type
|
|
|
|
def __getitem__(self, index):
|
|
if not isinstance(index, int):
|
|
raise TypeError('index must be an integer')
|
|
|
|
if self.items is None:
|
|
return self.none_val
|
|
|
|
items = self.items
|
|
if self.packstyle == packing.DATE:
|
|
# Remove time information from dates. Usually delimited by
|
|
# a "T" or a space.
|
|
items = re.sub(r'[Tt ].*$', '', unicode(items))
|
|
|
|
# transform from a string packing into a list we can index into
|
|
if self.packstyle == packing.SLASHED:
|
|
seq = unicode(items).split('/')
|
|
elif self.packstyle == packing.DATE:
|
|
seq = unicode(items).split('-')
|
|
elif self.packstyle == packing.TUPLE:
|
|
seq = items # tuple: items is already indexable
|
|
|
|
try:
|
|
out = seq[index]
|
|
except:
|
|
out = None
|
|
|
|
if out is None or out == self.none_val or out == '':
|
|
return self.out_type(self.none_val)
|
|
else:
|
|
return self.out_type(out)
|
|
|
|
def __setitem__(self, index, value):
|
|
|
|
if self.packstyle in (packing.SLASHED, packing.TUPLE):
|
|
# SLASHED and TUPLE are always two-item packings
|
|
length = 2
|
|
else:
|
|
# DATE can have up to three fields
|
|
length = 3
|
|
|
|
# make a list of the items we'll pack
|
|
new_items = []
|
|
for i in range(length):
|
|
if i == index:
|
|
next_item = value
|
|
else:
|
|
next_item = self[i]
|
|
new_items.append(next_item)
|
|
|
|
if self.packstyle == packing.DATE:
|
|
# Truncate the items wherever we reach an invalid (none) entry.
|
|
# This prevents dates like 2008-00-05.
|
|
for i, item in enumerate(new_items):
|
|
if item == self.none_val or item is None:
|
|
del(new_items[i:]) # truncate
|
|
break
|
|
|
|
if self.packstyle == packing.SLASHED:
|
|
self.items = '/'.join(map(unicode, new_items))
|
|
elif self.packstyle == packing.DATE:
|
|
field_lengths = [4, 2, 2] # YYYY-MM-DD
|
|
elems = []
|
|
for i, item in enumerate(new_items):
|
|
elems.append( ('%0' + str(field_lengths[i]) + 'i') % item )
|
|
self.items = '-'.join(elems)
|
|
elif self.packstyle == packing.TUPLE:
|
|
self.items = new_items
|
|
|
|
|
|
|
|
class MediaField(object):
|
|
"""A descriptor providing access to a particular (abstract) metadata
|
|
field. out_type is the type that users of MediaFile should see and can
|
|
be unicode, int, or bool. id3, mp4, and flac are StorageStyle instances
|
|
parameterizing the field's storage for each type.
|
|
"""
|
|
|
|
def __init__(self,
|
|
# The field's semantic (exterior) type.
|
|
out_type = unicode,
|
|
# A hash whose keys are 'mp3', 'mp4', and 'flac' and whose values
|
|
# are StorageStyle instances parameterizing the field's storage for
|
|
# each type.
|
|
**kwargs
|
|
):
|
|
|
|
self.out_type = out_type
|
|
if not set(['mp3', 'mp4', 'etc']) == set(kwargs):
|
|
raise TypeError('MediaField constructor must have keyword '
|
|
'arguments mp3, mp4, and etc')
|
|
self.styles = kwargs
|
|
|
|
def _fetchdata(self, obj, style):
|
|
"""Get the value associated with this descriptor's field stored
|
|
with the given StorageStyle. Unwraps from a list if necessary.
|
|
"""
|
|
# fetch the value, which may be a scalar or a list
|
|
if obj.type == 'mp3':
|
|
if style.id3_desc is not None: # also match on 'desc' field
|
|
frames = obj.mgfile.tags.getall(style.key)
|
|
entry = None
|
|
for frame in frames:
|
|
if frame.desc == style.id3_desc:
|
|
entry = frame.text
|
|
break
|
|
if entry is None: # no desc match
|
|
return None
|
|
else:
|
|
try:
|
|
entry = obj.mgfile[style.key].text
|
|
except KeyError:
|
|
return None
|
|
|
|
else: # Not MP3.
|
|
try:
|
|
entry = obj.mgfile[style.key]
|
|
except KeyError:
|
|
return None
|
|
|
|
# possibly index the list
|
|
if style.list_elem:
|
|
if entry: # List must have at least one value.
|
|
return entry[0]
|
|
else:
|
|
return None
|
|
else:
|
|
return entry
|
|
|
|
def _storedata(self, obj, val, style):
|
|
"""Store val for this descriptor's field in the tag dictionary
|
|
according to the provided StorageStyle. Store it as a single-item list
|
|
if necessary.
|
|
"""
|
|
# wrap as a list if necessary
|
|
if style.list_elem: out = [val]
|
|
else: out = val
|
|
|
|
if obj.type == 'mp3':
|
|
if style.id3_desc is not None: # match on desc field
|
|
frames = obj.mgfile.tags.getall(style.key)
|
|
|
|
# try modifying in place
|
|
found = False
|
|
for frame in frames:
|
|
if frame.desc == style.id3_desc:
|
|
frame.text = out
|
|
found = True
|
|
break
|
|
|
|
# need to make a new frame?
|
|
if not found:
|
|
frame = mutagen.id3.Frames[style.key](
|
|
encoding=3, desc=style.id3_desc, text=val)
|
|
obj.mgfile.tags.add(frame)
|
|
|
|
else: # no match on desc; just replace based on key
|
|
frame = mutagen.id3.Frames[style.key](encoding=3, text=val)
|
|
obj.mgfile.tags.setall(style.key, [frame])
|
|
|
|
else: # Not MP3.
|
|
obj.mgfile[style.key] = out
|
|
|
|
def _styles(self, obj):
|
|
if obj.type in ('mp3', 'mp4'):
|
|
styles = self.styles[obj.type]
|
|
else:
|
|
styles = self.styles['etc'] # sane styles
|
|
|
|
# Make sure we always return a list of styles, even when given
|
|
# a single style for convenience.
|
|
if isinstance(styles, StorageStyle):
|
|
return [styles]
|
|
else:
|
|
return styles
|
|
|
|
def __get__(self, obj, owner):
|
|
"""Retrieve the value of this metadata field.
|
|
"""
|
|
# Fetch the data using the various StorageStyles.
|
|
styles = self._styles(obj)
|
|
for style in styles:
|
|
# Use the first style that returns a reasonable value.
|
|
out = self._fetchdata(obj, style)
|
|
if out:
|
|
break
|
|
|
|
if style.packing:
|
|
out = Packed(out, style.packing)[style.pack_pos]
|
|
|
|
# return the appropriate type
|
|
if self.out_type == int:
|
|
if out is None:
|
|
return 0
|
|
elif isinstance(out, int) or isinstance(out, float):
|
|
# Just a number.
|
|
return int(out)
|
|
else:
|
|
# Process any other type as a string.
|
|
if not isinstance(out, basestring):
|
|
out = unicode(out)
|
|
# Get a number from the front of the string.
|
|
out = re.match('[0-9]*', out.strip()).group(0)
|
|
if not out:
|
|
return 0
|
|
else:
|
|
return int(out)
|
|
elif self.out_type == bool:
|
|
if out is None:
|
|
return False
|
|
else:
|
|
return bool(int(out)) # should work for strings, bools, ints
|
|
elif self.out_type == unicode:
|
|
if out is None:
|
|
return u''
|
|
else:
|
|
return unicode(out)
|
|
else:
|
|
return out
|
|
|
|
def __set__(self, obj, val):
|
|
"""Set the value of this metadata field.
|
|
"""
|
|
# Store using every StorageStyle available.
|
|
styles = self._styles(obj)
|
|
for style in styles:
|
|
|
|
if style.packing:
|
|
p = Packed(self._fetchdata(obj, style), style.packing)
|
|
p[style.pack_pos] = val
|
|
out = p.items
|
|
|
|
else: # unicode, integer, or boolean scalar
|
|
out = val
|
|
|
|
# deal with Nones according to abstract type if present
|
|
if out is None:
|
|
if self.out_type == int:
|
|
out = 0
|
|
elif self.out_type == bool:
|
|
out = False
|
|
elif self.out_type == unicode:
|
|
out = u''
|
|
# We trust that packed values are handled above.
|
|
|
|
# convert to correct storage type (irrelevant for
|
|
# packed values)
|
|
if style.as_type == unicode:
|
|
if out is None:
|
|
out = u''
|
|
else:
|
|
if self.out_type == bool:
|
|
# store bools as 1,0 instead of True,False
|
|
out = unicode(int(out))
|
|
else:
|
|
out = unicode(out)
|
|
elif style.as_type == int:
|
|
if out is None:
|
|
out = 0
|
|
else:
|
|
out = int(out)
|
|
elif style.as_type == bool:
|
|
out = bool(out)
|
|
|
|
# store the data
|
|
self._storedata(obj, out, style)
|
|
|
|
class CompositeDateField(object):
|
|
"""A MediaFile field for conveniently accessing the year, month, and day
|
|
fields as a datetime.date object. Allows both getting and setting of the
|
|
component fields.
|
|
"""
|
|
def __init__(self, year_field, month_field, day_field):
|
|
"""Create a new date field from the indicated MediaFields for the
|
|
component values.
|
|
"""
|
|
self.year_field = year_field
|
|
self.month_field = month_field
|
|
self.day_field = day_field
|
|
|
|
def __get__(self, obj, owner):
|
|
"""Return a datetime.date object whose components indicating the
|
|
smallest valid date whose components are at least as large as the
|
|
three component fields (that is, if year == 1999, month == 0, and
|
|
day == 0, then date == datetime.date(1999, 1, 1)). If the components
|
|
indicate an invalid date (e.g., if month == 47), datetime.date.min is
|
|
returned.
|
|
"""
|
|
try:
|
|
return datetime.date(max(self.year_field.__get__(obj, owner),
|
|
datetime.MINYEAR),
|
|
max(self.month_field.__get__(obj, owner), 1),
|
|
max(self.day_field.__get__(obj, owner), 1)
|
|
)
|
|
except ValueError: # Out of range values.
|
|
return datetime.date.min
|
|
|
|
def __set__(self, obj, val):
|
|
"""Set the year, month, and day fields to match the components of the
|
|
provided datetime.date object.
|
|
"""
|
|
self.year_field.__set__(obj, val.year)
|
|
self.month_field.__set__(obj, val.month)
|
|
self.day_field.__set__(obj, val.day)
|
|
|
|
|
|
|
|
class MediaFile(object):
|
|
"""Represents a multimedia file on disk and provides access to its
|
|
metadata.
|
|
"""
|
|
|
|
def __init__(self, path):
|
|
"""Constructs a new MediaFile reflecting the file at path. May
|
|
throw UnreadableFileError.
|
|
"""
|
|
unreadable_exc = (
|
|
mutagen.mp3.HeaderNotFoundError,
|
|
mutagen.flac.FLACNoHeaderError,
|
|
mutagen.monkeysaudio.MonkeysAudioHeaderError,
|
|
)
|
|
try:
|
|
self.mgfile = mutagen.File(path)
|
|
except unreadable_exc:
|
|
raise UnreadableFileError('Mutagen could not read file')
|
|
|
|
if self.mgfile is None: # Mutagen couldn't guess the type
|
|
raise FileTypeError('file type unsupported by Mutagen')
|
|
elif type(self.mgfile).__name__ == 'M4A' or \
|
|
type(self.mgfile).__name__ == 'MP4':
|
|
self.type = 'mp4'
|
|
elif type(self.mgfile).__name__ == 'ID3' or \
|
|
type(self.mgfile).__name__ == 'MP3':
|
|
self.type = 'mp3'
|
|
elif type(self.mgfile).__name__ == 'FLAC':
|
|
self.type = 'flac'
|
|
elif type(self.mgfile).__name__ == 'OggVorbis':
|
|
self.type = 'ogg'
|
|
elif type(self.mgfile).__name__ == 'MonkeysAudio':
|
|
self.type = 'ape'
|
|
else:
|
|
raise FileTypeError('file type %s unsupported by MediaFile' %
|
|
type(self.mgfile).__name__)
|
|
|
|
# add a set of tags if it's missing
|
|
if not self.mgfile.tags:
|
|
self.mgfile.add_tags()
|
|
|
|
def save(self):
|
|
self.mgfile.save()
|
|
|
|
|
|
#### field definitions ####
|
|
|
|
title = MediaField(
|
|
mp3 = StorageStyle('TIT2'),
|
|
mp4 = StorageStyle("\xa9nam"),
|
|
etc = StorageStyle('title'),
|
|
)
|
|
artist = MediaField(
|
|
mp3 = StorageStyle('TPE1'),
|
|
mp4 = StorageStyle("\xa9ART"),
|
|
etc = StorageStyle('artist'),
|
|
)
|
|
album = MediaField(
|
|
mp3 = StorageStyle('TALB'),
|
|
mp4 = StorageStyle("\xa9alb"),
|
|
etc = StorageStyle('album'),
|
|
)
|
|
genre = MediaField(
|
|
mp3 = StorageStyle('TCON'),
|
|
mp4 = StorageStyle("\xa9gen"),
|
|
etc = StorageStyle('genre'),
|
|
)
|
|
composer = MediaField(
|
|
mp3 = StorageStyle('TCOM'),
|
|
mp4 = StorageStyle("\xa9wrt"),
|
|
etc = StorageStyle('composer'),
|
|
)
|
|
grouping = MediaField(
|
|
mp3 = StorageStyle('TIT1'),
|
|
mp4 = StorageStyle("\xa9grp"),
|
|
etc = StorageStyle('grouping'),
|
|
)
|
|
year = MediaField(out_type=int,
|
|
mp3 = StorageStyle('TDRC',
|
|
packing = packing.DATE,
|
|
pack_pos = 0),
|
|
mp4 = StorageStyle("\xa9day",
|
|
packing = packing.DATE,
|
|
pack_pos = 0),
|
|
etc = [StorageStyle('date',
|
|
packing = packing.DATE,
|
|
pack_pos = 0),
|
|
StorageStyle('year')]
|
|
)
|
|
month = MediaField(out_type=int,
|
|
mp3 = StorageStyle('TDRC',
|
|
packing = packing.DATE,
|
|
pack_pos = 1),
|
|
mp4 = StorageStyle("\xa9day",
|
|
packing = packing.DATE,
|
|
pack_pos = 1),
|
|
etc = StorageStyle('date',
|
|
packing = packing.DATE,
|
|
pack_pos = 1)
|
|
)
|
|
day = MediaField(out_type=int,
|
|
mp3 = StorageStyle('TDRC',
|
|
packing = packing.DATE,
|
|
pack_pos = 2),
|
|
mp4 = StorageStyle("\xa9day",
|
|
packing = packing.DATE,
|
|
pack_pos = 2),
|
|
etc = StorageStyle('date',
|
|
packing = packing.DATE,
|
|
pack_pos = 2)
|
|
)
|
|
date = CompositeDateField(year, month, day)
|
|
track = MediaField(out_type = int,
|
|
mp3 = StorageStyle('TRCK',
|
|
packing = packing.SLASHED,
|
|
pack_pos = 0),
|
|
mp4 = StorageStyle('trkn',
|
|
packing = packing.TUPLE,
|
|
pack_pos = 0),
|
|
etc = [StorageStyle('track'),
|
|
StorageStyle('tracknumber')]
|
|
)
|
|
tracktotal = MediaField(out_type = int,
|
|
mp3 = StorageStyle('TRCK',
|
|
packing = packing.SLASHED,
|
|
pack_pos = 1),
|
|
mp4 = StorageStyle('trkn',
|
|
packing = packing.TUPLE,
|
|
pack_pos = 1),
|
|
etc = [StorageStyle('tracktotal'),
|
|
StorageStyle('trackc'),
|
|
StorageStyle('totaltracks')]
|
|
)
|
|
disc = MediaField(out_type = int,
|
|
mp3 = StorageStyle('TPOS',
|
|
packing = packing.SLASHED,
|
|
pack_pos = 0),
|
|
mp4 = StorageStyle('disk',
|
|
packing = packing.TUPLE,
|
|
pack_pos = 0),
|
|
etc = [StorageStyle('disc'),
|
|
StorageStyle('discnumber')]
|
|
)
|
|
disctotal = MediaField(out_type = int,
|
|
mp3 = StorageStyle('TPOS',
|
|
packing = packing.SLASHED,
|
|
pack_pos = 1),
|
|
mp4 = StorageStyle('disk',
|
|
packing = packing.TUPLE,
|
|
pack_pos = 1),
|
|
etc = [StorageStyle('disctotal'),
|
|
StorageStyle('discc'),
|
|
StorageStyle('totaldiscs')]
|
|
)
|
|
lyrics = MediaField(
|
|
mp3 = StorageStyle('USLT',
|
|
list_elem = False,
|
|
id3_desc = u''),
|
|
mp4 = StorageStyle("\xa9lyr"),
|
|
etc = StorageStyle('lyrics')
|
|
)
|
|
comments = MediaField(
|
|
mp3 = StorageStyle('COMM', id3_desc = u''),
|
|
mp4 = StorageStyle("\xa9cmt"),
|
|
etc = [StorageStyle('description'),
|
|
StorageStyle('comment')]
|
|
)
|
|
bpm = MediaField(out_type = int,
|
|
mp3 = StorageStyle('TBPM'),
|
|
mp4 = StorageStyle('tmpo', as_type = int),
|
|
etc = StorageStyle('bpm')
|
|
)
|
|
comp = MediaField(out_type = bool,
|
|
mp3 = StorageStyle('TCMP'),
|
|
mp4 = StorageStyle('cpil',
|
|
list_elem = False,
|
|
as_type = bool),
|
|
etc = StorageStyle('compilation')
|
|
)
|
|
|
|
@property
|
|
def length(self):
|
|
return self.mgfile.info.length
|
|
|
|
@property
|
|
def bitrate(self):
|
|
if self.type in ('flac', 'ape'):
|
|
# Simulate bitrate for lossless formats.
|
|
#fixme: The utility of this guess is questionable.
|
|
return self.mgfile.info.sample_rate * \
|
|
self.mgfile.info.bits_per_sample
|
|
else:
|
|
return self.mgfile.info.bitrate
|
|
|