mirror of
https://github.com/beetbox/beets.git
synced 2025-12-06 16:42:42 +01:00
The library mode was slow due to use of item.formatted(), which
runs multiple database queries per item. The --include-keys option
didn't help because it filtered the after the queries had already
happened.
This commit reworks filtering so that we only query the fields
that the user requests. In many cases this can dramatically speed up
execution.
Before:
> /usr/bin/time beet export -l Bob Dylan > /dev/null
13.42user 21.56system 0:35.71elapsed 97%CPU (0avgtext+0avgdata 52396maxresident)k
> /usr/bin/time beet export -l -i artist,title,path Bob Dylan > /dev/null
13.25user 21.17system 0:35.14elapsed 97%CPU (0avgtext+0avgdata 52112maxresident)k
After:
> /usr/bin/time beet export -l Bob Dylan > /dev/null
1.46user 0.09system 0:01.60elapsed 97%CPU (0avgtext+0avgdata 51188maxresident)k
> /usr/bin/time beet export -l -i artist,title,path Bob Dylan > /dev/null
0.50user 0.07system 0:00.58elapsed 97%CPU (0avgtext+0avgdata 50632maxresident)k
227 lines
7.8 KiB
Python
227 lines
7.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
# This file is part of beets.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
"""Exports data from beets
|
|
"""
|
|
|
|
from __future__ import division, absolute_import, print_function
|
|
|
|
import sys
|
|
import codecs
|
|
import json
|
|
import csv
|
|
from xml.etree import ElementTree
|
|
|
|
from datetime import datetime, date
|
|
from beets.plugins import BeetsPlugin
|
|
from beets import ui
|
|
from beets import util
|
|
import mediafile
|
|
from beetsplug.info import all_library_fields, all_tag_fields, expand_key_list
|
|
from beetsplug.info import library_data, tag_data
|
|
|
|
|
|
class ExportEncoder(json.JSONEncoder):
|
|
"""Deals with dates because JSON doesn't have a standard"""
|
|
def default(self, o):
|
|
if isinstance(o, datetime) or isinstance(o, date):
|
|
return o.isoformat()
|
|
return json.JSONEncoder.default(self, o)
|
|
|
|
|
|
class ExportPlugin(BeetsPlugin):
|
|
|
|
def __init__(self):
|
|
super(ExportPlugin, self).__init__()
|
|
|
|
json_formatting_options = {
|
|
'ensure_ascii': False,
|
|
'indent': 4,
|
|
'separators': (',', ': '),
|
|
'sort_keys': True
|
|
}
|
|
|
|
self.config.add({
|
|
'default_format': 'json',
|
|
'json': {
|
|
# JSON module formatting options.
|
|
'formatting': json_formatting_options,
|
|
},
|
|
'jsonlines': {
|
|
# JSON Lines formatting options.
|
|
'formatting': json_formatting_options,
|
|
},
|
|
'csv': {
|
|
# CSV module formatting options.
|
|
'formatting': {
|
|
# The delimiter used to seperate columns.
|
|
'delimiter': ',',
|
|
# The dialect to use when formating the file output.
|
|
'dialect': 'excel'
|
|
}
|
|
},
|
|
'xml': {
|
|
# XML module formatting options.
|
|
'formatting': {}
|
|
}
|
|
# TODO: Use something like the edit plugin
|
|
# 'item_fields': []
|
|
})
|
|
|
|
def commands(self):
|
|
# TODO: Add option to use albums
|
|
|
|
cmd = ui.Subcommand('export', help=u'export data from beets')
|
|
cmd.func = self.run
|
|
cmd.parser.add_option(
|
|
u'-l', u'--library', action='store_true',
|
|
help=u'show library fields instead of tags',
|
|
)
|
|
cmd.parser.add_option(
|
|
u'--append', action='store_true', default=False,
|
|
help=u'if should append data to the file',
|
|
)
|
|
cmd.parser.add_option(
|
|
u'-i', u'--include-keys', default=[],
|
|
action='append', dest='included_keys',
|
|
help=u'comma separated list of keys to show',
|
|
)
|
|
cmd.parser.add_option(
|
|
u'-o', u'--output',
|
|
help=u'path for the output file. If not given, will print the data'
|
|
)
|
|
cmd.parser.add_option(
|
|
u'-f', u'--format', default='json',
|
|
help=u"the output format: json (default), json-lines, csv, or xml"
|
|
)
|
|
return [cmd]
|
|
|
|
def run(self, lib, opts, args):
|
|
file_path = opts.output
|
|
file_mode = 'a' if opts.append else 'w'
|
|
file_format = opts.format or self.config['default_format'].get(str)
|
|
file_format_is_line_based = (file_format == 'jsonlines')
|
|
format_options = self.config[file_format]['formatting'].get(dict)
|
|
|
|
export_format = ExportFormat.factory(
|
|
file_type=file_format,
|
|
**{
|
|
'file_path': file_path,
|
|
'file_mode': file_mode
|
|
}
|
|
)
|
|
|
|
items = []
|
|
data_collector = library_data if opts.library else tag_data
|
|
|
|
all_fields = all_library_fields() if opts.library else all_tag_fields()
|
|
included_keys = []
|
|
for keys in opts.included_keys:
|
|
included_keys.extend(keys.split(','))
|
|
included_keys = expand_key_list(included_keys, all_fields)
|
|
|
|
for data_emitter in data_collector(lib, ui.decargs(args)):
|
|
try:
|
|
data, item = data_emitter(included_keys)
|
|
except (mediafile.UnreadableFileError, IOError) as ex:
|
|
self._log.error(u'cannot read file: {0}', ex)
|
|
continue
|
|
|
|
for key, value in data.items():
|
|
if isinstance(value, bytes):
|
|
data[key] = util.displayable_path(value)
|
|
|
|
if file_format_is_line_based:
|
|
export_format.export(data, **format_options)
|
|
else:
|
|
items += [data]
|
|
|
|
if not file_format_is_line_based:
|
|
export_format.export(items, **format_options)
|
|
|
|
|
|
class ExportFormat(object):
|
|
"""The output format type"""
|
|
def __init__(self, file_path, file_mode=u'w', encoding=u'utf-8'):
|
|
self.path = file_path
|
|
self.mode = file_mode
|
|
self.encoding = encoding
|
|
# creates a file object to write/append or sets to stdout
|
|
self.out_stream = codecs.open(self.path, self.mode, self.encoding) \
|
|
if self.path else sys.stdout
|
|
|
|
@classmethod
|
|
def factory(cls, file_type, **kwargs):
|
|
if file_type == "json":
|
|
return JsonFormat(**kwargs)
|
|
if file_type == "jsonlines":
|
|
return JsonFormat(newline_at_end=True, **kwargs)
|
|
elif file_type == "csv":
|
|
return CSVFormat(**kwargs)
|
|
elif file_type == "xml":
|
|
return XMLFormat(**kwargs)
|
|
else:
|
|
raise NotImplementedError()
|
|
|
|
def export(self, data, **kwargs):
|
|
raise NotImplementedError()
|
|
|
|
|
|
class JsonFormat(ExportFormat):
|
|
"""Saves in a json file"""
|
|
def __init__(self, file_path, file_mode=u'w', encoding=u'utf-8',
|
|
newline_at_end=False):
|
|
super(JsonFormat, self).__init__(file_path, file_mode, encoding)
|
|
self.newline_at_end = newline_at_end
|
|
|
|
def export(self, data, **kwargs):
|
|
json.dump(data, self.out_stream, cls=ExportEncoder, **kwargs)
|
|
if self.newline_at_end:
|
|
self.out_stream.write('\n')
|
|
|
|
|
|
class CSVFormat(ExportFormat):
|
|
"""Saves in a csv file"""
|
|
def __init__(self, file_path, file_mode=u'w', encoding=u'utf-8'):
|
|
super(CSVFormat, self).__init__(file_path, file_mode, encoding)
|
|
|
|
def export(self, data, **kwargs):
|
|
header = list(data[0].keys()) if data else []
|
|
writer = csv.DictWriter(self.out_stream, fieldnames=header, **kwargs)
|
|
writer.writeheader()
|
|
writer.writerows(data)
|
|
|
|
|
|
class XMLFormat(ExportFormat):
|
|
"""Saves in a xml file"""
|
|
def __init__(self, file_path, file_mode=u'w', encoding=u'utf-8'):
|
|
super(XMLFormat, self).__init__(file_path, file_mode, encoding)
|
|
|
|
def export(self, data, **kwargs):
|
|
# Creates the XML file structure.
|
|
library = ElementTree.Element(u'library')
|
|
tracks = ElementTree.SubElement(library, u'tracks')
|
|
if data and isinstance(data[0], dict):
|
|
for index, item in enumerate(data):
|
|
track = ElementTree.SubElement(tracks, u'track')
|
|
for key, value in item.items():
|
|
track_details = ElementTree.SubElement(track, key)
|
|
track_details.text = value
|
|
# Depending on the version of python the encoding needs to change
|
|
try:
|
|
data = ElementTree.tostring(library, encoding='unicode', **kwargs)
|
|
except LookupError:
|
|
data = ElementTree.tostring(library, encoding='utf-8', **kwargs)
|
|
|
|
self.out_stream.write(data)
|