mirror of
https://github.com/beetbox/beets.git
synced 2025-12-16 05:34:47 +01:00
Optimise FormattedMapping when querying a specific set of fields
This changes greatly improves the speed of `beet export` and `beet info` when the `--include-keys` option is used. It also removes the globbing feature of `--include-keys` that was added in #1295. (See #3762 for discussion). Listing all fields for an item requires querying the database to find any flex attributes. This is slow when done for every item being exported. We already have a way for the user to specify a fixed set of keys, but we previously queried everything and filtered it afterwards. The new approach is more efficient. Code that iterates through all fields now have to handle invalid field names. The export and info plugins output invalid fields as None. Timings before: > /usr/bin/time beet export -i title,path,artist -l Bob Dylan 13.26user 20.22system 0:34.01elapsed 98%CPU (0avgtext+0avgdata 52544maxresident)k > /usr/bin/time beet export -l Bob Dylan 12.93user 20.15system 0:33.58elapsed 98%CPU (0avgtext+0avgdata 53632maxresident)k Timings after: > /usr/bin/time beet export -l Bob Dylan 13.33user 20.17system 0:34.02elapsed 98%CPU (0avgtext+0avgdata 53500maxresident)k > /usr/bin/time beet export -i title,path,artist -l Bob Dylan 0.49user 0.07system 0:00.56elapsed 98%CPU (0avgtext+0avgdata 50496maxresident)k Notice the dramatic speedup in the last example!
This commit is contained in:
parent
e1a901dce5
commit
2fa3717731
7 changed files with 80 additions and 85 deletions
|
|
@ -52,15 +52,24 @@ class FormattedMapping(Mapping):
|
|||
The accessor `mapping[key]` returns the formatted version of
|
||||
`model[key]` as a unicode string.
|
||||
|
||||
The `included_keys` parameter allows filtering the fields that are
|
||||
returned. By default all fields are returned. Limiting to specific keys can
|
||||
avoid expensive per-item database queries.
|
||||
|
||||
If `for_path` is true, all path separators in the formatted values
|
||||
are replaced.
|
||||
"""
|
||||
|
||||
def __init__(self, model, for_path=False, compute_keys=True):
|
||||
ALL_KEYS = '*'
|
||||
|
||||
def __init__(self, model, included_keys=ALL_KEYS, for_path=False):
|
||||
self.for_path = for_path
|
||||
self.model = model
|
||||
if compute_keys:
|
||||
self.model_keys = model.keys(True)
|
||||
if included_keys == self.ALL_KEYS:
|
||||
# Performance note: this triggers a database query.
|
||||
self.model_keys = self.model.keys(True)
|
||||
else:
|
||||
self.model_keys = included_keys
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key in self.model_keys:
|
||||
|
|
@ -605,11 +614,11 @@ class Model(object):
|
|||
|
||||
_formatter = FormattedMapping
|
||||
|
||||
def formatted(self, for_path=False):
|
||||
def formatted(self, included_keys=_formatter.ALL_KEYS, for_path=False):
|
||||
"""Get a mapping containing all values on this object formatted
|
||||
as human-readable unicode strings.
|
||||
"""
|
||||
return self._formatter(self, for_path)
|
||||
return self._formatter(self, included_keys, for_path)
|
||||
|
||||
def evaluate_template(self, template, for_path=False):
|
||||
"""Evaluate a template (a string or a `Template` object) using
|
||||
|
|
@ -619,7 +628,7 @@ class Model(object):
|
|||
# Perform substitution.
|
||||
if isinstance(template, six.string_types):
|
||||
template = functemplate.template(template)
|
||||
return template.substitute(self.formatted(for_path),
|
||||
return template.substitute(self.formatted(for_path=for_path),
|
||||
self._template_funcs())
|
||||
|
||||
# Parsing.
|
||||
|
|
|
|||
|
|
@ -374,12 +374,19 @@ class FormattedItemMapping(dbcore.db.FormattedMapping):
|
|||
Album-level fields take precedence if `for_path` is true.
|
||||
"""
|
||||
|
||||
def __init__(self, item, for_path=False):
|
||||
ALL_KEYS = '*'
|
||||
|
||||
def __init__(self, item, included_keys=ALL_KEYS, for_path=False):
|
||||
# We treat album and item keys specially here,
|
||||
# so exclude transitive album keys from the model's keys.
|
||||
super(FormattedItemMapping, self).__init__(item, for_path,
|
||||
compute_keys=False)
|
||||
self.model_keys = item.keys(computed=True, with_album=False)
|
||||
super(FormattedItemMapping, self).__init__(item, included_keys=[],
|
||||
for_path=for_path)
|
||||
self.included_keys = included_keys
|
||||
if included_keys == self.ALL_KEYS:
|
||||
# Performance note: this triggers a database query.
|
||||
self.model_keys = item.keys(computed=True, with_album=False)
|
||||
else:
|
||||
self.model_keys = included_keys
|
||||
self.item = item
|
||||
|
||||
@lazy_property
|
||||
|
|
@ -390,10 +397,14 @@ class FormattedItemMapping(dbcore.db.FormattedMapping):
|
|||
def album_keys(self):
|
||||
album_keys = []
|
||||
if self.album:
|
||||
for key in self.album.keys(computed=True):
|
||||
if key in Album.item_keys \
|
||||
or key not in self.item._fields.keys():
|
||||
album_keys.append(key)
|
||||
if self.included_keys == self.ALL_KEYS:
|
||||
# Performance note: this triggers a database query.
|
||||
for key in self.album.keys(computed=True):
|
||||
if key in Album.item_keys \
|
||||
or key not in self.item._fields.keys():
|
||||
album_keys.append(key)
|
||||
else:
|
||||
album_keys = self.included_keys
|
||||
return album_keys
|
||||
|
||||
@property
|
||||
|
|
@ -422,12 +433,15 @@ class FormattedItemMapping(dbcore.db.FormattedMapping):
|
|||
# `artist` and `albumartist` fields fall back to one another.
|
||||
# This is helpful in path formats when the album artist is unset
|
||||
# on as-is imports.
|
||||
if key == 'artist' and not value:
|
||||
return self._get('albumartist')
|
||||
elif key == 'albumartist' and not value:
|
||||
return self._get('artist')
|
||||
else:
|
||||
return value
|
||||
try:
|
||||
if key == 'artist' and not value:
|
||||
return self._get('albumartist')
|
||||
elif key == 'albumartist' and not value:
|
||||
return self._get('artist')
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return value
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.all_keys)
|
||||
|
|
@ -1703,7 +1717,7 @@ class DefaultTemplateFunctions(object):
|
|||
return res
|
||||
|
||||
# Flatten disambiguation value into a string.
|
||||
disam_value = album.formatted(True).get(disambiguator)
|
||||
disam_value = album.formatted(for_path=True).get(disambiguator)
|
||||
|
||||
# Return empty string if disambiguator is empty.
|
||||
if disam_value:
|
||||
|
|
|
|||
|
|
@ -26,8 +26,9 @@ from xml.etree import ElementTree
|
|||
from datetime import datetime, date
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets import ui
|
||||
from beets import util
|
||||
import mediafile
|
||||
from beetsplug.info import make_key_filter, library_data, tag_data
|
||||
from beetsplug.info import library_data, tag_data
|
||||
|
||||
|
||||
class ExportEncoder(json.JSONEncoder):
|
||||
|
|
@ -129,16 +130,18 @@ class ExportPlugin(BeetsPlugin):
|
|||
for keys in opts.included_keys:
|
||||
included_keys.extend(keys.split(','))
|
||||
|
||||
key_filter = make_key_filter(included_keys)
|
||||
|
||||
for data_emitter in data_collector(lib, ui.decargs(args)):
|
||||
try:
|
||||
data, item = data_emitter()
|
||||
data, item = data_emitter(included_keys or '*')
|
||||
except (mediafile.UnreadableFileError, IOError) as ex:
|
||||
self._log.error(u'cannot read file: {0}', ex)
|
||||
continue
|
||||
|
||||
data = key_filter(data)
|
||||
for key, value in data.items():
|
||||
if isinstance(value, bytes):
|
||||
data[key] = util.displayable_path(value)
|
||||
|
||||
items += [data]
|
||||
|
||||
if file_format_is_line_based:
|
||||
export_format.export(data, **format_options)
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@
|
|||
from __future__ import division, absolute_import, print_function
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from beets.plugins import BeetsPlugin
|
||||
from beets import ui
|
||||
|
|
@ -42,15 +41,29 @@ def tag_data(lib, args):
|
|||
yield tag_data_emitter(item.path)
|
||||
|
||||
|
||||
def tag_fields():
|
||||
fields = set(mediafile.MediaFile.readable_fields())
|
||||
fields.add('art')
|
||||
return fields
|
||||
|
||||
|
||||
def tag_data_emitter(path):
|
||||
def emitter():
|
||||
fields = list(mediafile.MediaFile.readable_fields())
|
||||
fields.remove('images')
|
||||
def emitter(included_keys):
|
||||
if included_keys == '*':
|
||||
fields = tag_fields()
|
||||
else:
|
||||
fields = included_keys
|
||||
if 'images' in fields:
|
||||
# We can't serialize the image data.
|
||||
fields.remove('images')
|
||||
mf = mediafile.MediaFile(syspath(path))
|
||||
tags = {}
|
||||
for field in fields:
|
||||
tags[field] = getattr(mf, field)
|
||||
tags['art'] = mf.art is not None
|
||||
if field == 'art':
|
||||
tags[field] = mf.art is not None
|
||||
else:
|
||||
tags[field] = getattr(mf, field, None)
|
||||
|
||||
# create a temporary Item to take advantage of __format__
|
||||
item = Item.from_path(syspath(path))
|
||||
|
||||
|
|
@ -64,8 +77,8 @@ def library_data(lib, args):
|
|||
|
||||
|
||||
def library_data_emitter(item):
|
||||
def emitter():
|
||||
data = dict(item.formatted())
|
||||
def emitter(included_keys):
|
||||
data = dict(item.formatted(included_keys=included_keys))
|
||||
|
||||
return data, item
|
||||
return emitter
|
||||
|
|
@ -185,18 +198,16 @@ class InfoPlugin(BeetsPlugin):
|
|||
included_keys.extend(keys.split(','))
|
||||
# Drop path even if user provides it multiple times
|
||||
included_keys = [k for k in included_keys if k != 'path']
|
||||
key_filter = make_key_filter(included_keys)
|
||||
|
||||
first = True
|
||||
summary = {}
|
||||
for data_emitter in data_collector(lib, ui.decargs(args)):
|
||||
try:
|
||||
data, item = data_emitter()
|
||||
data, item = data_emitter(included_keys or '*')
|
||||
except (mediafile.UnreadableFileError, IOError) as ex:
|
||||
self._log.error(u'cannot read file: {0}', ex)
|
||||
continue
|
||||
|
||||
data = key_filter(data)
|
||||
if opts.summarize:
|
||||
update_summary(summary, data)
|
||||
else:
|
||||
|
|
@ -211,34 +222,3 @@ class InfoPlugin(BeetsPlugin):
|
|||
|
||||
if opts.summarize:
|
||||
print_data(summary)
|
||||
|
||||
|
||||
def make_key_filter(include):
|
||||
"""Return a function that filters a dictionary.
|
||||
|
||||
The returned filter takes a dictionary and returns another
|
||||
dictionary that only includes the key-value pairs where the key
|
||||
glob-matches one of the keys in `include`.
|
||||
"""
|
||||
# By default, if no field inclusions are specified, include
|
||||
# everything but `path`.
|
||||
if not include:
|
||||
def filter_(data):
|
||||
return {k: v for k, v in data.items()
|
||||
if k != 'path'}
|
||||
return filter_
|
||||
|
||||
matchers = []
|
||||
for key in include:
|
||||
key = re.escape(key)
|
||||
key = key.replace(r'\*', '.*')
|
||||
matchers.append(re.compile(key + '$'))
|
||||
|
||||
def filter_(data):
|
||||
filtered = {}
|
||||
for key, value in data.items():
|
||||
if any([m.match(key) for m in matchers]):
|
||||
filtered[key] = value
|
||||
return filtered
|
||||
|
||||
return filter_
|
||||
|
|
|
|||
|
|
@ -212,6 +212,8 @@ Other new things:
|
|||
used to target a maximum image filesize.
|
||||
* :doc:`/plugins/badfiles`: Checkers can now be run during import with the
|
||||
``check_on_import`` config option.
|
||||
* :doc:`/plugins/export`: big speedups when `--include-keys` option is used
|
||||
Thanks to :user:`ssssam`.
|
||||
|
||||
Fixes:
|
||||
|
||||
|
|
|
|||
|
|
@ -20,14 +20,12 @@ your library::
|
|||
|
||||
If you just want to see specific properties you can use the
|
||||
``--include-keys`` option to filter them. The argument is a
|
||||
comma-separated list of simple glob patterns where ``*`` matches any
|
||||
string. For example::
|
||||
comma-separated list of field names. For example::
|
||||
|
||||
$ beet info -i 'title,mb*' beatles
|
||||
$ beet info -i 'title,mb_artistid' beatles
|
||||
|
||||
Will only show the ``title`` property and all properties starting with
|
||||
``mb``. You can add the ``-i`` option multiple times to the command
|
||||
line.
|
||||
Will only show the ``title`` and ``mb_artistid`` properties. You can add the
|
||||
``-i`` option multiple times to the command line.
|
||||
|
||||
Additional command-line options include:
|
||||
|
||||
|
|
|
|||
|
|
@ -92,17 +92,6 @@ class InfoTest(unittest.TestCase, TestHelper):
|
|||
self.assertIn(u'title: [various]', out)
|
||||
self.remove_mediafile_fixtures()
|
||||
|
||||
def test_include_pattern(self):
|
||||
item, = self.add_item_fixtures()
|
||||
item.album = 'xxxx'
|
||||
item.store()
|
||||
|
||||
out = self.run_with_output('info', '--library', 'album:xxxx',
|
||||
'--include-keys', '*lbu*')
|
||||
self.assertIn(displayable_path(item.path), out)
|
||||
self.assertNotIn(u'title:', out)
|
||||
self.assertIn(u'album: xxxx', out)
|
||||
|
||||
def test_custom_format(self):
|
||||
self.add_item_fixtures()
|
||||
out = self.run_with_output('info', '--library', '--format',
|
||||
|
|
|
|||
Loading…
Reference in a new issue