Merge pull request #3762 from beetbox/sam/export-speed

Optimise FormattedMapping when querying a specific set of fields
This commit is contained in:
Sam Thursfield 2021-05-03 15:10:07 +02:00 committed by GitHub
commit 7254611a38
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 80 additions and 85 deletions

View file

@ -52,15 +52,24 @@ class FormattedMapping(Mapping):
The accessor `mapping[key]` returns the formatted version of
`model[key]` as a unicode string.
The `included_keys` parameter allows filtering the fields that are
returned. By default all fields are returned. Limiting to specific keys can
avoid expensive per-item database queries.
If `for_path` is true, all path separators in the formatted values
are replaced.
"""
def __init__(self, model, for_path=False, compute_keys=True):
ALL_KEYS = '*'
def __init__(self, model, included_keys=ALL_KEYS, for_path=False):
self.for_path = for_path
self.model = model
if compute_keys:
self.model_keys = model.keys(True)
if included_keys == self.ALL_KEYS:
# Performance note: this triggers a database query.
self.model_keys = self.model.keys(True)
else:
self.model_keys = included_keys
def __getitem__(self, key):
if key in self.model_keys:
@ -605,11 +614,11 @@ class Model(object):
_formatter = FormattedMapping
def formatted(self, for_path=False):
def formatted(self, included_keys=_formatter.ALL_KEYS, for_path=False):
"""Get a mapping containing all values on this object formatted
as human-readable unicode strings.
"""
return self._formatter(self, for_path)
return self._formatter(self, included_keys, for_path)
def evaluate_template(self, template, for_path=False):
"""Evaluate a template (a string or a `Template` object) using
@ -619,7 +628,7 @@ class Model(object):
# Perform substitution.
if isinstance(template, six.string_types):
template = functemplate.template(template)
return template.substitute(self.formatted(for_path),
return template.substitute(self.formatted(for_path=for_path),
self._template_funcs())
# Parsing.

View file

@ -374,12 +374,19 @@ class FormattedItemMapping(dbcore.db.FormattedMapping):
Album-level fields take precedence if `for_path` is true.
"""
def __init__(self, item, for_path=False):
ALL_KEYS = '*'
def __init__(self, item, included_keys=ALL_KEYS, for_path=False):
# We treat album and item keys specially here,
# so exclude transitive album keys from the model's keys.
super(FormattedItemMapping, self).__init__(item, for_path,
compute_keys=False)
self.model_keys = item.keys(computed=True, with_album=False)
super(FormattedItemMapping, self).__init__(item, included_keys=[],
for_path=for_path)
self.included_keys = included_keys
if included_keys == self.ALL_KEYS:
# Performance note: this triggers a database query.
self.model_keys = item.keys(computed=True, with_album=False)
else:
self.model_keys = included_keys
self.item = item
@lazy_property
@ -390,10 +397,14 @@ class FormattedItemMapping(dbcore.db.FormattedMapping):
def album_keys(self):
album_keys = []
if self.album:
for key in self.album.keys(computed=True):
if key in Album.item_keys \
or key not in self.item._fields.keys():
album_keys.append(key)
if self.included_keys == self.ALL_KEYS:
# Performance note: this triggers a database query.
for key in self.album.keys(computed=True):
if key in Album.item_keys \
or key not in self.item._fields.keys():
album_keys.append(key)
else:
album_keys = self.included_keys
return album_keys
@property
@ -422,12 +433,15 @@ class FormattedItemMapping(dbcore.db.FormattedMapping):
# `artist` and `albumartist` fields fall back to one another.
# This is helpful in path formats when the album artist is unset
# on as-is imports.
if key == 'artist' and not value:
return self._get('albumartist')
elif key == 'albumartist' and not value:
return self._get('artist')
else:
return value
try:
if key == 'artist' and not value:
return self._get('albumartist')
elif key == 'albumartist' and not value:
return self._get('artist')
except KeyError:
pass
return value
def __iter__(self):
return iter(self.all_keys)
@ -1703,7 +1717,7 @@ class DefaultTemplateFunctions(object):
return res
# Flatten disambiguation value into a string.
disam_value = album.formatted(True).get(disambiguator)
disam_value = album.formatted(for_path=True).get(disambiguator)
# Return empty string if disambiguator is empty.
if disam_value:

View file

@ -26,8 +26,9 @@ from xml.etree import ElementTree
from datetime import datetime, date
from beets.plugins import BeetsPlugin
from beets import ui
from beets import util
import mediafile
from beetsplug.info import make_key_filter, library_data, tag_data
from beetsplug.info import library_data, tag_data
class ExportEncoder(json.JSONEncoder):
@ -129,16 +130,18 @@ class ExportPlugin(BeetsPlugin):
for keys in opts.included_keys:
included_keys.extend(keys.split(','))
key_filter = make_key_filter(included_keys)
for data_emitter in data_collector(lib, ui.decargs(args)):
try:
data, item = data_emitter()
data, item = data_emitter(included_keys or '*')
except (mediafile.UnreadableFileError, IOError) as ex:
self._log.error(u'cannot read file: {0}', ex)
continue
data = key_filter(data)
for key, value in data.items():
if isinstance(value, bytes):
data[key] = util.displayable_path(value)
items += [data]
if file_format_is_line_based:
export_format.export(data, **format_options)

View file

@ -19,7 +19,6 @@
from __future__ import division, absolute_import, print_function
import os
import re
from beets.plugins import BeetsPlugin
from beets import ui
@ -42,15 +41,29 @@ def tag_data(lib, args):
yield tag_data_emitter(item.path)
def tag_fields():
fields = set(mediafile.MediaFile.readable_fields())
fields.add('art')
return fields
def tag_data_emitter(path):
def emitter():
fields = list(mediafile.MediaFile.readable_fields())
fields.remove('images')
def emitter(included_keys):
if included_keys == '*':
fields = tag_fields()
else:
fields = included_keys
if 'images' in fields:
# We can't serialize the image data.
fields.remove('images')
mf = mediafile.MediaFile(syspath(path))
tags = {}
for field in fields:
tags[field] = getattr(mf, field)
tags['art'] = mf.art is not None
if field == 'art':
tags[field] = mf.art is not None
else:
tags[field] = getattr(mf, field, None)
# create a temporary Item to take advantage of __format__
item = Item.from_path(syspath(path))
@ -64,8 +77,8 @@ def library_data(lib, args):
def library_data_emitter(item):
def emitter():
data = dict(item.formatted())
def emitter(included_keys):
data = dict(item.formatted(included_keys=included_keys))
return data, item
return emitter
@ -185,18 +198,16 @@ class InfoPlugin(BeetsPlugin):
included_keys.extend(keys.split(','))
# Drop path even if user provides it multiple times
included_keys = [k for k in included_keys if k != 'path']
key_filter = make_key_filter(included_keys)
first = True
summary = {}
for data_emitter in data_collector(lib, ui.decargs(args)):
try:
data, item = data_emitter()
data, item = data_emitter(included_keys or '*')
except (mediafile.UnreadableFileError, IOError) as ex:
self._log.error(u'cannot read file: {0}', ex)
continue
data = key_filter(data)
if opts.summarize:
update_summary(summary, data)
else:
@ -211,34 +222,3 @@ class InfoPlugin(BeetsPlugin):
if opts.summarize:
print_data(summary)
def make_key_filter(include):
"""Return a function that filters a dictionary.
The returned filter takes a dictionary and returns another
dictionary that only includes the key-value pairs where the key
glob-matches one of the keys in `include`.
"""
# By default, if no field inclusions are specified, include
# everything but `path`.
if not include:
def filter_(data):
return {k: v for k, v in data.items()
if k != 'path'}
return filter_
matchers = []
for key in include:
key = re.escape(key)
key = key.replace(r'\*', '.*')
matchers.append(re.compile(key + '$'))
def filter_(data):
filtered = {}
for key, value in data.items():
if any([m.match(key) for m in matchers]):
filtered[key] = value
return filtered
return filter_

View file

@ -212,6 +212,8 @@ Other new things:
used to target a maximum image filesize.
* :doc:`/plugins/badfiles`: Checkers can now be run during import with the
``check_on_import`` config option.
* :doc:`/plugins/export`: big speedups when `--include-keys` option is used
Thanks to :user:`ssssam`.
Fixes:

View file

@ -20,14 +20,12 @@ your library::
If you just want to see specific properties you can use the
``--include-keys`` option to filter them. The argument is a
comma-separated list of simple glob patterns where ``*`` matches any
string. For example::
comma-separated list of field names. For example::
$ beet info -i 'title,mb*' beatles
$ beet info -i 'title,mb_artistid' beatles
Will only show the ``title`` property and all properties starting with
``mb``. You can add the ``-i`` option multiple times to the command
line.
Will only show the ``title`` and ``mb_artistid`` properties. You can add the
``-i`` option multiple times to the command line.
Additional command-line options include:

View file

@ -92,17 +92,6 @@ class InfoTest(unittest.TestCase, TestHelper):
self.assertIn(u'title: [various]', out)
self.remove_mediafile_fixtures()
def test_include_pattern(self):
item, = self.add_item_fixtures()
item.album = 'xxxx'
item.store()
out = self.run_with_output('info', '--library', 'album:xxxx',
'--include-keys', '*lbu*')
self.assertIn(displayable_path(item.path), out)
self.assertNotIn(u'title:', out)
self.assertIn(u'album: xxxx', out)
def test_custom_format(self):
self.add_item_fixtures()
out = self.run_with_output('info', '--library', '--format',