From 60160c38a03e61355865ed9c2e221b8bf9580ba5 Mon Sep 17 00:00:00 2001 From: Pierre Rust Date: Wed, 7 May 2014 00:03:05 +0200 Subject: [PATCH 1/4] Shortcut some tests in Model initialization When Model objects (Albums and Items) are loaded from the database a lot of time is wasted checking if the object is dirty and if fields are fixed or flexattr. This change alow bypassing these checks when we know that the data is correct ; it should only be used when loading Model from the database. With this, listing all tracks in my test database (8200 items) takes 4 seconds while it took 31 seconds previously. --- beets/dbcore/db.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 0c03f39ea..ba89ab233 100644 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -95,7 +95,7 @@ class Model(object): # Basic operation. - def __init__(self, db=None, **values): + def __init__(self, db=None, fixed=None, flexattr=None, **values): """Create a new object with an optional Database association and initial field values. """ @@ -105,6 +105,19 @@ class Model(object): self._values_flex = {} # Initial contents. + # For fields that are explicitly given as fixed or flex, we skip + # the checks made by update() to speed things up when loading objects + # from the database + if fixed: + for (key, value) in fixed.iteritems(): + # read path buffers. + if key == 'path': + value = str(value) + self._values_fixed[key] = self._fields[key].normalize(value) + if flexattr: + for (key, value) in flexattr: + self._values_flex[key] = value + self.update(values) self.clear_dirty() @@ -492,13 +505,11 @@ class Results(object): (row['id'],) ) values = dict(row) - values.update( - dict((row['key'], row['value']) for row in flex_rows) - ) + flex_values = dict((row['key'], row['value']) for row in flex_rows) # Construct the Python object and yield it if it passes the # predicate. - obj = self.model_class(self.db, **values) + obj = self.model_class(self.db, values, flex_values) if not self.query or self.query.match(obj): yield obj From 6d5c5824b52316ee9685dd1d0f8e0679d49b190b Mon Sep 17 00:00:00 2001 From: Pierre Rust Date: Thu, 8 May 2014 10:44:44 +0200 Subject: [PATCH 2/4] Move Model initialization in a separate method The idea is that we can apply special treatment in subclasses when needed, especially for path normalization. --- beets/dbcore/db.py | 26 +++++++++++++------------- beets/library.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index ba89ab233..4353c14f8 100644 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -105,19 +105,7 @@ class Model(object): self._values_flex = {} # Initial contents. - # For fields that are explicitly given as fixed or flex, we skip - # the checks made by update() to speed things up when loading objects - # from the database - if fixed: - for (key, value) in fixed.iteritems(): - # read path buffers. - if key == 'path': - value = str(value) - self._values_fixed[key] = self._fields[key].normalize(value) - if flexattr: - for (key, value) in flexattr: - self._values_flex[key] = value - + self._bulk_update(fixed, flexattr) self.update(values) self.clear_dirty() @@ -208,6 +196,18 @@ class Model(object): for key, value in values.items(): self[key] = value + def _bulk_update(self, fixed, flexattr): + """Assign all values in the fixed and flex dicts. + Using _bulk_update() bypasses many tests made by update() and + should only be used when loading data from the db. + """ + if fixed: + for (key, value) in fixed.items(): + self._values_fixed[key] = self._fields[key].normalize(value) + if flexattr: + for (key, value) in flexattr.items(): + self._values_flex[key] = value + def items(self): """Iterate over (key, value) pairs that this object contains. Computed fields are not included. diff --git a/beets/library.py b/beets/library.py index 3a2ccd62c..402fea44c 100644 --- a/beets/library.py +++ b/beets/library.py @@ -295,6 +295,25 @@ class Item(LibModel): if self.mtime == 0 and 'mtime' in values: self.mtime = values['mtime'] + def _bulk_update(self, fixed, flexattr): + # For fields that are explicitly given as fixed or flex, we skip + # the checks made by update() to speed things up when loading objects + # from the database + if fixed: + for (key, value) in fixed.items(): + # read path buffers. + if key == 'path': + if isinstance(value, unicode): + value = bytestring_path(value) + elif isinstance(value, buffer): + value = str(value) + self._values_fixed[key] = self._fields[key].normalize(value) + if flexattr: + for (key, value) in flexattr.items(): + self._values_flex[key] = value + + # TODO : set mtimes ? + def get_album(self): """Get the Album object that this item belongs to, if any, or None if the item is a singleton or is not associated with a @@ -687,6 +706,23 @@ class Album(LibModel): getters['path'] = Album.item_dir return getters + def _bulk_update(self, fixed, flexattr): + # For fields that are explicitly given as fixed or flex, we skip + # the checks made by update() to speed things up when loading objects + # from the database + if fixed: + for (key, value) in fixed.items(): + # read path buffers. + if key == 'artpath': + if isinstance(value, buffer): + value = bytes(value) + elif isinstance(value, unicode): + value = bytestring_path(value) + self._values_fixed[key] = self._fields[key].normalize(value) + if flexattr: + for (key, value) in flexattr.items(): + self._values_flex[key] = value + def __setitem__(self, key, value): """Set the value of an album attribute.""" if key == 'artpath': From 505add7e5c8d9aaff8337f3a8f3e7babe5b3ea1d Mon Sep 17 00:00:00 2001 From: Pierre Rust Date: Fri, 9 May 2014 08:51:10 +0200 Subject: [PATCH 3/4] Avoid duplicating _bulk_update into subclasses --- beets/dbcore/db.py | 10 ++++++++-- beets/library.py | 48 ++++++++++++++-------------------------------- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 4353c14f8..79645c599 100644 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -203,10 +203,16 @@ class Model(object): """ if fixed: for (key, value) in fixed.items(): - self._values_fixed[key] = self._fields[key].normalize(value) + self._set_fixed_attr(key, value) if flexattr: for (key, value) in flexattr.items(): - self._values_flex[key] = value + self._set_flex_attr(key, value) + + def _set_fixed_attr(self, key, value): + self._values_fixed[key] = self._fields[key].normalize(value) + + def _set_flex_attr(self, key, value): + self._values_flex[key] = value def items(self): """Iterate over (key, value) pairs that this object contains. diff --git a/beets/library.py b/beets/library.py index 402fea44c..bf4df16a4 100644 --- a/beets/library.py +++ b/beets/library.py @@ -295,24 +295,13 @@ class Item(LibModel): if self.mtime == 0 and 'mtime' in values: self.mtime = values['mtime'] - def _bulk_update(self, fixed, flexattr): - # For fields that are explicitly given as fixed or flex, we skip - # the checks made by update() to speed things up when loading objects - # from the database - if fixed: - for (key, value) in fixed.items(): - # read path buffers. - if key == 'path': - if isinstance(value, unicode): - value = bytestring_path(value) - elif isinstance(value, buffer): - value = str(value) - self._values_fixed[key] = self._fields[key].normalize(value) - if flexattr: - for (key, value) in flexattr.items(): - self._values_flex[key] = value - - # TODO : set mtimes ? + def _set_fixed_attr(self, key, value): + if key == 'path': + if isinstance(value, unicode): + value = bytestring_path(value) + elif isinstance(value, buffer): + value = str(value) + super(Item, self)._set_fixed_attr(key, value) def get_album(self): """Get the Album object that this item belongs to, if any, or @@ -706,22 +695,13 @@ class Album(LibModel): getters['path'] = Album.item_dir return getters - def _bulk_update(self, fixed, flexattr): - # For fields that are explicitly given as fixed or flex, we skip - # the checks made by update() to speed things up when loading objects - # from the database - if fixed: - for (key, value) in fixed.items(): - # read path buffers. - if key == 'artpath': - if isinstance(value, buffer): - value = bytes(value) - elif isinstance(value, unicode): - value = bytestring_path(value) - self._values_fixed[key] = self._fields[key].normalize(value) - if flexattr: - for (key, value) in flexattr.items(): - self._values_flex[key] = value + def _set_fixed_attr(self, key, value): + if key == 'artpath': + if isinstance(value, unicode): + value = bytestring_path(value) + elif isinstance(value, buffer): + value = bytes(value) + super(Album, self)._set_fixed_attr(key, value) def __setitem__(self, key, value): """Set the value of an album attribute.""" From d48c604b2fdddcd021c8f1e461ba84e351967e71 Mon Sep 17 00:00:00 2001 From: Adrian Sampson Date: Sat, 10 May 2014 14:02:45 -0700 Subject: [PATCH 4/4] Simplify model initialization shortcut This avoids multiple paths to setting values. It also moves the buffer check into PathType.normalize, where it belongs. This is slightly faster than the previous iteration: about 8 vs. 9 seconds to list about 13k songs on my machine. --- beets/dbcore/db.py | 38 +++++++++++++++++--------------------- beets/library.py | 38 +++++++++++++------------------------- 2 files changed, 30 insertions(+), 46 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 79645c599..a2d7b3513 100644 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -95,7 +95,7 @@ class Model(object): # Basic operation. - def __init__(self, db=None, fixed=None, flexattr=None, **values): + def __init__(self, db=None, **values): """Create a new object with an optional Database association and initial field values. """ @@ -105,10 +105,24 @@ class Model(object): self._values_flex = {} # Initial contents. - self._bulk_update(fixed, flexattr) self.update(values) self.clear_dirty() + @classmethod + def _awaken(cls, db=None, fixed_values=None, flex_values=None): + """Create an object with values drawn from the database. + + This is a performance optimization: the checks involved with + ordinary construction are bypassed. + """ + obj = cls(db) + if fixed_values: + for key, value in fixed_values.items(): + obj._values_fixed[key] = cls._fields[key].normalize(value) + if flex_values: + obj._values_flex.update(flex_values) + return obj + def __repr__(self): return '{0}({1})'.format( type(self).__name__, @@ -196,24 +210,6 @@ class Model(object): for key, value in values.items(): self[key] = value - def _bulk_update(self, fixed, flexattr): - """Assign all values in the fixed and flex dicts. - Using _bulk_update() bypasses many tests made by update() and - should only be used when loading data from the db. - """ - if fixed: - for (key, value) in fixed.items(): - self._set_fixed_attr(key, value) - if flexattr: - for (key, value) in flexattr.items(): - self._set_flex_attr(key, value) - - def _set_fixed_attr(self, key, value): - self._values_fixed[key] = self._fields[key].normalize(value) - - def _set_flex_attr(self, key, value): - self._values_flex[key] = value - def items(self): """Iterate over (key, value) pairs that this object contains. Computed fields are not included. @@ -515,7 +511,7 @@ class Results(object): # Construct the Python object and yield it if it passes the # predicate. - obj = self.model_class(self.db, values, flex_values) + obj = self.model_class._awaken(self.db, values, flex_values) if not self.query or self.query.match(obj): yield obj diff --git a/beets/library.py b/beets/library.py index bf4df16a4..7c1bd5075 100644 --- a/beets/library.py +++ b/beets/library.py @@ -94,6 +94,19 @@ class PathType(types.Type): def parse(self, string): return normpath(bytestring_path(string)) + def normalize(self, value): + if isinstance(value, unicode): + # Paths stored internally as encoded bytes. + return bytestring_path(value) + + elif isinstance(value, buffer): + # SQLite must store bytestings as buffers to avoid decoding. + # We unwrap buffers to bytes. + return bytes(value) + + else: + return value + # Special path format key. PF_KEY_DEFAULT = 'default' @@ -295,14 +308,6 @@ class Item(LibModel): if self.mtime == 0 and 'mtime' in values: self.mtime = values['mtime'] - def _set_fixed_attr(self, key, value): - if key == 'path': - if isinstance(value, unicode): - value = bytestring_path(value) - elif isinstance(value, buffer): - value = str(value) - super(Item, self)._set_fixed_attr(key, value) - def get_album(self): """Get the Album object that this item belongs to, if any, or None if the item is a singleton or is not associated with a @@ -695,23 +700,6 @@ class Album(LibModel): getters['path'] = Album.item_dir return getters - def _set_fixed_attr(self, key, value): - if key == 'artpath': - if isinstance(value, unicode): - value = bytestring_path(value) - elif isinstance(value, buffer): - value = bytes(value) - super(Album, self)._set_fixed_attr(key, value) - - def __setitem__(self, key, value): - """Set the value of an album attribute.""" - if key == 'artpath': - if isinstance(value, unicode): - value = bytestring_path(value) - elif isinstance(value, buffer): - value = bytes(value) - super(Album, self).__setitem__(key, value) - def items(self): """Returns an iterable over the items associated with this album.