Various database optimizations

This commit is contained in:
Kovid Goyal 2008-11-01 16:51:22 -07:00
parent d02ec242d5
commit 114c1e799e
5 changed files with 129 additions and 46 deletions

View file

@ -185,7 +185,7 @@ def sort(self, col, order, reset=True):
if not self.db:
return
ascending = order == Qt.AscendingOrder
self.db.refresh(self.cols[col], ascending)
self.db.sort(self.cols[col], ascending)
self.research()
if reset:
self.clear_caches()

View file

@ -654,10 +654,6 @@ def edit_metadata(self, checked, bulk=None):
for row in rows:
d = MetadataSingleDialog(self, row.row(),
self.library_view.model().db)
self.connect(d, SIGNAL('accepted()'), partial(self.metadata_edited, d.id), Qt.QueuedConnection)
def metadata_edited(self, id):
self.library_view.model().refresh_ids([id], self.library_view.currentIndex().row())
def edit_bulk_metadata(self, checked):
'''

View file

@ -1,3 +1,12 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' Code to manage ebook library'''
''' Code to manage ebook library'''
import re
title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
def title_sort(title):
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title.replace(prep, '') + ', ' + prep
return title.strip()

View file

@ -7,7 +7,7 @@
The database used to store ebook metadata
'''
import os, re, sys, shutil, cStringIO, glob, collections, textwrap, \
operator, itertools, functools, traceback
itertools, functools, traceback
from itertools import repeat
from datetime import datetime
@ -15,6 +15,7 @@
from PyQt4.QtGui import QApplication, QPixmap, QImage
__app = None
from calibre.library import title_sort
from calibre.library.database import LibraryDatabase
from calibre.library.sqlite import connect, IntegrityError
from calibre.utils.search_query_parser import SearchQueryParser
@ -218,8 +219,8 @@ def remove(self, id):
if id in self._map_filtered:
self._map_filtered.remove(id)
def set(self, row, col, val):
id = self._map_filtered[row]
def set(self, row, col, val, row_is_id=False):
id = row if row_is_id else self._map_filtered[row]
self._data[id][col] = val
def index(self, id, cache=False):
@ -242,6 +243,12 @@ def books_added(self, ids, conn):
self._data[id] = conn.get('SELECT * from meta WHERE id=?', (id,))[0]
self._map[0:0] = ids
self._map_filtered[0:0] = ids
def books_deleted(self, ids):
for id in ids:
self._data[id] = None
if id in self._map: self._map.remove(id)
if id in self._map_filtered: self._map_filtered.remove(id)
def refresh(self, db, field=None, ascending=True):
temp = db.conn.get('SELECT * FROM meta')
@ -254,19 +261,24 @@ def refresh(self, db, field=None, ascending=True):
self._map_filtered = list(self._map)
def seriescmp(self, x, y):
ans = self.strcmp(self._data[x][9], self._data[y][9])
try:
ans = cmp(self._data[x][9].lower(), self._data[y][9].lower()) if str else\
cmp(self._data[x][9], self._data[y][9])
except AttributeError: # Some entries may be None
ans = cmp(self._data[x][9], self._data[y][9])
if ans != 0: return ans
return cmp(self._data[x][10], self._data[y][10])
def cmp(self, loc, x, y, str=True):
ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if str else\
try:
ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if str else\
cmp(self._data[x][loc], self._data[y][loc])
except AttributeError: # Some entries may be None
ans = cmp(self._data[x][loc], self._data[y][loc])
if ans != 0: return ans
return cmp(self._data[x][11].lower(), self._data[y][11].lower())
def sort(self, field, ascending):
import time
start = time.time()
field = field.lower().strip()
if field in ('author', 'tag', 'comment'):
field += 's'
@ -274,9 +286,10 @@ def sort(self, field, ascending):
elif field == 'title': field = 'sort'
elif field == 'author': field = 'author_sort'
fcmp = self.seriescmp if field == 'series' else \
functools.partial(self.cmp, FIELD_MAP[field], field not in ('size', 'rating', 'timestamp'))
functools.partial(self.cmp, FIELD_MAP[field],
str=field not in ('size', 'rating', 'timestamp'))
self._map.sort(cmp=fcmp, reverse=not ascending)
print time.time() - start
def search(self, query):
if not query or not query.strip():
@ -348,7 +361,6 @@ def __init__(self, library_path, row_factory=False):
self.user_version += 1
self.data = ResultCache()
self.data.refresh()
self.search = self.data.search
self.refresh = functools.partial(self.data.refresh, self)
self.sort = functools.partial(self.data.refresh, self)
@ -356,6 +368,16 @@ def __init__(self, library_path, row_factory=False):
self.refresh_ids = functools.partial(self.data.refresh_ids, self.conn)
self.row = self.data.row
self.refresh()
def get_property(idx, index_is_id=False, loc=-1):
row = self.data._data[idx] if index_is_id else self.data[idx]
return row[loc]
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
'publisher', 'rating', 'series', 'series_index', 'tags', 'title'):
setattr(self, prop, functools.partial(get_property, loc=FIELD_MAP['comments' if prop == 'comment' else prop]))
def initialize_database(self):
from calibre.resources import metadata_sqlite
self.conn.executescript(metadata_sqlite)
@ -395,7 +417,7 @@ def upgrade_version_2(self):
def last_modified(self):
''' Return last modified time as a UTC datetime object'''
return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime)
def path(self, index, index_is_id=False):
'Return the relative path to the directory containing this books files as a unicode string.'
id = index if index_is_id else self.id(index)
@ -617,6 +639,11 @@ def add_format(self, index, format, stream, index_is_id=False, path=None):
self.conn.execute('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
(id, format.upper(), size, name))
self.conn.commit()
try:
fmts = [f.strip().upper() for f in self.data[self.data.row(id)][FIELD_MAP['formats']].split(',')]
except AttributeError:
fmts = []
self.data.set(id, FIELD_MAP['formats'], ','.join(fmts+[format.upper()]), row_is_id=True)
self.notify('metadata', [id])
def delete_book(self, id):
@ -633,6 +660,7 @@ def delete_book(self, id):
self.conn.execute('DELETE FROM books WHERE id=?', (id,))
self.conn.commit()
self.clean()
self.data.books_deleted([id])
self.notify('delete', [id])
def remove_format(self, index, format, index_is_id=False):
@ -649,6 +677,9 @@ def remove_format(self, index, format, index_is_id=False):
pass
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
self.conn.commit()
fmts = [f.strip().upper() for f in self.data[self.data.row(id)][FIELD_MAP['formats']].split(',')]
fmts.remove(format.upper())
self.data.set(id, FIELD_MAP['formats'], ','.join(fmts), row_is_id=True)
self.notify('metadata', [id])
def clean(self):
@ -706,7 +737,7 @@ def set(self, row, column, val):
elif column == 'publisher':
self.set_publisher(id, val, notify=False)
elif column == 'rating':
self.set_rating(id, val)
self.set_rating(id, val, notify=False)
elif column == 'tags':
self.set_tags(id, val.split(','), append=False, notify=False)
self.data.refresh_ids(self.conn, [id])
@ -726,11 +757,11 @@ def set_metadata(self, id, mi):
authors += a.split('&')
self.set_authors(id, authors, notify=False)
if mi.author_sort:
self.set_author_sort(id, mi.author_sort)
self.set_author_sort(id, mi.author_sort, notify=False)
if mi.publisher:
self.set_publisher(id, mi.publisher, notify=False)
if mi.rating:
self.set_rating(id, mi.rating)
self.set_rating(id, mi.rating, notify=False)
if mi.series:
self.set_series(id, mi.series, notify=False)
if mi.cover_data[1] is not None:
@ -738,9 +769,11 @@ def set_metadata(self, id, mi):
if mi.tags:
self.set_tags(id, mi.tags, notify=False)
if mi.comments:
self.set_comment(id, mi.comments)
self.set_comment(id, mi.comments, notify=False)
if mi.isbn and mi.isbn.strip():
self.set_isbn(id, mi.isbn)
self.set_isbn(id, mi.isbn, notify=False)
if mi.series_index and mi.series_index > 0:
self.set_series_index(id, mi.series_index, notify=False)
self.set_path(id, True)
self.notify('metadata', [id])
@ -748,6 +781,8 @@ def set_authors(self, id, authors, notify=True):
'''
`authors`: A list of authors.
'''
if not authors:
authors = [_('Unknown')]
self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,))
self.conn.execute('DELETE FROM authors WHERE (SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) < 1')
for a in authors:
@ -767,8 +802,12 @@ def set_authors(self, id, authors, notify=True):
self.conn.execute('INSERT INTO books_authors_link(book, author) VALUES (?,?)', (id, aid))
except IntegrityError: # Sometimes books specify the same author twice in their metadata
pass
self.conn.commit()
self.set_path(id, True)
self.notify('metadata', [id])
self.data.set(id, FIELD_MAP['authors'], ','.join([a.replace(',', '|') for a in authors]), row_is_id=True)
self.data.set(id, FIELD_MAP['author_sort'], self.data[self.data.row(id)][FIELD_MAP['authors']], row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_title(self, id, title, notify=True):
if not title:
@ -777,7 +816,11 @@ def set_title(self, id, title, notify=True):
title = title.decode(preferred_encoding, 'replace')
self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id))
self.set_path(id, True)
self.notify('metadata', [id])
self.data.set(id, FIELD_MAP['title'], title, row_is_id=True)
self.data.set(id, FIELD_MAP['sort'], title_sort(title), row_is_id=True)
self.conn.commit()
if notify:
self.notify('metadata', [id])
def set_publisher(self, id, publisher, notify=True):
self.conn.execute('DELETE FROM books_publishers_link WHERE book=?',(id,))
@ -791,8 +834,10 @@ def set_publisher(self, id, publisher, notify=True):
else:
aid = self.conn.execute('INSERT INTO publishers(name) VALUES (?)', (publisher,)).lastrowid
self.conn.execute('INSERT INTO books_publishers_link(book, publisher) VALUES (?,?)', (id, aid))
self.conn.commit()
self.notify('metadata', [id])
self.conn.commit()
self.data.set(id, FIELD_MAP['publisher'], publisher, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_tags(self, id, tags, append=False, notify=True):
'''
@ -819,7 +864,16 @@ def set_tags(self, id, tags, append=False, notify=True):
self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)',
(id, tid))
self.conn.commit()
self.notify('metadata', [id])
try:
otags = [t.strip() for t in self.data[self.data.row(id)][FIELD_MAP['tags']].split(',')]
except AttributeError:
otags = []
if not append:
otags = []
tags = ','.join(otags+tags)
self.data.set(id, FIELD_MAP['tags'], tags, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_series(self, id, series, notify=True):
@ -841,7 +895,9 @@ def set_series(self, id, series, notify=True):
self.data.set(row, 9, series)
except ValueError:
pass
self.notify('metadata', [id])
self.data.set(id, FIELD_MAP['series'], series, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_series_index(self, id, idx, notify=True):
if idx is None:
@ -855,7 +911,42 @@ def set_series_index(self, id, idx, notify=True):
self.data.set(row, 10, idx)
except ValueError:
pass
self.notify('metadata', [id])
self.data.set(id, FIELD_MAP['series_index'], int(idx), row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_rating(self, id, rating, notify=True):
rating = int(rating)
self.conn.execute('DELETE FROM books_ratings_link WHERE book=?',(id,))
rat = self.conn.get('SELECT id FROM ratings WHERE rating=?', (rating,), all=False)
rat = rat if rat else self.conn.execute('INSERT INTO ratings(rating) VALUES (?)', (rating,)).lastrowid
self.conn.execute('INSERT INTO books_ratings_link(book, rating) VALUES (?,?)', (id, rat))
self.conn.commit()
self.data.set(id, FIELD_MAP['rating'], rating, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_comment(self, id, text, notify=True):
self.conn.execute('DELETE FROM comments WHERE book=?', (id,))
self.conn.execute('INSERT INTO comments(book,text) VALUES (?,?)', (id, text))
self.conn.commit()
self.data.set(id, FIELD_MAP['comments'], text, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_author_sort(self, id, sort, notify=True):
self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', (sort, id))
self.conn.commit()
self.data.set(id, FIELD_MAP['author_sort'], sort, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_isbn(self, id, isbn, notify=True):
self.conn.execute('UPDATE books SET isbn=? WHERE id=?', (isbn, id))
self.conn.commit()
self.data.set(id, FIELD_MAP['isbn'], isbn, row_is_id=True)
if notify:
self.notify('metadata', [id])
def add_books(self, paths, formats, metadata, uris=[], add_duplicates=True):
'''
@ -880,6 +971,7 @@ def add_books(self, paths, formats, metadata, uris=[], add_duplicates=True):
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
(mi.title, uri, series_index, aus))
id = obj.lastrowid
self.data.books_added([id], self.conn)
ids.append(id)
self.set_path(id, True)
self.conn.commit()
@ -891,8 +983,6 @@ def add_books(self, paths, formats, metadata, uris=[], add_duplicates=True):
if not hasattr(path, 'read'):
stream.close()
self.conn.commit()
if ids:
self.data.books_added(ids, self.conn)
if duplicates:
paths = tuple(duplicate[0] for duplicate in duplicates)
formats = tuple(duplicate[1] for duplicate in duplicates)
@ -909,6 +999,7 @@ def import_book(self, mi, formats):
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
(mi.title, None, series_index, aus))
id = obj.lastrowid
self.data.books_added([id], self.conn)
self.set_path(id, True)
self.set_metadata(id, mi)
for path in formats:
@ -916,7 +1007,6 @@ def import_book(self, mi, formats):
stream = open(path, 'rb')
self.add_format(id, ext, stream, index_is_id=True)
self.conn.commit()
self.data.books_added([id], self.conn)
self.notify('add', [id])
def move_library_to(self, newloc, progress=None):
@ -966,8 +1056,6 @@ def move_library_to(self, newloc, progress=None):
def __iter__(self):
if len(self.data._data) == 0:
self.refresh('timestamp', True)
for record in self.data._data:
if record is not None:
yield record
@ -991,8 +1079,6 @@ def get_data_as_dict(self, prefix=None, authors_as_string=False):
prefix = self.library_path
FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'isbn'])
data = []
if len(self.data) == 0:
self.refresh(None, True)
for record in self.data:
if record is None: continue
x = {}

View file

@ -7,11 +7,12 @@
Wrapper for multi-threaded access to a single sqlite database connection. Serializes
all calls.
'''
import sqlite3 as sqlite, traceback, re, time
import sqlite3 as sqlite, traceback, time
from sqlite3 import IntegrityError
from threading import Thread
from Queue import Queue
from calibre.library import title_sort
class Concatenate(object):
'''String concatenation aggregator for sqlite'''
@ -61,15 +62,6 @@ def connect(self):
detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES)
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
self.conn.create_aggregate('concat', 1, Concatenate)
title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
def title_sort(title):
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title.replace(prep, '') + ', ' + prep
return title.strip()
self.conn.create_function('title_sort', 1, title_sort)
def run(self):