Run single metadata downloads in a worker process as well to workaround memory leaks in third party plugins

This commit is contained in:
Kovid Goyal 2012-04-05 18:40:06 +05:30
parent 270d36f59f
commit fbddf37b80
4 changed files with 171 additions and 28 deletions

View file

@ -112,6 +112,18 @@ def get_cached_cover_urls(mi):
if url:
yield (p, url)
def dump_caches():
from calibre.customize.ui import metadata_plugins
return {p.name:p.dump_caches() for p in metadata_plugins(['identify'])}
def load_caches(dump):
from calibre.customize.ui import metadata_plugins
plugins = list(metadata_plugins(['identify']))
for p in plugins:
cache = dump.get(p.name, None)
if cache:
p.load_caches(cache)
def cap_author_token(token):
lt = lower(token)
if lt in ('von', 'de', 'el', 'van', 'le'):
@ -293,6 +305,16 @@ def cached_identifier_to_cover_url(self, id_):
with self.cache_lock:
return self._identifier_to_cover_url_cache.get(id_, None)
def dump_caches(self):
with self.cache_lock:
return {'isbn_to_identifier':self._isbn_to_identifier_cache.copy(),
'identifier_to_cover':self._identifier_to_cover_url_cache.copy()}
def load_caches(self, dump):
with self.cache_lock:
self._isbn_to_identifier_cache.update(dump['isbn_to_identifier'])
self._identifier_to_cover_url_cache.update(dump['identifier_to_cover'])
# }}}
# Utility functions {{{

View file

@ -8,14 +8,17 @@
__docformat__ = 'restructuredtext en'
import os
from threading import Event
from threading import Event, Thread
from Queue import Queue, Empty
from io import BytesIO
from calibre.utils.date import as_utc
from calibre.ebooks.metadata.sources.identify import identify, msprefs
from calibre.ebooks.metadata.book.base import Metadata
from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata.sources.covers import download_cover
from calibre.ebooks.metadata.sources.covers import (download_cover,
run_download)
from calibre.ebooks.metadata.sources.base import dump_caches, load_caches
from calibre.utils.logging import GUILog
from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
@ -93,3 +96,31 @@ def main(do_identify, covers, metadata, ensure_fields):
return failed_ids, failed_covers, all_failed
def single_identify(title, authors, identifiers):
log = GUILog()
results = identify(log, Event(), title=title, authors=authors,
identifiers=identifiers)
return [metadata_to_opf(r) for r in results], dump_caches(), log.dump()
def single_covers(title, authors, identifiers, caches):
load_caches(caches)
log = GUILog()
results = Queue()
worker = Thread(target=run_download, args=(log, results, Event()),
kwargs=dict(title=title, authors=authors, identifiers=identifiers))
worker.daemon = True
worker.start()
while worker.is_alive():
try:
plugin, width, height, fmt, data = results.get(True, 1)
except Empty:
continue
else:
name = '%s,,%s,,%s,,%s.cover'%(plugin.name, width, height, fmt)
with open(name, 'wb') as f:
f.write(data)
os.mkdir(name+'.done')
return log.dump()

View file

@ -8,11 +8,16 @@
__docformat__ = 'restructuredtext en'
DEBUG_DIALOG = False
# Test: turn fields off, error
# handling
# Do some testing in windows as filesystem model is different
# Imports {{{
import os, time
from threading import Thread, Event
from operator import attrgetter
from Queue import Queue, Empty
from io import BytesIO
from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt,
QApplication, QDialog, QVBoxLayout, QLabel, QDialogButtonBox,
@ -24,16 +29,17 @@
from calibre.customize.ui import metadata_plugins
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.logging import GUILog as Log
from calibre.ebooks.metadata.sources.identify import (identify,
urls_from_identifiers)
from calibre.ebooks.metadata.sources.identify import urls_from_identifiers
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf2 import OPF
from calibre.gui2 import error_dialog, NONE, rating_font
from calibre.utils.date import (utcnow, fromordinal, format_date,
UNDEFINED_DATE, as_utc)
from calibre.library.comments import comments_to_html
from calibre import force_unicode
from calibre.utils.config import tweaks
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
from calibre.ptempfile import TemporaryDirectory
# }}}
class RichTextDelegate(QStyledItemDelegate): # {{{
@ -357,7 +363,7 @@ def color_to_string(col):
class IdentifyWorker(Thread): # {{{
def __init__(self, log, abort, title, authors, identifiers):
def __init__(self, log, abort, title, authors, identifiers, caches):
Thread.__init__(self)
self.daemon = True
@ -367,6 +373,7 @@ def __init__(self, log, abort, title, authors, identifiers):
self.results = []
self.error = None
self.caches = caches
def sample_results(self):
m1 = Metadata('The Great Gatsby', ['Francis Scott Fitzgerald'])
@ -390,25 +397,36 @@ def run(self):
if DEBUG_DIALOG:
self.results = self.sample_results()
else:
self.results = identify(self.log, self.abort, title=self.title,
authors=self.authors, identifiers=self.identifiers)
res = fork_job(
'calibre.ebooks.metadata.sources.worker',
'single_identify', (self.title, self.authors,
self.identifiers), no_output=True, abort=self.abort)
self.results, caches, log_dump = res['result']
self.results = [OPF(BytesIO(r), basedir=os.getcwdu(),
populate_spine=False).to_book_metadata() for r in self.results]
self.caches.update(caches)
self.log.load(log_dump)
for i, result in enumerate(self.results):
result.gui_rank = i
except WorkerError as e:
self.error = force_unicode(e.orig_tb)
except:
import traceback
self.error = force_unicode(traceback.format_exc())
# }}}
class IdentifyWidget(QWidget): # {{{
rejected = pyqtSignal()
results_found = pyqtSignal()
book_selected = pyqtSignal(object)
book_selected = pyqtSignal(object, object)
def __init__(self, log, parent=None):
QWidget.__init__(self, parent)
self.log = log
self.abort = Event()
self.caches = {}
self.l = l = QGridLayout()
self.setLayout(l)
@ -421,7 +439,7 @@ def __init__(self, log, parent=None):
l.addWidget(self.top, 0, 0)
self.results_view = ResultsView(self)
self.results_view.book_selected.connect(self.book_selected.emit)
self.results_view.book_selected.connect(self.emit_book_selected)
self.get_result = self.results_view.get_result
l.addWidget(self.results_view, 1, 0)
@ -455,6 +473,9 @@ def __init__(self, log, parent=None):
</script>
''')
def emit_book_selected(self, book):
self.book_selected.emit(book, self.caches)
def start(self, title=None, authors=None, identifiers={}):
self.log.clear()
self.log('Starting download')
@ -470,7 +491,7 @@ def start(self, title=None, authors=None, identifiers={}):
self.log(unicode(self.query.text()))
self.worker = IdentifyWorker(self.log, self.abort, title,
authors, identifiers)
authors, identifiers, self.caches)
self.worker.start()
@ -513,20 +534,20 @@ def cancel(self):
class CoverWorker(Thread): # {{{
def __init__(self, log, abort, title, authors, identifiers):
def __init__(self, log, abort, title, authors, identifiers, caches):
Thread.__init__(self)
self.daemon = True
self.log, self.abort = log, abort
self.title, self.authors, self.identifiers = (title, authors,
identifiers)
self.caches = caches
self.rq = Queue()
self.error = None
def fake_run(self):
images = ['donate.png', 'config.png', 'column.png', 'eject.png', ]
import time
time.sleep(2)
for pl, im in zip(metadata_plugins(['cover']), images):
self.rq.put((pl, 1, 1, 'png', I(im, data=True)))
@ -536,12 +557,56 @@ def run(self):
if DEBUG_DIALOG:
self.fake_run()
else:
from calibre.ebooks.metadata.sources.covers import run_download
run_download(self.log, self.rq, self.abort, title=self.title,
authors=self.authors, identifiers=self.identifiers)
self.run_fork()
except WorkerError as e:
self.error = force_unicode(e.orig_tb)
except:
import traceback
self.error = force_unicode(traceback.format_exc())
def run_fork(self):
with TemporaryDirectory('_single_metadata_download') as tdir:
self.keep_going = True
t = Thread(target=self.monitor_tdir, args=(tdir,))
t.daemon = True
t.start()
try:
res = fork_job('calibre.ebooks.metadata.sources.worker',
'single_covers',
(self.title, self.authors, self.identifiers, self.caches),
cwd=tdir, no_output=True, abort=self.abort)
self.log.append_dump(res['result'])
finally:
self.keep_going = False
t.join()
def scan_once(self, tdir, seen):
for x in list(os.listdir(tdir)):
if x in seen: continue
if x.endswith('.cover') and os.path.exists(os.path.join(tdir,
x+'.done')):
name = x.rpartition('.')[0]
try:
plugin_name, width, height, fmt = name.split(',,')
width, height = int(width), int(height)
with open(os.path.join(tdir, x), 'rb') as f:
data = f.read()
except:
import traceback
traceback.print_exc()
else:
seen.add(x)
self.rq.put((plugin_name, width, height, fmt, data))
def monitor_tdir(self, tdir):
seen = set()
while self.keep_going:
time.sleep(1)
self.scan_once(tdir, seen)
# One last scan after the download process has ended
self.scan_once(tdir, seen)
# }}}
class CoversModel(QAbstractListModel): # {{{
@ -620,16 +685,19 @@ def index_for_plugin(self, plugin):
idx = self.plugin_map.get(plugin, 0)
return self.index(idx)
def update_result(self, plugin, width, height, data):
try:
idx = self.plugin_map[plugin]
except:
def update_result(self, plugin_name, width, height, data):
idx = None
for plugin, i in self.plugin_map.iteritems():
if plugin.name == plugin_name:
idx = i
break
if idx is None:
return
pmap = QPixmap()
pmap.loadFromData(data)
if pmap.isNull():
return
self.covers[idx] = self.get_item(plugin.name, pmap, waiting=False)
self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False)
self.dataChanged.emit(self.index(idx), self.index(idx))
def cover_pixmap(self, index):
@ -709,7 +777,7 @@ def __init__(self, log, current_cover, parent=None):
def reset_covers(self):
self.covers_view.reset_covers()
def start(self, book, current_cover, title, authors):
def start(self, book, current_cover, title, authors, caches):
self.continue_processing = True
self.abort.clear()
self.book, self.current_cover = book, current_cover
@ -721,7 +789,7 @@ def start(self, book, current_cover, title, authors):
self.covers_view.start()
self.worker = CoverWorker(self.log, self.abort, self.title,
self.authors, book.identifiers)
self.authors, book.identifiers, caches)
self.worker.start()
QTimer.singleShot(50, self.check)
self.covers_view.setFocus(Qt.OtherFocusReason)
@ -766,8 +834,8 @@ def process_results(self):
def process_result(self, result):
if not self.continue_processing:
return
plugin, width, height, fmt, data = result
self.covers_view.model().update_result(plugin, width, height, data)
plugin_name, width, height, fmt, data = result
self.covers_view.model().update_result(plugin_name, width, height, data)
def cleanup(self):
self.covers_view.delegate.stop_animation()
@ -894,7 +962,7 @@ def __init__(self, current_cover=None, parent=None):
def view_log(self):
self._lv = LogViewer(self.log, self)
def book_selected(self, book):
def book_selected(self, book, caches):
self.next_button.setVisible(False)
self.ok_button.setVisible(True)
self.prev_button.setVisible(True)
@ -902,7 +970,7 @@ def book_selected(self, book):
self.stack.setCurrentIndex(1)
self.log('\n\n')
self.covers_widget.start(book, self.current_cover,
self.title, self.authors)
self.title, self.authors, caches)
def back_clicked(self):
self.next_button.setVisible(True)
@ -993,7 +1061,7 @@ def start(self, title, authors, identifiers):
book = Metadata(title, authors)
book.identifiers = identifiers
self.covers_widget.start(book, self.current_cover,
title, authors)
title, authors, {})
return self.exec_()
def view_log(self):

View file

@ -122,6 +122,18 @@ def html(self):
end = self.normal if self.data else u''
return u''.join(self.data) + end
def dump(self):
return [self.data, self.plain_text, self.last_col]
def load(self, dump):
self.data, self.plain_text, self.last_col = dump
def append_dump(self, dump):
d, p, lc = dump
self.data.extend(d)
self.plain_text.extend(p)
self.last_col = lc
class Log(object):
@ -186,4 +198,14 @@ def html(self):
def plain_text(self):
return u''.join(self.outputs[0].plain_text)
def dump(self):
return self.outputs[0].dump()
def load(self, dump):
return self.outputs[0].load(dump)
def append_dump(self, dump):
return self.outputs[0].append_dump(dump)
default_log = Log()