From a679086e53de749dc8d451cda6a3e83315362dcf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 11 Sep 2008 17:08:48 -0700 Subject: [PATCH] IGN:Remove temporary files as soon as possible, rather than only at program exit. Fixes for various minor regressions. --- src/calibre/constants.py | 35 ++++++++++- src/calibre/ebooks/lrf/feeds/convert_from.py | 45 +++++++------- src/calibre/ebooks/metadata/opf2.py | 23 +++++-- src/calibre/gui2/main.py | 37 ++++++++---- src/calibre/library/database.py | 11 +++- src/calibre/library/database2.py | 24 +++++++- src/calibre/parallel.py | 19 +++++- src/calibre/ptempfile.py | 63 ++++++++++---------- src/calibre/startup.py | 38 +----------- src/calibre/utils/config.py | 3 +- src/calibre/web/feeds/news.py | 48 +++++++++------ src/calibre/web/feeds/recipes/economist.py | 4 +- src/calibre/web/fetch/simple.py | 32 +++++++--- 13 files changed, 239 insertions(+), 143 deletions(-) diff --git a/src/calibre/constants.py b/src/calibre/constants.py index ce68d8cfc4..64577cc56e 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -8,7 +8,7 @@ Various run time constants. ''' -import sys, locale, codecs +import sys, locale, codecs, os from calibre.utils.terminfo import TerminalController terminal_controller = TerminalController(sys.stdout) @@ -28,3 +28,36 @@ winerror = __import__('winerror') if iswindows else None win32api = __import__('win32api') if iswindows else None fcntl = None if iswindows else __import__('fcntl') + +################################################################################ +plugins = None +if plugins is None: + # Load plugins + def load_plugins(): + plugins = {} + if isfrozen: + if iswindows: + plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins') + sys.path.insert(1, os.path.dirname(sys.executable)) + elif isosx: + plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins') + elif islinux: + plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins') + sys.path.insert(0, plugin_path) + else: + import pkg_resources + plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins') + sys.path.insert(0, plugin_path) + + for plugin in ['pictureflow', 'lzx', 'msdes'] + \ + (['winutil'] if iswindows else []) + \ + (['usbobserver'] if isosx else []): + try: + p, err = __import__(plugin), '' + except Exception, err: + p = None + err = str(err) + plugins[plugin] = (p, err) + return plugins + + plugins = load_plugins() \ No newline at end of file diff --git a/src/calibre/ebooks/lrf/feeds/convert_from.py b/src/calibre/ebooks/lrf/feeds/convert_from.py index b401d6572d..dd1e21aa98 100644 --- a/src/calibre/ebooks/lrf/feeds/convert_from.py +++ b/src/calibre/ebooks/lrf/feeds/convert_from.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -8,8 +8,9 @@ from calibre.ebooks.lrf.html.convert_from import process_file from calibre.web.feeds.main import option_parser as feeds_option_parser from calibre.web.feeds.main import run_recipe -from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.ptempfile import TemporaryDirectory from calibre import sanitize_file_name, strftime +from calibre.ebooks import ConversionError import sys, os @@ -34,25 +35,27 @@ def main(args=sys.argv, notification=None, handler=None): recipe_arg = args[1] if len(args) > 1 else None - tdir = PersistentTemporaryDirectory('_feeds2lrf') - opts.output_dir = tdir - - recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler) - - htmlfile = os.path.join(tdir, 'index.html') - if not os.access(htmlfile, os.R_OK): - raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg) - - lparser = lrf_option_parser('') - ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0] - parser.merge_options(ropts, opts) - - if not opts.output: - ext = '.lrs' if opts.lrs else '.lrf' - fname = recipe.title + strftime(recipe.timefmt)+ext - opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname)) - print 'Generating LRF...' - process_file(htmlfile, opts) + with TemporaryDirectory('_feeds2lrf') as tdir: + opts.output_dir = tdir + + recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler) + + htmlfile = os.path.join(tdir, 'index.html') + if not os.access(htmlfile, os.R_OK): + raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg) + + lparser = lrf_option_parser('') + ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0] + parser.merge_options(ropts, opts) + + if not opts.output: + ext = '.lrs' if opts.lrs else '.lrf' + fname = recipe.title + strftime(recipe.timefmt)+ext + opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname)) + print 'Generating LRF...' + process_file(htmlfile, opts) + if os.stat(opts.output).st_size < 100: # This can happen if the OS runs out of file handles + raise ConversionError(_('Failed to convert downloaded recipe: ')+recipe_arg) return 0 if __name__ == '__main__': diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 5a3a74fd89..51b5035290 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -15,15 +15,24 @@ class MetadataField(object): - def __init__(self, name, is_dc=True): + def __init__(self, name, is_dc=True, formatter=None): self.name = name self.is_dc = is_dc + self.formatter = formatter def __get__(self, obj, type=None): ans = obj.get_metadata_element(self.name) if ans is None: - return u'' - return obj.get_text(ans) + return None + ans = obj.get_text(ans) + if ans is None: + return ans + if self.formatter is not None: + try: + ans = self.formatter(ans) + except: + return None + return ans def __set__(self, obj, val): elem = obj.get_metadata_element(self.name) @@ -60,8 +69,8 @@ class OPF(object): comments = MetadataField('description') category = MetadataField('category') series = MetadataField('series', is_dc=False) - series_index = MetadataField('series_index', is_dc=False) - rating = MetadataField('rating', is_dc=False) + series_index = MetadataField('series_index', is_dc=False, formatter=int) + rating = MetadataField('rating', is_dc=False, formatter=int) def __init__(self, stream, basedir): @@ -194,12 +203,14 @@ def testReading(self): self.assertEqual(opf.author_sort, 'Monkey') self.assertEqual(opf.tags, ['One', 'Two']) self.assertEqual(opf.isbn, '123456789') + self.assertEqual(opf.series, None) + self.assertEqual(opf.series_index, None) def testWriting(self): for test in [('title', 'New & Title'), ('authors', ['One', 'Two']), ('author_sort', "Kitchen"), ('tags', ['Three']), - ('isbn', 'a'), ('rating', '3')]: + ('isbn', 'a'), ('rating', 3)]: setattr(self.opf, *test) self.assertEqual(getattr(self.opf, test[0]), test[1]) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 6dfe69fcaa..2d20d05b7a 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -786,6 +786,10 @@ def news_fetched(self, job): if to_device: self.status_bar.showMessage(_('News fetched. Uploading to device.'), 2000) self.persistent_files.append(pt) + try: + os.remove(pt.name) + except: + pass ############################################################################ @@ -846,6 +850,7 @@ def convert_bulk_others(self, rows): of = PersistentTemporaryFile('.lrf') of.close() cover = self.library_view.model().db.cover(row) + cf = None if cover: cf = PersistentTemporaryFile('.jpeg') cf.write(cover) @@ -858,7 +863,7 @@ def convert_bulk_others(self, rows): description=_('Convert book %d of %d (%s)')%(i+1, len(rows), repr(mi.title))) - self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, + self.conversion_jobs[job] = (cf, pt, of, d.output_format, self.library_view.model().db.id(row)) res = [] for row in bad_rows: @@ -882,12 +887,13 @@ def convert_bulk(self, checked): if mi.title: options.title = mi.title if mi.authors: - opts.author = ','.join(mi.authors) + options.author = ','.join(mi.authors) data = None for fmt in ['cbz', 'cbr']: try: data = self.library_view.model().db.format(row, fmt.upper()) - break + if data: + break except: continue @@ -933,7 +939,6 @@ def convert_single_others(self, rows): 'any2lrf', args=[cmdline], description=_('Convert book: ')+d.title()) - self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, d.id) changed = True if changed: @@ -984,14 +989,22 @@ def convert_single(self, checked): self.library_view.model().research() def book_converted(self, job): - of, fmt, book_id = self.conversion_jobs.pop(job)[2:] - if job.exception is not None: - self.job_exception(job) - return - data = open(of.name, 'rb') - self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True) - data.close() - self.status_bar.showMessage(job.description + (' completed'), 2000) + cf, pt, of, fmt, book_id = self.conversion_jobs.pop(job) + try: + if job.exception is not None: + self.job_exception(job) + return + data = open(of.name, 'rb') + self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True) + data.close() + self.status_bar.showMessage(job.description + (' completed'), 2000) + finally: + for f in (cf, of, pt): + try: + if os.path.exists(f.name): + os.remove(f.name) + except: + pass #############################View book###################################### diff --git a/src/calibre/library/database.py b/src/calibre/library/database.py index 10f27b1734..ff0c8bb07b 100644 --- a/src/calibre/library/database.py +++ b/src/calibre/library/database.py @@ -976,9 +976,15 @@ def series(self, index, index_is_id=False): return ans[0] def series_index(self, index, index_is_id=False): + ans = None if not index_is_id: - return self.data[index][10] - return self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0] + ans = self.data[index][10] + else: + ans = self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0] + try: + return int(ans) + except: + return 1 def books_in_series(self, series_id): ''' @@ -1229,6 +1235,7 @@ def remove_unused_series(self): self.conn.commit() def set_series_index(self, id, idx): + idx = int(idx) self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id)) self.conn.commit() row = self.row(id) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 173108afb8..fae2f73a5c 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -513,6 +513,21 @@ def set_cover(self, id, data): p.loadFromData(data) p.save(path) + def formats(self, index, index_is_id=False): + ''' Return available formats as a comma separated list ''' + id = index if index_is_id else self.id(index) + path = os.path.join(self.library_path, self.path(id, index_is_id=True)) + formats = self.conn.execute('SELECT format FROM data WHERE book=?', (id,)).fetchall() + name = self.conn.execute('SELECT name FROM data WHERE book=?', (id,)).fetchone()[0] + formats = map(lambda x:x[0], formats) + ans = [] + for format in formats: + _format = ('.' + format.lower()) if format else '' + if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK): + ans.append(format) + return ','.join(ans) + + def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'): ''' Return the ebook format as a bytestring or `None` if the format doesn't exist, @@ -529,7 +544,7 @@ def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'): if os.access(path, os.R_OK|os.W_OK): f = open(path, mode) return f if as_file else f.read() - self.remove_format(id, format, index_is_id=True) + self.remove_format(id, format, index_is_id=True) def add_format(self, index, format, stream, index_is_id=False, path=None): id = index if index_is_id else self.id(index) @@ -571,8 +586,10 @@ def remove_format(self, index, format, index_is_id=False): if name: ext = ('.' + format.lower()) if format else '' path = os.path.join(path, name+ext) - if os.access(path, os.W_OK): + try: os.remove(path) + except: + pass self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper())) self.conn.commit() @@ -664,6 +681,9 @@ def set_series(self, id, series): self.data.set(row, 9, series) def set_series_index(self, id, idx): + if idx is None: + idx = 1 + idx = int(idx) self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id)) self.conn.commit() row = self.row(id) diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py index 498b012e4b..b7f3b7bfc0 100644 --- a/src/calibre/parallel.py +++ b/src/calibre/parallel.py @@ -25,7 +25,7 @@ is buffered and asynchronous to prevent the job from being IO bound. ''' import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \ - subprocess, socket, collections, binascii, re, thread, tempfile + subprocess, socket, collections, binascii, re, thread, tempfile, atexit from select import select from threading import RLock, Thread, Event from math import ceil @@ -855,8 +855,14 @@ def get_func(name): func = getattr(module, func) return func, kwdargs, notification +_atexit = collections.deque() +def myatexit(func, *args, **kwargs): + _atexit.append((func, args, kwargs)) + def work(client_socket, func, args, kwdargs): sys.stdout.last_report = time.time() + orig = atexit.register + atexit.register = myatexit try: func, kargs, notification = get_func(func) if notification is not None and hasattr(sys.stdout, 'notify'): @@ -867,7 +873,18 @@ def work(client_socket, func, args, kwdargs): sys.stdout.send() return res finally: + atexit.register = orig sys.stdout.last_report = None + while True: + try: + func, args, kwargs = _atexit.pop() + except IndexError: + break + try: + func(*args, **kwargs) + except (Exception, SystemExit): + continue + time.sleep(5) # Give any in progress BufferedSend time to complete diff --git a/src/calibre/ptempfile.py b/src/calibre/ptempfile.py index 00c9976bd2..fd28c87f58 100644 --- a/src/calibre/ptempfile.py +++ b/src/calibre/ptempfile.py @@ -9,30 +9,6 @@ from calibre import __version__, __appname__ -class _TemporaryFileWrapper(object): - """ - Temporary file wrapper - - This class provides a wrapper around files opened for - temporary use. In particular, it seeks to automatically - remove the file when the object is deleted. - """ - - def __init__(self, _file, name): - self.file = _file - self.name = name - atexit.register(cleanup, name) - - def __getattr__(self, name): - _file = self.__dict__['file'] - a = getattr(_file, name) - if type(a) != type(0): - setattr(self, name, a) - return a - - def __del__(self): - self.close() - def cleanup(path): try: import os @@ -41,18 +17,36 @@ def cleanup(path): except: pass -def PersistentTemporaryFile(suffix="", prefix="", dir=None): +class PersistentTemporaryFile(object): """ - Return a temporary file that is available even after being closed on + A file-like object that is a temporary file that is available even after being closed on all platforms. It is automatically deleted on normal program termination. - Uses tempfile.mkstemp to create the file. The file is opened in mode 'wb'. """ - if prefix == None: - prefix = "" - fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, - dir=dir) - _file = os.fdopen(fd, 'w+b') - return _TemporaryFileWrapper(_file, name) + _file = None + + def __init__(self, suffix="", prefix="", dir=None, mode='w+b'): + if prefix == None: + prefix = "" + fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, + dir=dir) + self._file = os.fdopen(fd, 'w+b') + self._name = name + atexit.register(cleanup, name) + + def __getattr__(self, name): + if name == 'name': + return self.__dict__['_name'] + return getattr(self.__dict__['_file'], name) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + def __del__(self): + self.close() + def PersistentTemporaryDirectory(suffix='', prefix='', dir=None): ''' @@ -64,6 +58,9 @@ def PersistentTemporaryDirectory(suffix='', prefix='', dir=None): return tdir class TemporaryDirectory(str): + ''' + A temporary directory to be used ina with statement. + ''' def __init__(self, suffix='', prefix='', dir=None): self.suffix = suffix self.prefix = prefix diff --git a/src/calibre/startup.py b/src/calibre/startup.py index a5d4582013..fc2749323a 100644 --- a/src/calibre/startup.py +++ b/src/calibre/startup.py @@ -13,14 +13,14 @@ import __builtin__ __builtin__.__dict__['_'] = lambda s: s -from calibre.constants import iswindows, isosx, islinux, isfrozen,\ - preferred_encoding -from calibre.translations.msgfmt import make +from calibre.constants import iswindows, preferred_encoding, plugins from calibre.utils.config import prefs +from calibre.translations.msgfmt import make _run_once = False if not _run_once: _run_once = True + ################################################################################ # Setup translations @@ -74,38 +74,6 @@ def set_translator(): except: pass - ################################################################################ - # Load plugins - def load_plugins(): - plugins = {} - if isfrozen: - if iswindows: - plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins') - sys.path.insert(1, os.path.dirname(sys.executable)) - elif isosx: - plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins') - elif islinux: - plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins') - sys.path.insert(0, plugin_path) - else: - import pkg_resources - plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins') - sys.path.insert(0, plugin_path) - - for plugin in ['pictureflow', 'lzx', 'msdes'] + \ - (['winutil'] if iswindows else []) + \ - (['usbobserver'] if isosx else []): - try: - p, err = __import__(plugin), '' - except Exception, err: - p = None - err = str(err) - plugins[plugin] = (p, err) - return plugins - - plugins = load_plugins() - - ################################################################################ # Improve builtin path functions to handle unicode sensibly diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index 184aea604a..c20a49bb09 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -13,12 +13,11 @@ from optparse import IndentedHelpFormatter from PyQt4.QtCore import QString from calibre.constants import terminal_controller, iswindows, isosx, \ - __appname__, __version__, __author__ + __appname__, __version__, __author__, plugins from calibre.utils.lock import LockError, ExclusiveFile from collections import defaultdict if iswindows: - from calibre import plugins config_dir = plugins['winutil'][0].special_folder_path(plugins['winutil'][0].CSIDL_APPDATA) if not os.access(config_dir, os.W_OK|os.X_OK): config_dir = os.path.expanduser('~') diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 530f15b9ab..f606985d07 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -313,7 +313,9 @@ def index_to_soup(self, url_or_raw): `url_or_raw`: Either a URL or the downloaded index page as a string ''' if re.match(r'\w+://', url_or_raw): - raw = self.browser.open(url_or_raw).read() + f = self.browser.open(url_or_raw) + raw = f.read() + f.close() if not raw: raise RuntimeError('Could not fetch index from %s'%url_or_raw) else: @@ -544,7 +546,10 @@ def feed2index(self, feed): if bn: img = os.path.join(imgdir, 'feed_image_%d%s'%(self.image_counter, os.path.splitext(bn))) try: - open(img, 'wb').write(self.browser.open(feed.image_url).read()) + with open(img, 'wb') as fi: + r = self.browser.open(feed.image_url) + fi.write(r.read()) + r.close() self.image_counter += 1 feed.image_url = img self.image_map[feed.image_url] = img @@ -588,12 +593,11 @@ def fetch_obfuscated_article(self, url, dir, logger, f, a, num_of_feeds): return self._fetch_article(url, dir, logger, f, a, num_of_feeds) def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds): - pt = PersistentTemporaryFile('_feeds2disk.html') templ = templates.EmbeddedContent() raw = templ.generate(article).render('html') - open(pt.name, 'wb').write(raw) - pt.close() - url = ('file:'+pt.name) if iswindows else ('file://'+pt.name) + with PersistentTemporaryFile('_feeds2disk.html') as f: + f.write(raw) + url = ('file:'+f.name) if iswindows else ('file://'+f.name) return self._fetch_article(url, dir, logger, f, a, num_of_feeds) @@ -618,7 +622,8 @@ def build_index(self): index = os.path.join(self.output_dir, 'index.html') html = self.feeds2index(feeds) - open(index, 'wb').write(html) + with open(index, 'wb') as fi: + fi.write(html) self.jobs = [] for f, feed in enumerate(feeds): @@ -670,7 +675,8 @@ def build_index(self): for f, feed in enumerate(feeds): html = self.feed2index(feed) feed_dir = os.path.join(self.output_dir, 'feed_%d'%f) - open(os.path.join(feed_dir, 'index.html'), 'wb').write(html) + with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi: + fi.write(html) self.create_opf(feeds) self.report_progress(1, _('Feeds downloaded to %s')%index) @@ -689,8 +695,10 @@ def download_cover(self): ext = ext.lower() if ext else 'jpg' self.report_progress(1, _('Downloading cover from %s')%cu) cpath = os.path.join(self.output_dir, 'cover.'+ext) - cfile = open(cpath, 'wb') - cfile.write(self.browser.open(cu).read()) + with open(cpath, 'wb') as cfile: + r = self.browser.open(cu) + cfile.write(r.read()) + r.close() self.cover_path = cpath @@ -729,7 +737,8 @@ def feed_index(num, parent): entries.append(relp.replace(os.sep, '/')) last = sp - src = open(last, 'rb').read().decode('utf-8') + with open(last, 'rb') as fi: + src = fi.read().decode('utf-8') soup = BeautifulSoup(src) body = soup.find('body') if body is not None: @@ -740,7 +749,8 @@ def feed_index(num, parent): center=self.center_navbar) elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') body.insert(len(body.contents), elem) - open(last, 'wb').write(unicode(soup).encode('utf-8')) + with open(last, 'wb') as fi: + fi.write(unicode(soup).encode('utf-8')) if len(feeds) > 1: for i, f in enumerate(feeds): @@ -755,7 +765,9 @@ def feed_index(num, parent): opf.create_spine(entries) opf.set_toc(toc) - opf.render(open(opf_path, 'wb'), open(ncx_path, 'wb')) + with open(opf_path, 'wb') as opf_file: + with open(ncx_path, 'wb') as ncx_file: + opf.render(opf_file, ncx_file) def article_downloaded(self, request, result): @@ -800,12 +812,13 @@ def parse_feeds(self): else: title, url = obj self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url)) - parsed_feeds.append(feed_from_xml(self.browser.open(url).read(), + f = self.browser.open(url) + parsed_feeds.append(feed_from_xml(f.read(), title=title, oldest_article=self.oldest_article, max_articles_per_feed=self.max_articles_per_feed, get_article_url=self.get_article_url)) - + f.close() return parsed_feeds @classmethod @@ -891,7 +904,8 @@ def create_opf(self): mi = OPFCreator(self.output_dir, mi) mi.create_manifest_from_files_in([self.output_dir]) mi.create_spine([os.path.join(self.output_dir, 'index.html')]) - mi.render(open(os.path.join(self.output_dir, 'index.opf'), 'wb')) + with open(os.path.join(self.output_dir, 'index.opf'), 'wb') as opf_file: + mi.render(opf_file) def download(self): index = os.path.abspath(self.custom_index()) diff --git a/src/calibre/web/feeds/recipes/economist.py b/src/calibre/web/feeds/recipes/economist.py index 8794886d21..3c75bd4237 100644 --- a/src/calibre/web/feeds/recipes/economist.py +++ b/src/calibre/web/feeds/recipes/economist.py @@ -33,14 +33,14 @@ def get_browser(self): return br def parse_index(self): - soup = BeautifulSoup(self.browser.open(self.INDEX).read(), + soup = BeautifulSoup(self.browser.open(self.INDEX).read(), convertEntities=BeautifulSoup.HTML_ENTITIES) index_started = False feeds = {} ans = [] key = None for tag in soup.findAll(['h1', 'h2']): - text = ''.join(tag.findAll(text=True)) + text = ''.join(tag.findAll(text=True)) if tag.name == 'h1': if 'Classified ads' in text: break diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 749c57bcde..9b8f666c4d 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -44,11 +44,10 @@ def save_soup(soup, target): if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path): tag[key] = relpath(path, selfdir).replace(os.sep, '/') - f = open(target, 'wb') html = unicode(soup) - f.write(html.encode('utf-8')) - f.close() - + with open(target, 'wb') as f: + f.write(html.encode('utf-8')) + class RecursiveFetcher(object, LoggingInterface): LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in @@ -59,6 +58,7 @@ class RecursiveFetcher(object, LoggingInterface): # ) # ) CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE) + default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__ def __init__(self, options, logger, image_map={}, css_map={}, job_info=None): LoggingInterface.__init__(self, logger) @@ -99,7 +99,7 @@ def __init__(self, options, logger, image_map={}, css_map={}, job_info=None): def get_soup(self, src): nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) nmassage.extend(self.preprocess_regexps) - soup = BeautifulSoup(xml_to_unicode(src, self.verbose)[0], markupMassage=nmassage) + soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage) if self.keep_only_tags: body = Tag(soup, 'body') @@ -145,6 +145,8 @@ def fetch_url(self, url): if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer self.log_debug('Connection reset by peer retrying in 1 second.') time.sleep(1) + if hasattr(f, 'close'): + f.close() f = self.browser.open(url) else: raise err @@ -196,11 +198,14 @@ def process_stylesheets(self, soup, baseurl): except Exception, err: self.log_warning('Could not fetch stylesheet %s', iurl) self.log_debug('Error: %s', str(err), exc_info=True) + if hasattr(f, 'close'): f.close() continue stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') with self.stylemap_lock: self.stylemap[iurl] = stylepath - open(stylepath, 'wb').write(f.read()) + with open(stylepath, 'wb') as x: + x.write(f.read()) + f.close() tag['href'] = stylepath else: for ns in tag.findAll(text=True): @@ -219,12 +224,15 @@ def process_stylesheets(self, soup, baseurl): except Exception, err: self.log_warning('Could not fetch stylesheet %s', iurl) self.log_debug('Error: %s', str(err), exc_info=True) + if hasattr(f, 'close'): f.close() continue c += 1 stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') with self.stylemap_lock: self.stylemap[iurl] = stylepath - open(stylepath, 'wb').write(f.read()) + with open(stylepath, 'wb') as x: + x.write(f.read()) + f.close() ns.replaceWith(src.replace(m.group(1), stylepath)) @@ -250,6 +258,7 @@ def process_images(self, soup, baseurl): except Exception, err: self.log_warning('Could not fetch image %s', iurl) self.log_debug('Error: %s', str(err), exc_info=True) + if hasattr(f, 'close'): f.close() continue c += 1 fname = sanitize_file_name('img'+str(c)+ext) @@ -258,7 +267,9 @@ def process_images(self, soup, baseurl): imgpath = os.path.join(diskpath, fname) with self.imagemap_lock: self.imagemap[iurl] = imgpath - open(imgpath, 'wb').write(f.read()) + with open(imgpath, 'wb') as x: + x.write(f.read()) + f.close() tag['src'] = imgpath def absurl(self, baseurl, tag, key, filter=True): @@ -327,6 +338,7 @@ def process_links(self, soup, baseurl, recursion_level, into_dir='links'): self.current_dir = linkdiskpath f = self.fetch_url(iurl) dsrc = f.read() + f.close() if len(dsrc) == 0 or \ len(re.compile('', re.DOTALL).sub('', dsrc).strip()) == 0: raise ValueError('No content at URL %s'%iurl) @@ -378,7 +390,9 @@ def process_links(self, soup, baseurl, recursion_level, into_dir='links'): return res def __del__(self): - socket.setdefaulttimeout(self.default_timeout) + dt = getattr(self, 'default_timeout', None) + if dt is not None: + socket.setdefaulttimeout(dt) def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')): parser = OptionParser(usage=usage)