IGN:Remove temporary files as soon as possible, rather than only at program exit. Fixes for various minor regressions.

This commit is contained in:
Kovid Goyal 2008-09-11 17:08:48 -07:00
parent 6fee09b9d2
commit a679086e53
13 changed files with 239 additions and 143 deletions

View file

@ -8,7 +8,7 @@
Various run time constants.
'''
import sys, locale, codecs
import sys, locale, codecs, os
from calibre.utils.terminfo import TerminalController
terminal_controller = TerminalController(sys.stdout)
@ -28,3 +28,36 @@
winerror = __import__('winerror') if iswindows else None
win32api = __import__('win32api') if iswindows else None
fcntl = None if iswindows else __import__('fcntl')
################################################################################
plugins = None
if plugins is None:
# Load plugins
def load_plugins():
plugins = {}
if isfrozen:
if iswindows:
plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins')
sys.path.insert(1, os.path.dirname(sys.executable))
elif isosx:
plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins')
elif islinux:
plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins')
sys.path.insert(0, plugin_path)
else:
import pkg_resources
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
sys.path.insert(0, plugin_path)
for plugin in ['pictureflow', 'lzx', 'msdes'] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):
try:
p, err = __import__(plugin), ''
except Exception, err:
p = None
err = str(err)
plugins[plugin] = (p, err)
return plugins
plugins = load_plugins()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
@ -8,8 +8,9 @@
from calibre.ebooks.lrf.html.convert_from import process_file
from calibre.web.feeds.main import option_parser as feeds_option_parser
from calibre.web.feeds.main import run_recipe
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ptempfile import TemporaryDirectory
from calibre import sanitize_file_name, strftime
from calibre.ebooks import ConversionError
import sys, os
@ -34,25 +35,27 @@ def main(args=sys.argv, notification=None, handler=None):
recipe_arg = args[1] if len(args) > 1 else None
tdir = PersistentTemporaryDirectory('_feeds2lrf')
opts.output_dir = tdir
recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler)
htmlfile = os.path.join(tdir, 'index.html')
if not os.access(htmlfile, os.R_OK):
raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg)
lparser = lrf_option_parser('')
ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0]
parser.merge_options(ropts, opts)
if not opts.output:
ext = '.lrs' if opts.lrs else '.lrf'
fname = recipe.title + strftime(recipe.timefmt)+ext
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
print 'Generating LRF...'
process_file(htmlfile, opts)
with TemporaryDirectory('_feeds2lrf') as tdir:
opts.output_dir = tdir
recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler)
htmlfile = os.path.join(tdir, 'index.html')
if not os.access(htmlfile, os.R_OK):
raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg)
lparser = lrf_option_parser('')
ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0]
parser.merge_options(ropts, opts)
if not opts.output:
ext = '.lrs' if opts.lrs else '.lrf'
fname = recipe.title + strftime(recipe.timefmt)+ext
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
print 'Generating LRF...'
process_file(htmlfile, opts)
if os.stat(opts.output).st_size < 100: # This can happen if the OS runs out of file handles
raise ConversionError(_('Failed to convert downloaded recipe: ')+recipe_arg)
return 0
if __name__ == '__main__':

View file

@ -15,15 +15,24 @@
class MetadataField(object):
def __init__(self, name, is_dc=True):
def __init__(self, name, is_dc=True, formatter=None):
self.name = name
self.is_dc = is_dc
self.formatter = formatter
def __get__(self, obj, type=None):
ans = obj.get_metadata_element(self.name)
if ans is None:
return u''
return obj.get_text(ans)
return None
ans = obj.get_text(ans)
if ans is None:
return ans
if self.formatter is not None:
try:
ans = self.formatter(ans)
except:
return None
return ans
def __set__(self, obj, val):
elem = obj.get_metadata_element(self.name)
@ -60,8 +69,8 @@ class OPF(object):
comments = MetadataField('description')
category = MetadataField('category')
series = MetadataField('series', is_dc=False)
series_index = MetadataField('series_index', is_dc=False)
rating = MetadataField('rating', is_dc=False)
series_index = MetadataField('series_index', is_dc=False, formatter=int)
rating = MetadataField('rating', is_dc=False, formatter=int)
def __init__(self, stream, basedir):
@ -194,12 +203,14 @@ def testReading(self):
self.assertEqual(opf.author_sort, 'Monkey')
self.assertEqual(opf.tags, ['One', 'Two'])
self.assertEqual(opf.isbn, '123456789')
self.assertEqual(opf.series, None)
self.assertEqual(opf.series_index, None)
def testWriting(self):
for test in [('title', 'New & Title'), ('authors', ['One', 'Two']),
('author_sort', "Kitchen"), ('tags', ['Three']),
('isbn', 'a'), ('rating', '3')]:
('isbn', 'a'), ('rating', 3)]:
setattr(self.opf, *test)
self.assertEqual(getattr(self.opf, test[0]), test[1])

View file

@ -786,6 +786,10 @@ def news_fetched(self, job):
if to_device:
self.status_bar.showMessage(_('News fetched. Uploading to device.'), 2000)
self.persistent_files.append(pt)
try:
os.remove(pt.name)
except:
pass
############################################################################
@ -846,6 +850,7 @@ def convert_bulk_others(self, rows):
of = PersistentTemporaryFile('.lrf')
of.close()
cover = self.library_view.model().db.cover(row)
cf = None
if cover:
cf = PersistentTemporaryFile('.jpeg')
cf.write(cover)
@ -858,7 +863,7 @@ def convert_bulk_others(self, rows):
description=_('Convert book %d of %d (%s)')%(i+1, len(rows), repr(mi.title)))
self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format,
self.conversion_jobs[job] = (cf, pt, of, d.output_format,
self.library_view.model().db.id(row))
res = []
for row in bad_rows:
@ -882,12 +887,13 @@ def convert_bulk(self, checked):
if mi.title:
options.title = mi.title
if mi.authors:
opts.author = ','.join(mi.authors)
options.author = ','.join(mi.authors)
data = None
for fmt in ['cbz', 'cbr']:
try:
data = self.library_view.model().db.format(row, fmt.upper())
break
if data:
break
except:
continue
@ -933,7 +939,6 @@ def convert_single_others(self, rows):
'any2lrf', args=[cmdline],
description=_('Convert book: ')+d.title())
self.conversion_jobs[job] = (d.cover_file, pt, of, d.output_format, d.id)
changed = True
if changed:
@ -984,14 +989,22 @@ def convert_single(self, checked):
self.library_view.model().research()
def book_converted(self, job):
of, fmt, book_id = self.conversion_jobs.pop(job)[2:]
if job.exception is not None:
self.job_exception(job)
return
data = open(of.name, 'rb')
self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True)
data.close()
self.status_bar.showMessage(job.description + (' completed'), 2000)
cf, pt, of, fmt, book_id = self.conversion_jobs.pop(job)
try:
if job.exception is not None:
self.job_exception(job)
return
data = open(of.name, 'rb')
self.library_view.model().db.add_format(book_id, fmt, data, index_is_id=True)
data.close()
self.status_bar.showMessage(job.description + (' completed'), 2000)
finally:
for f in (cf, of, pt):
try:
if os.path.exists(f.name):
os.remove(f.name)
except:
pass
#############################View book######################################

View file

@ -976,9 +976,15 @@ def series(self, index, index_is_id=False):
return ans[0]
def series_index(self, index, index_is_id=False):
ans = None
if not index_is_id:
return self.data[index][10]
return self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0]
ans = self.data[index][10]
else:
ans = self.conn.execute('SELECT series_index FROM books WHERE id=?', (index,)).fetchone()[0]
try:
return int(ans)
except:
return 1
def books_in_series(self, series_id):
'''
@ -1229,6 +1235,7 @@ def remove_unused_series(self):
self.conn.commit()
def set_series_index(self, id, idx):
idx = int(idx)
self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id))
self.conn.commit()
row = self.row(id)

View file

@ -513,6 +513,21 @@ def set_cover(self, id, data):
p.loadFromData(data)
p.save(path)
def formats(self, index, index_is_id=False):
''' Return available formats as a comma separated list '''
id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
formats = self.conn.execute('SELECT format FROM data WHERE book=?', (id,)).fetchall()
name = self.conn.execute('SELECT name FROM data WHERE book=?', (id,)).fetchone()[0]
formats = map(lambda x:x[0], formats)
ans = []
for format in formats:
_format = ('.' + format.lower()) if format else ''
if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK):
ans.append(format)
return ','.join(ans)
def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'):
'''
Return the ebook format as a bytestring or `None` if the format doesn't exist,
@ -529,7 +544,7 @@ def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'):
if os.access(path, os.R_OK|os.W_OK):
f = open(path, mode)
return f if as_file else f.read()
self.remove_format(id, format, index_is_id=True)
self.remove_format(id, format, index_is_id=True)
def add_format(self, index, format, stream, index_is_id=False, path=None):
id = index if index_is_id else self.id(index)
@ -571,8 +586,10 @@ def remove_format(self, index, format, index_is_id=False):
if name:
ext = ('.' + format.lower()) if format else ''
path = os.path.join(path, name+ext)
if os.access(path, os.W_OK):
try:
os.remove(path)
except:
pass
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
self.conn.commit()
@ -664,6 +681,9 @@ def set_series(self, id, series):
self.data.set(row, 9, series)
def set_series_index(self, id, idx):
if idx is None:
idx = 1
idx = int(idx)
self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (int(idx), id))
self.conn.commit()
row = self.row(id)

View file

@ -25,7 +25,7 @@
is buffered and asynchronous to prevent the job from being IO bound.
'''
import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \
subprocess, socket, collections, binascii, re, thread, tempfile
subprocess, socket, collections, binascii, re, thread, tempfile, atexit
from select import select
from threading import RLock, Thread, Event
from math import ceil
@ -855,8 +855,14 @@ def get_func(name):
func = getattr(module, func)
return func, kwdargs, notification
_atexit = collections.deque()
def myatexit(func, *args, **kwargs):
_atexit.append((func, args, kwargs))
def work(client_socket, func, args, kwdargs):
sys.stdout.last_report = time.time()
orig = atexit.register
atexit.register = myatexit
try:
func, kargs, notification = get_func(func)
if notification is not None and hasattr(sys.stdout, 'notify'):
@ -867,7 +873,18 @@ def work(client_socket, func, args, kwdargs):
sys.stdout.send()
return res
finally:
atexit.register = orig
sys.stdout.last_report = None
while True:
try:
func, args, kwargs = _atexit.pop()
except IndexError:
break
try:
func(*args, **kwargs)
except (Exception, SystemExit):
continue
time.sleep(5) # Give any in progress BufferedSend time to complete

View file

@ -9,30 +9,6 @@
from calibre import __version__, __appname__
class _TemporaryFileWrapper(object):
"""
Temporary file wrapper
This class provides a wrapper around files opened for
temporary use. In particular, it seeks to automatically
remove the file when the object is deleted.
"""
def __init__(self, _file, name):
self.file = _file
self.name = name
atexit.register(cleanup, name)
def __getattr__(self, name):
_file = self.__dict__['file']
a = getattr(_file, name)
if type(a) != type(0):
setattr(self, name, a)
return a
def __del__(self):
self.close()
def cleanup(path):
try:
import os
@ -41,18 +17,36 @@ def cleanup(path):
except:
pass
def PersistentTemporaryFile(suffix="", prefix="", dir=None):
class PersistentTemporaryFile(object):
"""
Return a temporary file that is available even after being closed on
A file-like object that is a temporary file that is available even after being closed on
all platforms. It is automatically deleted on normal program termination.
Uses tempfile.mkstemp to create the file. The file is opened in mode 'wb'.
"""
if prefix == None:
prefix = ""
fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
dir=dir)
_file = os.fdopen(fd, 'w+b')
return _TemporaryFileWrapper(_file, name)
_file = None
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
if prefix == None:
prefix = ""
fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
dir=dir)
self._file = os.fdopen(fd, 'w+b')
self._name = name
atexit.register(cleanup, name)
def __getattr__(self, name):
if name == 'name':
return self.__dict__['_name']
return getattr(self.__dict__['_file'], name)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def __del__(self):
self.close()
def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
'''
@ -64,6 +58,9 @@ def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
return tdir
class TemporaryDirectory(str):
'''
A temporary directory to be used ina with statement.
'''
def __init__(self, suffix='', prefix='', dir=None):
self.suffix = suffix
self.prefix = prefix

View file

@ -13,14 +13,14 @@
import __builtin__
__builtin__.__dict__['_'] = lambda s: s
from calibre.constants import iswindows, isosx, islinux, isfrozen,\
preferred_encoding
from calibre.translations.msgfmt import make
from calibre.constants import iswindows, preferred_encoding, plugins
from calibre.utils.config import prefs
from calibre.translations.msgfmt import make
_run_once = False
if not _run_once:
_run_once = True
################################################################################
# Setup translations
@ -74,38 +74,6 @@ def set_translator():
except:
pass
################################################################################
# Load plugins
def load_plugins():
plugins = {}
if isfrozen:
if iswindows:
plugin_path = os.path.join(os.path.dirname(sys.executable), 'plugins')
sys.path.insert(1, os.path.dirname(sys.executable))
elif isosx:
plugin_path = os.path.join(getattr(sys, 'frameworks_dir'), 'plugins')
elif islinux:
plugin_path = os.path.join(getattr(sys, 'frozen_path'), 'plugins')
sys.path.insert(0, plugin_path)
else:
import pkg_resources
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
sys.path.insert(0, plugin_path)
for plugin in ['pictureflow', 'lzx', 'msdes'] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):
try:
p, err = __import__(plugin), ''
except Exception, err:
p = None
err = str(err)
plugins[plugin] = (p, err)
return plugins
plugins = load_plugins()
################################################################################
# Improve builtin path functions to handle unicode sensibly

View file

@ -13,12 +13,11 @@
from optparse import IndentedHelpFormatter
from PyQt4.QtCore import QString
from calibre.constants import terminal_controller, iswindows, isosx, \
__appname__, __version__, __author__
__appname__, __version__, __author__, plugins
from calibre.utils.lock import LockError, ExclusiveFile
from collections import defaultdict
if iswindows:
from calibre import plugins
config_dir = plugins['winutil'][0].special_folder_path(plugins['winutil'][0].CSIDL_APPDATA)
if not os.access(config_dir, os.W_OK|os.X_OK):
config_dir = os.path.expanduser('~')

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
@ -313,7 +313,9 @@ def index_to_soup(self, url_or_raw):
`url_or_raw`: Either a URL or the downloaded index page as a string
'''
if re.match(r'\w+://', url_or_raw):
raw = self.browser.open(url_or_raw).read()
f = self.browser.open(url_or_raw)
raw = f.read()
f.close()
if not raw:
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
else:
@ -544,7 +546,10 @@ def feed2index(self, feed):
if bn:
img = os.path.join(imgdir, 'feed_image_%d%s'%(self.image_counter, os.path.splitext(bn)))
try:
open(img, 'wb').write(self.browser.open(feed.image_url).read())
with open(img, 'wb') as fi:
r = self.browser.open(feed.image_url)
fi.write(r.read())
r.close()
self.image_counter += 1
feed.image_url = img
self.image_map[feed.image_url] = img
@ -588,12 +593,11 @@ def fetch_obfuscated_article(self, url, dir, logger, f, a, num_of_feeds):
return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds):
pt = PersistentTemporaryFile('_feeds2disk.html')
templ = templates.EmbeddedContent()
raw = templ.generate(article).render('html')
open(pt.name, 'wb').write(raw)
pt.close()
url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
with PersistentTemporaryFile('_feeds2disk.html') as f:
f.write(raw)
url = ('file:'+f.name) if iswindows else ('file://'+f.name)
return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
@ -618,7 +622,8 @@ def build_index(self):
index = os.path.join(self.output_dir, 'index.html')
html = self.feeds2index(feeds)
open(index, 'wb').write(html)
with open(index, 'wb') as fi:
fi.write(html)
self.jobs = []
for f, feed in enumerate(feeds):
@ -670,7 +675,8 @@ def build_index(self):
for f, feed in enumerate(feeds):
html = self.feed2index(feed)
feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
open(os.path.join(feed_dir, 'index.html'), 'wb').write(html)
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
fi.write(html)
self.create_opf(feeds)
self.report_progress(1, _('Feeds downloaded to %s')%index)
@ -689,8 +695,10 @@ def download_cover(self):
ext = ext.lower() if ext else 'jpg'
self.report_progress(1, _('Downloading cover from %s')%cu)
cpath = os.path.join(self.output_dir, 'cover.'+ext)
cfile = open(cpath, 'wb')
cfile.write(self.browser.open(cu).read())
with open(cpath, 'wb') as cfile:
r = self.browser.open(cu)
cfile.write(r.read())
r.close()
self.cover_path = cpath
@ -729,7 +737,8 @@ def feed_index(num, parent):
entries.append(relp.replace(os.sep, '/'))
last = sp
src = open(last, 'rb').read().decode('utf-8')
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
@ -740,7 +749,8 @@ def feed_index(num, parent):
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
open(last, 'wb').write(unicode(soup).encode('utf-8'))
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) > 1:
for i, f in enumerate(feeds):
@ -755,7 +765,9 @@ def feed_index(num, parent):
opf.create_spine(entries)
opf.set_toc(toc)
opf.render(open(opf_path, 'wb'), open(ncx_path, 'wb'))
with open(opf_path, 'wb') as opf_file:
with open(ncx_path, 'wb') as ncx_file:
opf.render(opf_file, ncx_file)
def article_downloaded(self, request, result):
@ -800,12 +812,13 @@ def parse_feeds(self):
else:
title, url = obj
self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
parsed_feeds.append(feed_from_xml(self.browser.open(url).read(),
f = self.browser.open(url)
parsed_feeds.append(feed_from_xml(f.read(),
title=title,
oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed,
get_article_url=self.get_article_url))
f.close()
return parsed_feeds
@classmethod
@ -891,7 +904,8 @@ def create_opf(self):
mi = OPFCreator(self.output_dir, mi)
mi.create_manifest_from_files_in([self.output_dir])
mi.create_spine([os.path.join(self.output_dir, 'index.html')])
mi.render(open(os.path.join(self.output_dir, 'index.opf'), 'wb'))
with open(os.path.join(self.output_dir, 'index.opf'), 'wb') as opf_file:
mi.render(opf_file)
def download(self):
index = os.path.abspath(self.custom_index())

View file

@ -33,14 +33,14 @@ def get_browser(self):
return br
def parse_index(self):
soup = BeautifulSoup(self.browser.open(self.INDEX).read(),
soup = BeautifulSoup(self.browser.open(self.INDEX).read(),
convertEntities=BeautifulSoup.HTML_ENTITIES)
index_started = False
feeds = {}
ans = []
key = None
for tag in soup.findAll(['h1', 'h2']):
text = ''.join(tag.findAll(text=True))
text = ''.join(tag.findAll(text=True))
if tag.name == 'h1':
if 'Classified ads' in text:
break

View file

@ -44,11 +44,10 @@ def save_soup(soup, target):
if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
tag[key] = relpath(path, selfdir).replace(os.sep, '/')
f = open(target, 'wb')
html = unicode(soup)
f.write(html.encode('utf-8'))
f.close()
with open(target, 'wb') as f:
f.write(html.encode('utf-8'))
class RecursiveFetcher(object, LoggingInterface):
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
@ -59,6 +58,7 @@ class RecursiveFetcher(object, LoggingInterface):
# )
# )
CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__
def __init__(self, options, logger, image_map={}, css_map={}, job_info=None):
LoggingInterface.__init__(self, logger)
@ -99,7 +99,7 @@ def __init__(self, options, logger, image_map={}, css_map={}, job_info=None):
def get_soup(self, src):
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
nmassage.extend(self.preprocess_regexps)
soup = BeautifulSoup(xml_to_unicode(src, self.verbose)[0], markupMassage=nmassage)
soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage)
if self.keep_only_tags:
body = Tag(soup, 'body')
@ -145,6 +145,8 @@ def fetch_url(self, url):
if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer
self.log_debug('Connection reset by peer retrying in 1 second.')
time.sleep(1)
if hasattr(f, 'close'):
f.close()
f = self.browser.open(url)
else:
raise err
@ -196,11 +198,14 @@ def process_stylesheets(self, soup, baseurl):
except Exception, err:
self.log_warning('Could not fetch stylesheet %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
if hasattr(f, 'close'): f.close()
continue
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
with self.stylemap_lock:
self.stylemap[iurl] = stylepath
open(stylepath, 'wb').write(f.read())
with open(stylepath, 'wb') as x:
x.write(f.read())
f.close()
tag['href'] = stylepath
else:
for ns in tag.findAll(text=True):
@ -219,12 +224,15 @@ def process_stylesheets(self, soup, baseurl):
except Exception, err:
self.log_warning('Could not fetch stylesheet %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
if hasattr(f, 'close'): f.close()
continue
c += 1
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
with self.stylemap_lock:
self.stylemap[iurl] = stylepath
open(stylepath, 'wb').write(f.read())
with open(stylepath, 'wb') as x:
x.write(f.read())
f.close()
ns.replaceWith(src.replace(m.group(1), stylepath))
@ -250,6 +258,7 @@ def process_images(self, soup, baseurl):
except Exception, err:
self.log_warning('Could not fetch image %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
if hasattr(f, 'close'): f.close()
continue
c += 1
fname = sanitize_file_name('img'+str(c)+ext)
@ -258,7 +267,9 @@ def process_images(self, soup, baseurl):
imgpath = os.path.join(diskpath, fname)
with self.imagemap_lock:
self.imagemap[iurl] = imgpath
open(imgpath, 'wb').write(f.read())
with open(imgpath, 'wb') as x:
x.write(f.read())
f.close()
tag['src'] = imgpath
def absurl(self, baseurl, tag, key, filter=True):
@ -327,6 +338,7 @@ def process_links(self, soup, baseurl, recursion_level, into_dir='links'):
self.current_dir = linkdiskpath
f = self.fetch_url(iurl)
dsrc = f.read()
f.close()
if len(dsrc) == 0 or \
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
raise ValueError('No content at URL %s'%iurl)
@ -378,7 +390,9 @@ def process_links(self, soup, baseurl, recursion_level, into_dir='links'):
return res
def __del__(self):
socket.setdefaulttimeout(self.default_timeout)
dt = getattr(self, 'default_timeout', None)
if dt is not None:
socket.setdefaulttimeout(dt)
def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')):
parser = OptionParser(usage=usage)