mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-05 19:03:39 +02:00
Saving to disk is now fully customizable. The user can control the file and directory structure of the saved files as well as various other aspects of the save process.
This commit is contained in:
parent
6973d80602
commit
181802da53
10 changed files with 534 additions and 353 deletions
|
|
@ -153,14 +153,17 @@ def read_metadata(paths, result_queue, chunk=50, spare_server=None):
|
|||
t.start()
|
||||
return t
|
||||
|
||||
|
||||
###########################################################################
|
||||
############ Saving #####################
|
||||
###########################################################################
|
||||
|
||||
class SaveWorker(Thread):
|
||||
|
||||
def __init__(self, result_queue, db, ids, path, by_author=False,
|
||||
single_dir=False, single_format=None, spare_server=None):
|
||||
def __init__(self, result_queue, db, ids, path, opts, spare_server=None):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
self.path, self.by_author = path, by_author
|
||||
self.single_dir, self.single_format = single_dir, single_format
|
||||
self.path, self.opts = path, opts
|
||||
self.ids = ids
|
||||
self.library_path = db.library_path
|
||||
self.canceled = False
|
||||
|
|
@ -170,17 +173,22 @@ def __init__(self, result_queue, db, ids, path, by_author=False,
|
|||
self.start()
|
||||
|
||||
def run(self):
|
||||
from calibre.library.save_to_disk import config
|
||||
server = Server() if self.spare_server is None else self.spare_server
|
||||
ids = set(self.ids)
|
||||
tasks = server.split(list(ids))
|
||||
jobs = set([])
|
||||
c = config()
|
||||
recs = {}
|
||||
for pref in c.preferences:
|
||||
recs[pref.name] = getattr(self.opts, pref.name)
|
||||
|
||||
for i, task in enumerate(tasks):
|
||||
tids = [x[-1] for x in task]
|
||||
job = ParallelJob('save_book',
|
||||
'Save books (%d of %d)'%(i, len(tasks)),
|
||||
lambda x,y:x,
|
||||
args=[tids, self.library_path, self.path, self.single_dir,
|
||||
self.single_format, self.by_author])
|
||||
args=[tids, self.library_path, self.path, recs])
|
||||
jobs.add(job)
|
||||
server.add_job(job)
|
||||
|
||||
|
|
@ -192,9 +200,9 @@ def run(self):
|
|||
job.update(consume_notifications=False)
|
||||
while True:
|
||||
try:
|
||||
id, title, ok = job.notifications.get_nowait()[0]
|
||||
id, title, ok, tb = job.notifications.get_nowait()[0]
|
||||
if id in ids:
|
||||
self.result_queue.put((id, title, ok))
|
||||
self.result_queue.put((id, title, ok, tb))
|
||||
ids.remove(id)
|
||||
except Empty:
|
||||
break
|
||||
|
|
@ -221,23 +229,18 @@ def run(self):
|
|||
pass
|
||||
|
||||
|
||||
def save_book(task, library_path, path, single_dir, single_format,
|
||||
by_author, notification=lambda x,y:x):
|
||||
def save_book(task, library_path, path, recs, notification=lambda x,y:x):
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
db = LibraryDatabase2(library_path)
|
||||
from calibre.library.save_to_disk import config, save_to_disk
|
||||
opts = config().parse()
|
||||
for name in recs:
|
||||
setattr(opts, name, recs[name])
|
||||
|
||||
def callback(id, title):
|
||||
notification((id, title, True))
|
||||
|
||||
def callback(id, title, failed, tb):
|
||||
notification((id, title, not failed, tb))
|
||||
return True
|
||||
|
||||
if single_format is None:
|
||||
failures = []
|
||||
db.export_to_dir(path, task, index_is_id=True, byauthor=by_author,
|
||||
callback=callback, single_dir=single_dir)
|
||||
else:
|
||||
failures = db.export_single_format_to_dir(path, task, single_format,
|
||||
index_is_id=True, callback=callback)
|
||||
|
||||
for id, title in failures:
|
||||
notification((id, title, False))
|
||||
save_to_disk(db, task, path, opts, callback)
|
||||
|
||||
|
|
|
|||
|
|
@ -295,13 +295,13 @@ def infos(self):
|
|||
|
||||
class Saver(QObject):
|
||||
|
||||
def __init__(self, parent, db, callback, rows, path,
|
||||
by_author=False, single_dir=False, single_format=None,
|
||||
def __init__(self, parent, db, callback, rows, path, opts,
|
||||
spare_server=None):
|
||||
QObject.__init__(self, parent)
|
||||
self.pd = ProgressDialog(_('Saving...'), parent=parent)
|
||||
self.spare_server = spare_server
|
||||
self.db = db
|
||||
self.opts = opts
|
||||
self.pd.setModal(True)
|
||||
self.pd.show()
|
||||
self.pd.set_min(0)
|
||||
|
|
@ -315,8 +315,8 @@ def __init__(self, parent, db, callback, rows, path,
|
|||
self.failures = set([])
|
||||
|
||||
from calibre.ebooks.metadata.worker import SaveWorker
|
||||
self.worker = SaveWorker(self.rq, db, self.ids, path, by_author,
|
||||
single_dir, single_format, spare_server=self.spare_server)
|
||||
self.worker = SaveWorker(self.rq, db, self.ids, path, self.opts,
|
||||
spare_server=self.spare_server)
|
||||
self.connect(self.pd, SIGNAL('canceled()'), self.canceled)
|
||||
self.timer = QTimer(self)
|
||||
self.connect(self.timer, SIGNAL('timeout()'), self.update)
|
||||
|
|
@ -344,15 +344,14 @@ def update(self):
|
|||
return
|
||||
|
||||
try:
|
||||
id, title, ok = self.rq.get_nowait()
|
||||
id, title, ok, tb = self.rq.get_nowait()
|
||||
except Empty:
|
||||
return
|
||||
self.pd.value += 1
|
||||
self.ids.remove(id)
|
||||
if not isinstance(title, unicode):
|
||||
title = str(title).decode('utf-8', preferred_encoding)
|
||||
title = str(title).decode(preferred_encoding, 'replace')
|
||||
self.pd.set_msg(_('Saved')+' '+title)
|
||||
if not ok:
|
||||
self.failures.add(title)
|
||||
|
||||
self.failures.add((title, tb))
|
||||
|
||||
|
|
|
|||
|
|
@ -52,13 +52,10 @@ def __init__(self, parent=None):
|
|||
table = u'<table>%s</table>'%(u'\n'.join(rows))
|
||||
self.template_variables.setText(table)
|
||||
|
||||
self.opt_read_metadata_from_filename.setChecked(prefs['read_file_metadata'])
|
||||
self.metadata_box.setEnabled(self.opt_read_metadata_from_filename.isChecked())
|
||||
self.opt_read_metadata_from_filename.setChecked(not prefs['read_file_metadata'])
|
||||
self.filename_pattern = FilenamePattern(self)
|
||||
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
|
||||
|
||||
|
||||
|
||||
def validate(self):
|
||||
tmpl = preprocess_template(self.opt_template.text())
|
||||
fa = {}
|
||||
|
|
@ -82,7 +79,7 @@ def save_settings(self):
|
|||
for x in ('formats', 'template', 'timefmt'):
|
||||
c.set(x, unicode(getattr(self, 'opt_'+x).text()).strip())
|
||||
self.opt_template.save_history('save_to_disk_template_history')
|
||||
prefs['read_file_metadata'] = bool(self.opt_read_metadata_from_filename.isChecked())
|
||||
prefs['read_file_metadata'] = not bool(self.opt_read_metadata_from_filename.isChecked())
|
||||
pattern = self.filename_pattern.commit()
|
||||
prefs['filename_pattern'] = pattern
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@
|
|||
<item>
|
||||
<widget class="QCheckBox" name="opt_read_metadata_from_filename">
|
||||
<property name="text">
|
||||
<string>Read metadata from &file name</string>
|
||||
<string>Read metadata only from &file name</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
@ -174,22 +174,5 @@
|
|||
</customwidget>
|
||||
</customwidgets>
|
||||
<resources/>
|
||||
<connections>
|
||||
<connection>
|
||||
<sender>opt_read_metadata_from_filename</sender>
|
||||
<signal>toggled(bool)</signal>
|
||||
<receiver>metadata_box</receiver>
|
||||
<slot>setEnabled(bool)</slot>
|
||||
<hints>
|
||||
<hint type="sourcelabel">
|
||||
<x>159</x>
|
||||
<y>81</y>
|
||||
</hint>
|
||||
<hint type="destinationlabel">
|
||||
<x>178</x>
|
||||
<y>122</y>
|
||||
</hint>
|
||||
</hints>
|
||||
</connection>
|
||||
</connections>
|
||||
<connections/>
|
||||
</ui>
|
||||
|
|
|
|||
|
|
@ -1064,11 +1064,14 @@ def save_to_disk(self, checked, single_dir=False, single_format=None):
|
|||
|
||||
if self.current_view() is self.library_view:
|
||||
from calibre.gui2.add import Saver
|
||||
from calibre.library.save_to_disk import config
|
||||
opts = config().parse()
|
||||
if single_format is not None:
|
||||
opts.formats = single_format
|
||||
if single_dir:
|
||||
opts.template = '{title} - {authors}'
|
||||
self._saver = Saver(self, self.library_view.model().db,
|
||||
Dispatcher(self._books_saved), rows, path,
|
||||
by_author=self.library_view.model().by_author,
|
||||
single_dir=single_dir,
|
||||
single_format=single_format,
|
||||
Dispatcher(self._books_saved), rows, path, opts,
|
||||
spare_server=self.spare_server)
|
||||
|
||||
else:
|
||||
|
|
@ -1078,19 +1081,20 @@ def save_to_disk(self, checked, single_dir=False, single_format=None):
|
|||
|
||||
|
||||
def _books_saved(self, path, failures, error):
|
||||
single_format = self._saver.worker.single_format
|
||||
self._saver = None
|
||||
if error:
|
||||
return error_dialog(self, _('Error while saving'),
|
||||
_('There was an error while saving.'),
|
||||
error, show=True)
|
||||
if failures and single_format:
|
||||
single_format = single_format.upper()
|
||||
if failures:
|
||||
failures = [u'%s\n\t%s'%
|
||||
(title, '\n\t'.join(err.splitlines())) for title, err in
|
||||
failures]
|
||||
|
||||
warning_dialog(self, _('Could not save some books'),
|
||||
_('Could not save some books') + ', ' +
|
||||
(_('as the %s format is not available for them.')%single_format) +
|
||||
_('Click the show details button to see which ones.'),
|
||||
'\n'.join(failures), show=True)
|
||||
u'\n\n'.join(failures), show=True)
|
||||
QDesktopServices.openUrl(QUrl.fromLocalFile(path))
|
||||
|
||||
def books_saved(self, job):
|
||||
|
|
|
|||
|
|
@ -498,10 +498,7 @@ def do_export(db, ids, dir, opts):
|
|||
prints('Failed to save the following books:')
|
||||
for id, title, tb in failures:
|
||||
prints(str(id)+':', title)
|
||||
if tb:
|
||||
prints('\t'+'\n\t'.join(tb.splitlines()))
|
||||
else:
|
||||
prints('\tRequested formats not available')
|
||||
prints('\t'+'\n\t'.join(tb.splitlines()))
|
||||
prints(' ')
|
||||
|
||||
def command_export(args, dbpath):
|
||||
|
|
|
|||
|
|
@ -176,7 +176,8 @@ def save_book_to_disk(id, db, root, opts, length):
|
|||
traceback.print_exc()
|
||||
stream.seek(0)
|
||||
data = stream.read()
|
||||
with open(base_path+'.'+fmt, 'wb') as f:
|
||||
fmt_path = base_path+'.'+str(fmt)
|
||||
with open(fmt_path, 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
return not written, id, mi.title
|
||||
|
|
@ -189,7 +190,9 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
|
|||
|
||||
:param:`ids` iterable of book ids to save from the database.
|
||||
:param:`callback` is an optional callable that is called on after each
|
||||
book is processed with the arguments: id, title and failed
|
||||
book is processed with the arguments: id, title, failed, traceback.
|
||||
If the callback returns False, further processing is terminated and
|
||||
the function returns.
|
||||
:return: A list of failures. Each element of the list is a tuple
|
||||
(id, title, traceback)
|
||||
'''
|
||||
|
|
@ -209,13 +212,14 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
|
|||
tb = ''
|
||||
try:
|
||||
failed, id, title = save_book_to_disk(x, db, root, opts, length)
|
||||
tb = _('Requested formats not available')
|
||||
except:
|
||||
failed, id, title = True, x, db.title(x, index_is_id=True)
|
||||
tb = traceback.format_exc()
|
||||
if failed:
|
||||
failures.append((id, title, tb))
|
||||
if callable(callback):
|
||||
if not callback(int(id), title, failed):
|
||||
if not callback(int(id), title, failed, tb):
|
||||
break
|
||||
return failures
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -379,6 +379,7 @@ def __init__(self, description):
|
|||
self.remove_opt = self.remove = self.option_set.remove_opt
|
||||
self.parse_string = self.option_set.parse_string
|
||||
self.get_option = self.option_set.get_option
|
||||
self.preferences = self.option_set.preferences
|
||||
|
||||
def update(self, other):
|
||||
self.option_set.update(other.option_set)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Guardian(BasicNewsRecipe):
|
||||
|
||||
|
|
@ -16,14 +17,33 @@ class Guardian(BasicNewsRecipe):
|
|||
language = _('English')
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
|
||||
remove_javascript = True
|
||||
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
|
||||
remove_tags_before = dict(id='main-article-info')
|
||||
remove_tags_after = dict(id='article-wrapper')
|
||||
remove_tags_after = dict(id='content')
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':["video-content","videos-third-column"]}),
|
||||
dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}),
|
||||
dict(name='ul', attrs={'class':["pagination"]}),
|
||||
dict(name='ul', attrs={'id':["content-actions"]}),
|
||||
]
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
extra_css = 'h2 {font-size: medium;} \n h1 {text-align: left;}'
|
||||
extra_css = '''
|
||||
.article-attributes{font-size: x-small; font-family:Arial,Helvetica,sans-serif;}
|
||||
.h1{font-size: large ;font-family:georgia,serif; font-weight:bold;}
|
||||
.stand-first-alone{color:#666666; font-size:small; font-family:Arial,Helvetica,sans-serif;}
|
||||
.caption{color:#666666; font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
|
||||
#article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;}
|
||||
.main-article-info{font-family:Arial,Helvetica,sans-serif;}
|
||||
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;}
|
||||
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
('Front Page', 'http://www.guardian.co.uk/rss'),
|
||||
|
|
@ -37,3 +57,21 @@ class Guardian(BasicNewsRecipe):
|
|||
('Comment','http://www.guardian.co.uk/commentisfree/rss'),
|
||||
]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
for tag in soup.findAll(name=['ul','li']):
|
||||
tag.name = 'div'
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue