Single proc bg processing, optionally split by site & accumulate results -- experimental

This commit is contained in:
Jim Miller 2025-05-17 13:37:54 -05:00
parent 0e9f60f8a6
commit 1432241319
9 changed files with 346 additions and 54 deletions

View file

@ -416,6 +416,11 @@ class ConfigWidget(QWidget):
prefs['auto_reject_from_email'] = self.imap_tab.auto_reject_from_email.isChecked()
prefs['update_existing_only_from_email'] = self.imap_tab.update_existing_only_from_email.isChecked()
prefs['download_from_email_immediately'] = self.imap_tab.download_from_email_immediately.isChecked()
prefs['single_proc_jobs'] = self.other_tab.single_proc_jobs.isChecked()
prefs['site_split_jobs'] = self.other_tab.site_split_jobs.isChecked()
prefs['reconsolidate_jobs'] = self.other_tab.reconsolidate_jobs.isChecked()
prefs.save_to_db()
self.plugin_action.set_popup_mode()
@ -1294,6 +1299,39 @@ class OtherTab(QWidget):
view_prefs_button.clicked.connect(self.view_prefs)
self.l.addWidget(view_prefs_button)
label = QLabel("<h3>Temporary Settings</h3>"
"<p>These are experimental settings that change the way FanFicFare "
"handles background processing.</p>"
"<p>In past, FFF split story downloads into separate "
"processes <i>in the background job</i>.</p>"
"<p>Advantages of new version:<ul>"
"<li>Download job <i>actually stops</i> when job is stopped or Calibre quits. No more <i>open_pages_in_browser</i> calls after you've quit Calibre.</li>"
"<li>Job Details (aka Job log) updates real time, you can watch downloads in progress.</li>"
"<li>Job start is quicker by several seconds.</li>"
"</ul></p>"
"<p>Disadvantages of new version:<ul>"
"<li>Downloads from different sites only done in parallel if you also check 'Split downloads...'.</li>"
"<li>If split, you will get a separate 'Proceed to update library' question for each site, unless you also check 'Reconsolidate split downloads...'. But it also means you can update your library sooner for sites that finish quicker</li>"
"</ul></p>"
)
label.setWordWrap(True)
self.l.addWidget(label)
self.single_proc_jobs = QCheckBox(_('Use New Single Process Background Jobs'),self)
self.single_proc_jobs.setToolTip(_("Uncheck to go back to old multi-process BG jobs"))
self.single_proc_jobs.setChecked(prefs['single_proc_jobs'])
self.l.addWidget(self.single_proc_jobs)
self.site_split_jobs = QCheckBox(_('Split downloads into separate background jobs by site'),self)
self.site_split_jobs.setToolTip(_("Launches a separate background Job for each site in the list of stories to download/update."))
self.site_split_jobs.setChecked(prefs['site_split_jobs'])
self.l.addWidget(self.site_split_jobs)
self.reconsolidate_jobs = QCheckBox(_('Reconsolidate split downloads before updating library'),self)
self.reconsolidate_jobs.setToolTip(_("Hold all downloads/updates launched together until they all finish. Otherwise, there will be a 'Proceed to update' dialog for each site."))
self.reconsolidate_jobs.setChecked(prefs['reconsolidate_jobs'])
self.l.addWidget(self.reconsolidate_jobs)
self.l.insertStretch(-1)
def reset_dialogs(self):

View file

@ -31,6 +31,7 @@ import os
import re
import sys
import threading
import copy
from io import BytesIO
from functools import partial
from datetime import datetime, time
@ -193,6 +194,7 @@ class FanFicFarePlugin(InterfaceAction):
self.menu.aboutToShow.connect(self.about_to_show_menu)
self.imap_pass = None
self.download_job_manager = DownloadJobManager()
def initialization_complete(self):
# otherwise configured hot keys won't work until the menu's
@ -1745,12 +1747,7 @@ class FanFicFarePlugin(InterfaceAction):
calonly = False
break
if calonly:
class NotJob(object):
def __init__(self,result):
self.failed=False
self.result=result
notjob = NotJob(book_list)
self.download_list_completed(notjob,options=options)
self._do_download_list_completed(book_list,options=options)
return
self.do_mark_series_anthologies(options.get('mark_anthology_ids',set()))
@ -1780,6 +1777,20 @@ class FanFicFarePlugin(InterfaceAction):
msgl)
return
### *Don't* split anthology.
if merge:
self.dispatch_bg_job(_("Anthology"), book_list, copy.copy(options), merge)
elif prefs['site_split_jobs']: ### YYY Split list into sites, one BG job per site
sites_lists = defaultdict(list)
[ sites_lists[x['site']].append(x) for x in book_list if x['good'] ]
for site in sites_lists.keys():
site_list = sites_lists[site]
self.dispatch_bg_job(site, site_list, copy.copy(options), merge)
else:
self.dispatch_bg_job(None, book_list, copy.copy(options), merge)
def dispatch_bg_job(self, site, book_list, options, merge):
options['site'] = site
basic_cachefile = PersistentTemporaryFile(suffix='.basic_cache',
dir=options['tdir'])
options['basic_cache'].save_cache(basic_cachefile.name)
@ -1799,15 +1810,29 @@ class FanFicFarePlugin(InterfaceAction):
# get libs from plugin zip.
options['plugin_path'] = self.interface_action_base_plugin.plugin_path
func = 'arbitrary_n'
cpus = self.gui.job_manager.server.pool_size
args = ['calibre_plugins.fanficfare_plugin.jobs', 'do_download_worker',
(book_list, options, cpus, merge)]
desc = _('Download %s FanFiction Book(s)') % sum(1 for x in book_list if x['good'])
if prefs['single_proc_jobs']: ## YYY Single BG job
args = ['calibre_plugins.fanficfare_plugin.jobs',
'do_download_worker_single',
(site, book_list, options, merge)]
else: ## MultiBG Job split by site
cpus = self.gui.job_manager.server.pool_size
args = ['calibre_plugins.fanficfare_plugin.jobs',
'do_download_worker_multiproc',
(site, book_list, options, cpus, merge)]
if site:
desc = _('Download %s FanFiction Book(s) for %s') % (sum(1 for x in book_list if x['good']),site)
else:
desc = _('Download %s FanFiction Book(s)') % sum(1 for x in book_list if x['good'])
job = self.gui.job_manager.run_job(
self.Dispatcher(partial(self.download_list_completed,options=options,merge=merge)),
func, args=args,
self.Dispatcher(partial(self.download_list_completed,
options=options,merge=merge)),
'arbitrary_n',
args=args,
description=desc)
self.download_job_manager.get_batch(options['tdir']).add_job(site,job)
job.tdir=options['tdir']
job.site=site
self.gui.jobs_pointer.start()
self.gui.status_bar.show_message(_('Starting %d FanFicFare Downloads')%len(book_list),3000)
@ -1956,8 +1981,13 @@ class FanFicFarePlugin(InterfaceAction):
logger.debug(_('Finished Adding/Updating %d books.')%(len(update_list) + len(add_list)))
self.gui.status_bar.show_message(_('Finished Adding/Updating %d books.')%(len(update_list) + len(add_list)), 3000)
remove_dir(options['tdir'])
logger.debug("removed tdir")
batch = self.download_job_manager.get_batch(options['tdir'])
batch.finish_job(options['site'])
if batch.all_done():
remove_dir(options['tdir'])
logger.debug("removed tdir(%s)"%options['tdir'])
else:
logger.debug("DIDN'T removed tdir(%s)"%options['tdir'])
if 'Count Pages' in self.gui.iactions and len(prefs['countpagesstats']) and len(all_ids):
cp_plugin = self.gui.iactions['Count Pages']
@ -1990,14 +2020,31 @@ class FanFicFarePlugin(InterfaceAction):
self.gui.iactions['Convert Books'].auto_convert_auto_add(all_not_calonly_ids)
def download_list_completed(self, job, options={},merge=False):
tdir = job.tdir
site = job.site
logger.debug("Batch Job:%s %s"%(tdir,site))
batch = self.download_job_manager.get_batch(tdir)
if job.failed:
self.gui.job_exception(job, dialog_title='Failed to Download Stories')
return
showsite = None
if prefs['reconsolidate_jobs']: # YYY batch update
batch.finish_job(site)
if batch.all_done():
book_list = batch.get_results()
else:
return
elif not job.failed:
showsite = site
book_list = job.result
return self._do_download_list_completed(book_list, options, merge, showsite)
def _do_download_list_completed(self, book_list, options={},merge=False,showsite=None):
self.previous = self.gui.library_view.currentIndex()
db = self.gui.current_db
book_list = job.result
good_list = [ x for x in book_list if x['good'] ]
bad_list = [ x for x in book_list if not x['good'] ]
chapter_error_list = [ x for x in book_list if 'chapter_error_count' in x ]
@ -2048,6 +2095,8 @@ class FanFicFarePlugin(InterfaceAction):
do_update_func = self.do_download_merge_update
else:
if showsite:
msgl.append(_('Downloading from %s')%showsite)
msgl.extend([
_('See log for details.'),
_('Proceed with updating your library?')])
@ -3153,3 +3202,43 @@ def pretty_book(d, indent=0, spacer=' '):
return '\n'.join(['%s%s:\n%s' % (kindent, k, pretty_book(v, indent + 1, spacer))
for k, v in d.items()])
return "%s%s"%(kindent, d)
from collections.abc import Iterable # import directly from collections for Python < 3.3
class DownloadBatch():
def __init__(self,tdir=None):
self.runningjobs = dict() # keyed by site
self.jobsorder = []
self.tdir = tdir
def add_job(self,site,job):
self.runningjobs[site]=job
self.jobsorder.append(job)
def finish_job(self,site):
try:
self.runningjobs.pop(site)
except:
pass
def all_done(self):
return len(self.runningjobs) == 0
def get_results(self):
retlist = []
for j in self.jobsorder:
## failed / no result
if isinstance(j.result, Iterable):
retlist.extend(j.result)
return retlist
class DownloadJobManager():
def __init__(self):
self.batches = {}
def get_batch(self,batch):
if batch not in self.batches:
self.batches[batch] = DownloadBatch()
return self.batches[batch]
def remove_batch(self,batch):
del self.batches[batch]

View file

@ -32,11 +32,12 @@ except NameError:
#
# ------------------------------------------------------------------------------
def do_download_worker(book_list,
options,
cpus,
merge=False,
notification=lambda x,y:x):
def do_download_worker_multiproc(site,
book_list,
options,
cpus,
merge,
notification=lambda x,y:x):
'''
Coordinator job, to launch child jobs to do downloads.
This is run as a worker job in the background to keep the UI more
@ -142,43 +143,86 @@ def do_download_worker(book_list,
## Can't use individual count--I've seen stories all reported
## finished before results of all jobs processed.
if jobs_running == 0:
book_list = sorted(book_list,key=lambda x : x['listorder'])
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
good_lists = defaultdict(list)
bad_lists = defaultdict(list)
for book in book_list:
if book['good']:
good_lists[book['status']].append(book)
else:
bad_lists[book['status']].append(book)
order = [_('Add'),
_('Update'),
_('Meta'),
_('Different URL'),
_('Rejected'),
_('Skipped'),
_('Bad'),
_('Error'),
]
j = 0
for d in [ good_lists, bad_lists ]:
for status in order:
if d[status]:
l = d[status]
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
for book in l:
book['reportorder'] = j
j += 1
del d[status]
# just in case a status is added but doesn't appear in order.
for status in d.keys():
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
ret_list = finish_download(book_list)
break
server.close()
# return the book list as the job result
return ret_list
def do_download_worker_single(site,
book_list,
options,
merge,
notification=lambda x,y:x):
logger.info(options['version'])
## same info debug calibre prints out at startup. For when users
## give me job output instead of debug log.
from calibre.debug import print_basic_debug_info
print_basic_debug_info(sys.stderr)
notification(0.01, _('Downloading FanFiction Stories'))
count = 0
totals = {}
# can't do direct assignment in list comprehension? I'm sure it
# makes sense to some pythonista.
# [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
[ totals.update({x['url']:0.0}) for x in book_list if x['good'] ]
# logger.debug(sites_lists.keys())
def do_indiv_notif(percent,msg):
totals[msg] = percent/len(totals)
notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
donelist = []
for book in book_list:
# logger.info("%s"%book['url'])
donelist.append(do_download_for_worker(book,options,merge,do_indiv_notif))
count += 1
return finish_download(donelist)
def finish_download(donelist):
book_list = sorted(donelist,key=lambda x : x['listorder'])
logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
good_lists = defaultdict(list)
bad_lists = defaultdict(list)
for book in book_list:
if book['good']:
good_lists[book['status']].append(book)
else:
bad_lists[book['status']].append(book)
order = [_('Add'),
_('Update'),
_('Meta'),
_('Different URL'),
_('Rejected'),
_('Skipped'),
_('Bad'),
_('Error'),
]
stnum = 0
for d in [ good_lists, bad_lists ]:
for status in order:
stnum += 1
if d[status]:
l = d[status]
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
for book in l:
# Add prior listorder to 10000 * status num for
# ordering of accumulated results with multiple bg
# jobs
book['reportorder'] = stnum*10000 + book['listorder']
del d[status]
# just in case a status is added but doesn't appear in order.
for status in d.keys():
logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
# return the book list as the job result
return book_list

View file

@ -197,6 +197,10 @@ default_prefs['auto_reject_from_email'] = False
default_prefs['update_existing_only_from_email'] = False
default_prefs['download_from_email_immediately'] = False
default_prefs['single_proc_jobs'] = True
default_prefs['site_split_jobs'] = False
default_prefs['reconsolidate_jobs'] = False
def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS):
db.prefs.set_namespaced(PREFS_NAMESPACE,
setting,

View file

@ -33,6 +33,9 @@ from .. import configurable as configurable
from . import base_adapter
from . import base_efiction_adapter
from . import adapter_test1
from . import adapter_test2
from . import adapter_test3
from . import adapter_test4
from . import adapter_fanfictionnet
from . import adapter_fictionalleyarchiveorg
from . import adapter_fictionpresscom

View file

@ -29,6 +29,13 @@ from ..six import ensure_text
from .base_adapter import BaseSiteAdapter, makeDate
try: # just a way to switch between CLI and PI
## webbrowser.open doesn't work on some linux flavors.
## piggyback Calibre's version.
from calibre.gui2 import safe_open_url as open_url
except :
from webbrowser import open as open_url
class TestSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
@ -321,6 +328,8 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
rt = random.uniform(t*0.5, t*1.5)
logger.debug("random sleep(%0.2f-%0.2f):%0.2f"%(t*0.5, t*1.5,rt))
time.sleep(rt)
# open_url("https://echo.free.beeceptor.com/%s.%s"%(self.story.getMetadata('siteabbrev'),
# self.story.getMetadata('storyId')))
if "chapter=1" in url :
text=u'''

View file

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2019 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
from .adapter_test1 import TestSiteAdapter
class Test2SiteAdapter(TestSiteAdapter):
def __init__(self, config, url):
TestSiteAdapter.__init__(self, config, url)
@staticmethod
def getSiteDomain():
return 'test2.com'
def getClass():
return Test2SiteAdapter

View file

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2019 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
from .adapter_test1 import TestSiteAdapter
class Test3SiteAdapter(TestSiteAdapter):
def __init__(self, config, url):
TestSiteAdapter.__init__(self, config, url)
@staticmethod
def getSiteDomain():
return 'test3.com'
def getClass():
return Test3SiteAdapter

View file

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2019 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
from .adapter_test1 import TestSiteAdapter
class Test4SiteAdapter(TestSiteAdapter):
def __init__(self, config, url):
TestSiteAdapter.__init__(self, config, url)
@staticmethod
def getSiteDomain():
return 'test4.com'
def getClass():
return Test4SiteAdapter