Single proc bg processing, optionally split by site & accumulate results -- experimental

2025-12-06 08:52:55 +01:00 · 2025-05-17 13:37:54 -05:00 · 2025-05-17 13:37:54 -05:00 · 1432241319
commit 1432241319
parent 0e9f60f8a6
9 changed files with 346 additions and 54 deletions
--- a/calibre-plugin/config.py
+++ b/calibre-plugin/config.py
@ -416,6 +416,11 @@ class ConfigWidget(QWidget):
            prefs['auto_reject_from_email'] = self.imap_tab.auto_reject_from_email.isChecked()
            prefs['update_existing_only_from_email'] = self.imap_tab.update_existing_only_from_email.isChecked()
            prefs['download_from_email_immediately'] = self.imap_tab.download_from_email_immediately.isChecked()
+
+            prefs['single_proc_jobs'] = self.other_tab.single_proc_jobs.isChecked()
+            prefs['site_split_jobs'] = self.other_tab.site_split_jobs.isChecked()
+            prefs['reconsolidate_jobs'] = self.other_tab.reconsolidate_jobs.isChecked()
+
            prefs.save_to_db()
            self.plugin_action.set_popup_mode()

@ -1294,6 +1299,39 @@ class OtherTab(QWidget):
        view_prefs_button.clicked.connect(self.view_prefs)
        self.l.addWidget(view_prefs_button)

+        label = QLabel("<h3>Temporary Settings</h3>"
+                       "<p>These are experimental settings that change the way FanFicFare "
+                       "handles background processing.</p>"
+                       "<p>In past, FFF split story downloads into separate "
+                       "processes <i>in the background job</i>.</p>"
+                       "<p>Advantages of new version:<ul>"
+                       "<li>Download job <i>actually stops</i> when job is stopped or Calibre quits.  No more <i>open_pages_in_browser</i> calls after you've quit Calibre.</li>"
+                       "<li>Job Details (aka Job log) updates real time, you can watch downloads in progress.</li>"
+                       "<li>Job start is quicker by several seconds.</li>"
+                       "</ul></p>"
+                       "<p>Disadvantages of new version:<ul>"
+                       "<li>Downloads from different sites only done in parallel if you also check 'Split downloads...'.</li>"
+                       "<li>If split, you will get a separate 'Proceed to update library' question for each site, unless you also check 'Reconsolidate split downloads...'.  But it also means you can update your library sooner for sites that finish quicker</li>"
+                       "</ul></p>"
+                       )
+        label.setWordWrap(True)
+        self.l.addWidget(label)
+
+        self.single_proc_jobs = QCheckBox(_('Use New Single Process Background Jobs'),self)
+        self.single_proc_jobs.setToolTip(_("Uncheck to go back to old multi-process BG jobs"))
+        self.single_proc_jobs.setChecked(prefs['single_proc_jobs'])
+        self.l.addWidget(self.single_proc_jobs)
+
+        self.site_split_jobs = QCheckBox(_('Split downloads into separate background jobs by site'),self)
+        self.site_split_jobs.setToolTip(_("Launches a separate background Job for each site in the list of stories to download/update."))
+        self.site_split_jobs.setChecked(prefs['site_split_jobs'])
+        self.l.addWidget(self.site_split_jobs)
+
+        self.reconsolidate_jobs = QCheckBox(_('Reconsolidate split downloads before updating library'),self)
+        self.reconsolidate_jobs.setToolTip(_("Hold all downloads/updates launched together until they all finish.  Otherwise, there will be a 'Proceed to update' dialog for each site."))
+        self.reconsolidate_jobs.setChecked(prefs['reconsolidate_jobs'])
+        self.l.addWidget(self.reconsolidate_jobs)
+
        self.l.insertStretch(-1)

    def reset_dialogs(self):
--- a/calibre-plugin/fff_plugin.py
+++ b/calibre-plugin/fff_plugin.py
@ -31,6 +31,7 @@ import os
 import re
 import sys
 import threading
+import copy
 from io import BytesIO
 from functools import partial
 from datetime import datetime, time
@ -193,6 +194,7 @@ class FanFicFarePlugin(InterfaceAction):
        self.menu.aboutToShow.connect(self.about_to_show_menu)

        self.imap_pass = None
+        self.download_job_manager = DownloadJobManager()

    def initialization_complete(self):
        # otherwise configured hot keys won't work until the menu's
@ -1745,12 +1747,7 @@ class FanFicFarePlugin(InterfaceAction):
                calonly = False
                break
        if calonly:
-            class NotJob(object):
-                def __init__(self,result):
-                    self.failed=False
-                    self.result=result
-            notjob = NotJob(book_list)
-            self.download_list_completed(notjob,options=options)
+            self._do_download_list_completed(book_list,options=options)
            return

        self.do_mark_series_anthologies(options.get('mark_anthology_ids',set()))
@ -1780,6 +1777,20 @@ class FanFicFarePlugin(InterfaceAction):
                                     msgl)
            return

+        ### *Don't* split anthology.
+        if merge:
+            self.dispatch_bg_job(_("Anthology"), book_list, copy.copy(options), merge)
+        elif prefs['site_split_jobs']: ### YYY Split list into sites, one BG job per site
+            sites_lists = defaultdict(list)
+            [ sites_lists[x['site']].append(x) for x in book_list if x['good'] ]
+            for site in sites_lists.keys():
+                site_list = sites_lists[site]
+                self.dispatch_bg_job(site, site_list, copy.copy(options), merge)
+        else:
+            self.dispatch_bg_job(None, book_list, copy.copy(options), merge)
+
+    def dispatch_bg_job(self, site, book_list, options, merge):
+        options['site'] = site
        basic_cachefile = PersistentTemporaryFile(suffix='.basic_cache',
                                                dir=options['tdir'])
        options['basic_cache'].save_cache(basic_cachefile.name)
@ -1799,15 +1810,29 @@ class FanFicFarePlugin(InterfaceAction):
        # get libs from plugin zip.
        options['plugin_path'] = self.interface_action_base_plugin.plugin_path

-        func = 'arbitrary_n'
-        cpus = self.gui.job_manager.server.pool_size
-        args = ['calibre_plugins.fanficfare_plugin.jobs', 'do_download_worker',
-                (book_list, options, cpus, merge)]
-        desc = _('Download %s FanFiction Book(s)') % sum(1 for x in book_list if x['good'])
+        if prefs['single_proc_jobs']: ## YYY Single BG job
+            args = ['calibre_plugins.fanficfare_plugin.jobs',
+                    'do_download_worker_single',
+                    (site, book_list, options, merge)]
+        else: ## MultiBG Job split by site
+            cpus = self.gui.job_manager.server.pool_size
+            args = ['calibre_plugins.fanficfare_plugin.jobs',
+                    'do_download_worker_multiproc',
+                    (site, book_list, options, cpus, merge)]
+        if site:
+            desc = _('Download %s FanFiction Book(s) for %s') % (sum(1 for x in book_list if x['good']),site)
+        else:
+            desc = _('Download %s FanFiction Book(s)') % sum(1 for x in book_list if x['good'])
+
        job = self.gui.job_manager.run_job(
-                self.Dispatcher(partial(self.download_list_completed,options=options,merge=merge)),
-                func, args=args,
+                self.Dispatcher(partial(self.download_list_completed,
+                                        options=options,merge=merge)),
+                'arbitrary_n',
+                args=args,
                description=desc)
+        self.download_job_manager.get_batch(options['tdir']).add_job(site,job)
+        job.tdir=options['tdir']
+        job.site=site

        self.gui.jobs_pointer.start()
        self.gui.status_bar.show_message(_('Starting %d FanFicFare Downloads')%len(book_list),3000)
@ -1956,8 +1981,13 @@ class FanFicFarePlugin(InterfaceAction):

        logger.debug(_('Finished Adding/Updating %d books.')%(len(update_list) + len(add_list)))
        self.gui.status_bar.show_message(_('Finished Adding/Updating %d books.')%(len(update_list) + len(add_list)), 3000)
-        remove_dir(options['tdir'])
-        logger.debug("removed tdir")
+        batch = self.download_job_manager.get_batch(options['tdir'])
+        batch.finish_job(options['site'])
+        if batch.all_done():
+            remove_dir(options['tdir'])
+            logger.debug("removed tdir(%s)"%options['tdir'])
+        else:
+            logger.debug("DIDN'T removed tdir(%s)"%options['tdir'])

        if 'Count Pages' in self.gui.iactions and len(prefs['countpagesstats']) and len(all_ids):
            cp_plugin = self.gui.iactions['Count Pages']
@ -1990,14 +2020,31 @@ class FanFicFarePlugin(InterfaceAction):
            self.gui.iactions['Convert Books'].auto_convert_auto_add(all_not_calonly_ids)

    def download_list_completed(self, job, options={},merge=False):
+        tdir = job.tdir
+        site = job.site
+        logger.debug("Batch Job:%s %s"%(tdir,site))
+        batch = self.download_job_manager.get_batch(tdir)
        if job.failed:
            self.gui.job_exception(job, dialog_title='Failed to Download Stories')
            return

+        showsite = None
+        if prefs['reconsolidate_jobs']: # YYY batch update
+            batch.finish_job(site)
+            if batch.all_done():
+                book_list = batch.get_results()
+            else:
+                return
+        elif not job.failed:
+            showsite = site
+            book_list = job.result
+
+        return self._do_download_list_completed(book_list, options, merge, showsite)
+
+    def _do_download_list_completed(self, book_list, options={},merge=False,showsite=None):
        self.previous = self.gui.library_view.currentIndex()
        db = self.gui.current_db

-        book_list = job.result
        good_list = [ x for x in book_list if x['good'] ]
        bad_list = [ x for x in book_list if not x['good'] ]
        chapter_error_list = [ x for x in book_list if 'chapter_error_count' in  x ]
@ -2048,6 +2095,8 @@ class FanFicFarePlugin(InterfaceAction):

            do_update_func = self.do_download_merge_update
        else:
+            if showsite:
+                msgl.append(_('Downloading from %s')%showsite)
            msgl.extend([
                    _('See log for details.'),
                    _('Proceed with updating your library?')])
@ -3153,3 +3202,43 @@ def pretty_book(d, indent=0, spacer='     '):
        return '\n'.join(['%s%s:\n%s' % (kindent, k, pretty_book(v, indent + 1, spacer))
                          for k, v in d.items()])
    return "%s%s"%(kindent, d)
+
+from collections.abc import Iterable   # import directly from collections for Python < 3.3
+class DownloadBatch():
+    def __init__(self,tdir=None):
+        self.runningjobs = dict() # keyed by site
+        self.jobsorder = []
+        self.tdir = tdir
+
+    def add_job(self,site,job):
+        self.runningjobs[site]=job
+        self.jobsorder.append(job)
+
+    def finish_job(self,site):
+        try:
+            self.runningjobs.pop(site)
+        except:
+            pass
+
+    def all_done(self):
+        return len(self.runningjobs) == 0
+
+    def get_results(self):
+        retlist = []
+        for j in self.jobsorder:
+            ## failed / no result
+            if isinstance(j.result, Iterable):
+                retlist.extend(j.result)
+        return retlist
+
+class DownloadJobManager():
+    def __init__(self):
+        self.batches = {}
+
+    def get_batch(self,batch):
+        if batch not in self.batches:
+            self.batches[batch] = DownloadBatch()
+        return self.batches[batch]
+
+    def remove_batch(self,batch):
+        del self.batches[batch]
--- a/calibre-plugin/jobs.py
+++ b/calibre-plugin/jobs.py
@ -32,11 +32,12 @@ except NameError:
 #
 # ------------------------------------------------------------------------------

-def do_download_worker(book_list,
-                       options,
-                       cpus,
-                       merge=False,
-                       notification=lambda x,y:x):
+def do_download_worker_multiproc(site,
+                                 book_list,
+                                 options,
+                                 cpus,
+                                 merge,
+                                 notification=lambda x,y:x):
    '''
    Coordinator job, to launch child jobs to do downloads.
    This is run as a worker job in the background to keep the UI more
@ -142,43 +143,86 @@ def do_download_worker(book_list,
        ## Can't use individual count--I've seen stories all reported
        ## finished before results of all jobs processed.
        if jobs_running == 0:
-            book_list = sorted(book_list,key=lambda x : x['listorder'])
-            logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
-
-            good_lists = defaultdict(list)
-            bad_lists = defaultdict(list)
-            for book in book_list:
-                if book['good']:
-                    good_lists[book['status']].append(book)
-                else:
-                    bad_lists[book['status']].append(book)
-
-            order = [_('Add'),
-                     _('Update'),
-                     _('Meta'),
-                     _('Different URL'),
-                     _('Rejected'),
-                     _('Skipped'),
-                     _('Bad'),
-                     _('Error'),
-                     ]
-            j = 0
-            for d in [ good_lists, bad_lists ]:
-                for status in order:
-                    if d[status]:
-                        l = d[status]
-                        logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
-                        for book in l:
-                            book['reportorder'] = j
-                            j += 1
-                    del d[status]
-                # just in case a status is added but doesn't appear in order.
-                for status in d.keys():
-                    logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
+            ret_list = finish_download(book_list)
            break

    server.close()

+    # return the book list as the job result
+    return ret_list
+
+def do_download_worker_single(site,
+                              book_list,
+                              options,
+                              merge,
+                              notification=lambda x,y:x):
+
+    logger.info(options['version'])
+
+    ## same info debug calibre prints out at startup. For when users
+    ## give me job output instead of debug log.
+    from calibre.debug import print_basic_debug_info
+    print_basic_debug_info(sys.stderr)
+
+    notification(0.01, _('Downloading FanFiction Stories'))
+
+    count = 0
+    totals = {}
+    # can't do direct assignment in list comprehension?  I'm sure it
+    # makes sense to some pythonista.
+    # [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
+    [ totals.update({x['url']:0.0}) for x in book_list if x['good']  ]
+    # logger.debug(sites_lists.keys())
+
+    def do_indiv_notif(percent,msg):
+        totals[msg] = percent/len(totals)
+        notification(max(0.01,sum(totals.values())), _('%(count)d of %(total)d stories finished downloading')%{'count':count,'total':len(totals)})
+
+    donelist = []
+    for book in book_list:
+        # logger.info("%s"%book['url'])
+        donelist.append(do_download_for_worker(book,options,merge,do_indiv_notif))
+        count += 1
+    return finish_download(donelist)
+
+def finish_download(donelist):
+    book_list = sorted(donelist,key=lambda x : x['listorder'])
+    logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list])))
+
+    good_lists = defaultdict(list)
+    bad_lists = defaultdict(list)
+    for book in book_list:
+        if book['good']:
+            good_lists[book['status']].append(book)
+        else:
+            bad_lists[book['status']].append(book)
+
+    order = [_('Add'),
+             _('Update'),
+             _('Meta'),
+             _('Different URL'),
+             _('Rejected'),
+             _('Skipped'),
+             _('Bad'),
+             _('Error'),
+             ]
+    stnum = 0
+    for d in [ good_lists, bad_lists ]:
+        for status in order:
+            stnum += 1
+            if d[status]:
+                l = d[status]
+                logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in l])))
+                for book in l:
+                    # Add prior listorder to 10000 * status num for
+                    # ordering of accumulated results with multiple bg
+                    # jobs
+                    book['reportorder'] = stnum*10000 + book['listorder']
+            del d[status]
+        # just in case a status is added but doesn't appear in order.
+        for status in d.keys():
+            logger.info("\n"+status+"\n%s\n"%("\n".join([book['url'] for book in d[status]])))
+
    # return the book list as the job result
    return book_list

--- a/calibre-plugin/prefs.py
+++ b/calibre-plugin/prefs.py
@ -197,6 +197,10 @@ default_prefs['auto_reject_from_email'] = False
 default_prefs['update_existing_only_from_email'] = False
 default_prefs['download_from_email_immediately'] = False

+default_prefs['single_proc_jobs'] = True
+default_prefs['site_split_jobs'] = False
+default_prefs['reconsolidate_jobs'] = False
+
 def set_library_config(library_config,db,setting=PREFS_KEY_SETTINGS):
    db.prefs.set_namespaced(PREFS_NAMESPACE,
                            setting,
--- a/fanficfare/adapters/init.py
+++ b/fanficfare/adapters/init.py
@ -33,6 +33,9 @@ from .. import configurable as configurable
 from . import base_adapter
 from . import base_efiction_adapter
 from . import adapter_test1
+from . import adapter_test2
+from . import adapter_test3
+from . import adapter_test4
 from . import adapter_fanfictionnet
 from . import adapter_fictionalleyarchiveorg
 from . import adapter_fictionpresscom
--- a/fanficfare/adapters/adapter_test1.py
+++ b/fanficfare/adapters/adapter_test1.py
@ -29,6 +29,13 @@ from ..six import ensure_text

 from .base_adapter import BaseSiteAdapter,  makeDate

+try: # just a way to switch between CLI and PI
+    ## webbrowser.open doesn't work on some linux flavors.
+    ## piggyback Calibre's version.
+    from calibre.gui2 import safe_open_url as open_url
+except :
+    from webbrowser import open as open_url
+
 class TestSiteAdapter(BaseSiteAdapter):

    def __init__(self, config, url):
@ -321,6 +328,8 @@ Some more longer description.  "I suck at summaries!"  "Better than it sounds!"
            rt = random.uniform(t*0.5, t*1.5)
            logger.debug("random sleep(%0.2f-%0.2f):%0.2f"%(t*0.5, t*1.5,rt))
            time.sleep(rt)
+            # open_url("https://echo.free.beeceptor.com/%s.%s"%(self.story.getMetadata('siteabbrev'),
+            #                                                   self.story.getMetadata('storyId')))

        if "chapter=1" in url :
            text=u'''
--- a/fanficfare/adapters/adapter_test2.py
+++ b/fanficfare/adapters/adapter_test2.py
@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2019 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import
+import logging
+logger = logging.getLogger(__name__)
+
+from .adapter_test1 import TestSiteAdapter
+
+class Test2SiteAdapter(TestSiteAdapter):
+
+    def __init__(self, config, url):
+        TestSiteAdapter.__init__(self, config, url)
+
+    @staticmethod
+    def getSiteDomain():
+        return 'test2.com'
+
+def getClass():
+    return Test2SiteAdapter
+
--- a/fanficfare/adapters/adapter_test3.py
+++ b/fanficfare/adapters/adapter_test3.py
@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2019 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import
+import logging
+logger = logging.getLogger(__name__)
+
+from .adapter_test1 import TestSiteAdapter
+
+class Test3SiteAdapter(TestSiteAdapter):
+
+    def __init__(self, config, url):
+        TestSiteAdapter.__init__(self, config, url)
+
+    @staticmethod
+    def getSiteDomain():
+        return 'test3.com'
+
+def getClass():
+    return Test3SiteAdapter
+
--- a/fanficfare/adapters/adapter_test4.py
+++ b/fanficfare/adapters/adapter_test4.py
@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2019 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import
+import logging
+logger = logging.getLogger(__name__)
+
+from .adapter_test1 import TestSiteAdapter
+
+class Test4SiteAdapter(TestSiteAdapter):
+
+    def __init__(self, config, url):
+        TestSiteAdapter.__init__(self, config, url)
+
+    @staticmethod
+    def getSiteDomain():
+        return 'test4.com'
+
+def getClass():
+    return Test4SiteAdapter
+