Add tweak_fg_sleep feature set for ffnet, make ffnet chapter look ahead optional

2026-01-18 14:12:58 +01:00 · 2014-02-24 21:08:56 -06:00 · 2014-02-24 21:08:56 -06:00 · 6ef28cee6a
commit 6ef28cee6a
parent fc3431098d
11 changed files with 107 additions and 43 deletions
--- a/app.yaml
+++ b/app.yaml
@ -1,6 +1,6 @@
 # ffd-retief-hrd fanfictiondownloader
 application: fanfictiondownloader
-version: 4-4-92
+version: 4-4-93
 runtime: python27
 api_version: 1
 threadsafe: true
--- a/calibre-plugin/init.py
+++ b/calibre-plugin/init.py
@ -42,7 +42,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
    description         = _('UI plugin to download FanFiction stories from various sites.')
    supported_platforms = ['windows', 'osx', 'linux']
    author              = 'Jim Miller'
-    version             = (1, 8, 11)
+    version             = (1, 8, 12)
    minimum_calibre_version = (1, 13, 0)

    #: This field defines the GUI plugin class that contains all the code
--- a/calibre-plugin/config.py
+++ b/calibre-plugin/config.py
@ -55,7 +55,7 @@ from calibre_plugins.fanfictiondownloader_plugin.dialogs \
            EditTextDialog, RejectUrlEntry)
    
 from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.adapters \
-    import (getConfigSections, getNormalStoryURL)
+    import getConfigSections

 from calibre_plugins.fanfictiondownloader_plugin.common_utils \
    import ( KeyboardConfigDialog, PrefsViewerDialog )
--- a/calibre-plugin/ffdl_plugin.py
+++ b/calibre-plugin/ffdl_plugin.py
@ -660,13 +660,13 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        #print("update_dialog()")
        
        db = self.gui.current_db
-        book_list = map( self.make_book_id_only, id_list )
+        books = map( self.make_book_id_only, id_list )

-        for j, book in enumerate(book_list):
+        for j, book in enumerate(books):
            book['listorder'] = j
-            
+
        LoopProgressDialog(self.gui,
-                           book_list,
+                           books,
                           partial(self.populate_book_from_calibre_id, db=self.gui.current_db),
                           self.update_dialog_finish,
                           init_label=_("Collecting stories for update..."),
@ -718,6 +718,9 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
            url_list = split_text_to_urls(books)
            books = self.convert_urls_to_books(url_list)

+        ## for tweak_fg_sleep
+        options['ffnetcount']=len(filter(lambda x : x['site']=='www.fanfiction.net', books))
+
        options['version'] = self.version
        logger.debug(self.version)
        
@ -815,6 +818,18 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
        
        options['personal.ini'] = get_ffdl_personalini()
        adapter = get_ffdl_adapter(url,fileform)
+        # reduce foreground sleep time for ffnet when few books.
+        if 'ffnetcount' in options and \
+                adapter.getConfig('tweak_fg_sleep') and \
+                adapter.getSiteDomain() == 'www.fanfiction.net':
+            minslp = float(adapter.getConfig('min_fg_sleep'))
+            maxslp = float(adapter.getConfig('max_fg_sleep'))
+            dwnlds = float(adapter.getConfig('max_fg_sleep_at_downloads'))
+            m = (maxslp-minslp) / (dwnlds-1)
+            b = minslp - m
+            slp = min(maxslp,m*float(options['ffnetcount'])+b)
+            #print("m:%s b:%s = %s"%(m,b,slp))
+            adapter.set_sleep(slp)

        ## three tries, that's enough if both user/pass & is_adult needed,
        ## or a couple tries of one or the other
@ -1629,7 +1644,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):

        if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']):

-            logger.debug("Do Generate Cover added:%s gcnewonly:%s"%(book['added'],prefs['gcnewonly']))
+            #logger.debug("Do Generate Cover added:%s gcnewonly:%s"%(book['added'],prefs['gcnewonly']))
            
            # force a refresh if generating cover so complex composite
            # custom columns are current and correct
@ -1858,13 +1873,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
            book['status'] = _('Not Found')
        else:
            # get normalized url or None.
-            book['url'] = self.is_good_downloader_url(url)
-            if book['url'] == None:
+            urlsitetuple = adapters.getNormalStoryURLSite(url)
+            if urlsitetuple == None:
                book['url'] = url
                book['comment'] = _("URL is not a valid story URL.")
                book['good'] = False
                book['icon']='dialog_error.png'
                book['status'] = _('Bad URL')
+            else:
+                (book['url'],book['site'])=urlsitetuple
    
    def get_story_url(self, db, book_id=None, path=None):
        if book_id == None:
--- a/defaults.ini
+++ b/defaults.ini
@ -1215,6 +1215,11 @@ user_agent:
 ## Change this to false to use them anyway.
 never_make_cover: true

+## fanfiction.net is blocking people more aggressively.  If you
+## download fewer stories less often you can likely get by with
+## reducing this sleep.
+slow_down_sleep_time:4
+
 ## Extra metadata that this adapter knows about.  See [dramione.org]
 ## for examples of how to use them.
 extra_valid_entries:reviews,favs,follows
@ -1223,13 +1228,20 @@ extra_valid_entries:reviews,favs,follows
 ## to be romantic pairings.
 ships_label:Pairings

-## Date formats used by FFDL.  Published and Update don't have time.
+## Date formats used by FFDL.  Published and Update don't usually have
+## time, but they do now on ffnet.
 ## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
 ## Note that ini format requires % to be escaped as %%.
 #dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
 datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
 dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S

+## ffnet used to have a tendency to send out update notices in email
+## before all their servers were showing the update on the first
+## chapter.  It generates another server request and doesn't seem to
+## be needed lately, so now default it to off.
+check_next_chapter:false
+
 [www.fanfiktion.de]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
--- a/fanficdownloader/adapters/init.py
+++ b/fanficdownloader/adapters/init.py
@ -146,6 +146,13 @@ for x in imports():
            __domain_map[site]=cls

 def getNormalStoryURL(url):
+    r = getNormalStoryURLSite(url)
+    if r:
+        return r[0]
+    else:
+        return None
+
+def getNormalStoryURLSite(url):
    if not getNormalStoryURL.__dummyconfig:
        getNormalStoryURL.__dummyconfig = Configuration("test1.com","EPUB")
    # pulling up an adapter is pretty low over-head.  If
@ -153,10 +160,11 @@ def getNormalStoryURL(url):
    try:
        adapter = getAdapter(getNormalStoryURL.__dummyconfig,url)
        url = adapter.url
+        site = adapter.getSiteDomain()
        del adapter
-        return url
+        return (url,site)
    except:
-        return None;
+        return None

 # kludgey function static/singleton
 getNormalStoryURL.__dummyconfig = None
--- a/fanficdownloader/adapters/adapter_fanfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fanfictionnet.py
@ -107,28 +107,31 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
        if "not found. Please check to see you are not using an outdated url." in data:
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  'Chapter not found. Please check to see you are not using an outdated url.'" % url)

-        try:
-            # rather nasty way to check for a newer chapter.  ffnet has a
-            # tendency to send out update notices in email before all
-            # their servers are showing the update on the first chapter.
+        if self.getConfig('check_next_chapter'):
            try:
-                chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
-            # get chapter part of url.
+                ## ffnet used to have a tendency to send out update
+                ## notices in email before all their servers were
+                ## showing the update on the first chapter.  It
+                ## generates another server request and doesn't seem
+                ## to be needed lately, so now default it to off.
+                try:
+                    chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
+                # get chapter part of url.
+                except:
+                    chapcount = 1
+                chapter = url.split('/',)[5]
+                tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
+                                                self.story.getMetadata('storyId'),
+                                                chapcount+1)
+                logger.debug('=Trying newer chapter: %s' % tryurl)
+                newdata = self._fetchUrl(tryurl)
+                if "not found. Please check to see you are not using an outdated url." \
+                        not in newdata:
+                    logger.debug('=======Found newer chapter: %s' % tryurl)
+                    soup = bs.BeautifulSoup(newdata)
            except:
-                chapcount = 1
-            chapter = url.split('/',)[5]
-            tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
-                                            self.story.getMetadata('storyId'),
-                                            chapcount+1)
-            logger.debug('=Trying newer chapter: %s' % tryurl)
-            newdata = self._fetchUrl(tryurl)
-            if "not found. Please check to see you are not using an outdated url." \
-                    not in newdata:
-                logger.debug('=======Found newer chapter: %s' % tryurl)
-                soup = bs.BeautifulSoup(newdata)
-        except:
-            pass
-        
+                pass
+            
        # Find authorid and URL from... author url.
        a = soup.find('a', href=re.compile(r"^/u/\d+"))
        self.story.setMetadata('authorId',a['href'].split('/')[2])
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@ -50,8 +50,7 @@ class TestSiteAdapter(BaseSiteAdapter):
    def extractChapterUrlsAndMetadata(self):
        idstr = self.story.getMetadata('storyId')
        idnum = int(idstr)
-        if self.getConfig('slow_down_sleep_time'):
-            time.sleep(float(self.getConfig('slow_down_sleep_time')))
+        self.do_sleep()

        if idnum >= 1000:
            logger.warn("storyId:%s - Custom INI data will be used."%idstr)
@ -298,8 +297,7 @@ Some more longer description.  "I suck at summaries!"  "Better than it sounds!"

    def getChapterText(self, url):
        logger.debug('Getting chapter text from: %s' % url)
-        if self.getConfig('slow_down_sleep_time'):
-            time.sleep(float(self.getConfig('slow_down_sleep_time')))
+        self.do_sleep()
        if self.story.getMetadata('storyId').startswith('670') or \
                self.story.getMetadata('storyId').startswith('672'):
            time.sleep(1.0)
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@ -76,6 +76,8 @@ class BaseSiteAdapter(Configurable):
        self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False
+
+        self.override_sleep = None
        
        self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
        ## Specific UA because too many sites are blocking the default python UA.
@ -147,8 +149,7 @@ class BaseSiteAdapter(Configurable):

    # Assumes application/x-www-form-urlencoded.  parameters, headers are dict()s
    def _postUrl(self, url, parameters={}, headers={}):
-        if self.getConfig('slow_down_sleep_time'):
-            time.sleep(float(self.getConfig('slow_down_sleep_time')))
+        self.do_sleep()

        ## u2.Request assumes POST when data!=None.  Also assumes data
        ## is application/x-www-form-urlencoded.
@ -166,11 +167,20 @@ class BaseSiteAdapter(Configurable):
            return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters)).read()
        else:
            return self.opener.open(url.replace(' ','%20')).read()
+
+    def set_sleep(self,val):
+        print("\n===========\n set sleep time %s\n==========="%val)
+        self.override_sleep = val
    
+    def do_sleep(self):
+        if self.override_sleep:
+            time.sleep(float(self.override_sleep))
+        elif self.getConfig('slow_down_sleep_time'):
+            time.sleep(float(self.getConfig('slow_down_sleep_time')))
+        
    # parameters is a dict()
    def _fetchUrl(self, url, parameters=None):
-        if self.getConfig('slow_down_sleep_time'):
-            time.sleep(float(self.getConfig('slow_down_sleep_time')))
+        self.do_sleep()

        excpt=None
        for sleeptime in [0, 0.5, 4, 9]:
--- a/index.html
+++ b/index.html
@ -76,7 +76,7 @@
 	    If you have any problems with this application, please
 	    report them in
 	    the <a href="http://groups.google.com/group/fanfic-downloader">FanFictionDownLoader Google Group</a>.  The
-	    <a href="http://4-4-91.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
+	    <a href="http://4-4-92.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
 	  </p>
 	  <div id='error'>
 	    {{ error_message }}
--- a/plugin-defaults.ini
+++ b/plugin-defaults.ini
@ -1207,6 +1207,15 @@ never_make_cover: true
 ## reducing this sleep.
 slow_down_sleep_time:4

+## ffnet is sensitive to too many hits.  Users are sensitive to long
+## waits during the initial metadata collection in the foreground.
+## When used, these settings will speed up metadata downloads in the
+## foreground linearly.
+tweak_fg_sleep:true
+min_fg_sleep:1.0
+max_fg_sleep:4.0
+max_fg_sleep_at_downloads: 10
+
 ## Extra metadata that this adapter knows about.  See [dramione.org]
 ## for examples of how to use them.
 extra_valid_entries:reviews,favs,follows
@ -1215,13 +1224,20 @@ extra_valid_entries:reviews,favs,follows
 ## to be romantic pairings.
 ships_label:Pairings

-## Date formats used by FFDL.  Published and Update don't have time.
+## Date formats used by FFDL.  Published and Update don't usually have
+## time, but they do now on ffnet.
 ## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
 ## Note that ini format requires % to be escaped as %%.
 #dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
 datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
 dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S

+## ffnet used to have a tendency to send out update notices in email
+## before all their servers were showing the update on the first
+## chapter.  It generates another server request and doesn't seem to
+## be needed lately, so now default it to off.
+check_next_chapter:false
+
 [www.fanfiktion.de]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In