From 6ef28cee6a52938d4d77bfb014e321f9167e338e Mon Sep 17 00:00:00 2001
From: Jim Miller
Date: Mon, 24 Feb 2014 21:08:56 -0600
Subject: [PATCH] Add tweak_fg_sleep feature set for ffnet, make ffnet chapter
look ahead optional
---
app.yaml | 2 +-
calibre-plugin/__init__.py | 2 +-
calibre-plugin/config.py | 2 +-
calibre-plugin/ffdl_plugin.py | 31 ++++++++++---
defaults.ini | 14 +++++-
fanficdownloader/adapters/__init__.py | 12 +++++-
.../adapters/adapter_fanfictionnet.py | 43 ++++++++++---------
fanficdownloader/adapters/adapter_test1.py | 6 +--
fanficdownloader/adapters/base_adapter.py | 18 ++++++--
index.html | 2 +-
plugin-defaults.ini | 18 +++++++-
11 files changed, 107 insertions(+), 43 deletions(-)
diff --git a/app.yaml b/app.yaml
index 997f17a7..b4ae8cf7 100644
--- a/app.yaml
+++ b/app.yaml
@@ -1,6 +1,6 @@
# ffd-retief-hrd fanfictiondownloader
application: fanfictiondownloader
-version: 4-4-92
+version: 4-4-93
runtime: python27
api_version: 1
threadsafe: true
diff --git a/calibre-plugin/__init__.py b/calibre-plugin/__init__.py
index 630369ca..4f6d8085 100644
--- a/calibre-plugin/__init__.py
+++ b/calibre-plugin/__init__.py
@@ -42,7 +42,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
description = _('UI plugin to download FanFiction stories from various sites.')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Jim Miller'
- version = (1, 8, 11)
+ version = (1, 8, 12)
minimum_calibre_version = (1, 13, 0)
#: This field defines the GUI plugin class that contains all the code
diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py
index a2a7251c..8a2dd92a 100644
--- a/calibre-plugin/config.py
+++ b/calibre-plugin/config.py
@@ -55,7 +55,7 @@ from calibre_plugins.fanfictiondownloader_plugin.dialogs \
EditTextDialog, RejectUrlEntry)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.adapters \
- import (getConfigSections, getNormalStoryURL)
+ import getConfigSections
from calibre_plugins.fanfictiondownloader_plugin.common_utils \
import ( KeyboardConfigDialog, PrefsViewerDialog )
diff --git a/calibre-plugin/ffdl_plugin.py b/calibre-plugin/ffdl_plugin.py
index a37c87d4..6c254d9c 100644
--- a/calibre-plugin/ffdl_plugin.py
+++ b/calibre-plugin/ffdl_plugin.py
@@ -660,13 +660,13 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
#print("update_dialog()")
db = self.gui.current_db
- book_list = map( self.make_book_id_only, id_list )
+ books = map( self.make_book_id_only, id_list )
- for j, book in enumerate(book_list):
+ for j, book in enumerate(books):
book['listorder'] = j
-
+
LoopProgressDialog(self.gui,
- book_list,
+ books,
partial(self.populate_book_from_calibre_id, db=self.gui.current_db),
self.update_dialog_finish,
init_label=_("Collecting stories for update..."),
@@ -718,6 +718,9 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
url_list = split_text_to_urls(books)
books = self.convert_urls_to_books(url_list)
+ ## for tweak_fg_sleep
+ options['ffnetcount']=len(filter(lambda x : x['site']=='www.fanfiction.net', books))
+
options['version'] = self.version
logger.debug(self.version)
@@ -815,6 +818,18 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
options['personal.ini'] = get_ffdl_personalini()
adapter = get_ffdl_adapter(url,fileform)
+ # reduce foreground sleep time for ffnet when few books.
+ if 'ffnetcount' in options and \
+ adapter.getConfig('tweak_fg_sleep') and \
+ adapter.getSiteDomain() == 'www.fanfiction.net':
+ minslp = float(adapter.getConfig('min_fg_sleep'))
+ maxslp = float(adapter.getConfig('max_fg_sleep'))
+ dwnlds = float(adapter.getConfig('max_fg_sleep_at_downloads'))
+ m = (maxslp-minslp) / (dwnlds-1)
+ b = minslp - m
+ slp = min(maxslp,m*float(options['ffnetcount'])+b)
+ #print("m:%s b:%s = %s"%(m,b,slp))
+ adapter.set_sleep(slp)
## three tries, that's enough if both user/pass & is_adult needed,
## or a couple tries of one or the other
@@ -1629,7 +1644,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']):
- logger.debug("Do Generate Cover added:%s gcnewonly:%s"%(book['added'],prefs['gcnewonly']))
+ #logger.debug("Do Generate Cover added:%s gcnewonly:%s"%(book['added'],prefs['gcnewonly']))
# force a refresh if generating cover so complex composite
# custom columns are current and correct
@@ -1858,13 +1873,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
book['status'] = _('Not Found')
else:
# get normalized url or None.
- book['url'] = self.is_good_downloader_url(url)
- if book['url'] == None:
+ urlsitetuple = adapters.getNormalStoryURLSite(url)
+ if urlsitetuple == None:
book['url'] = url
book['comment'] = _("URL is not a valid story URL.")
book['good'] = False
book['icon']='dialog_error.png'
book['status'] = _('Bad URL')
+ else:
+ (book['url'],book['site'])=urlsitetuple
def get_story_url(self, db, book_id=None, path=None):
if book_id == None:
diff --git a/defaults.ini b/defaults.ini
index 3dee6420..7538a77d 100644
--- a/defaults.ini
+++ b/defaults.ini
@@ -1215,6 +1215,11 @@ user_agent:
## Change this to false to use them anyway.
never_make_cover: true
+## fanfiction.net is blocking people more aggressively. If you
+## download fewer stories less often you can likely get by with
+## reducing this sleep.
+slow_down_sleep_time:4
+
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
@@ -1223,13 +1228,20 @@ extra_valid_entries:reviews,favs,follows
## to be romantic pairings.
ships_label:Pairings
-## Date formats used by FFDL. Published and Update don't have time.
+## Date formats used by FFDL. Published and Update don't usually have
+## time, but they do now on ffnet.
## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
#dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
+## ffnet used to have a tendency to send out update notices in email
+## before all their servers were showing the update on the first
+## chapter. It generates another server request and doesn't seem to
+## be needed lately, so now default it to off.
+check_next_chapter:false
+
[www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
diff --git a/fanficdownloader/adapters/__init__.py b/fanficdownloader/adapters/__init__.py
index a3051e30..85c5f3c8 100644
--- a/fanficdownloader/adapters/__init__.py
+++ b/fanficdownloader/adapters/__init__.py
@@ -146,6 +146,13 @@ for x in imports():
__domain_map[site]=cls
def getNormalStoryURL(url):
+ r = getNormalStoryURLSite(url)
+ if r:
+ return r[0]
+ else:
+ return None
+
+def getNormalStoryURLSite(url):
if not getNormalStoryURL.__dummyconfig:
getNormalStoryURL.__dummyconfig = Configuration("test1.com","EPUB")
# pulling up an adapter is pretty low over-head. If
@@ -153,10 +160,11 @@ def getNormalStoryURL(url):
try:
adapter = getAdapter(getNormalStoryURL.__dummyconfig,url)
url = adapter.url
+ site = adapter.getSiteDomain()
del adapter
- return url
+ return (url,site)
except:
- return None;
+ return None
# kludgey function static/singleton
getNormalStoryURL.__dummyconfig = None
diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py
index 01856ab6..87306639 100644
--- a/fanficdownloader/adapters/adapter_fanfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fanfictionnet.py
@@ -107,28 +107,31 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
if "not found. Please check to see you are not using an outdated url." in data:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url)
- try:
- # rather nasty way to check for a newer chapter. ffnet has a
- # tendency to send out update notices in email before all
- # their servers are showing the update on the first chapter.
+ if self.getConfig('check_next_chapter'):
try:
- chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
- # get chapter part of url.
+ ## ffnet used to have a tendency to send out update
+ ## notices in email before all their servers were
+ ## showing the update on the first chapter. It
+ ## generates another server request and doesn't seem
+ ## to be needed lately, so now default it to off.
+ try:
+ chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
+ # get chapter part of url.
+ except:
+ chapcount = 1
+ chapter = url.split('/',)[5]
+ tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
+ self.story.getMetadata('storyId'),
+ chapcount+1)
+ logger.debug('=Trying newer chapter: %s' % tryurl)
+ newdata = self._fetchUrl(tryurl)
+ if "not found. Please check to see you are not using an outdated url." \
+ not in newdata:
+ logger.debug('=======Found newer chapter: %s' % tryurl)
+ soup = bs.BeautifulSoup(newdata)
except:
- chapcount = 1
- chapter = url.split('/',)[5]
- tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
- self.story.getMetadata('storyId'),
- chapcount+1)
- logger.debug('=Trying newer chapter: %s' % tryurl)
- newdata = self._fetchUrl(tryurl)
- if "not found. Please check to see you are not using an outdated url." \
- not in newdata:
- logger.debug('=======Found newer chapter: %s' % tryurl)
- soup = bs.BeautifulSoup(newdata)
- except:
- pass
-
+ pass
+
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"^/u/\d+"))
self.story.setMetadata('authorId',a['href'].split('/')[2])
diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py
index d1dac36a..f54d499e 100644
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@@ -50,8 +50,7 @@ class TestSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
idstr = self.story.getMetadata('storyId')
idnum = int(idstr)
- if self.getConfig('slow_down_sleep_time'):
- time.sleep(float(self.getConfig('slow_down_sleep_time')))
+ self.do_sleep()
if idnum >= 1000:
logger.warn("storyId:%s - Custom INI data will be used."%idstr)
@@ -298,8 +297,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
- if self.getConfig('slow_down_sleep_time'):
- time.sleep(float(self.getConfig('slow_down_sleep_time')))
+ self.do_sleep()
if self.story.getMetadata('storyId').startswith('670') or \
self.story.getMetadata('storyId').startswith('672'):
time.sleep(1.0)
diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py
index 326ab2d1..0da91cfa 100644
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@@ -76,6 +76,8 @@ class BaseSiteAdapter(Configurable):
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
+
+ self.override_sleep = None
self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
## Specific UA because too many sites are blocking the default python UA.
@@ -147,8 +149,7 @@ class BaseSiteAdapter(Configurable):
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
def _postUrl(self, url, parameters={}, headers={}):
- if self.getConfig('slow_down_sleep_time'):
- time.sleep(float(self.getConfig('slow_down_sleep_time')))
+ self.do_sleep()
## u2.Request assumes POST when data!=None. Also assumes data
## is application/x-www-form-urlencoded.
@@ -166,11 +167,20 @@ class BaseSiteAdapter(Configurable):
return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters)).read()
else:
return self.opener.open(url.replace(' ','%20')).read()
+
+ def set_sleep(self,val):
+ print("\n===========\n set sleep time %s\n==========="%val)
+ self.override_sleep = val
+ def do_sleep(self):
+ if self.override_sleep:
+ time.sleep(float(self.override_sleep))
+ elif self.getConfig('slow_down_sleep_time'):
+ time.sleep(float(self.getConfig('slow_down_sleep_time')))
+
# parameters is a dict()
def _fetchUrl(self, url, parameters=None):
- if self.getConfig('slow_down_sleep_time'):
- time.sleep(float(self.getConfig('slow_down_sleep_time')))
+ self.do_sleep()
excpt=None
for sleeptime in [0, 0.5, 4, 9]:
diff --git a/index.html b/index.html
index d3b0381e..cda1f59d 100644
--- a/index.html
+++ b/index.html
@@ -76,7 +76,7 @@
If you have any problems with this application, please
report them in
the FanFictionDownLoader Google Group. The
- Previous Version is also available for you to use if necessary.
+ Previous Version is also available for you to use if necessary.
{{ error_message }}
diff --git a/plugin-defaults.ini b/plugin-defaults.ini
index 52a60c16..e0239a9d 100644
--- a/plugin-defaults.ini
+++ b/plugin-defaults.ini
@@ -1207,6 +1207,15 @@ never_make_cover: true
## reducing this sleep.
slow_down_sleep_time:4
+## ffnet is sensitive to too many hits. Users are sensitive to long
+## waits during the initial metadata collection in the foreground.
+## When used, these settings will speed up metadata downloads in the
+## foreground linearly.
+tweak_fg_sleep:true
+min_fg_sleep:1.0
+max_fg_sleep:4.0
+max_fg_sleep_at_downloads: 10
+
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
@@ -1215,13 +1224,20 @@ extra_valid_entries:reviews,favs,follows
## to be romantic pairings.
ships_label:Pairings
-## Date formats used by FFDL. Published and Update don't have time.
+## Date formats used by FFDL. Published and Update don't usually have
+## time, but they do now on ffnet.
## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
#dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
+## ffnet used to have a tendency to send out update notices in email
+## before all their servers were showing the update on the first
+## chapter. It generates another server request and doesn't seem to
+## be needed lately, so now default it to off.
+check_next_chapter:false
+
[www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In