Add tweak_fg_sleep feature set for ffnet, make ffnet chapter look ahead optional

This commit is contained in:
Jim Miller 2014-02-24 21:08:56 -06:00
parent fc3431098d
commit 6ef28cee6a
11 changed files with 107 additions and 43 deletions

View file

@ -1,6 +1,6 @@
# ffd-retief-hrd fanfictiondownloader
application: fanfictiondownloader
version: 4-4-92
version: 4-4-93
runtime: python27
api_version: 1
threadsafe: true

View file

@ -42,7 +42,7 @@ class FanFictionDownLoaderBase(InterfaceActionBase):
description = _('UI plugin to download FanFiction stories from various sites.')
supported_platforms = ['windows', 'osx', 'linux']
author = 'Jim Miller'
version = (1, 8, 11)
version = (1, 8, 12)
minimum_calibre_version = (1, 13, 0)
#: This field defines the GUI plugin class that contains all the code

View file

@ -55,7 +55,7 @@ from calibre_plugins.fanfictiondownloader_plugin.dialogs \
EditTextDialog, RejectUrlEntry)
from calibre_plugins.fanfictiondownloader_plugin.fanficdownloader.adapters \
import (getConfigSections, getNormalStoryURL)
import getConfigSections
from calibre_plugins.fanfictiondownloader_plugin.common_utils \
import ( KeyboardConfigDialog, PrefsViewerDialog )

View file

@ -660,13 +660,13 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
#print("update_dialog()")
db = self.gui.current_db
book_list = map( self.make_book_id_only, id_list )
books = map( self.make_book_id_only, id_list )
for j, book in enumerate(book_list):
for j, book in enumerate(books):
book['listorder'] = j
LoopProgressDialog(self.gui,
book_list,
books,
partial(self.populate_book_from_calibre_id, db=self.gui.current_db),
self.update_dialog_finish,
init_label=_("Collecting stories for update..."),
@ -718,6 +718,9 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
url_list = split_text_to_urls(books)
books = self.convert_urls_to_books(url_list)
## for tweak_fg_sleep
options['ffnetcount']=len(filter(lambda x : x['site']=='www.fanfiction.net', books))
options['version'] = self.version
logger.debug(self.version)
@ -815,6 +818,18 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
options['personal.ini'] = get_ffdl_personalini()
adapter = get_ffdl_adapter(url,fileform)
# reduce foreground sleep time for ffnet when few books.
if 'ffnetcount' in options and \
adapter.getConfig('tweak_fg_sleep') and \
adapter.getSiteDomain() == 'www.fanfiction.net':
minslp = float(adapter.getConfig('min_fg_sleep'))
maxslp = float(adapter.getConfig('max_fg_sleep'))
dwnlds = float(adapter.getConfig('max_fg_sleep_at_downloads'))
m = (maxslp-minslp) / (dwnlds-1)
b = minslp - m
slp = min(maxslp,m*float(options['ffnetcount'])+b)
#print("m:%s b:%s = %s"%(m,b,slp))
adapter.set_sleep(slp)
## three tries, that's enough if both user/pass & is_adult needed,
## or a couple tries of one or the other
@ -1629,7 +1644,7 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
if 'Generate Cover' in self.gui.iactions and (book['added'] or not prefs['gcnewonly']):
logger.debug("Do Generate Cover added:%s gcnewonly:%s"%(book['added'],prefs['gcnewonly']))
#logger.debug("Do Generate Cover added:%s gcnewonly:%s"%(book['added'],prefs['gcnewonly']))
# force a refresh if generating cover so complex composite
# custom columns are current and correct
@ -1858,13 +1873,15 @@ class FanFictionDownLoaderPlugin(InterfaceAction):
book['status'] = _('Not Found')
else:
# get normalized url or None.
book['url'] = self.is_good_downloader_url(url)
if book['url'] == None:
urlsitetuple = adapters.getNormalStoryURLSite(url)
if urlsitetuple == None:
book['url'] = url
book['comment'] = _("URL is not a valid story URL.")
book['good'] = False
book['icon']='dialog_error.png'
book['status'] = _('Bad URL')
else:
(book['url'],book['site'])=urlsitetuple
def get_story_url(self, db, book_id=None, path=None):
if book_id == None:

View file

@ -1215,6 +1215,11 @@ user_agent:
## Change this to false to use them anyway.
never_make_cover: true
## fanfiction.net is blocking people more aggressively. If you
## download fewer stories less often you can likely get by with
## reducing this sleep.
slow_down_sleep_time:4
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
@ -1223,13 +1228,20 @@ extra_valid_entries:reviews,favs,follows
## to be romantic pairings.
ships_label:Pairings
## Date formats used by FFDL. Published and Update don't have time.
## Date formats used by FFDL. Published and Update don't usually have
## time, but they do now on ffnet.
## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
#dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
## ffnet used to have a tendency to send out update notices in email
## before all their servers were showing the update on the first
## chapter. It generates another server request and doesn't seem to
## be needed lately, so now default it to off.
check_next_chapter:false
[www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In

View file

@ -146,6 +146,13 @@ for x in imports():
__domain_map[site]=cls
def getNormalStoryURL(url):
r = getNormalStoryURLSite(url)
if r:
return r[0]
else:
return None
def getNormalStoryURLSite(url):
if not getNormalStoryURL.__dummyconfig:
getNormalStoryURL.__dummyconfig = Configuration("test1.com","EPUB")
# pulling up an adapter is pretty low over-head. If
@ -153,10 +160,11 @@ def getNormalStoryURL(url):
try:
adapter = getAdapter(getNormalStoryURL.__dummyconfig,url)
url = adapter.url
site = adapter.getSiteDomain()
del adapter
return url
return (url,site)
except:
return None;
return None
# kludgey function static/singleton
getNormalStoryURL.__dummyconfig = None

View file

@ -107,28 +107,31 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
if "not found. Please check to see you are not using an outdated url." in data:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! 'Chapter not found. Please check to see you are not using an outdated url.'" % url)
try:
# rather nasty way to check for a newer chapter. ffnet has a
# tendency to send out update notices in email before all
# their servers are showing the update on the first chapter.
if self.getConfig('check_next_chapter'):
try:
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
# get chapter part of url.
## ffnet used to have a tendency to send out update
## notices in email before all their servers were
## showing the update on the first chapter. It
## generates another server request and doesn't seem
## to be needed lately, so now default it to off.
try:
chapcount = len(soup.find('select', { 'name' : 'chapter' } ).findAll('option'))
# get chapter part of url.
except:
chapcount = 1
chapter = url.split('/',)[5]
tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
self.story.getMetadata('storyId'),
chapcount+1)
logger.debug('=Trying newer chapter: %s' % tryurl)
newdata = self._fetchUrl(tryurl)
if "not found. Please check to see you are not using an outdated url." \
not in newdata:
logger.debug('=======Found newer chapter: %s' % tryurl)
soup = bs.BeautifulSoup(newdata)
except:
chapcount = 1
chapter = url.split('/',)[5]
tryurl = "https://%s/s/%s/%d/"%(self.getSiteDomain(),
self.story.getMetadata('storyId'),
chapcount+1)
logger.debug('=Trying newer chapter: %s' % tryurl)
newdata = self._fetchUrl(tryurl)
if "not found. Please check to see you are not using an outdated url." \
not in newdata:
logger.debug('=======Found newer chapter: %s' % tryurl)
soup = bs.BeautifulSoup(newdata)
except:
pass
pass
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"^/u/\d+"))
self.story.setMetadata('authorId',a['href'].split('/')[2])

View file

@ -50,8 +50,7 @@ class TestSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
idstr = self.story.getMetadata('storyId')
idnum = int(idstr)
if self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time')))
self.do_sleep()
if idnum >= 1000:
logger.warn("storyId:%s - Custom INI data will be used."%idstr)
@ -298,8 +297,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
if self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time')))
self.do_sleep()
if self.story.getMetadata('storyId').startswith('670') or \
self.story.getMetadata('storyId').startswith('672'):
time.sleep(1.0)

View file

@ -76,6 +76,8 @@ class BaseSiteAdapter(Configurable):
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
self.override_sleep = None
self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
## Specific UA because too many sites are blocking the default python UA.
@ -147,8 +149,7 @@ class BaseSiteAdapter(Configurable):
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
def _postUrl(self, url, parameters={}, headers={}):
if self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time')))
self.do_sleep()
## u2.Request assumes POST when data!=None. Also assumes data
## is application/x-www-form-urlencoded.
@ -166,11 +167,20 @@ class BaseSiteAdapter(Configurable):
return self.opener.open(url.replace(' ','%20'),urllib.urlencode(parameters)).read()
else:
return self.opener.open(url.replace(' ','%20')).read()
def set_sleep(self,val):
print("\n===========\n set sleep time %s\n==========="%val)
self.override_sleep = val
def do_sleep(self):
if self.override_sleep:
time.sleep(float(self.override_sleep))
elif self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time')))
# parameters is a dict()
def _fetchUrl(self, url, parameters=None):
if self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time')))
self.do_sleep()
excpt=None
for sleeptime in [0, 0.5, 4, 9]:

View file

@ -76,7 +76,7 @@
If you have any problems with this application, please
report them in
the <a href="http://groups.google.com/group/fanfic-downloader">FanFictionDownLoader Google Group</a>. The
<a href="http://4-4-91.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
<a href="http://4-4-92.fanfictiondownloader.appspot.com">Previous Version</a> is also available for you to use if necessary.
</p>
<div id='error'>
{{ error_message }}

View file

@ -1207,6 +1207,15 @@ never_make_cover: true
## reducing this sleep.
slow_down_sleep_time:4
## ffnet is sensitive to too many hits. Users are sensitive to long
## waits during the initial metadata collection in the foreground.
## When used, these settings will speed up metadata downloads in the
## foreground linearly.
tweak_fg_sleep:true
min_fg_sleep:1.0
max_fg_sleep:4.0
max_fg_sleep_at_downloads: 10
## Extra metadata that this adapter knows about. See [dramione.org]
## for examples of how to use them.
extra_valid_entries:reviews,favs,follows
@ -1215,13 +1224,20 @@ extra_valid_entries:reviews,favs,follows
## to be romantic pairings.
ships_label:Pairings
## Date formats used by FFDL. Published and Update don't have time.
## Date formats used by FFDL. Published and Update don't usually have
## time, but they do now on ffnet.
## See http://docs.python.org/library/datetime.html#strftime-strptime-behavior
## Note that ini format requires % to be escaped as %%.
#dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
## ffnet used to have a tendency to send out update notices in email
## before all their servers were showing the update on the first
## chapter. It generates another server request and doesn't seem to
## be needed lately, so now default it to off.
check_next_chapter:false
[www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In