Remove 'extrasleep' feature--it was ugly--increase slow_down_sleep_time for the couple sites that used it. We can trust the users, right?

This commit is contained in:
Jim Miller 2021-01-31 11:34:38 -06:00
parent fad21498d2
commit f02b854343
7 changed files with 14 additions and 58 deletions

View file

@ -581,7 +581,7 @@ use_pagecache:true
## download fewer stories less often you can likely get by with
## reducing this sleep. There's also a hard-coded 2sec sleep in
## addition to whatever slow_down_sleep_time is.
slow_down_sleep_time:4
slow_down_sleep_time:6
## exclude emoji and default avatars.
cover_exclusion_regexp:(/styles/|xenforo/avatars/avatar.*\.png|https://cdn\.jsdelivr\.net/gh/|https://cdn\.jsdelivr\.net/emojione)
@ -1127,6 +1127,7 @@ windows_eol: true
[test1.com]
use_pagecache:true
slow_down_sleep_time:2
extratags: FanFiction,Testing
# extracategories:Fafner
@ -2833,15 +2834,15 @@ skip_author_cover:true
## fanfiction.net is blocking people more aggressively. If you
## download fewer stories less often you can likely get by with
## reducing this sleep.
slow_down_sleep_time:8
slow_down_sleep_time:12
## ffnet is sensitive to too many hits. Users are sensitive to long
## waits during the initial metadata collection in the foreground.
## When used, these settings will speed up metadata downloads in the
## foreground linearly.
tweak_fg_sleep:true
min_fg_sleep:3.0
max_fg_sleep:8.0
min_fg_sleep:4.0
max_fg_sleep:12.0
max_fg_sleep_at_downloads:4
## Extra metadata that this adapter knows about. See [archiveofourown.org]

View file

@ -89,14 +89,6 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return self._get_site_url_pattern()
def get_request(self,url,extrasleep=1.0,usecache=True):
## ffnet(and, I assume, fpcom) tends to fail more if hit too
## fast. This is in additional to what ever the
## slow_down_sleep_time setting is.
return BaseSiteAdapter.get_request(self,url,
extrasleep=extrasleep,
usecache=usecache)
## not actually putting urltitle on multi-chapters below, but
## one-shots will have it, so this is still useful. normalized
## chapter URLs do NOT contain the story title.
@ -372,8 +364,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
## AND explicitly put title URL back on chapter URL for fetch
## *only*--normalized chapter URL does NOT have urltitle
data = self.get_request(url+self.urltitle,
extrasleep=4.0)
data = self.get_request(url+self.urltitle)
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! FanFiction.net Site Error!" % url)

View file

@ -361,7 +361,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
elif 'test1.com' not in url:
## for chapter_urls setting.
origurl = url
(data,url) = self.get_request_redirected(url,extrasleep=2.0)
(data,url) = self.get_request_redirected(url)
if '#' in origurl and '#' not in url:
url = url + origurl[origurl.index('#'):]
if url != origurl:

View file

@ -88,16 +88,6 @@ class BaseXenForoForumAdapter(BaseSiteAdapter):
## need to accept http and https still.
return re.escape(self.getURLPrefix()).replace("https","https?")+r"(?P<tp>threads|posts)/(?P<title>.+\.)?(?P<id>\d+)/?[^#]*?(#?post-(?P<anchorpost>\d+))?$"
def get_request_redirected(self, url,
usecache=True,
extrasleep=2.0):
## We've been requested by the site(s) admin to rein in hits.
## This is in additional to what ever the slow_down_sleep_time
## setting is.
return BaseSiteAdapter.get_request_redirected(self,url,
usecache=usecache,
extrasleep=extrasleep)
## For adapters, especially base_xenforoforum to override. Make
## sure to return unchanged URL if it's NOT a chapter URL. This
## is most helpful for xenforoforum because threadmarks use

View file

@ -608,7 +608,7 @@ use_pagecache:true
## download fewer stories less often you can likely get by with
## reducing this sleep. There's also a hard-coded 2sec sleep in
## addition to whatever slow_down_sleep_time is.
slow_down_sleep_time:4
slow_down_sleep_time:6
## exclude emoji and default avatars.
cover_exclusion_regexp:(/styles/|xenforo/avatars/avatar.*\.png|https://cdn\.jsdelivr\.net/gh/|https://cdn\.jsdelivr\.net/emojione)
@ -1158,6 +1158,7 @@ windows_eol: true
[test1.com]
use_pagecache:true
slow_down_sleep_time:2
extratags: FanFiction,Testing
# extracategories:Fafner
@ -2864,7 +2865,7 @@ skip_author_cover:true
## fanfiction.net is blocking people more aggressively. If you
## download fewer stories less often you can likely get by with
## reducing this sleep.
slow_down_sleep_time:8
slow_down_sleep_time:12
## Extra metadata that this adapter knows about. See [archiveofourown.org]
## for examples of how to use them.

View file

@ -78,7 +78,6 @@ class FetcherDecorator(object):
method,
url,
parameters=None,
extrasleep=None,
referer=None,
usecache=True):
## can use fetcher.getConfig()/getConfigList().
@ -86,7 +85,6 @@ class FetcherDecorator(object):
method,
url,
parameters=parameters,
extrasleep=extrasleep,
referer=referer,
usecache=usecache)
@ -99,7 +97,6 @@ class ProgressBarDecorator(FetcherDecorator):
method,
url,
parameters=None,
extrasleep=None,
referer=None,
usecache=True):
logger.debug("ProgressBarDecorator fetcher_do_request")
@ -107,7 +104,6 @@ class ProgressBarDecorator(FetcherDecorator):
method,
url,
parameters=parameters,
extrasleep=extrasleep,
referer=referer,
usecache=usecache)
## added ages ago for CLI to give a line of dots showing it's
@ -135,7 +131,6 @@ class SleepDecorator(FetcherDecorator):
method,
url,
parameters=None,
extrasleep=None,
referer=None,
usecache=True):
logger.debug("SleepDecorator fetcher_do_request")
@ -143,7 +138,6 @@ class SleepDecorator(FetcherDecorator):
method,
url,
parameters=parameters,
extrasleep=extrasleep,
referer=referer,
usecache=usecache)
@ -152,9 +146,6 @@ class SleepDecorator(FetcherDecorator):
# and other intermediate caches.
if not fetchresp.fromcache:
t = None
if extrasleep:
logger.debug("extra sleep:%s"%extrasleep)
time.sleep(float(extrasleep))
if self.sleep_override:
t = float(self.sleep_override)
elif fetcher.getConfig('slow_down_sleep_time'):
@ -233,7 +224,6 @@ class BasicCacheDecorator(FetcherDecorator):
method,
url,
parameters=None,
extrasleep=None,
referer=None,
usecache=True):
'''
@ -254,7 +244,6 @@ class BasicCacheDecorator(FetcherDecorator):
method,
url,
parameters=parameters,
extrasleep=extrasleep,
referer=referer,
usecache=usecache)
@ -336,14 +325,8 @@ class Fetcher(object):
def do_request(self, method, url,
parameters=None,
extrasleep=None,
referer=None,
usecache=True):
'''
extrasleep is primarily for ffnet adapter which has extra
sleeps. Passed into fetchs so it can be bypassed when
cache hits.
'''
logger.debug("fetcher do_request")
headers = self.make_headers(url,referer=referer)
fetchresp = self.request(method,url,
@ -363,22 +346,18 @@ class Fetcher(object):
def post_request(self, url,
parameters=None,
extrasleep=None,
usecache=True):
fetchresp = self.do_request('POST',
self.condition_url(url),
parameters=parameters,
extrasleep=extrasleep,
usecache=usecache)
return fetchresp.content
def get_request_redirected(self, url,
extrasleep=None,
referer=None,
usecache=True):
fetchresp = self.do_request('GET',
self.condition_url(url),
extrasleep=extrasleep,
referer=referer,
usecache=usecache)
return (fetchresp.content,fetchresp.redirecturl)

View file

@ -101,29 +101,23 @@ class Requestable(Configurable):
return data
def get_request_redirected(self, url,
usecache=True,
extrasleep=None):
usecache=True):
(data,rurl) = self.configuration.get_fetcher().get_request_redirected(
url,
usecache=usecache,
extrasleep=extrasleep)[:2]
usecache=usecache)[:2]
data = self.decode_data(data)
return (data,rurl)
def get_request(self, url,
usecache=True,
extrasleep=None):
usecache=True):
return self.get_request_redirected(url,
usecache,
extrasleep)[0]
usecache)[0]
def get_request_raw(self, url,
extrasleep=None,
referer=None,
usecache=True): ## referer is used with raw for images.
return self.configuration.get_fetcher().get_request_redirected(
url,
extrasleep=extrasleep,
referer=referer,
usecache=usecache)[0]