mirror of
https://github.com/kemayo/leech
synced 2025-12-30 04:02:21 +01:00
Retry failed site-requests
This commit is contained in:
parent
27b677a444
commit
f1ac7c8bda
1 changed files with 9 additions and 1 deletions
|
|
@ -3,6 +3,7 @@ import glob
|
|||
import os
|
||||
import argparse
|
||||
import uuid
|
||||
import time
|
||||
import attr
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
|
@ -96,9 +97,16 @@ class Site:
|
|||
def _add_arguments(self, parser):
|
||||
pass
|
||||
|
||||
def _soup(self, url, method='html5lib', **kw):
|
||||
def _soup(self, url, method='html5lib', retry=3, retry_delay=10, **kw):
|
||||
page = self.session.get(url, **kw)
|
||||
if not page:
|
||||
if retry and retry > 0:
|
||||
delay = retry_delay
|
||||
if page.headers['Retry-After']:
|
||||
delay = int(page.headers['Retry-After'])
|
||||
print("Load failed: waiting {}s to retry ({})".format(delay, page))
|
||||
time.sleep(delay)
|
||||
return self._soup(url, method=method, retry=retry - 1, retry_delay=retry_delay, **kw)
|
||||
raise SiteException("Couldn't fetch", url)
|
||||
return BeautifulSoup(page.text, method)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue