From f1ac7c8bdae09403989ffed581df35465fe93197 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Tue, 31 Oct 2017 00:27:54 -0500 Subject: [PATCH] Retry failed site-requests --- sites/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sites/__init__.py b/sites/__init__.py index 70ab656..24161be 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -3,6 +3,7 @@ import glob import os import argparse import uuid +import time import attr from bs4 import BeautifulSoup @@ -96,9 +97,16 @@ class Site: def _add_arguments(self, parser): pass - def _soup(self, url, method='html5lib', **kw): + def _soup(self, url, method='html5lib', retry=3, retry_delay=10, **kw): page = self.session.get(url, **kw) if not page: + if retry and retry > 0: + delay = retry_delay + if page.headers['Retry-After']: + delay = int(page.headers['Retry-After']) + print("Load failed: waiting {}s to retry ({})".format(delay, page)) + time.sleep(delay) + return self._soup(url, method=method, retry=retry - 1, retry_delay=retry_delay, **kw) raise SiteException("Couldn't fetch", url) return BeautifulSoup(page.text, method)