From d50f23d07b854d8dfd50a0a1de92bf051c275a1b Mon Sep 17 00:00:00 2001 From: David Lynch Date: Fri, 12 Feb 2021 16:02:55 -0600 Subject: [PATCH] Special exception for hitting a cloudflare captcha page Fanfiction.net is currently doing this, so let's at least acknowledge it Refs #53 --- sites/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sites/__init__.py b/sites/__init__.py index cd90332..55342ac 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -137,6 +137,8 @@ class Site: def _soup(self, url, method='html5lib', retry=3, retry_delay=10, **kw): page = self.session.get(url, **kw) if not page: + if page.status_code == 403 and page.headers.get('Server', False) == 'cloudflare' and "captcha-bypass" in page.text: + raise SiteException("Couldn't fetch, probably because of Cloudflare protection", url) if retry and retry > 0: delay = retry_delay if 'Retry-After' in page.headers: