From 9c6395b75955ab790b5443db111bd1cf2dce0b07 Mon Sep 17 00:00:00 2001 From: cryzed Date: Tue, 18 Apr 2017 18:47:50 +0200 Subject: [PATCH] Isolate change of httplib._MAXHEADERS to getChapterText() --- fanficfare/adapters/adapter_royalroadl.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fanficfare/adapters/adapter_royalroadl.py b/fanficfare/adapters/adapter_royalroadl.py index 1d611999..247d8702 100644 --- a/fanficfare/adapters/adapter_royalroadl.py +++ b/fanficfare/adapters/adapter_royalroadl.py @@ -15,6 +15,7 @@ # limitations under the License. # +import contextlib from datetime import datetime import httplib import logging @@ -26,16 +27,22 @@ from ..htmlcleanup import stripHTML from base_adapter import BaseSiteAdapter logger = logging.getLogger(__name__) -# Fix "http.client.HTTPException: got more than 100 headers" issue. RoyalRoadL's webserver seems to be misconfigured and -# sends more than 100 headers for some stories (probably Set-Cookie). This simply increases the maximum header limit to -# 1000 -- changing this state globally isn't an issue, since it should be backwards-compatible with all other adapters. -httplib._MAXHEADERS = 1000 def getClass(): return RoyalRoadAdapter +# Using a context manager for this guarantees that the original max headers value is restored, even when an uncaught +# exception is raised +@contextlib.contextmanager +def httplib_max_headers(number): + original_max_headers = httplib._MAXHEADERS + httplib._MAXHEADERS = number + yield + httplib._MAXHEADERS = original_max_headers + + # Class name has to be unique. Our convention is camel case the # sitename with Adapter at the end. www is skipped. class RoyalRoadAdapter(BaseSiteAdapter): @@ -162,13 +169,16 @@ class RoyalRoadAdapter(BaseSiteAdapter): self.setCoverImage(url,cover_url) # some content is show as tables, this will preserve them - # grab the text for an individual chapter. def getChapterText(self, url): logger.debug('Getting chapter text from: %s' % url) - soup = self.make_soup(self._fetchUrl(url)) + # Work around "http.client.HTTPException: got more than 100 headers" issue. RoyalRoadL's webserver seems to be + # misconfigured and sends more than 100 headers for some stories (probably Set-Cookie). This simply increases + # the maximum header limit to 1000 temporarily. Also see: https://github.com/JimmXinu/FanFicFare/pull/174 + with httplib_max_headers(1000): + soup = self.make_soup(self._fetchUrl(url)) div = soup.find('div',{'class':"chapter-inner chapter-content"})