diff --git a/fanficfare/adapters/adapter_literotica.py b/fanficfare/adapters/adapter_literotica.py index a9dd474f..dc8f2f34 100644 --- a/fanficfare/adapters/adapter_literotica.py +++ b/fanficfare/adapters/adapter_literotica.py @@ -151,9 +151,9 @@ class LiteroticaSiteAdapter(BaseSiteAdapter): # logger.debug("Chapter/Story URL: <%s> " % self.url) try: - (data1,opened) = self._fetchUrlOpened(self.url) + (data1,rurl) = self.get_request_redirected(self.url) ## for language domains - self._setURL(opened.geturl()) + self._setURL(rurl) logger.debug("set opened url:%s"%self.url) except HTTPError as e: if e.code in [404, 410]: diff --git a/fanficfare/adapters/adapter_storiesonlinenet.py b/fanficfare/adapters/adapter_storiesonlinenet.py index 32d79054..0ea2642f 100644 --- a/fanficfare/adapters/adapter_storiesonlinenet.py +++ b/fanficfare/adapters/adapter_storiesonlinenet.py @@ -118,14 +118,12 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter): ## and finestories. ## fetch 'v' code, post action and redirected domain from login page. - (data,opened) = self._fetchUrlOpened(loginUrl, - usecache=False) + (data,useurl) = self.get_request_redirected(loginUrl,usecache=False) # logger.debug(data) if not self.needToLoginCheck(data): ## hitting login URL reminds system we're logged in? logger.debug("don't need to login") return - useurl = opened.geturl() soup = self.make_soup(data) params = {} params['v']=soup.find('input', {'name':'v'})['value'] diff --git a/fanficfare/adapters/adapter_test1.py b/fanficfare/adapters/adapter_test1.py index 780f112b..a410d1e3 100644 --- a/fanficfare/adapters/adapter_test1.py +++ b/fanficfare/adapters/adapter_test1.py @@ -364,8 +364,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" elif 'test1.com' not in url: ## for chapter_urls setting. origurl = url - (data,opened) = self._fetchUrlOpened(url,extrasleep=2.0) - url = opened.geturl() + (data,url) = self.get_request_redirected(url,extrasleep=2.0) if '#' in origurl and '#' not in url: url = url + origurl[origurl.index('#'):] if url != origurl: diff --git a/fanficfare/adapters/base_xenforoforum_adapter.py b/fanficfare/adapters/base_xenforoforum_adapter.py index 48648e3c..a02fe271 100644 --- a/fanficfare/adapters/base_xenforoforum_adapter.py +++ b/fanficfare/adapters/base_xenforoforum_adapter.py @@ -90,15 +90,15 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): ## need to accept http and https still. return re.escape(self.getURLPrefix()).replace("https","https?")+r"(?Pthreads|posts)/(?P.+\.)?(?P<id>\d+)/?[^#]*?(#?post-(?P<anchorpost>\d+))?$" - def _fetchUrlOpened(self, url, - usecache=True, - extrasleep=2.0): + def get_request_redirected(self, url, + usecache=True, + extrasleep=2.0): ## We've been requested by the site(s) admin to rein in hits. ## This is in additional to what ever the slow_down_sleep_time ## setting is. - return BaseSiteAdapter._fetchUrlOpened(self,url, - usecache=usecache, - extrasleep=extrasleep) + return BaseSiteAdapter.get_request_redirected(self,url, + usecache=usecache, + extrasleep=extrasleep) ## For adapters, especially base_xenforoforum to override. Make ## sure to return unchanged URL if it's NOT a chapter URL. This @@ -435,23 +435,20 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): logger.info("url: "+useurl) try: - (data,opened) = self._fetchUrlOpened(useurl) - useurl = opened.geturl() + (data,useurl) = self.get_request_redirected(useurl) logger.info("use useurl: "+useurl) # can't login before initial fetch--need a cookie. if self.getConfig('always_login',False): self.performLogin(data) - (data,opened) = self._fetchUrlOpened(self.url, - usecache=False) - useurl = opened.geturl() + (data,useurl) = self.get_request_redirected(self.url, + usecache=False) logger.info("use useurl: "+useurl) except HTTPError as e: # QQ gives 403, SV at least gives 404. Which unfortunately if e.code == 403 or self.getConfig('always_login',False): self.performLogin(data) - (data,opened) = self._fetchUrlOpened(self.url, - usecache=False) - useurl = opened.geturl() + (data,useurl) = self.get_request_redirected(self.url, + usecache=False) logger.info("use useurl: "+useurl) elif e.code == 404: raise exceptions.StoryDoesNotExist(self.url) @@ -716,8 +713,7 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): souptag = self.get_cache_post(url) if not souptag: - (data,opened) = self._fetchUrlOpened(url) - url = unicode(opened.geturl()) + (data,url) = self.get_request_redirected(url) if '#' in origurl and '#' not in url: url = url + origurl[origurl.index('#'):] logger.debug("chapter URL redirected to: %s"%url) diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index e545f666..bad8d07e 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -915,7 +915,7 @@ class Configuration(ConfigParser): #### *_filelist feature was added. def set_sleep(self,val): return self.fetcher.set_sleep(val) - + def get_empty_cookiejar(self): return self.fetcher.get_empty_cookiejar() @@ -1006,25 +1006,25 @@ class Configurable(object): parameters=parameters, usecache=usecache) - def _fetchUrlOpened(self, url, - usecache=True, - extrasleep=None): + def get_request_redirected(self, url, + usecache=True, + extrasleep=None): return self.configuration.\ - fetcher._fetchUrlOpened(url, - usecache=usecache, - extrasleep=extrasleep) + fetcher.get_request_redirected(url, + usecache=usecache, + extrasleep=extrasleep) def _fetchUrl(self, url, usecache=True, extrasleep=None): - return self._fetchUrlOpened(url, + return self.get_request_redirected(url, usecache, extrasleep)[0] def _fetchUrlRaw(self, url, extrasleep=None, usecache=True, referer=None): ## referer is used with raw for images. - return self._fetchUrlRawOpened(url, + return self._fetchUrlRawUrl(url, extrasleep, usecache, referer=referer)[0] diff --git a/fanficfare/fetcher.py b/fanficfare/fetcher.py index f6aaed08..b5e2dc38 100644 --- a/fanficfare/fetcher.py +++ b/fanficfare/fetcher.py @@ -303,15 +303,15 @@ class Fetcher(object): def _fetchUrl(self, url, usecache=True, extrasleep=None): - return self._fetchUrlOpened(url, - parameters, - usecache, - extrasleep)[0] + return self.get_request_redirected(url, + parameters, + usecache, + extrasleep)[0] # parameters is a dict() - def _fetchUrlOpened(self, url, - usecache=True, - extrasleep=None): + def get_request_redirected(self, url, + usecache=True, + extrasleep=None): excpt=None if url.startswith("file://"): @@ -324,21 +324,21 @@ class Fetcher(object): logger.debug("retry sleep:%s"%sleeptime) time.sleep(sleeptime) try: - (data,opened)=self._fetchUrlRawOpened(url, + (data,rurl)=self._fetchUrlRawUrl(url, usecache=usecache, extrasleep=extrasleep) - return (self._do_reduce_zalgo(self._decode(data)),opened) + return (self._do_reduce_zalgo(self._decode(data)),rurl) except HTTPError as he: excpt=he if he.code in (403,404,410): logger.debug("Caught an exception reading URL: %s Exception %s."%(unicode(safe_url(url)),unicode(he))) break # break out on 404 ## trekfanfiction.net has started returning the page, - ## but with a 500 code. We can use the HTTPError as - ## the 'opened' in such case. + ## but with a 500 code. We can get the url from the + ## HTTPError in such case. if he.code == 500 and 'trekfanfiction.net' in url: data = he.read() - return (self._do_reduce_zalgo(self._decode(data)),he) + return (self._do_reduce_zalgo(self._decode(data)),he.geturl()) except Exception as e: excpt=e logger.debug("Caught an exception reading URL: %s sleeptime(%s) Exception %s."%(unicode(safe_url(url)),sleeptime,unicode(e))) @@ -354,7 +354,7 @@ class Fetcher(object): logger.debug(excpt, exc_info=True) raise(excpt) - def _fetchUrlRawOpened(self, url, + def _fetchUrlRawUrl(self, url, extrasleep=None, usecache=True, referer=None): @@ -366,12 +366,6 @@ class Fetcher(object): cache hits. ''' method='GET' - class FakeOpened: - def __init__(self,data,url): - self.data=data - self.url=url - def geturl(self): return self.url - def read(self): return self.data if not url.startswith('file:'): # file fetches fail on + for space url = quote_plus(ensure_binary(url),safe=';/?:@&=+$,%&#') @@ -382,7 +376,7 @@ class Fetcher(object): if usecache and self._has_cachekey(cachekey) and not cachekey.startswith('file:'): logger.debug("#####################################\npagecache(%s) HIT: %s"%(method,safe_url(cachekey))) data,redirecturl = self._get_from_pagecache(cachekey) - return (data,FakeOpened(data,redirecturl)) + return (data,redirecturl) logger.debug("#####################################\npagecache(%s) MISS: %s"%(method,safe_url(cachekey))) # print(self.get_pagecache().keys()) @@ -430,14 +424,13 @@ class Fetcher(object): None #fp ) data = resp.content - opened = FakeOpened(data,resp.url) self._progressbar() ## postURL saves data to the pagecache *after* _decode() while ## fetchRaw saves it *before* _decode()--because raw. - self._set_to_pagecache(cachekey,data,opened.url) + self._set_to_pagecache(cachekey,data,resp.url) - return (data,opened) + return (data,resp.url) class UrllibFetcher(Fetcher):