Extend page caching to AO3, fimf, portkey and buffynfaith.net.

This commit is contained in:
Jim Miller 2014-09-05 12:57:30 -05:00
parent b682d0ba6b
commit 530d7b0ab5
4 changed files with 37 additions and 14 deletions

View file

@ -123,6 +123,13 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
else:
return True
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
@ -155,8 +162,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url,data)
data = self._fetchUrl(url)
meta = self._fetchUrl(metaurl)
data = self._fetchUrl(url,usecache=False)
meta = self._fetchUrl(metaurl,usecache=False)
# use BeautifulSoup HTML parser to make everything easier to find.
soup = bs.BeautifulSoup(data)

View file

@ -101,6 +101,13 @@ class BuffyNFaithNetAdapter(BaseSiteAdapter):
r"(vie|ovr)&id=(?P<id>\d+)(&ch=(?P<ch>\d+))?$"
return p
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return True
def extractChapterUrlsAndMetadata(self):
dateformat = "%d %B %Y"
@ -109,7 +116,6 @@ class BuffyNFaithNetAdapter(BaseSiteAdapter):
#set a cookie to get past adult check
if self.is_adult or self.getConfig("is_adult"):
cookieproc = urllib2.HTTPCookieProcessor()
cookie = cl.Cookie(version=0, name='my_age', value='yes',
port=None, port_specified=False,
domain=self.getSiteDomain(), domain_specified=False, domain_initial_dot=False,
@ -121,8 +127,7 @@ class BuffyNFaithNetAdapter(BaseSiteAdapter):
comment_url=None,
rest={'HttpOnly': None},
rfc2109=False)
cookieproc.cookiejar.set_cookie(cookie)
self.opener = urllib2.build_opener(cookieproc)
self.cookiejar.set_cookie(cookie)
self.setHeader()
try:

View file

@ -61,10 +61,16 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return r"https?://(www|mobile)\.fimfiction\.(net|com)/story/\d+/?.*"
def extractChapterUrlsAndMetadata(self):
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return True
def doExtractChapterUrlsAndMetadata(self,get_cover=True):
if self.is_adult or self.getConfig("is_adult"):
cookieproc = urllib2.HTTPCookieProcessor()
cookie = cl.Cookie(version=0, name='view_mature', value='true',
port=None, port_specified=False,
domain=self.getSiteDomain(), domain_specified=False, domain_initial_dot=False,
@ -76,8 +82,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
comment_url=None,
rest={'HttpOnly': None},
rfc2109=False)
cookieproc.cookiejar.set_cookie(cookie)
self.opener = urllib2.build_opener(cookieproc)
self.cookiejar.set_cookie(cookie)
try:
apiResponse = urllib2.urlopen("http://www.fimfiction.net/api/story.php?story=%s" % (self.story.getMetadata("storyId"))).read()
@ -183,7 +188,8 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
if coverurl.startswith('//'): # fix for img urls missing 'http:'
coverurl = "http:"+coverurl
self.setCoverImage(self.url,coverurl)
if get_cover:
self.setCoverImage(self.url,coverurl)
# fimf has started including extra stuff inside the description div.
descdivstr = u"%s"%soup.find("div", {"class":"description"})

View file

@ -79,6 +79,13 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
def getSiteURLPattern(self):
return re.escape("http://"+self.getSiteDomain()+"/story/")+r"\d+(/\d+)?$"
def use_pagecache(self):
'''
adapters that will work with the page cache need to implement
this and change it to True.
'''
return True
## Getting the chapter list and the meta data, plus 'is adult' checking.
def extractChapterUrlsAndMetadata(self):
@ -88,7 +95,6 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
# portkey screws around with using a different URL to set the
# cookie and it's a pain. So... cheat!
if self.is_adult or self.getConfig("is_adult"):
cookieproc = urllib2.HTTPCookieProcessor()
cookie = cl.Cookie(version=0, name='verify17', value='1',
port=None, port_specified=False,
domain=self.getSiteDomain(), domain_specified=False, domain_initial_dot=False,
@ -99,9 +105,8 @@ class PortkeyOrgAdapter(BaseSiteAdapter): # XXX
comment=None,
comment_url=None,
rest={'HttpOnly': None},
rfc2109=False)
cookieproc.cookiejar.set_cookie(cookie)
self.opener = urllib2.build_opener(cookieproc)
rfc2109=False)
self.cookiejar.set_cookie(cookie)
try:
data = self._fetchUrl(url)