mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
Refactor _fetchUrl() to get_request()
This commit is contained in:
parent
3ba65f922b
commit
75b1cc23b5
111 changed files with 392 additions and 392 deletions
|
|
@ -75,7 +75,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -194,7 +194,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -213,7 +213,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist("Code: 404. {0}".format(url))
|
||||
|
|
@ -267,7 +267,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
try:
|
||||
adata = self._fetchUrl(author_Url)
|
||||
adata = self.get_request(author_Url)
|
||||
except HTTPError as e:
|
||||
if e.code in 404:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
|
||||
|
|
@ -305,7 +305,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
try:
|
||||
adata = self._fetchUrl(author_Url)
|
||||
adata = self.get_request(author_Url)
|
||||
except HTTPError as e:
|
||||
if e.code in 404:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
|
||||
|
|
@ -398,7 +398,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
|
|||
#Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chaptertag = soup.find('div',{'class' : 'pagination'}).parent.findNext('td')
|
||||
if None == chaptertag:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -135,7 +135,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
|
|
@ -159,7 +159,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -267,7 +267,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -286,7 +286,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
params['utf8'] = u'\x2713' # utf8 *is* required now. hex code works better than actual character for some reason. u'✓'
|
||||
|
||||
# authenticity_token now comes from a completely separate json call.
|
||||
token_json = json.loads(self._fetchUrl('https://' + self.getSiteDomain() + "/token_dispenser.json"))
|
||||
token_json = json.loads(self.get_request('https://' + self.getSiteDomain() + "/token_dispenser.json"))
|
||||
params['authenticity_token'] = token_json['token']
|
||||
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/users/login'
|
||||
|
|
@ -162,8 +162,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
logger.info("metaurl: "+metaurl)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
meta = self._fetchUrl(metaurl)
|
||||
data = self.get_request(url)
|
||||
meta = self.get_request(metaurl)
|
||||
|
||||
if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta:
|
||||
if self.addurl:
|
||||
|
|
@ -172,7 +172,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
metasoup = self.make_soup(meta)
|
||||
a = metasoup.find('a',text='Proceed')
|
||||
metaurl = 'https://'+self.host+a['href']
|
||||
meta = self._fetchUrl(metaurl)
|
||||
meta = self.get_request(metaurl)
|
||||
else:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
|
|
@ -189,8 +189,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data) or \
|
||||
( self.getConfig("always_login") and 'href="/users/logout"' not in data ):
|
||||
self.performLogin(url,data)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
meta = self._fetchUrl(metaurl,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
meta = self.get_request(metaurl,usecache=False)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
|
@ -428,7 +428,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
logger.debug("USE view_full_work")
|
||||
## Assumed view_adult=true was cookied during metadata
|
||||
if not self.full_work_soup:
|
||||
self.full_work_soup = self.make_soup(self._fetchUrl(self.url+"?view_full_work=true"+self.addurl.replace('?','&')))
|
||||
self.full_work_soup = self.make_soup(self.get_request(self.url+"?view_full_work=true"+self.addurl.replace('?','&')))
|
||||
## AO3 has had several cases now where chapter numbers
|
||||
## are missing, breaking the link between
|
||||
## <div id=chapter-##> and Chapter ##.
|
||||
|
|
@ -445,7 +445,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
if whole_dl_soup:
|
||||
chapter_dl_soup = self.full_work_chapters[index]
|
||||
else:
|
||||
whole_dl_soup = chapter_dl_soup = self.make_soup(self._fetchUrl(url+self.addurl))
|
||||
whole_dl_soup = chapter_dl_soup = self.make_soup(self.get_request(url+self.addurl))
|
||||
if None == chapter_dl_soup:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
|
|
@ -584,7 +584,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
addurl=""
|
||||
# just to get an authenticity_token.
|
||||
data = self._fetchUrl(url+addurl)
|
||||
data = self.get_request(url+addurl)
|
||||
# login the session.
|
||||
self.performLogin(url,data)
|
||||
# get the list page with logged in session.
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -104,7 +104,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href'])
|
||||
self.story.setMetadata('author',author.string)
|
||||
|
||||
authorSoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
authorSoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
chapter=soup.find('select',{'name':'chapter'}).findAll('option')
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div',{'class':"chapter bordersolid"}).findNext('div').findNext('div')
|
||||
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
try:
|
||||
data1 = self._fetchUrl(self.url)
|
||||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
|
|
@ -148,7 +148,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
|
|||
logger.debug('Getting chapter text from <%s>' % url)
|
||||
#logger.info('Getting chapter text from <%s>' % url)
|
||||
|
||||
data1 = self._fetchUrl(url)
|
||||
data1 = self.get_request(url)
|
||||
soup1 = self.make_soup(data1)
|
||||
|
||||
# get story text
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -125,7 +125,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
|
@ -141,14 +141,14 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
try:
|
||||
# in case link points somewhere other than the first chapter
|
||||
a = soup.findAll('option')[1]['value']
|
||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||
url = 'http://'+self.host+'/'+a
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
@ -240,7 +240,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data) # some chapters seem to be hanging up on those tags, so it is safer to close them
|
||||
|
||||
|
|
|
|||
|
|
@ -98,8 +98,8 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
if subHref:
|
||||
#does not work when using https - 403
|
||||
subUrl = 'http://' + self.getSiteDomain() + subHref['href']
|
||||
self._fetchUrl(subUrl)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
self.get_request(subUrl)
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
check = soup.find('div',{'class':'click-to-read-full'})
|
||||
if check:
|
||||
|
|
@ -121,7 +121,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
logger.info("url: "+url)
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
|
|
@ -136,7 +136,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
# always login if not already to avoid lots of headaches
|
||||
self.performLogin(url,data)
|
||||
# refresh website after logging in
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# subscription check
|
||||
|
|
@ -201,7 +201,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
# story description
|
||||
try:
|
||||
jsonlink = soup.find('script',string=re.compile(r'/api/forewords/[0-9]+/foreword_[0-9a-z]+.json')).get_text().split('"')[1] # grabs url from quotation marks
|
||||
fore_json = json.loads(self._fetchUrl(jsonlink))
|
||||
fore_json = json.loads(self.get_request(jsonlink))
|
||||
content = self.make_soup(fore_json['post']).find('body') # BS4 adds <html><body> if not present.
|
||||
a = content.find('div', {'id':'story-description'})
|
||||
except:
|
||||
|
|
@ -261,13 +261,13 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
try:
|
||||
# <script>var postApi = "https://www.asianfanfics.com/api/chapters/4791923/chapter_46d32e413d1a702a26f7637eabbfb6f3.json";</script>
|
||||
jsonlink = soup.find('script',string=re.compile(r'/api/chapters/[0-9]+/chapter_[0-9a-z]+.json')).get_text().split('"')[1] # grabs url from quotation marks
|
||||
chap_json = json.loads(self._fetchUrl(jsonlink))
|
||||
chap_json = json.loads(self.get_request(jsonlink))
|
||||
content = self.make_soup(chap_json['post']).find('body') # BS4 adds <html><body> if not present.
|
||||
content.name='div' # change body to a div.
|
||||
if self.getConfig('inject_chapter_title'):
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
|
|
@ -133,7 +133,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
time.sleep(1)
|
||||
logger.warning('A problem retrieving the author information. Trying Again')
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
data = self.get_request(self.url)
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
|
|
@ -187,7 +187,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
#Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chaptertag = soup.find('div',{'class' : 'storyblock'})
|
||||
|
||||
# Some of the stories have the chapters in <pre> sections, so have to check for that
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug("URL: "+self.url)
|
||||
soup = self.make_soup(self._fetchUrl(self.url))
|
||||
soup = self.make_soup(self.get_request(self.url))
|
||||
|
||||
# Since no 404 error code we have to raise the exception ourselves.
|
||||
# A title that is just 'by' indicates that there is no author name
|
||||
|
|
@ -99,7 +99,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
# Get the URL to the author's page and find the correct story entry to
|
||||
# scrape the metadata
|
||||
author_url = urlparse.urljoin(self.url, soup.find('a', {'class': 'headline'})['href'])
|
||||
soup = self.make_soup(self._fetchUrl(author_url))
|
||||
soup = self.make_soup(self.get_request(author_url))
|
||||
|
||||
# Ignore first list_box div, it only contains the author information
|
||||
for list_box in soup('div', {'class': 'list_box'})[1:]:
|
||||
|
|
@ -188,7 +188,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
storytext_div = soup.find('div', {'class': 'tl'})
|
||||
storytext_div = storytext_div.find('div', {'class': ''})
|
||||
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -185,7 +185,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -297,7 +297,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'http://'+self.host+'/fiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -316,7 +316,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -133,7 +133,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
|
|
@ -157,7 +157,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -266,7 +266,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^efiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -285,7 +285,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -203,7 +203,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -222,7 +222,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
data = self.get_request(self.url)
|
||||
except HTTPError as exception:
|
||||
if exception.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
|
||||
|
|
@ -106,7 +106,7 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
content = soup.select_one('#content')
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -212,7 +212,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -234,7 +234,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -196,7 +196,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -221,7 +221,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -133,7 +133,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
|
|
@ -157,7 +157,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -265,7 +265,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -284,7 +284,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -108,7 +108,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -209,7 +209,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -228,7 +228,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -139,7 +139,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url,soup)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
|
|
@ -172,7 +172,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(chapter,'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
|
||||
|
||||
|
||||
asoup = self.make_soup(self._fetchUrl(alink))
|
||||
asoup = self.make_soup(self.get_request(alink))
|
||||
|
||||
if 'fanfiction' in self.section:
|
||||
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
|
||||
|
|
@ -262,7 +262,7 @@ class DokugaComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'chtext'})
|
||||
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -132,7 +132,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -147,7 +147,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -261,7 +261,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -283,7 +283,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'listbox'})
|
||||
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -120,7 +120,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
# raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
|
@ -184,7 +184,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
|
||||
# Need author page for most of the metadata.
|
||||
logger.debug("fetching author page: (%s)"%authurl)
|
||||
authsoup = self.make_soup(self._fetchUrl(authurl))
|
||||
authsoup = self.make_soup(self.get_request(authurl))
|
||||
#print("authsoup:%s"%authsoup)
|
||||
|
||||
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
|
||||
|
|
@ -271,7 +271,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
|
||||
i=1
|
||||
|
|
@ -291,7 +291,7 @@ class EFPFanFicNet(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'class' : 'storia'})
|
||||
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -108,7 +108,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -219,7 +219,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -240,7 +240,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ class FaerieArchiveComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -195,7 +195,7 @@ class FaerieArchiveComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
series_url = 'http://'+self.host+'/'+series['href']
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -219,7 +219,7 @@ class FaerieArchiveComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl','https://{0}/'.format(self.parsedUrl.netloc))
|
||||
|
||||
loginUrl = self.story.getMetadata('authorUrl')+'account/'
|
||||
loginsoup = self.make_soup(self._fetchUrl(loginUrl))
|
||||
loginsoup = self.make_soup(self.get_request(loginUrl))
|
||||
if True:
|
||||
# if self.performLogin(loginUrl, loginsoup):
|
||||
# Now go hunting for all the meta data and the chapter list.
|
||||
|
|
@ -266,7 +266,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
story = soup.find('div',{'class':'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -148,7 +148,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -163,7 +163,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -287,7 +287,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -306,7 +306,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ class FanficHuAdapter(BaseSiteAdapter):
|
|||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
soup = self.make_soup(self._fetchUrl(self.url + '&i=1'))
|
||||
soup = self.make_soup(self.get_request(self.url + '&i=1'))
|
||||
|
||||
if ensure_text(soup.title.string).strip(u' :') == u'írta':
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -91,7 +91,7 @@ class FanficHuAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(option.string, url)
|
||||
|
||||
author_url = urlparse.urljoin(self.BASE_URL, soup.find('a', href=lambda href: href and href.startswith('viewuser.php?uid='))['href'])
|
||||
soup = self.make_soup(self._fetchUrl(author_url))
|
||||
soup = self.make_soup(self.get_request(author_url))
|
||||
|
||||
story_id = self.story.getMetadata('storyId')
|
||||
for table in soup('table', {'class': 'mainnav'}):
|
||||
|
|
@ -180,7 +180,7 @@ class FanficHuAdapter(BaseSiteAdapter):
|
|||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
def getChapterText(self, url):
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
story_cell = soup.find('form', action='viewstory.php').parent.parent
|
||||
|
||||
for div in story_cell('div'):
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['name']))
|
||||
## must need a cookie or something.
|
||||
self._fetchUrl(loginUrl, usecache=False)
|
||||
self.get_request(loginUrl, usecache=False)
|
||||
d = self.post_request(loginUrl, params, usecache=False)
|
||||
|
||||
if self.needToLoginCheck(d):
|
||||
|
|
@ -138,7 +138,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
logger.info("url: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -170,7 +170,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
if self.story.getMetadata('rating') != 'General' and self.needToLoginCheck(data):
|
||||
self.performLogin(url)
|
||||
# reload after login.
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
fichead = soup.find('div',class_='FicHead')
|
||||
|
||||
|
|
@ -325,7 +325,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
logger.debug("USE view_full_work")
|
||||
## Assumed view_adult=true was cookied during metadata
|
||||
if not self.full_work_soup:
|
||||
self.full_work_soup = self.make_soup(self._fetchUrl(
|
||||
self.full_work_soup = self.make_soup(self.get_request(
|
||||
'https://' + self.getSiteDomain() + '/read.php?id='+self.story.getMetadata('storyId')))
|
||||
|
||||
whole_dl_soup = self.full_work_soup
|
||||
|
|
@ -334,7 +334,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
|
|||
self.use_full_work_soup = False
|
||||
logger.warning("c%s not found in view_full_work--ending use_view_full_work"%(index))
|
||||
if chapter_div == None:
|
||||
whole_dl_soup = self.make_soup(self._fetchUrl(url))
|
||||
whole_dl_soup = self.make_soup(self.get_request(url))
|
||||
chapter_div = whole_dl_soup.find('div',{'id':'c%s'%(index)})
|
||||
if None == chapter_div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -197,7 +197,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
series_name = stripHTML(seriesa)
|
||||
series_url = 'https://'+self.host+'/archive/'+seriesa['href']
|
||||
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
# logger.debug(storyas)
|
||||
j=1
|
||||
|
|
@ -224,7 +224,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -157,7 +157,7 @@ class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
|
|
@ -256,7 +256,7 @@ class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'http://'+self.host+'/efiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -275,7 +275,7 @@ class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -72,11 +72,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getSiteURLPattern(self):
|
||||
return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"
|
||||
|
||||
def _fetchUrl(self,url,extrasleep=1.0,usecache=True):
|
||||
def get_request(self,url,extrasleep=1.0,usecache=True):
|
||||
## ffnet(and, I assume, fpcom) tends to fail more if hit too
|
||||
## fast. This is in additional to what ever the
|
||||
## slow_down_sleep_time setting is.
|
||||
return BaseSiteAdapter._fetchUrl(self,url,
|
||||
return BaseSiteAdapter.get_request(self,url,
|
||||
extrasleep=extrasleep,
|
||||
usecache=usecache)
|
||||
|
||||
|
|
@ -104,7 +104,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
#logger.debug("\n===================\n%s\n===================\n"%data)
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
|
|
@ -142,7 +142,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
chapcount+1,
|
||||
self.urltitle)
|
||||
logger.debug('=Trying newer chapter: %s' % tryurl)
|
||||
newdata = self._fetchUrl(tryurl)
|
||||
newdata = self.get_request(tryurl)
|
||||
if "not found. Please check to see you are not using an outdated url." not in newdata \
|
||||
and "This request takes too long to process, it is timed out by the server." not in newdata:
|
||||
logger.debug('=======Found newer chapter: %s' % tryurl)
|
||||
|
|
@ -177,7 +177,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.addToList('category',stripHTML(categories[1]))
|
||||
elif 'Crossover' in categories[0]['href']:
|
||||
caturl = "https://%s%s"%(self.getSiteDomain(),categories[0]['href'])
|
||||
catsoup = self.make_soup(self._fetchUrl(caturl))
|
||||
catsoup = self.make_soup(self.get_request(caturl))
|
||||
found = False
|
||||
for a in catsoup.findAll('a',href=re.compile(r"^/crossovers/.+?/\d+/")):
|
||||
self.story.addToList('category',stripHTML(a))
|
||||
|
|
@ -309,7 +309,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
authimg_url = ""
|
||||
if cover_url and self.getConfig('skip_author_cover'):
|
||||
authsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
try:
|
||||
img = authsoup.select_one('img.lazy.cimage')
|
||||
authimg_url=img['data-original']
|
||||
|
|
@ -373,7 +373,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
## AND explicitly put title URL back on chapter URL for fetch
|
||||
## *only*--normalized chapter URL does NOT have urltitle
|
||||
data = self._fetchUrl(url+self.urltitle,
|
||||
data = self.get_request(url+self.urltitle,
|
||||
extrasleep=4.0)
|
||||
|
||||
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -129,7 +129,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
|
||||
if "Uhr ist diese Geschichte nur nach einer" in data:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Auserhalb der Zeit von 23:00 Uhr bis 04:00 Uhr ist diese Geschichte nur nach einer erfolgreichen Altersverifikation zuganglich.")
|
||||
|
|
@ -192,11 +192,11 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
|
||||
## Get description from own URL:
|
||||
## /?a=v&storyid=46ccbef30000616306614050&s=1
|
||||
descsoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"/?a=v&storyid="+self.story.getMetadata('storyId')+"&s=1"))
|
||||
descsoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"/?a=v&storyid="+self.story.getMetadata('storyId')+"&s=1"))
|
||||
self.setDescription(url,stripHTML(descsoup))
|
||||
|
||||
# #find metadata on the author's page
|
||||
# asoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
|
||||
# tr=asoup.findAll('tr')
|
||||
# for i in range(1,len(tr)):
|
||||
# a = tr[i].find('a')
|
||||
|
|
@ -217,7 +217,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
|
|||
logger.debug('Getting chapter text from: %s' % url)
|
||||
time.sleep(0.5) ## ffde has "floodlock" protection
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'storytext'})
|
||||
for a in div.findAll('script'):
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class FastNovelNetAdapter(BaseSiteAdapter):
|
|||
logger.debug('URL: %s', self.url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
data = self.get_request(self.url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
|
||||
|
|
@ -139,7 +139,7 @@ class FastNovelNetAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(title, 'https://' + self.host + a["href"])
|
||||
|
||||
def getChapterText(self, url):
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
story = soup.select_one('#chapter-body')
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
url=self.url
|
||||
logger.debug("URL: "+url)
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -176,7 +176,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
## after site change, I don't see word count anywhere.
|
||||
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
|
||||
# pr='https://'+self.host+pr['href']
|
||||
# pr = self.make_soup(self._fetchUrl(pr))
|
||||
# pr = self.make_soup(self.get_request(pr))
|
||||
# pr=pr.findAll('div', {'class' : 'part_text'})
|
||||
# i=0
|
||||
# for part in pr:
|
||||
|
|
@ -241,7 +241,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chapter = soup.find('div', {'id' : 'content'})
|
||||
if chapter == None: ## still needed?
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
## posting on list doesn't work, but doesn't hurt, either.
|
||||
data = self.post_request(url,params)
|
||||
else:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
return data
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -108,7 +108,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("Normalizing to URL: "+url)
|
||||
## title's right there...
|
||||
self.story.setMetadata('title',stripHTML(storya))
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
|
||||
else:
|
||||
|
|
@ -138,7 +138,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
## Go scrape the rest of the metadata from the author's page.
|
||||
data = self._fetchUrl(self.story.getMetadata('authorUrl'))
|
||||
data = self.get_request(self.story.getMetadata('authorUrl'))
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# <dl><dt><a class = "Rid story" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/TMH.html">
|
||||
|
|
@ -193,7 +193,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
# find <!-- headerend --> & <!-- footerstart --> and
|
||||
# replaced with matching div pair for easier parsing.
|
||||
# Yes, it's an evil kludge, but what can ya do? Using
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
## need to pull empty login page first to get authenticity_token
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['identifier']))
|
||||
soup = self.make_soup(self._fetchUrl(loginUrl,usecache=False))
|
||||
soup = self.make_soup(self.get_request(loginUrl,usecache=False))
|
||||
params['_token']=soup.find('input', {'name':'_token'})['value']
|
||||
|
||||
d = self.post_request(loginUrl, params, usecache=False)
|
||||
|
|
@ -131,11 +131,11 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
url = self.url
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.needToLoginCheck(data):
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
## detect old storyUrl, switch to new storyUrl:
|
||||
|
|
@ -146,7 +146,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
# logger.debug(canonlink)
|
||||
self._setURL(canonlink)
|
||||
url = self.url
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
else:
|
||||
# in case title changed
|
||||
|
|
@ -179,7 +179,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
## find story url, might need to spin through author's pages.
|
||||
while authpagea and not authstorya:
|
||||
logger.debug(authpagea)
|
||||
authsoup = self.make_soup(self._fetchUrl(authpagea['href']))
|
||||
authsoup = self.make_soup(self.get_request(authpagea['href']))
|
||||
authpagea = authsoup.find('a',{'class':'page-link','rel':'next'})
|
||||
# CSS selectors don't allow : or / unquoted, which
|
||||
# BS4(and dependencies) didn't used to enforce.
|
||||
|
|
@ -233,7 +233,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
|
||||
metadata_url = "https://fiction.live/api/node/{s_id}/"
|
||||
response = self._fetchUrl(metadata_url.format(s_id = self.story_id))
|
||||
response = self.get_request(metadata_url.format(s_id = self.story_id))
|
||||
|
||||
if not response: # this is how fiction.live responds to nonsense urls -- HTTP200 with empty response
|
||||
raise exceptions.StoryDoesNotExist("Empty response for " + self.url)
|
||||
|
|
@ -93,7 +93,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
## get metadata for multi route chapters
|
||||
if 'multiRoute' in data and data['multiRoute'] == True:
|
||||
route_metadata_url = "https://fiction.live/api/anonkun/routes/{s_id}/"
|
||||
response = self._fetchUrl(route_metadata_url.format(s_id = self.story_id))
|
||||
response = self.get_request(route_metadata_url.format(s_id = self.story_id))
|
||||
|
||||
if not response: # this is how fiction.live responds to nonsense urls -- HTTP200 with empty response
|
||||
raise exceptions.StoryDoesNotExist("Empty response for " + self.url)
|
||||
|
|
@ -270,7 +270,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
|
|||
"chapter" : self.format_chapter
|
||||
}
|
||||
|
||||
response = self._fetchUrl(url)
|
||||
response = self.get_request(url)
|
||||
data = json.loads(response)
|
||||
|
||||
if data == []:
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId')
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
keep_summary_html = self.getConfig('keep_summary_html')
|
||||
for row in soup.find('table')('tr'):
|
||||
|
|
@ -149,7 +149,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
if self.getConfig("download_text_version",False):
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
element = soup.find('pre')
|
||||
element.name = 'div'
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
# <div style="margin-left:10ex;margin-right:10ex">
|
||||
## fetching SWI version now instead of text.
|
||||
htmlurl = url.replace('readtextstory','readhtmlstory')
|
||||
soup = self.make_soup(self._fetchUrl(htmlurl))
|
||||
soup = self.make_soup(self.get_request(htmlurl))
|
||||
div = soup.find('div',style="margin-left:10ex;margin-right:10ex")
|
||||
if div:
|
||||
return self.utf8FromSoup(htmlurl,div)
|
||||
|
|
@ -183,7 +183,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
logger.debug("Story With Images(SWI) not found, falling back to HTML.")
|
||||
|
||||
## fetching html version now instead of text.
|
||||
soup = self.make_soup(self._fetchUrl(url.replace('readtextstory','readxstory')))
|
||||
soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory')))
|
||||
|
||||
# remove first hr and everything before
|
||||
remove = soup.find('hr')
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class FictionPadSiteAdapter(BaseSiteAdapter):
|
|||
params['login']))
|
||||
|
||||
## need to pull empty login page first to get authenticity_token
|
||||
soup = self.make_soup(self._fetchUrl(loginUrl))
|
||||
soup = self.make_soup(self.get_request(loginUrl))
|
||||
params['authenticity_token']=soup.find('input', {'name':'authenticity_token'})['value']
|
||||
|
||||
data = self.post_request(loginUrl, params)
|
||||
|
|
@ -114,10 +114,10 @@ class FictionPadSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
if "This is a mature story. Please sign in to read it." in data:
|
||||
self.performLogin()
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
find = "wordyarn.config.page = "
|
||||
data = data[data.index(find)+len(find):]
|
||||
|
|
@ -186,7 +186,7 @@ class FictionPadSiteAdapter(BaseSiteAdapter):
|
|||
if not url:
|
||||
data = u"<em>This chapter has no text.</em>"
|
||||
else:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(u"<div id='story'>"+data+u"</div>")
|
||||
return self.utf8FromSoup(url,soup)
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
# non-existent/removed story urls get thrown to the front page.
|
||||
if "<h4>Featured Story</h4>" in data:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -107,7 +107,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
if soup.find("div",{"class":"blocked"}) or soup.find("li",{"class":"blocked"}):
|
||||
if self.performLogin(url): # performLogin raises
|
||||
# FailedToLogin if it fails.
|
||||
soup = self.make_soup(self._fetchUrl(url,usecache=False))
|
||||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
divstory = soup.find('div',id='story')
|
||||
storya = divstory.find('a',href=re.compile(r"^/story/\d+$"))
|
||||
|
|
@ -118,7 +118,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("Normalizing to URL: "+url)
|
||||
self._setURL(url)
|
||||
try:
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -129,7 +129,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
if soup.find("div",{"class":"blocked"}) or soup.find("li",{"class":"blocked"}):
|
||||
if self.performLogin(url): # performLogin raises
|
||||
# FailedToLogin if it fails.
|
||||
soup = self.make_soup(self._fetchUrl(url,usecache=False))
|
||||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
# title - first h4 tag will be title.
|
||||
titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
|
||||
|
|
@ -222,7 +222,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
span = soup.find('div', {'id' : 'storytext'})
|
||||
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
try:
|
||||
# don't use cache if manual is_adult--should only happen
|
||||
# if it's an adult story and they don't have is_adult in ini.
|
||||
data = self.do_fix_blockquotes(self._fetchUrl(self.url,
|
||||
data = self.do_fix_blockquotes(self.get_request(self.url,
|
||||
usecache=(not self.is_adult)))
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
|
|
@ -301,7 +301,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
#groups
|
||||
groupButton = soup.find('button', {'data-click':'showAll'})
|
||||
if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}):
|
||||
groupResponse = self._fetchUrl("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
|
||||
groupData = json.loads(groupResponse)
|
||||
groupList = self.make_soup(groupData["content"])
|
||||
else:
|
||||
|
|
@ -373,7 +373,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
|
|
@ -395,6 +395,6 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
def before_get_urls_from_page(self,url,normalize):
|
||||
## Unlike most that show the links to 'adult' stories, but protect
|
||||
## them, FimF doesn't even show them if not logged in.
|
||||
# data = self._fetchUrl(url)
|
||||
# data = self.get_request(url)
|
||||
if self.getConfig("is_adult"):
|
||||
self.set_adult_cookie()
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: " + url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -116,7 +116,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
## used below if total words from site not found
|
||||
|
||||
# fetch author page to get story description.
|
||||
authorsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
authorsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
for story in authorsoup.find_all('article',class_='story-summary'):
|
||||
storya = story.find('h3').find('a',href=re.compile(r"^/viewstory.php\?psid="+self.story.getMetadata('storyId')))
|
||||
|
|
@ -173,7 +173,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
div = soup.find('div', {'class' : 'storytext-container'})
|
||||
if None == div:
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter):
|
|||
url = url+"?enterAgree=1"
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -180,7 +180,7 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
div = soup.select_one("section#viewChapter div.boxbody")
|
||||
if None == div:
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -115,7 +115,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
|
||||
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
for list in asoup.findAll('div', {'class' : re.compile('listbox')}):
|
||||
a = list.find('a')
|
||||
|
|
@ -194,7 +194,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -216,7 +216,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -194,7 +194,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
series_url = self.getProtocol()+self.host+'/stories/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -216,7 +216,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -132,7 +132,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
|
|
@ -160,7 +160,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -267,7 +267,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -132,7 +132,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -147,7 +147,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -252,7 +252,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -274,7 +274,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('Error 404: {0}'.format(self.url))
|
||||
|
|
@ -141,7 +141,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
|
|||
if 'Submission blocked' in data:
|
||||
if self.performLogin(url,soup): # performLogin raises
|
||||
# FailedToLogin if it fails.
|
||||
soup = self.make_soup(self._fetchUrl(url,usecache=False))
|
||||
soup = self.make_soup(self.get_request(url,usecache=False))
|
||||
|
||||
# removing all of the scripts
|
||||
for tag in soup.findAll('script'):
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -134,7 +134,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -289,7 +289,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -310,7 +310,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ class LCFanFicComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
url = self.url
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('Error 404: {0}'.format(self.url))
|
||||
|
|
|
|||
|
|
@ -178,7 +178,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# get the author page
|
||||
try:
|
||||
dataAuth = self._fetchUrl(authorurl)
|
||||
dataAuth = self.get_request(authorurl)
|
||||
soupAuth = self.make_soup(dataAuth)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soupAuth.findAll(text=lambda text:isinstance(text, Comment))]
|
||||
|
|
@ -356,7 +356,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
raw_page = self._fetchUrl(url)
|
||||
raw_page = self.get_request(url)
|
||||
page_soup = self.make_soup(raw_page)
|
||||
pages = page_soup.find('select', {'name' : 'page'})
|
||||
page_nums = [page.text for page in pages.findAll('option')] if pages else 0
|
||||
|
|
@ -373,7 +373,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
|
|||
for page_no in range(2, len(page_nums) + 1):
|
||||
page_url = url + "?page=%s" % page_no
|
||||
# logger.debug("page_url= %s" % page_url)
|
||||
raw_page = self._fetchUrl(page_url)
|
||||
raw_page = self.get_request(page_url)
|
||||
fullhtml += self.getPageText(raw_page, url)
|
||||
|
||||
# logger.debug(fullhtml)
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -217,7 +217,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -323,7 +323,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -203,7 +203,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -222,7 +222,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -230,7 +230,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
|
|||
|
||||
def _makeChapter(self, url):
|
||||
"""Creates a chapter object given a URL."""
|
||||
document = self.make_soup(self._fetchUrl(url))
|
||||
document = self.make_soup(self.get_request(url))
|
||||
chapter = Chapter(self._getParsingConfiguration(), url, document)
|
||||
return chapter
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
try:
|
||||
data1 = self._fetchUrl(self.url)
|
||||
data1 = self.get_request(self.url)
|
||||
soup1 = self.make_soup(data1)
|
||||
#strip comments from soup
|
||||
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
|
||||
|
|
@ -165,7 +165,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
|||
All content is in article#mcstories, with chapter headers in h3
|
||||
"""
|
||||
logger.debug('Getting chapter text from <%s>' % url)
|
||||
data1 = self._fetchUrl(url)
|
||||
data1 = self.get_request(url)
|
||||
soup1 = self.make_soup(data1)
|
||||
|
||||
#strip comments from soup
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url) # w/o trailing / gets 'chapter list' page even for one-shots.
|
||||
data = self.get_request(url) # w/o trailing / gets 'chapter list' page even for one-shots.
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
logger.error("404 on %s"%url)
|
||||
|
|
@ -205,7 +205,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# print("data:%s"%data)
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -132,7 +132,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -147,7 +147,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -256,7 +256,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -278,7 +278,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -139,7 +139,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -258,7 +258,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -279,7 +279,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(url))
|
||||
|
|
@ -192,7 +192,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
|
|||
self.setDescription(url, cdata)
|
||||
|
||||
def getChapterText(self, url):
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if self.getConfig('fix_excess_space', True):
|
||||
data = fix_excess_space(data)
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ class NovelTroveComSiteAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('Error 404: {0}'.format(self.url))
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -125,7 +125,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
|
@ -142,14 +142,14 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
try:
|
||||
# in case link points somewhere other than the first chapter
|
||||
a = soup.findAll('option')[1]['value']
|
||||
self.story.setMetadata('storyId',a.split('=',)[1])
|
||||
url = 'http://'+self.host+'/'+a
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
@ -246,7 +246,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
data = data.replace('<div align="left"', '<div align="left">')
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
try:
|
||||
if self.getConfig('force_login'):
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -122,7 +122,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
|
@ -157,14 +157,14 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(chapter,'https://'+self.host+chapter['value'])
|
||||
if i == 0:
|
||||
self.story.setMetadata('storyId',chapter['value'].split('/')[3])
|
||||
head = self.make_soup(self._fetchUrl('https://'+self.host+chapter['value'])).findAll('b')
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
|
||||
for b in head:
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
|
||||
|
||||
if i == (len(chapters)-1):
|
||||
head = self.make_soup(self._fetchUrl('https://'+self.host+chapter['value'])).findAll('b')
|
||||
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
|
||||
for b in head:
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
|
|
@ -173,7 +173,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
info = asoup.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$"))
|
||||
while info != None:
|
||||
|
|
@ -209,7 +209,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chapter=self.make_soup('<div class="story"></div>')
|
||||
for p in soup.findAll(['p','blockquote']):
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -113,7 +113,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -226,7 +226,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -248,7 +248,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -185,7 +185,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/fanfiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -212,7 +212,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -161,7 +161,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
|
||||
#set constant meta for this site:
|
||||
#Set Language = Spanish
|
||||
|
|
@ -268,7 +268,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'cuerpoHistoria'})
|
||||
if None == div:
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -132,7 +132,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# Since the warning text can change by warning level, let's
|
||||
# look for the warning pass url. ksarchive uses
|
||||
|
|
@ -156,7 +156,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -264,7 +264,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -283,7 +283,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -107,7 +107,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -212,7 +212,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/missingpieces/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -234,7 +234,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story1'})
|
||||
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -108,7 +108,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -147,7 +147,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(self.story.getMetadata('title'),url)
|
||||
|
||||
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
for list in asoup.findAll('div', {'class' : re.compile('listbox')}):
|
||||
a = list.find('a')
|
||||
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
|
||||
|
|
@ -225,7 +225,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
series_url = 'https://'+self.host+'/atp/'+series['href']
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -249,7 +249,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ class QuotevComAdapter(BaseSiteAdapter):
|
|||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
data = self.get_request(self.url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
|
||||
|
|
@ -143,7 +143,7 @@ class QuotevComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
def getChapterText(self, url):
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
rescontent = soup.find('div', id='rescontent')
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -242,7 +242,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
# misconfigured and sends more than 100 headers for some stories (probably Set-Cookie). This simply increases
|
||||
# the maximum header limit to 1000 temporarily. Also see: https://github.com/JimmXinu/FanFicFare/pull/174
|
||||
with httplib_max_headers(1000):
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div',{'class':"chapter-inner chapter-content"})
|
||||
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -184,7 +184,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -307,7 +307,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'http://'+self.host+'/fanfics/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -326,7 +326,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -116,7 +116,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -210,7 +210,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -232,7 +232,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
@ -244,7 +244,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
|
|||
def get_urls_from_page(self,url,normalize):
|
||||
from ..geturls import get_urls_from_html
|
||||
# this way it uses User-Agent or other special settings.
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
## I can't find when or why exactly this was added, but it was
|
||||
## in the old code, so here it remains.
|
||||
soup = self.make_soup(data)
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -245,7 +245,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# Extra metadata from URL + /stats/
|
||||
# Again we know the storyID is valid from before, so this shouldn't raise an exception, and if it does we might want to know about it..
|
||||
data = self._fetchUrl(url + 'stats/')
|
||||
data = self.get_request(url + 'stats/')
|
||||
soup = self.make_soup(data)
|
||||
|
||||
def find_stats_data(element, row, metadata):
|
||||
|
|
@ -268,7 +268,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'chp_raw'})
|
||||
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -156,7 +156,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
# The actual text that is used to announce you need to be an
|
||||
# adult varies from site to site. Again, print data before
|
||||
|
|
@ -282,7 +282,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/fanfics/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -301,7 +301,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ class BFAArchiveShriftwebOrgSiteAdapter(BaseSiteAdapter):
|
|||
cut down on the size of the file
|
||||
'''
|
||||
try:
|
||||
page_data = self._fetchUrl(page)
|
||||
page_data = self.get_request(page)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(page))
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -126,7 +126,7 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
|
|||
#logger.debug("Series Url: "+seriesUrl)
|
||||
|
||||
# Get Series page and convert to soup
|
||||
seriesPageSoup = self.make_soup(self._fetchUrl(seriesUrl+"&offset=0"))
|
||||
seriesPageSoup = self.make_soup(self.get_request(seriesUrl+"&offset=0"))
|
||||
## &offset=0 is the same as the first page, by adding
|
||||
## that, the page cache will save us from fetching it
|
||||
## twice in the loop below.
|
||||
|
|
@ -142,7 +142,7 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
|
|||
#get urls from all subpages and append to list
|
||||
i=1
|
||||
for seriesPagePageUrl in seriesPageUrlList:
|
||||
seriesPagePageSoup = self.make_soup(self._fetchUrl('https://'+self.host+'/archive/home/'+seriesPagePageUrl['href']))
|
||||
seriesPagePageSoup = self.make_soup(self.get_request('https://'+self.host+'/archive/home/'+seriesPagePageUrl['href']))
|
||||
storyHeaders = seriesPagePageSoup.findAll('h5')
|
||||
## can't just search for story URLs, some story
|
||||
## descs also contain story URLs. Looks like only
|
||||
|
|
@ -270,7 +270,7 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
# No convenient way to get story without the rest of the page, so get whole page and strip unneeded sections
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -112,7 +112,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# need(or easier) to pull other metadata from the author's list page.
|
||||
authsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
# remove author profile incase they've put the story URL in their bio.
|
||||
profile = authsoup.find('div',{'id':'profile'})
|
||||
|
|
@ -224,7 +224,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -243,10 +243,10 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
# soup = self.make_soup(self._fetchUrl(url))
|
||||
# soup = self.make_soup(self.get_request(url))
|
||||
# BeautifulSoup objects to <p> inside <span>, which
|
||||
# technically isn't allowed.
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
# not the most unique thing in the world, but it appears to be
|
||||
# the best we can do here.
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ class SpikeluverComAdapter(BaseSiteAdapter):
|
|||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('http','https?') + r'\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
soup = self.make_soup(self._fetchUrl(self.url + self.METADATA_URL_SUFFIX))
|
||||
soup = self.make_soup(self.get_request(self.url + self.METADATA_URL_SUFFIX))
|
||||
|
||||
errortext_div = soup.find('div', {'class': 'errortext'})
|
||||
if errortext_div:
|
||||
|
|
@ -78,7 +78,7 @@ class SpikeluverComAdapter(BaseSiteAdapter):
|
|||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
||||
url = ''.join([self.url, self.METADATA_URL_SUFFIX, self.AGE_CONSENT_URL_SUFFIX])
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
pagetitle_div = soup.find('div', id='pagetitle')
|
||||
self.story.setMetadata('title', stripHTML(pagetitle_div.a))
|
||||
|
|
@ -199,5 +199,5 @@ class SpikeluverComAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
url += self.AGE_CONSENT_URL_SUFFIX
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
return self.utf8FromSoup(url, soup.find('div', id='story'))
|
||||
|
|
|
|||
|
|
@ -93,14 +93,14 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if "fatal MySQL error was encountered" in data:
|
||||
raise exceptions.FailedToDownload("Site SQL Error--bad story")
|
||||
|
|
@ -117,7 +117,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorUrl','https://'+self.host+'/peja/cgi-bin/'+author['href'])
|
||||
self.story.setMetadata('author',author.string)
|
||||
|
||||
authorSoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
authorSoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
# There are scripts within the metadata sections, so we need to
|
||||
# take them out [GComyn]
|
||||
|
|
@ -225,7 +225,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/peja/cgi-bin/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -248,7 +248,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chaptext = soup.find('div',{'id':"story"}).find('span')
|
||||
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -97,7 +97,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('authorId',aut['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+aut['href'])
|
||||
self.story.setMetadata('author',aut.string)
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
|
||||
|
||||
a.find('em').extract()
|
||||
self.story.setMetadata('title',stripHTML(a))
|
||||
|
|
@ -142,7 +142,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
|
|||
params = {'confirmAge':'1'}
|
||||
data = self.post_request(url,params)
|
||||
else:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
data = data[data.index('<table width="90%" align="center">'):]
|
||||
data.replace("<body","<notbody").replace("<BODY","<NOTBODY")
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url+":i")
|
||||
data = self.get_request(url+":i")
|
||||
# logger.debug(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
|
|
@ -208,7 +208,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
try:
|
||||
data = self._fetchUrl(url+":i",usecache=False)
|
||||
data = self.get_request(url+":i",usecache=False)
|
||||
except HTTPError as e:
|
||||
if e.code in (404, 410):
|
||||
raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
|
||||
|
|
@ -333,7 +333,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
while not story_found:
|
||||
page = page + 1
|
||||
try:
|
||||
data = self._fetchUrl(self.story.getList('authorUrl')[0] + "/" + unicode(page))
|
||||
data = self.get_request(self.story.getList('authorUrl')[0] + "/" + unicode(page))
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.FailedToDownload("Story not found in Author's list--Set Access Level to Full Access and change Listings Theme back to "+self.getTheme())
|
||||
|
|
@ -365,7 +365,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('seriesUrl',seriesUrl)
|
||||
series_name = stripHTML(a)
|
||||
# logger.debug("Series name= %s" % series_name)
|
||||
series_soup = self.make_soup(self._fetchUrl(seriesUrl))
|
||||
series_soup = self.make_soup(self.get_request(seriesUrl))
|
||||
if series_soup:
|
||||
# logger.debug("Retrieving Series - looking for name")
|
||||
series_name = stripHTML(series_soup.find('h1', {'id' : 'ptitle'}))
|
||||
|
|
@ -375,7 +375,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
# Check if series is in a universe
|
||||
if self.has_universes:
|
||||
universe_url = self.story.getList('authorUrl')[0] + "&type=uni"
|
||||
universes_soup = self.make_soup(self._fetchUrl(universe_url) )
|
||||
universes_soup = self.make_soup(self.get_request(universe_url) )
|
||||
# logger.debug("Universe url='{0}'".format(universe_url))
|
||||
if universes_soup:
|
||||
universes = universes_soup.findAll('div', {'class' : 'ser-box'})
|
||||
|
|
@ -409,7 +409,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
universe_name = stripHTML(a)
|
||||
universeUrl = 'https://'+self.host+a['href']
|
||||
# logger.debug("Retrieving Universe - about to get page - universeUrl='{0}".format(universeUrl))
|
||||
universe_soup = self.make_soup(self._fetchUrl(universeUrl))
|
||||
universe_soup = self.make_soup(self.get_request(universeUrl))
|
||||
# logger.debug("Retrieving Universe - have page")
|
||||
if universe_soup:
|
||||
# logger.debug("Retrieving Universe - looking for name")
|
||||
|
|
@ -512,7 +512,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
# The story text is wrapped in article tags. Most of the page header and
|
||||
# footer are outside of this.
|
||||
|
|
@ -531,7 +531,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
|
|||
pager.extract()
|
||||
|
||||
for ur in urls:
|
||||
soup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+ur['href']))
|
||||
soup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+ur['href']))
|
||||
|
||||
pagetag = soup.find('article')
|
||||
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ class SugarQuillNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
|
|
@ -129,7 +129,7 @@ class SugarQuillNetAdapter(BaseSiteAdapter):
|
|||
author_Url = self.story.getMetadata('authorUrl').replace('&','&')
|
||||
logger.debug('Getting the author page: {0}'.format(author_Url))
|
||||
try:
|
||||
adata = self._fetchUrl(author_Url)
|
||||
adata = self.get_request(author_Url)
|
||||
except HTTPError as e:
|
||||
if e.code in 404:
|
||||
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
|
||||
|
|
@ -159,7 +159,7 @@ class SugarQuillNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
chap = soup.find('td',{'class':'content_pane'})
|
||||
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
url=self.url
|
||||
logger.debug("URL: "+url)
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -149,7 +149,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
|
|||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
chapter = soup.find('div', {'id' : 'content'})
|
||||
|
||||
chapter_header = chapter.find('h1', id = re.compile("chapter"))
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -125,7 +125,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
url = self.url+'&index=1'+addurl
|
||||
logger.debug("Changing URL: "+url)
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
|
||||
if "This story contains mature content which may include violence, sexual situations, and coarse language" in data:
|
||||
raise exceptions.AdultCheckRequired(self.url)
|
||||
|
|
@ -219,7 +219,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -238,7 +238,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
span = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -139,7 +139,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -154,7 +154,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -258,7 +258,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -122,7 +122,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
|
@ -156,7 +156,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
index = 1
|
||||
found = 0
|
||||
while found == 0:
|
||||
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')+"&page="+unicode(index)))
|
||||
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')+"&page="+unicode(index)))
|
||||
|
||||
for info in asoup.findAll('td', {'class' : 'highlightcolor1'}):
|
||||
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
|
||||
|
|
@ -223,7 +223,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.findAll('table')[2].findAll('td')[1]
|
||||
for a in div.findAll('div'):
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ class TomParisDormComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
|
|
@ -192,7 +192,7 @@ class TomParisDormComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ class TrekFanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
on the size of the file
|
||||
'''
|
||||
try:
|
||||
page_data = self._fetchUrl(page)
|
||||
page_data = self.get_request(page)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(page))
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -136,7 +136,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
|
||||
if m != None:
|
||||
|
|
@ -151,7 +151,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL 2nd try: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -285,7 +285,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/efiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -304,7 +304,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
# <input type='text' id='urealname' name='urealname' value=''/>
|
||||
# <input type='password' id='password' name='6bb3fcd148d148629223690bf19733b8'/>
|
||||
# <input type='submit' value='Login' name='loginsubmit'/>
|
||||
soup = self.make_soup(self._fetchUrl(loginUrl))
|
||||
soup = self.make_soup(self.get_request(loginUrl))
|
||||
## FYI, this will fail if cookiejar is shared, but
|
||||
## use_pagecache is false.
|
||||
params['ctkn']=soup.find('input', {'name':'ctkn'})['value']
|
||||
|
|
@ -118,7 +118,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def setSiteMaxRating(self,url,data=None,soup=None):
|
||||
if not data:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if self.is_adult or self.getConfig("is_adult"):
|
||||
|
|
@ -131,7 +131,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
data = self.post_request("https://"+self.getSiteDomain()+'/setmaxrating.php',params)
|
||||
# refetch story page.
|
||||
## XXX - needs cache invalidate? Or at least check that it this needs doing...
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
data = self.get_request(url,usecache=False)
|
||||
soup = self.make_soup(data)
|
||||
return (data,soup)
|
||||
|
||||
|
|
@ -149,7 +149,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
#print("data:%s"%data)
|
||||
soup = self.make_soup(data)
|
||||
except HTTPError as e:
|
||||
|
|
@ -180,7 +180,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
try:
|
||||
# going to pull part of the meta data from *primary* author list page.
|
||||
logger.debug("**AUTHOR** URL: "+authorurl)
|
||||
authordata = self._fetchUrl(authorurl)
|
||||
authordata = self.get_request(authorurl)
|
||||
descurl=authorurl
|
||||
authorsoup = self.make_soup(authordata)
|
||||
# author can have several pages, scan until we find it.
|
||||
|
|
@ -198,7 +198,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
# raise exceptions.AdultCheckRequired(self.url)
|
||||
nextpage = 'https://'+self.host+nextarrow['href']
|
||||
logger.debug("**AUTHOR** nextpage URL: "+nextpage)
|
||||
authordata = self._fetchUrl(nextpage)
|
||||
authordata = self.get_request(nextpage)
|
||||
#logger.info("authsoup:%s"%authorsoup)
|
||||
descurl=nextpage
|
||||
authorsoup = self.make_soup(authordata)
|
||||
|
|
@ -219,7 +219,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
# going to pull part of the meta data from author list page.
|
||||
infourl = 'https://'+self.host+ainfo['href']
|
||||
logger.debug("**StoryInfo** URL: "+infourl)
|
||||
infodata = self._fetchUrl(infourl)
|
||||
infodata = self.get_request(infourl)
|
||||
infosoup = self.make_soup(infodata)
|
||||
|
||||
# for a in infosoup.findAll('a',href=re.compile(r"^/Author-\d+")):
|
||||
|
|
@ -328,7 +328,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'storyinnerbody'})
|
||||
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -111,7 +111,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
if self.needToLoginCheck(data):
|
||||
# need to log in for this one.
|
||||
self.performLogin(url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
|
||||
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
|
||||
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
|
@ -208,7 +208,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -226,7 +226,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
# problems with some stories, but only in calibre. I suspect
|
||||
# issues with different SGML parsers in python. This is a
|
||||
# nasty hack, but it works.
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
|
|||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
soup = self.make_soup(self._fetchUrl(self.url + self.METADATA_URL_SUFFIX))
|
||||
soup = self.make_soup(self.get_request(self.url + self.METADATA_URL_SUFFIX))
|
||||
|
||||
# Check if the story is for "Registered Users Only", i.e. has adult
|
||||
# content. Based on the "is_adult" attributes either login or raise an
|
||||
|
|
@ -118,7 +118,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload(error_text)
|
||||
|
||||
url = ''.join([self.url, self.METADATA_URL_SUFFIX, self.AGE_CONSENT_URL_SUFFIX])
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
# If logged in and the skin doesn't match the required skin throw an
|
||||
# error
|
||||
|
|
@ -227,5 +227,5 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
url += self.AGE_CONSENT_URL_SUFFIX
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
return self.utf8FromSoup(url, soup.find('div', id='story'))
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -200,7 +200,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
series_url = 'http://'+self.host+'/archive/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -219,7 +219,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
# note: classvar may be useless because of del adapter
|
||||
if WattpadComAdapter.CATEGORY_DEFs is None:
|
||||
try:
|
||||
WattpadComAdapter.CATEGORY_DEFs = json.loads(self._fetchUrl(WattpadComAdapter.API_GETCATEGORIES))
|
||||
WattpadComAdapter.CATEGORY_DEFs = json.loads(self.get_request(WattpadComAdapter.API_GETCATEGORIES))
|
||||
except:
|
||||
logger.warning('API_GETCATEGORIES failed.')
|
||||
WattpadComAdapter.CATEGORY_DEFs = []
|
||||
|
|
@ -91,7 +91,7 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
## %in email.
|
||||
## https://www.wattpad.com/et?c=euc&t=uploaded_story&l=https%3A%2F%2Fwww.wattpad.com%2F997616013-nuestro-destino-ron-weasley-y-tu-cap-11&emid=uploaded_story.295918124.1608687259%2C544769.4a691b8fc2a4607e1c770aa4ebd48cc3aaf39bd599a738d3747d41fdfa37fcda
|
||||
chapterIdInUrl = re.match(r'.*https(://|%3A%2F%2F)www\.wattpad\.com(/|%2F)(?P<chapterId>\d+).*', url)
|
||||
chapterInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_CHAPTERINFO % chapterIdInUrl.group('chapterId')))
|
||||
chapterInfo = json.loads(self.get_request(WattpadComAdapter.API_CHAPTERINFO % chapterIdInUrl.group('chapterId')))
|
||||
groupid = chapterInfo.get('groupId', None)
|
||||
if groupid is None:
|
||||
raise exceptions.StoryDoesNotExist(url)
|
||||
|
|
@ -100,7 +100,7 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
|
||||
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
|
||||
try:
|
||||
storyInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_STORYINFO % self.storyId))
|
||||
storyInfo = json.loads(self.get_request(WattpadComAdapter.API_STORYINFO % self.storyId))
|
||||
# logger.debug('storyInfo: %s' % json.dumps(storyInfo))
|
||||
except Exception:
|
||||
raise exceptions.InvalidStoryURL(self.url, self.getSiteDomain(), self.getSiteExampleURLs())
|
||||
|
|
@ -148,7 +148,7 @@ class WattpadComAdapter(BaseSiteAdapter):
|
|||
def getChapterText(self, url):
|
||||
logger.debug('%s' % url)
|
||||
chapterID = re.search(r'https://www.wattpad.com/(?P<chapterID>\d+).*', url).group('chapterID')
|
||||
return self.utf8FromSoup(url,self.make_soup(self._fetchUrl(WattpadComAdapter.API_STORYTEXT % chapterID)))
|
||||
return self.utf8FromSoup(url,self.make_soup(self.get_request(WattpadComAdapter.API_STORYTEXT % chapterID)))
|
||||
|
||||
# adapter self-dicovery is not implemented in fanficfare (it existed for the previous project)
|
||||
def getClass():
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
url = self.url
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
# logger.debug(data)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
|
|
@ -186,7 +186,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload('csrf token could not be found')
|
||||
|
||||
## get chapters from a json API url.
|
||||
jsondata = json.loads(self._fetchUrl(
|
||||
jsondata = json.loads(self.get_request(
|
||||
"https://" + self.getSiteDomain() + "/apiajax/chapter/GetChapterList?_csrfToken=" + csrf_token + "&bookId=" + self.story.getMetadata(
|
||||
'storyId')))
|
||||
# print json.dumps(jsondata, sort_keys=True,
|
||||
|
|
@ -230,7 +230,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
chapter_id = url.split('/')[-1]
|
||||
content_url = 'https://%s/apiajax/chapter/GetContent?_csrfToken=%s&bookId=%s&chapterId=%s&_=%d' % (
|
||||
self.getSiteDomain(), self._csrf_token, book_id, chapter_id, time.time() * 1000)
|
||||
topdata = json.loads(self._fetchUrl(content_url))
|
||||
topdata = json.loads(self.get_request(content_url))
|
||||
# logger.debug(json.dumps(topdata, sort_keys=True,
|
||||
# indent=2, separators=(',', ':')))
|
||||
chapter_info = topdata['data']['chapterInfo']
|
||||
|
|
@ -239,14 +239,14 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
|
|||
if chapter_info['isVip'] == 1:
|
||||
content_token_url = 'https://%s/apiajax/chapter/GetChapterContentToken?_csrfToken=%s&bookId=%s&chapterId=%s' % (
|
||||
self.getSiteDomain(), self._csrf_token, self.story.getMetadata('storyId'), chapter_id)
|
||||
content_token = json.loads(self._fetchUrl(content_token_url))['data']['token']
|
||||
content_token = json.loads(self.get_request(content_token_url))['data']['token']
|
||||
|
||||
content_by_token_url = 'https://%s/apiajax/chapter/GetChapterContentByToken?_csrfToken=%s&token=%s' % (
|
||||
self.getSiteDomain(), self._csrf_token, content_token)
|
||||
|
||||
# This is actually required or the data/content field will be empty
|
||||
time.sleep(self._GET_VIP_CONTENT_DELAY)
|
||||
contents = json.loads(self._fetchUrl(content_by_token_url))['data']['contents']
|
||||
contents = json.loads(self.get_request(content_by_token_url))['data']['contents']
|
||||
else:
|
||||
contents = chapter_info['contents']
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -110,7 +110,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
## author page to find it.
|
||||
|
||||
logger.debug("Author URL: "+self.story.getMetadata('authorUrl'))
|
||||
soup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl'))) # normalize <br> tags to <br />
|
||||
soup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl'))) # normalize <br> tags to <br />
|
||||
# find this story in the list, parse it's metadata based on
|
||||
# lots of assumptions about the html, since there's little
|
||||
# tagging.
|
||||
|
|
@ -193,7 +193,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
series_name = a.string
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
try:
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -236,7 +236,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
|
||||
# hardly a great identifier, I know, but whofic really doesn't
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -181,7 +181,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/wrfa/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
|
||||
i=1
|
||||
|
|
@ -203,7 +203,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
logger.debug("URL: "+url)
|
||||
|
||||
try:
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
except HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
|
|
@ -196,7 +196,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
seriessoup = self.make_soup(self.get_request(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
i=1
|
||||
for a in storyas:
|
||||
|
|
@ -219,7 +219,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = self.make_soup(self._fetchUrl(url))
|
||||
soup = self.make_soup(self.get_request(url))
|
||||
|
||||
div = soup.find('div', {'id' : 'story'})
|
||||
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class WuxiaWorldCoSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
data = self.get_request(self.url)
|
||||
except HTTPError as exception:
|
||||
if exception.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
|
||||
|
|
@ -144,7 +144,7 @@ class WuxiaWorldCoSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s', url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
|
||||
content = soup.select_one('.chapter-entity')
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
|
|||
def extractChapterUrlsAndMetadata(self):
|
||||
logger.debug('URL: %s', self.url)
|
||||
try:
|
||||
data = self._fetchUrl(self.url)
|
||||
data = self.get_request(self.url)
|
||||
except HTTPError as exception:
|
||||
if exception.code == 404:
|
||||
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
|
||||
|
|
@ -120,7 +120,7 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
|
|||
self.add_chapter(title, url)
|
||||
|
||||
|
||||
last_chapter_data = self._fetchUrl(self.get_chapter(-1,'url'))
|
||||
last_chapter_data = self.get_request(self.get_chapter(-1,'url'))
|
||||
last_chapter_soup = self.make_soup(last_chapter_data)
|
||||
last_chapter_ld = self._parse_linked_data(last_chapter_soup)
|
||||
self.story.setMetadata('dateUpdated', self._parse_date(last_chapter_ld['datePublished']))
|
||||
|
|
@ -130,7 +130,7 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s', url)
|
||||
data = self._fetchUrl(url)
|
||||
data = self.get_request(url)
|
||||
soup = self.make_soup(data)
|
||||
content = soup.select_one('.panel-default .fr-view')
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue