Refactor _fetchUrl() to get_request()

This commit is contained in:
Jim Miller 2021-01-24 14:12:41 -06:00
parent 3ba65f922b
commit 75b1cc23b5
111 changed files with 392 additions and 392 deletions

View file

@ -75,7 +75,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -194,7 +194,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -213,7 +213,7 @@ class AdAstraFanficComSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
# problems with some stories, but only in calibre. I suspect
# issues with different SGML parsers in python. This is a
# nasty hack, but it works.

View file

@ -201,7 +201,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist("Code: 404. {0}".format(url))
@ -267,7 +267,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
logger.debug('Getting the author page: {0}'.format(author_Url))
try:
adata = self._fetchUrl(author_Url)
adata = self.get_request(author_Url)
except HTTPError as e:
if e.code in 404:
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
@ -305,7 +305,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
author_Url = '{0}&view=story&zone={1}&page={2}'.format(self.story.getMetadata('authorUrl'), self.zone, unicode(page))
logger.debug('Getting the author page: {0}'.format(author_Url))
try:
adata = self._fetchUrl(author_Url)
adata = self.get_request(author_Url)
except HTTPError as e:
if e.code in 404:
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
@ -398,7 +398,7 @@ class AdultFanFictionOrgAdapter(BaseSiteAdapter):
#Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
chaptertag = soup.find('div',{'class' : 'pagination'}).parent.findNext('td')
if None == chaptertag:
raise exceptions.FailedToDownload("Error downloading Chapter: {0}! Missing required element!".format(url))

View file

@ -125,7 +125,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -135,7 +135,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# Since the warning text can change by warning level, let's
# look for the warning pass url. ksarchive uses
@ -159,7 +159,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -267,7 +267,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -286,7 +286,7 @@ class AndromedaWebComAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'class' : 'story'})

View file

@ -124,7 +124,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
params['utf8'] = u'\x2713' # utf8 *is* required now. hex code works better than actual character for some reason. u'✓'
# authenticity_token now comes from a completely separate json call.
token_json = json.loads(self._fetchUrl('https://' + self.getSiteDomain() + "/token_dispenser.json"))
token_json = json.loads(self.get_request('https://' + self.getSiteDomain() + "/token_dispenser.json"))
params['authenticity_token'] = token_json['token']
loginUrl = 'https://' + self.getSiteDomain() + '/users/login'
@ -162,8 +162,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
logger.info("metaurl: "+metaurl)
try:
data = self._fetchUrl(url)
meta = self._fetchUrl(metaurl)
data = self.get_request(url)
meta = self.get_request(metaurl)
if "This work could have adult content. If you proceed you have agreed that you are willing to see such content." in meta:
if self.addurl:
@ -172,7 +172,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
metasoup = self.make_soup(meta)
a = metasoup.find('a',text='Proceed')
metaurl = 'https://'+self.host+a['href']
meta = self._fetchUrl(metaurl)
meta = self.get_request(metaurl)
else:
raise exceptions.AdultCheckRequired(self.url)
@ -189,8 +189,8 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data) or \
( self.getConfig("always_login") and 'href="/users/logout"' not in data ):
self.performLogin(url,data)
data = self._fetchUrl(url,usecache=False)
meta = self._fetchUrl(metaurl,usecache=False)
data = self.get_request(url,usecache=False)
meta = self.get_request(metaurl,usecache=False)
# use BeautifulSoup HTML parser to make everything easier to find.
soup = self.make_soup(data)
@ -428,7 +428,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
logger.debug("USE view_full_work")
## Assumed view_adult=true was cookied during metadata
if not self.full_work_soup:
self.full_work_soup = self.make_soup(self._fetchUrl(self.url+"?view_full_work=true"+self.addurl.replace('?','&')))
self.full_work_soup = self.make_soup(self.get_request(self.url+"?view_full_work=true"+self.addurl.replace('?','&')))
## AO3 has had several cases now where chapter numbers
## are missing, breaking the link between
## <div id=chapter-##> and Chapter ##.
@ -445,7 +445,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
if whole_dl_soup:
chapter_dl_soup = self.full_work_chapters[index]
else:
whole_dl_soup = chapter_dl_soup = self.make_soup(self._fetchUrl(url+self.addurl))
whole_dl_soup = chapter_dl_soup = self.make_soup(self.get_request(url+self.addurl))
if None == chapter_dl_soup:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
@ -584,7 +584,7 @@ class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
else:
addurl=""
# just to get an authenticity_token.
data = self._fetchUrl(url+addurl)
data = self.get_request(url+addurl)
# login the session.
self.performLogin(url,data)
# get the list page with logged in session.

View file

@ -80,7 +80,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -104,7 +104,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href'])
self.story.setMetadata('author',author.string)
authorSoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
authorSoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
chapter=soup.find('select',{'name':'chapter'}).findAll('option')
@ -175,7 +175,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div',{'class':"chapter bordersolid"}).findNext('div').findNext('div')

View file

@ -82,7 +82,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
raise exceptions.AdultCheckRequired(self.url)
try:
data1 = self._fetchUrl(self.url)
data1 = self.get_request(self.url)
soup1 = self.make_soup(data1)
#strip comments from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
@ -148,7 +148,7 @@ class ASexStoriesComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from <%s>' % url)
#logger.info('Getting chapter text from <%s>' % url)
data1 = self._fetchUrl(url)
data1 = self.get_request(url)
soup1 = self.make_soup(data1)
# get story text

View file

@ -115,7 +115,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -125,7 +125,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
@ -141,14 +141,14 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
try:
# in case link points somewhere other than the first chapter
a = soup.findAll('option')[1]['value']
self.story.setMetadata('storyId',a.split('=',)[1])
url = 'http://'+self.host+'/'+a
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
except:
pass
@ -240,7 +240,7 @@ class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data) # some chapters seem to be hanging up on those tags, so it is safer to close them

View file

@ -98,8 +98,8 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
if subHref:
#does not work when using https - 403
subUrl = 'http://' + self.getSiteDomain() + subHref['href']
self._fetchUrl(subUrl)
data = self._fetchUrl(url,usecache=False)
self.get_request(subUrl)
data = self.get_request(url,usecache=False)
soup = self.make_soup(data)
check = soup.find('div',{'class':'click-to-read-full'})
if check:
@ -121,7 +121,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
url = self.url
logger.info("url: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
@ -136,7 +136,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# always login if not already to avoid lots of headaches
self.performLogin(url,data)
# refresh website after logging in
data = self._fetchUrl(url,usecache=False)
data = self.get_request(url,usecache=False)
soup = self.make_soup(data)
# subscription check
@ -201,7 +201,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
# story description
try:
jsonlink = soup.find('script',string=re.compile(r'/api/forewords/[0-9]+/foreword_[0-9a-z]+.json')).get_text().split('"')[1] # grabs url from quotation marks
fore_json = json.loads(self._fetchUrl(jsonlink))
fore_json = json.loads(self.get_request(jsonlink))
content = self.make_soup(fore_json['post']).find('body') # BS4 adds <html><body> if not present.
a = content.find('div', {'id':'story-description'})
except:
@ -261,13 +261,13 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
try:
# <script>var postApi = "https://www.asianfanfics.com/api/chapters/4791923/chapter_46d32e413d1a702a26f7637eabbfb6f3.json";</script>
jsonlink = soup.find('script',string=re.compile(r'/api/chapters/[0-9]+/chapter_[0-9a-z]+.json')).get_text().split('"')[1] # grabs url from quotation marks
chap_json = json.loads(self._fetchUrl(jsonlink))
chap_json = json.loads(self.get_request(jsonlink))
content = self.make_soup(chap_json['post']).find('body') # BS4 adds <html><body> if not present.
content.name='div' # change body to a div.
if self.getConfig('inject_chapter_title'):

View file

@ -111,7 +111,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
raise exceptions.AdultCheckRequired(self.url)
try:
data = self._fetchUrl(self.url)
data = self.get_request(self.url)
soup = self.make_soup(data)
except HTTPError as e:
if e.code == 404:
@ -133,7 +133,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
time.sleep(1)
logger.warning('A problem retrieving the author information. Trying Again')
try:
data = self._fetchUrl(self.url)
data = self.get_request(self.url)
soup = self.make_soup(data)
except HTTPError as e:
if e.code == 404:
@ -187,7 +187,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
#Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
chaptertag = soup.find('div',{'class' : 'storyblock'})
# Some of the stories have the chapters in <pre> sections, so have to check for that

View file

@ -72,7 +72,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
logger.debug("URL: "+self.url)
soup = self.make_soup(self._fetchUrl(self.url))
soup = self.make_soup(self.get_request(self.url))
# Since no 404 error code we have to raise the exception ourselves.
# A title that is just 'by' indicates that there is no author name
@ -99,7 +99,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
# Get the URL to the author's page and find the correct story entry to
# scrape the metadata
author_url = urlparse.urljoin(self.url, soup.find('a', {'class': 'headline'})['href'])
soup = self.make_soup(self._fetchUrl(author_url))
soup = self.make_soup(self.get_request(author_url))
# Ignore first list_box div, it only contains the author information
for list_box in soup('div', {'class': 'list_box'})[1:]:
@ -188,7 +188,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
raise exceptions.AdultCheckRequired(self.url)
def getChapterText(self, url):
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
storytext_div = soup.find('div', {'class': 'tl'})
storytext_div = storytext_div.find('div', {'class': ''})

View file

@ -151,7 +151,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -185,7 +185,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -297,7 +297,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
series_url = 'http://'+self.host+'/fiction/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -316,7 +316,7 @@ class BloodTiesFansComAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -123,7 +123,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -133,7 +133,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# Since the warning text can change by warning level, let's
# look for the warning pass url. ksarchive uses
@ -157,7 +157,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -266,7 +266,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^efiction/viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -285,7 +285,7 @@ class BuffyGilesComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -88,7 +88,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -203,7 +203,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -222,7 +222,7 @@ class ChaosSycophantHexComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -69,7 +69,7 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
logger.debug('URL: %s', self.url)
try:
data = self._fetchUrl(self.url)
data = self.get_request(self.url)
except HTTPError as exception:
if exception.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
@ -106,7 +106,7 @@ class ChireadsComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
content = soup.select_one('#content')

View file

@ -94,7 +94,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -212,7 +212,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -234,7 +234,7 @@ class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -87,7 +87,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -196,7 +196,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -221,7 +221,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -123,7 +123,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -133,7 +133,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# Since the warning text can change by warning level, let's
# look for the warning pass url. ksarchive uses
@ -157,7 +157,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -265,7 +265,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^fiction/viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -284,7 +284,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -88,7 +88,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -108,7 +108,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -209,7 +209,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -228,7 +228,7 @@ class DestinysGatewayComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -126,7 +126,7 @@ class DokugaComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -139,7 +139,7 @@ class DokugaComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url,soup)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
@ -172,7 +172,7 @@ class DokugaComAdapter(BaseSiteAdapter):
self.add_chapter(chapter,'http://'+self.host+'/'+self.section+'/story/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
asoup = self.make_soup(self._fetchUrl(alink))
asoup = self.make_soup(self.get_request(alink))
if 'fanfiction' in self.section:
asoup=asoup.find('div', {'id' : 'cb_tabid_52'}).find('div')
@ -262,7 +262,7 @@ class DokugaComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'chtext'})

View file

@ -122,7 +122,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -132,7 +132,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
@ -147,7 +147,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -261,7 +261,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -283,7 +283,7 @@ class DracoAndGinnyComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'class' : 'listbox'})

View file

@ -110,7 +110,7 @@ class EFPFanFicNet(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -120,7 +120,7 @@ class EFPFanFicNet(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# if "Access denied. This story has not been validated by the adminstrators of this site." in data:
# raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
@ -184,7 +184,7 @@ class EFPFanFicNet(BaseSiteAdapter):
# Need author page for most of the metadata.
logger.debug("fetching author page: (%s)"%authurl)
authsoup = self.make_soup(self._fetchUrl(authurl))
authsoup = self.make_soup(self.get_request(authurl))
#print("authsoup:%s"%authsoup)
storyas = authsoup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r'&i=1$'))
@ -271,7 +271,7 @@ class EFPFanFicNet(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1'))
i=1
@ -291,7 +291,7 @@ class EFPFanFicNet(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'class' : 'storia'})

View file

@ -88,7 +88,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -108,7 +108,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -219,7 +219,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
@ -240,7 +240,7 @@ class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -90,7 +90,7 @@ class FaerieArchiveComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -195,7 +195,7 @@ class FaerieArchiveComAdapter(BaseSiteAdapter):
# Find Series name from series URL.
series_url = 'http://'+self.host+'/'+series['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -219,7 +219,7 @@ class FaerieArchiveComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -197,7 +197,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl','https://{0}/'.format(self.parsedUrl.netloc))
loginUrl = self.story.getMetadata('authorUrl')+'account/'
loginsoup = self.make_soup(self._fetchUrl(loginUrl))
loginsoup = self.make_soup(self.get_request(loginUrl))
if True:
# if self.performLogin(loginUrl, loginsoup):
# Now go hunting for all the meta data and the chapter list.
@ -266,7 +266,7 @@ class FanficAuthorsNetAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
story = soup.find('div',{'class':'story'})

View file

@ -138,7 +138,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -148,7 +148,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
@ -163,7 +163,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -287,7 +287,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -306,7 +306,7 @@ class FanficCastleTVNetAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -73,7 +73,7 @@ class FanficHuAdapter(BaseSiteAdapter):
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
def extractChapterUrlsAndMetadata(self):
soup = self.make_soup(self._fetchUrl(self.url + '&i=1'))
soup = self.make_soup(self.get_request(self.url + '&i=1'))
if ensure_text(soup.title.string).strip(u' :') == u'írta':
raise exceptions.StoryDoesNotExist(self.url)
@ -91,7 +91,7 @@ class FanficHuAdapter(BaseSiteAdapter):
self.add_chapter(option.string, url)
author_url = urlparse.urljoin(self.BASE_URL, soup.find('a', href=lambda href: href and href.startswith('viewuser.php?uid='))['href'])
soup = self.make_soup(self._fetchUrl(author_url))
soup = self.make_soup(self.get_request(author_url))
story_id = self.story.getMetadata('storyId')
for table in soup('table', {'class': 'mainnav'}):
@ -180,7 +180,7 @@ class FanficHuAdapter(BaseSiteAdapter):
raise exceptions.AdultCheckRequired(self.url)
def getChapterText(self, url):
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
story_cell = soup.find('form', action='viewstory.php').parent.parent
for div in story_cell('div'):

View file

@ -113,7 +113,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
logger.info("Will now login to URL (%s) as (%s)" % (loginUrl,
params['name']))
## must need a cookie or something.
self._fetchUrl(loginUrl, usecache=False)
self.get_request(loginUrl, usecache=False)
d = self.post_request(loginUrl, params, usecache=False)
if self.needToLoginCheck(d):
@ -138,7 +138,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
logger.info("url: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -170,7 +170,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
if self.story.getMetadata('rating') != 'General' and self.needToLoginCheck(data):
self.performLogin(url)
# reload after login.
data = self._fetchUrl(url,usecache=False)
data = self.get_request(url,usecache=False)
soup = self.make_soup(data)
fichead = soup.find('div',class_='FicHead')
@ -325,7 +325,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
logger.debug("USE view_full_work")
## Assumed view_adult=true was cookied during metadata
if not self.full_work_soup:
self.full_work_soup = self.make_soup(self._fetchUrl(
self.full_work_soup = self.make_soup(self.get_request(
'https://' + self.getSiteDomain() + '/read.php?id='+self.story.getMetadata('storyId')))
whole_dl_soup = self.full_work_soup
@ -334,7 +334,7 @@ class FanFicsMeAdapter(BaseSiteAdapter):
self.use_full_work_soup = False
logger.warning("c%s not found in view_full_work--ending use_view_full_work"%(index))
if chapter_div == None:
whole_dl_soup = self.make_soup(self._fetchUrl(url))
whole_dl_soup = self.make_soup(self.get_request(url))
chapter_div = whole_dl_soup.find('div',{'id':'c%s'%(index)})
if None == chapter_div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)

View file

@ -103,7 +103,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -197,7 +197,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
series_name = stripHTML(seriesa)
series_url = 'https://'+self.host+'/archive/'+seriesa['href']
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.find_all('a', href=re.compile(r'viewstory.php\?sid=\d+'))
# logger.debug(storyas)
j=1
@ -224,7 +224,7 @@ class FanfictalkComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -147,7 +147,7 @@ class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -157,7 +157,7 @@ class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# The actual text that is used to announce you need to be an
# adult varies from site to site. Again, print data before
@ -256,7 +256,7 @@ class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
series_url = 'http://'+self.host+'/efiction/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -275,7 +275,7 @@ class FanfictionJunkiesDeAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -72,11 +72,11 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
def getSiteURLPattern(self):
return r"https?://(www|m)?\.fanfiction\.net/s/\d+(/\d+)?(/|/[^/]+)?/?$"
def _fetchUrl(self,url,extrasleep=1.0,usecache=True):
def get_request(self,url,extrasleep=1.0,usecache=True):
## ffnet(and, I assume, fpcom) tends to fail more if hit too
## fast. This is in additional to what ever the
## slow_down_sleep_time setting is.
return BaseSiteAdapter._fetchUrl(self,url,
return BaseSiteAdapter.get_request(self,url,
extrasleep=extrasleep,
usecache=usecache)
@ -104,7 +104,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
# use BeautifulSoup HTML parser to make everything easier to find.
try:
data = self._fetchUrl(url)
data = self.get_request(url)
#logger.debug("\n===================\n%s\n===================\n"%data)
soup = self.make_soup(data)
except HTTPError as e:
@ -142,7 +142,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
chapcount+1,
self.urltitle)
logger.debug('=Trying newer chapter: %s' % tryurl)
newdata = self._fetchUrl(tryurl)
newdata = self.get_request(tryurl)
if "not found. Please check to see you are not using an outdated url." not in newdata \
and "This request takes too long to process, it is timed out by the server." not in newdata:
logger.debug('=======Found newer chapter: %s' % tryurl)
@ -177,7 +177,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
self.story.addToList('category',stripHTML(categories[1]))
elif 'Crossover' in categories[0]['href']:
caturl = "https://%s%s"%(self.getSiteDomain(),categories[0]['href'])
catsoup = self.make_soup(self._fetchUrl(caturl))
catsoup = self.make_soup(self.get_request(caturl))
found = False
for a in catsoup.findAll('a',href=re.compile(r"^/crossovers/.+?/\d+/")):
self.story.addToList('category',stripHTML(a))
@ -309,7 +309,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
authimg_url = ""
if cover_url and self.getConfig('skip_author_cover'):
authsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
try:
img = authsoup.select_one('img.lazy.cimage')
authimg_url=img['data-original']
@ -373,7 +373,7 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
## AND explicitly put title URL back on chapter URL for fetch
## *only*--normalized chapter URL does NOT have urltitle
data = self._fetchUrl(url+self.urltitle,
data = self.get_request(url+self.urltitle,
extrasleep=4.0)
if "Please email this error message in full to <a href='mailto:support@fanfiction.com'>support@fanfiction.com</a>" in data:

View file

@ -119,7 +119,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -129,7 +129,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url,usecache=False)
data = self.get_request(url,usecache=False)
if "Uhr ist diese Geschichte nur nach einer" in data:
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Auserhalb der Zeit von 23:00 Uhr bis 04:00 Uhr ist diese Geschichte nur nach einer erfolgreichen Altersverifikation zuganglich.")
@ -192,11 +192,11 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
## Get description from own URL:
## /?a=v&storyid=46ccbef30000616306614050&s=1
descsoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"/?a=v&storyid="+self.story.getMetadata('storyId')+"&s=1"))
descsoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"/?a=v&storyid="+self.story.getMetadata('storyId')+"&s=1"))
self.setDescription(url,stripHTML(descsoup))
# #find metadata on the author's page
# asoup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
# asoup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+"?a=q&a1=v&t=nickdetailsstories&lbi=stories&ar=0&nick="+self.story.getMetadata('authorId')))
# tr=asoup.findAll('tr')
# for i in range(1,len(tr)):
# a = tr[i].find('a')
@ -217,7 +217,7 @@ class FanFiktionDeAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
time.sleep(0.5) ## ffde has "floodlock" protection
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'storytext'})
for a in div.findAll('script'):

View file

@ -80,7 +80,7 @@ class FastNovelNetAdapter(BaseSiteAdapter):
logger.debug('URL: %s', self.url)
try:
data = self._fetchUrl(self.url)
data = self.get_request(self.url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
@ -139,7 +139,7 @@ class FastNovelNetAdapter(BaseSiteAdapter):
self.add_chapter(title, 'https://' + self.host + a["href"])
def getChapterText(self, url):
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
story = soup.select_one('#chapter-body')

View file

@ -85,7 +85,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
url=self.url
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -176,7 +176,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
## after site change, I don't see word count anywhere.
# pr=soup.find('a', href=re.compile(r'/printfic/\w+'))
# pr='https://'+self.host+pr['href']
# pr = self.make_soup(self._fetchUrl(pr))
# pr = self.make_soup(self.get_request(pr))
# pr=pr.findAll('div', {'class' : 'part_text'})
# i=0
# for part in pr:
@ -241,7 +241,7 @@ class FicBookNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
chapter = soup.find('div', {'id' : 'content'})
if chapter == None: ## still needed?

View file

@ -70,7 +70,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
## posting on list doesn't work, but doesn't hurt, either.
data = self.post_request(url,params)
else:
data = self._fetchUrl(url)
data = self.get_request(url)
return data
def extractChapterUrlsAndMetadata(self):
@ -108,7 +108,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
logger.debug("Normalizing to URL: "+url)
## title's right there...
self.story.setMetadata('title',stripHTML(storya))
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
chapterlinklist = soup.findAll('a',{'class':'chapterlink'})
else:
@ -138,7 +138,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
## Go scrape the rest of the metadata from the author's page.
data = self._fetchUrl(self.story.getMetadata('authorUrl'))
data = self.get_request(self.story.getMetadata('authorUrl'))
soup = self.make_soup(data)
# <dl><dt><a class = "Rid story" href = "http://www.fictionalley.org/authors/aafro_man_ziegod/TMH.html">
@ -193,7 +193,7 @@ class FictionAlleyOrgSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
# find <!-- headerend --> & <!-- footerstart --> and
# replaced with matching div pair for easier parsing.
# Yes, it's an evil kludge, but what can ya do? Using

View file

@ -110,7 +110,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
## need to pull empty login page first to get authenticity_token
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['identifier']))
soup = self.make_soup(self._fetchUrl(loginUrl,usecache=False))
soup = self.make_soup(self.get_request(loginUrl,usecache=False))
params['_token']=soup.find('input', {'name':'_token'})['value']
d = self.post_request(loginUrl, params, usecache=False)
@ -131,11 +131,11 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
url = self.url
try:
data = self._fetchUrl(url)
data = self.get_request(url)
if self.needToLoginCheck(data):
self.performLogin(url)
data = self._fetchUrl(url,usecache=False)
data = self.get_request(url,usecache=False)
soup = self.make_soup(data)
## detect old storyUrl, switch to new storyUrl:
@ -146,7 +146,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
# logger.debug(canonlink)
self._setURL(canonlink)
url = self.url
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
else:
# in case title changed
@ -179,7 +179,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
## find story url, might need to spin through author's pages.
while authpagea and not authstorya:
logger.debug(authpagea)
authsoup = self.make_soup(self._fetchUrl(authpagea['href']))
authsoup = self.make_soup(self.get_request(authpagea['href']))
authpagea = authsoup.find('a',{'class':'page-link','rel':'next'})
# CSS selectors don't allow : or / unquoted, which
# BS4(and dependencies) didn't used to enforce.
@ -233,7 +233,7 @@ class FictionHuntComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)

View file

@ -83,7 +83,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
metadata_url = "https://fiction.live/api/node/{s_id}/"
response = self._fetchUrl(metadata_url.format(s_id = self.story_id))
response = self.get_request(metadata_url.format(s_id = self.story_id))
if not response: # this is how fiction.live responds to nonsense urls -- HTTP200 with empty response
raise exceptions.StoryDoesNotExist("Empty response for " + self.url)
@ -93,7 +93,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
## get metadata for multi route chapters
if 'multiRoute' in data and data['multiRoute'] == True:
route_metadata_url = "https://fiction.live/api/anonkun/routes/{s_id}/"
response = self._fetchUrl(route_metadata_url.format(s_id = self.story_id))
response = self.get_request(route_metadata_url.format(s_id = self.story_id))
if not response: # this is how fiction.live responds to nonsense urls -- HTTP200 with empty response
raise exceptions.StoryDoesNotExist("Empty response for " + self.url)
@ -270,7 +270,7 @@ class FictionLiveAdapter(BaseSiteAdapter):
"chapter" : self.format_chapter
}
response = self._fetchUrl(url)
response = self.get_request(url)
data = json.loads(response)
if data == []:

View file

@ -65,7 +65,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId')
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
keep_summary_html = self.getConfig('keep_summary_html')
for row in soup.find('table')('tr'):
@ -149,7 +149,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
def getChapterText(self, url):
if self.getConfig("download_text_version",False):
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
element = soup.find('pre')
element.name = 'div'
@ -175,7 +175,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
# <div style="margin-left:10ex;margin-right:10ex">
## fetching SWI version now instead of text.
htmlurl = url.replace('readtextstory','readhtmlstory')
soup = self.make_soup(self._fetchUrl(htmlurl))
soup = self.make_soup(self.get_request(htmlurl))
div = soup.find('div',style="margin-left:10ex;margin-right:10ex")
if div:
return self.utf8FromSoup(htmlurl,div)
@ -183,7 +183,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
logger.debug("Story With Images(SWI) not found, falling back to HTML.")
## fetching html version now instead of text.
soup = self.make_soup(self._fetchUrl(url.replace('readtextstory','readxstory')))
soup = self.make_soup(self.get_request(url.replace('readtextstory','readxstory')))
# remove first hr and everything before
remove = soup.find('hr')

View file

@ -95,7 +95,7 @@ class FictionPadSiteAdapter(BaseSiteAdapter):
params['login']))
## need to pull empty login page first to get authenticity_token
soup = self.make_soup(self._fetchUrl(loginUrl))
soup = self.make_soup(self.get_request(loginUrl))
params['authenticity_token']=soup.find('input', {'name':'authenticity_token'})['value']
data = self.post_request(loginUrl, params)
@ -114,10 +114,10 @@ class FictionPadSiteAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
if "This is a mature story. Please sign in to read it." in data:
self.performLogin()
data = self._fetchUrl(url)
data = self.get_request(url)
find = "wordyarn.config.page = "
data = data[data.index(find)+len(find):]
@ -186,7 +186,7 @@ class FictionPadSiteAdapter(BaseSiteAdapter):
if not url:
data = u"<em>This chapter has no text.</em>"
else:
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(u"<div id='story'>"+data+u"</div>")
return self.utf8FromSoup(url,soup)

View file

@ -92,7 +92,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
# use BeautifulSoup HTML parser to make everything easier to find.
try:
data = self._fetchUrl(url)
data = self.get_request(url)
# non-existent/removed story urls get thrown to the front page.
if "<h4>Featured Story</h4>" in data:
raise exceptions.StoryDoesNotExist(self.url)
@ -107,7 +107,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
if soup.find("div",{"class":"blocked"}) or soup.find("li",{"class":"blocked"}):
if self.performLogin(url): # performLogin raises
# FailedToLogin if it fails.
soup = self.make_soup(self._fetchUrl(url,usecache=False))
soup = self.make_soup(self.get_request(url,usecache=False))
divstory = soup.find('div',id='story')
storya = divstory.find('a',href=re.compile(r"^/story/\d+$"))
@ -118,7 +118,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
logger.debug("Normalizing to URL: "+url)
self._setURL(url)
try:
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -129,7 +129,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
if soup.find("div",{"class":"blocked"}) or soup.find("li",{"class":"blocked"}):
if self.performLogin(url): # performLogin raises
# FailedToLogin if it fails.
soup = self.make_soup(self._fetchUrl(url,usecache=False))
soup = self.make_soup(self.get_request(url,usecache=False))
# title - first h4 tag will be title.
titleh4 = soup.find('div',{'class':'storylist'}).find('h4')
@ -222,7 +222,7 @@ class FicwadComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
span = soup.find('div', {'id' : 'storytext'})

View file

@ -122,7 +122,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
try:
# don't use cache if manual is_adult--should only happen
# if it's an adult story and they don't have is_adult in ini.
data = self.do_fix_blockquotes(self._fetchUrl(self.url,
data = self.do_fix_blockquotes(self.get_request(self.url,
usecache=(not self.is_adult)))
soup = self.make_soup(data)
except HTTPError as e:
@ -301,7 +301,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
#groups
groupButton = soup.find('button', {'data-click':'showAll'})
if groupButton != None and groupButton.find('i', {'class':'fa-search-plus'}):
groupResponse = self._fetchUrl("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
groupResponse = self.get_request("https://www.fimfiction.net/ajax/stories/%s/groups" % (self.story.getMetadata("storyId")))
groupData = json.loads(groupResponse)
groupList = self.make_soup(groupData["content"])
else:
@ -373,7 +373,7 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
@ -395,6 +395,6 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
def before_get_urls_from_page(self,url,normalize):
## Unlike most that show the links to 'adult' stories, but protect
## them, FimF doesn't even show them if not logged in.
# data = self._fetchUrl(url)
# data = self.get_request(url)
if self.getConfig("is_adult"):
self.set_adult_cookie()

View file

@ -80,7 +80,7 @@ class FireFlyFansNetSiteAdapter(BaseSiteAdapter):
logger.debug("URL: " + url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)

View file

@ -72,7 +72,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -116,7 +116,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
## used below if total words from site not found
# fetch author page to get story description.
authorsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
authorsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
for story in authorsoup.find_all('article',class_='story-summary'):
storya = story.find('h3').find('a',href=re.compile(r"^/viewstory.php\?psid="+self.story.getMetadata('storyId')))
@ -173,7 +173,7 @@ class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
div = soup.find('div', {'class' : 'storytext-container'})
if None == div:

View file

@ -77,7 +77,7 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter):
url = url+"?enterAgree=1"
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -180,7 +180,7 @@ class HentaiFoundryComSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
div = soup.select_one("section#viewChapter div.boxbody")
if None == div:

View file

@ -78,7 +78,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -115,7 +115,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
self.add_chapter(self.story.getMetadata('title'),url)
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
for list in asoup.findAll('div', {'class' : re.compile('listbox')}):
a = list.find('a')
@ -194,7 +194,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -216,7 +216,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -83,7 +83,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -194,7 +194,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
series_url = self.getProtocol()+self.host+'/stories/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -216,7 +216,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -122,7 +122,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -132,7 +132,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# The actual text that is used to announce you need to be an
# adult varies from site to site. Again, print data before
@ -160,7 +160,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -267,7 +267,7 @@ class IkEternalNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -122,7 +122,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -132,7 +132,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
@ -147,7 +147,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -252,7 +252,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -274,7 +274,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -124,7 +124,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
url = self.url
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist('Error 404: {0}'.format(self.url))
@ -141,7 +141,7 @@ class InkBunnyNetSiteAdapter(BaseSiteAdapter):
if 'Submission blocked' in data:
if self.performLogin(url,soup): # performLogin raises
# FailedToLogin if it fails.
soup = self.make_soup(self._fetchUrl(url,usecache=False))
soup = self.make_soup(self.get_request(url,usecache=False))
# removing all of the scripts
for tag in soup.findAll('script'):

View file

@ -101,7 +101,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -134,7 +134,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -289,7 +289,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
for a in storyas:
@ -310,7 +310,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
div = soup.find('div', {'id' : 'story'})

View file

@ -98,7 +98,7 @@ class LCFanFicComSiteAdapter(BaseSiteAdapter):
url = self.url
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist('Error 404: {0}'.format(self.url))

View file

@ -178,7 +178,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
# get the author page
try:
dataAuth = self._fetchUrl(authorurl)
dataAuth = self.get_request(authorurl)
soupAuth = self.make_soup(dataAuth)
#strip comments from soup
[comment.extract() for comment in soupAuth.findAll(text=lambda text:isinstance(text, Comment))]
@ -356,7 +356,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
raw_page = self._fetchUrl(url)
raw_page = self.get_request(url)
page_soup = self.make_soup(raw_page)
pages = page_soup.find('select', {'name' : 'page'})
page_nums = [page.text for page in pages.findAll('option')] if pages else 0
@ -373,7 +373,7 @@ class LiteroticaSiteAdapter(BaseSiteAdapter):
for page_no in range(2, len(page_nums) + 1):
page_url = url + "?page=%s" % page_no
# logger.debug("page_url= %s" % page_url)
raw_page = self._fetchUrl(page_url)
raw_page = self.get_request(page_url)
fullhtml += self.getPageText(raw_page, url)
# logger.debug(fullhtml)

View file

@ -80,7 +80,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -217,7 +217,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -323,7 +323,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
# problems with some stories, but only in calibre. I suspect
# issues with different SGML parsers in python. This is a
# nasty hack, but it works.

View file

@ -88,7 +88,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -203,7 +203,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -222,7 +222,7 @@ class LumosSycophantHexComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -230,7 +230,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
def _makeChapter(self, url):
"""Creates a chapter object given a URL."""
document = self.make_soup(self._fetchUrl(url))
document = self.make_soup(self.get_request(url))
chapter = Chapter(self._getParsingConfiguration(), url, document)
return chapter

View file

@ -84,7 +84,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
raise exceptions.AdultCheckRequired(self.url)
try:
data1 = self._fetchUrl(self.url)
data1 = self.get_request(self.url)
soup1 = self.make_soup(data1)
#strip comments from soup
[comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
@ -165,7 +165,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
All content is in article#mcstories, with chapter headers in h3
"""
logger.debug('Getting chapter text from <%s>' % url)
data1 = self._fetchUrl(url)
data1 = self.get_request(url)
soup1 = self.make_soup(data1)
#strip comments from soup

View file

@ -117,7 +117,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url) # w/o trailing / gets 'chapter list' page even for one-shots.
data = self.get_request(url) # w/o trailing / gets 'chapter list' page even for one-shots.
except HTTPError as e:
if e.code == 404:
logger.error("404 on %s"%url)
@ -205,7 +205,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
# print("data:%s"%data)

View file

@ -122,7 +122,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -132,7 +132,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
@ -147,7 +147,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -256,7 +256,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -278,7 +278,7 @@ class MerlinFicDtwinsCoUk(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -106,7 +106,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -139,7 +139,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -258,7 +258,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -279,7 +279,7 @@ class MidnightwhispersAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
div = soup.find('div', {'id' : 'story'})

View file

@ -113,7 +113,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(url))
@ -192,7 +192,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
self.setDescription(url, cdata)
def getChapterText(self, url):
data = self._fetchUrl(url)
data = self.get_request(url)
if self.getConfig('fix_excess_space', True):
data = fix_excess_space(data)

View file

@ -90,7 +90,7 @@ class NovelTroveComSiteAdapter(BaseSiteAdapter):
url = self.url
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist('Error 404: {0}'.format(self.url))

View file

@ -115,7 +115,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -125,7 +125,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
@ -142,14 +142,14 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
try:
# in case link points somewhere other than the first chapter
a = soup.findAll('option')[1]['value']
self.story.setMetadata('storyId',a.split('=',)[1])
url = 'http://'+self.host+'/'+a
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
except:
pass
@ -246,7 +246,7 @@ class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
data = data.replace('<div align="left"', '<div align="left">')
soup = self.make_soup(data)

View file

@ -112,7 +112,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
try:
if self.getConfig('force_login'):
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -122,7 +122,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# use BeautifulSoup HTML parser to make everything easier to find.
soup = self.make_soup(data)
@ -157,14 +157,14 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
self.add_chapter(chapter,'https://'+self.host+chapter['value'])
if i == 0:
self.story.setMetadata('storyId',chapter['value'].split('/')[3])
head = self.make_soup(self._fetchUrl('https://'+self.host+chapter['value'])).findAll('b')
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
for b in head:
if b.text == "Updated":
date = b.nextSibling.string.split(': ')[1].split(',')
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
if i == (len(chapters)-1):
head = self.make_soup(self._fetchUrl('https://'+self.host+chapter['value'])).findAll('b')
head = self.make_soup(self.get_request('https://'+self.host+chapter['value'])).findAll('b')
for b in head:
if b.text == "Updated":
date = b.nextSibling.string.split(': ')[1].split(',')
@ -173,7 +173,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
info = asoup.find('a', href=re.compile(r'fanfiction/story/'+self.story.getMetadata('storyId')+"/$"))
while info != None:
@ -209,7 +209,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
chapter=self.make_soup('<div class="story"></div>')
for p in soup.findAll(['p','blockquote']):

View file

@ -92,7 +92,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -113,7 +113,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -226,7 +226,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -248,7 +248,7 @@ class PonyFictionArchiveNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -65,7 +65,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -185,7 +185,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/fanfiction/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -212,7 +212,7 @@ class PotionsAndSnitchesOrgSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -145,7 +145,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -161,7 +161,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url,usecache=False)
data = self.get_request(url,usecache=False)
#set constant meta for this site:
#Set Language = Spanish
@ -268,7 +268,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'cuerpoHistoria'})
if None == div:

View file

@ -122,7 +122,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -132,7 +132,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# Since the warning text can change by warning level, let's
# look for the warning pass url. ksarchive uses
@ -156,7 +156,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -264,7 +264,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -283,7 +283,7 @@ class PotterHeadsAnonymousComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -87,7 +87,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -107,7 +107,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -212,7 +212,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/missingpieces/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -234,7 +234,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story1'})

View file

@ -88,7 +88,7 @@ class QafFicComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -108,7 +108,7 @@ class QafFicComAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -147,7 +147,7 @@ class QafFicComAdapter(BaseSiteAdapter):
self.add_chapter(self.story.getMetadata('title'),url)
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
for list in asoup.findAll('div', {'class' : re.compile('listbox')}):
a = list.find('a')
if ('viewstory.php?sid='+self.story.getMetadata('storyId')) in a['href']:
@ -225,7 +225,7 @@ class QafFicComAdapter(BaseSiteAdapter):
# Find Series name from series URL.
series_url = 'https://'+self.host+'/atp/'+series['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -249,7 +249,7 @@ class QafFicComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -66,7 +66,7 @@ class QuotevComAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
try:
data = self._fetchUrl(self.url)
data = self.get_request(self.url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
@ -143,7 +143,7 @@ class QuotevComAdapter(BaseSiteAdapter):
def getChapterText(self, url):
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
rescontent = soup.find('div', id='rescontent')

View file

@ -151,7 +151,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -242,7 +242,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
# misconfigured and sends more than 100 headers for some stories (probably Set-Cookie). This simply increases
# the maximum header limit to 1000 temporarily. Also see: https://github.com/JimmXinu/FanFicFare/pull/174
with httplib_max_headers(1000):
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div',{'class':"chapter-inner chapter-content"})

View file

@ -151,7 +151,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -184,7 +184,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -307,7 +307,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
series_url = 'http://'+self.host+'/fanfics/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -326,7 +326,7 @@ class SamAndJackNetAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -96,7 +96,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -116,7 +116,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -210,7 +210,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -232,7 +232,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})
@ -244,7 +244,7 @@ class ScarvesAndCoffeeNetAdapter(BaseSiteAdapter):
def get_urls_from_page(self,url,normalize):
from ..geturls import get_urls_from_html
# this way it uses User-Agent or other special settings.
data = self._fetchUrl(url,usecache=False)
data = self.get_request(url,usecache=False)
## I can't find when or why exactly this was added, but it was
## in the old code, so here it remains.
soup = self.make_soup(data)

View file

@ -126,7 +126,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -245,7 +245,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
# Extra metadata from URL + /stats/
# Again we know the storyID is valid from before, so this shouldn't raise an exception, and if it does we might want to know about it..
data = self._fetchUrl(url + 'stats/')
data = self.get_request(url + 'stats/')
soup = self.make_soup(data)
def find_stats_data(element, row, metadata):
@ -268,7 +268,7 @@ class ScribbleHubComAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'chp_raw'})

View file

@ -146,7 +146,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -156,7 +156,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
# The actual text that is used to announce you need to be an
# adult varies from site to site. Again, print data before
@ -282,7 +282,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/fanfics/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -301,7 +301,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -100,7 +100,7 @@ class BFAArchiveShriftwebOrgSiteAdapter(BaseSiteAdapter):
cut down on the size of the file
'''
try:
page_data = self._fetchUrl(page)
page_data = self.get_request(page)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(page))

View file

@ -82,7 +82,7 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -126,7 +126,7 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
#logger.debug("Series Url: "+seriesUrl)
# Get Series page and convert to soup
seriesPageSoup = self.make_soup(self._fetchUrl(seriesUrl+"&offset=0"))
seriesPageSoup = self.make_soup(self.get_request(seriesUrl+"&offset=0"))
## &offset=0 is the same as the first page, by adding
## that, the page cache will save us from fetching it
## twice in the loop below.
@ -142,7 +142,7 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
#get urls from all subpages and append to list
i=1
for seriesPagePageUrl in seriesPageUrlList:
seriesPagePageSoup = self.make_soup(self._fetchUrl('https://'+self.host+'/archive/home/'+seriesPagePageUrl['href']))
seriesPagePageSoup = self.make_soup(self.get_request('https://'+self.host+'/archive/home/'+seriesPagePageUrl['href']))
storyHeaders = seriesPagePageSoup.findAll('h5')
## can't just search for story URLs, some story
## descs also contain story URLs. Looks like only
@ -270,7 +270,7 @@ class SilmarillionWritersGuildOrgAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
# No convenient way to get story without the rest of the page, so get whole page and strip unneeded sections

View file

@ -90,7 +90,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -112,7 +112,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
self.story.setMetadata('author',a.string)
# need(or easier) to pull other metadata from the author's list page.
authsoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
authsoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
# remove author profile incase they've put the story URL in their bio.
profile = authsoup.find('div',{'id':'profile'})
@ -224,7 +224,7 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -243,10 +243,10 @@ class SiyeCoUkAdapter(BaseSiteAdapter): # XXX
logger.debug('Getting chapter text from: %s' % url)
# soup = self.make_soup(self._fetchUrl(url))
# soup = self.make_soup(self.get_request(url))
# BeautifulSoup objects to <p> inside <span>, which
# technically isn't allowed.
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
# not the most unique thing in the world, but it appears to be
# the best we can do here.

View file

@ -63,7 +63,7 @@ class SpikeluverComAdapter(BaseSiteAdapter):
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('http','https?') + r'\d+$'
def extractChapterUrlsAndMetadata(self):
soup = self.make_soup(self._fetchUrl(self.url + self.METADATA_URL_SUFFIX))
soup = self.make_soup(self.get_request(self.url + self.METADATA_URL_SUFFIX))
errortext_div = soup.find('div', {'class': 'errortext'})
if errortext_div:
@ -78,7 +78,7 @@ class SpikeluverComAdapter(BaseSiteAdapter):
raise exceptions.AdultCheckRequired(self.url)
url = ''.join([self.url, self.METADATA_URL_SUFFIX, self.AGE_CONSENT_URL_SUFFIX])
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
pagetitle_div = soup.find('div', id='pagetitle')
self.story.setMetadata('title', stripHTML(pagetitle_div.a))
@ -199,5 +199,5 @@ class SpikeluverComAdapter(BaseSiteAdapter):
def getChapterText(self, url):
url += self.AGE_CONSENT_URL_SUFFIX
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
return self.utf8FromSoup(url, soup.find('div', id='story'))

View file

@ -93,14 +93,14 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
data = self._fetchUrl(url)
data = self.get_request(url)
if "fatal MySQL error was encountered" in data:
raise exceptions.FailedToDownload("Site SQL Error--bad story")
@ -117,7 +117,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
self.story.setMetadata('authorUrl','https://'+self.host+'/peja/cgi-bin/'+author['href'])
self.story.setMetadata('author',author.string)
authorSoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
authorSoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
# There are scripts within the metadata sections, so we need to
# take them out [GComyn]
@ -225,7 +225,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/peja/cgi-bin/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -248,7 +248,7 @@ class SquidgeOrgPejaAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
chaptext = soup.find('div',{'id':"story"}).find('span')

View file

@ -77,7 +77,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -97,7 +97,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
self.story.setMetadata('authorId',aut['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+aut['href'])
self.story.setMetadata('author',aut.string)
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')))
a.find('em').extract()
self.story.setMetadata('title',stripHTML(a))
@ -142,7 +142,7 @@ class StoriesOfArdaComAdapter(BaseSiteAdapter):
params = {'confirmAge':'1'}
data = self.post_request(url,params)
else:
data = self._fetchUrl(url)
data = self.get_request(url)
data = data[data.index('<table width="90%" align="center">'):]
data.replace("<body","<notbody").replace("<BODY","<NOTBODY")

View file

@ -194,7 +194,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url+":i")
data = self.get_request(url+":i")
# logger.debug(data)
except HTTPError as e:
if e.code == 404:
@ -208,7 +208,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
# need to log in for this one.
self.performLogin(url)
try:
data = self._fetchUrl(url+":i",usecache=False)
data = self.get_request(url+":i",usecache=False)
except HTTPError as e:
if e.code in (404, 410):
raise exceptions.StoryDoesNotExist("Code: %s: %s"%(e.code,self.url))
@ -333,7 +333,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
while not story_found:
page = page + 1
try:
data = self._fetchUrl(self.story.getList('authorUrl')[0] + "/" + unicode(page))
data = self.get_request(self.story.getList('authorUrl')[0] + "/" + unicode(page))
except HTTPError as e:
if e.code == 404:
raise exceptions.FailedToDownload("Story not found in Author's list--Set Access Level to Full Access and change Listings Theme back to "+self.getTheme())
@ -365,7 +365,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
self.story.setMetadata('seriesUrl',seriesUrl)
series_name = stripHTML(a)
# logger.debug("Series name= %s" % series_name)
series_soup = self.make_soup(self._fetchUrl(seriesUrl))
series_soup = self.make_soup(self.get_request(seriesUrl))
if series_soup:
# logger.debug("Retrieving Series - looking for name")
series_name = stripHTML(series_soup.find('h1', {'id' : 'ptitle'}))
@ -375,7 +375,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
# Check if series is in a universe
if self.has_universes:
universe_url = self.story.getList('authorUrl')[0] + "&type=uni"
universes_soup = self.make_soup(self._fetchUrl(universe_url) )
universes_soup = self.make_soup(self.get_request(universe_url) )
# logger.debug("Universe url='{0}'".format(universe_url))
if universes_soup:
universes = universes_soup.findAll('div', {'class' : 'ser-box'})
@ -409,7 +409,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
universe_name = stripHTML(a)
universeUrl = 'https://'+self.host+a['href']
# logger.debug("Retrieving Universe - about to get page - universeUrl='{0}".format(universeUrl))
universe_soup = self.make_soup(self._fetchUrl(universeUrl))
universe_soup = self.make_soup(self.get_request(universeUrl))
# logger.debug("Retrieving Universe - have page")
if universe_soup:
# logger.debug("Retrieving Universe - looking for name")
@ -512,7 +512,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
# The story text is wrapped in article tags. Most of the page header and
# footer are outside of this.
@ -531,7 +531,7 @@ class StoriesOnlineNetAdapter(BaseSiteAdapter):
pager.extract()
for ur in urls:
soup = self.make_soup(self._fetchUrl("https://"+self.getSiteDomain()+ur['href']))
soup = self.make_soup(self.get_request("https://"+self.getSiteDomain()+ur['href']))
pagetag = soup.find('article')

View file

@ -91,7 +91,7 @@ class SugarQuillNetAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(url)
@ -129,7 +129,7 @@ class SugarQuillNetAdapter(BaseSiteAdapter):
author_Url = self.story.getMetadata('authorUrl').replace('&amp;','&')
logger.debug('Getting the author page: {0}'.format(author_Url))
try:
adata = self._fetchUrl(author_Url)
adata = self.get_request(author_Url)
except HTTPError as e:
if e.code in 404:
raise exceptions.StoryDoesNotExist("Author Page: Code: 404. {0}".format(author_Url))
@ -159,7 +159,7 @@ class SugarQuillNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
chap = soup.find('td',{'class':'content_pane'})

View file

@ -68,7 +68,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
url=self.url
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -149,7 +149,7 @@ class SwiOrgRuAdapter(BaseSiteAdapter):
# grab the text for an individual chapter.
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
chapter = soup.find('div', {'id' : 'content'})
chapter_header = chapter.find('h1', id = re.compile("chapter"))

View file

@ -112,7 +112,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -125,7 +125,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
url = self.url+'&index=1'+addurl
logger.debug("Changing URL: "+url)
self.performLogin(url)
data = self._fetchUrl(url,usecache=False)
data = self.get_request(url,usecache=False)
if "This story contains mature content which may include violence, sexual situations, and coarse language" in data:
raise exceptions.AdultCheckRequired(self.url)
@ -219,7 +219,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -238,7 +238,7 @@ class TenhawkPresentsSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
span = soup.find('div', {'id' : 'story'})

View file

@ -129,7 +129,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -139,7 +139,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
@ -154,7 +154,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -258,7 +258,7 @@ class TheMasqueNetAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -112,7 +112,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -122,7 +122,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
@ -156,7 +156,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
index = 1
found = 0
while found == 0:
asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')+"&page="+unicode(index)))
asoup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl')+"&page="+unicode(index)))
for info in asoup.findAll('td', {'class' : 'highlightcolor1'}):
a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
@ -223,7 +223,7 @@ class ThePetulantPoetessComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.findAll('table')[2].findAll('td')[1]
for a in div.findAll('div'):

View file

@ -81,7 +81,7 @@ class TomParisDormComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(url)
@ -192,7 +192,7 @@ class TomParisDormComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -100,7 +100,7 @@ class TrekFanFictionNetSiteAdapter(BaseSiteAdapter):
on the size of the file
'''
try:
page_data = self._fetchUrl(page)
page_data = self.get_request(page)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(page))

View file

@ -126,7 +126,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -136,7 +136,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
m = re.search(r"'viewstory.php\?sid=\d+((?:&amp;ageconsent=ok)?&amp;warning=\d+)'",data)
if m != None:
@ -151,7 +151,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
logger.debug("URL 2nd try: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -285,7 +285,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/efiction/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -304,7 +304,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -100,7 +100,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# <input type='text' id='urealname' name='urealname' value=''/>
# <input type='password' id='password' name='6bb3fcd148d148629223690bf19733b8'/>
# <input type='submit' value='Login' name='loginsubmit'/>
soup = self.make_soup(self._fetchUrl(loginUrl))
soup = self.make_soup(self.get_request(loginUrl))
## FYI, this will fail if cookiejar is shared, but
## use_pagecache is false.
params['ctkn']=soup.find('input', {'name':'ctkn'})['value']
@ -118,7 +118,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
def setSiteMaxRating(self,url,data=None,soup=None):
if not data:
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
if self.is_adult or self.getConfig("is_adult"):
@ -131,7 +131,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
data = self.post_request("https://"+self.getSiteDomain()+'/setmaxrating.php',params)
# refetch story page.
## XXX - needs cache invalidate? Or at least check that it this needs doing...
data = self._fetchUrl(url,usecache=False)
data = self.get_request(url,usecache=False)
soup = self.make_soup(data)
return (data,soup)
@ -149,7 +149,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# use BeautifulSoup HTML parser to make everything easier to find.
try:
data = self._fetchUrl(url)
data = self.get_request(url)
#print("data:%s"%data)
soup = self.make_soup(data)
except HTTPError as e:
@ -180,7 +180,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
try:
# going to pull part of the meta data from *primary* author list page.
logger.debug("**AUTHOR** URL: "+authorurl)
authordata = self._fetchUrl(authorurl)
authordata = self.get_request(authorurl)
descurl=authorurl
authorsoup = self.make_soup(authordata)
# author can have several pages, scan until we find it.
@ -198,7 +198,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# raise exceptions.AdultCheckRequired(self.url)
nextpage = 'https://'+self.host+nextarrow['href']
logger.debug("**AUTHOR** nextpage URL: "+nextpage)
authordata = self._fetchUrl(nextpage)
authordata = self.get_request(nextpage)
#logger.info("authsoup:%s"%authorsoup)
descurl=nextpage
authorsoup = self.make_soup(authordata)
@ -219,7 +219,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
# going to pull part of the meta data from author list page.
infourl = 'https://'+self.host+ainfo['href']
logger.debug("**StoryInfo** URL: "+infourl)
infodata = self._fetchUrl(infourl)
infodata = self.get_request(infourl)
infosoup = self.make_soup(infodata)
# for a in infosoup.findAll('a',href=re.compile(r"^/Author-\d+")):
@ -328,7 +328,7 @@ class TwistingTheHellmouthSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'storyinnerbody'})

View file

@ -101,7 +101,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -111,7 +111,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
if self.needToLoginCheck(data):
# need to log in for this one.
self.performLogin(url)
data = self._fetchUrl(url)
data = self.get_request(url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
@ -208,7 +208,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -226,7 +226,7 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
data = self._fetchUrl(url)
data = self.get_request(url)
# problems with some stories, but only in calibre. I suspect
# issues with different SGML parsers in python. This is a
# nasty hack, but it works.

View file

@ -99,7 +99,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
def extractChapterUrlsAndMetadata(self):
soup = self.make_soup(self._fetchUrl(self.url + self.METADATA_URL_SUFFIX))
soup = self.make_soup(self.get_request(self.url + self.METADATA_URL_SUFFIX))
# Check if the story is for "Registered Users Only", i.e. has adult
# content. Based on the "is_adult" attributes either login or raise an
@ -118,7 +118,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload(error_text)
url = ''.join([self.url, self.METADATA_URL_SUFFIX, self.AGE_CONSENT_URL_SUFFIX])
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
# If logged in and the skin doesn't match the required skin throw an
# error
@ -227,5 +227,5 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
def getChapterText(self, url):
url += self.AGE_CONSENT_URL_SUFFIX
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
return self.utf8FromSoup(url, soup.find('div', id='story'))

View file

@ -86,7 +86,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -200,7 +200,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
series_url = 'http://'+self.host+'/archive/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -219,7 +219,7 @@ class WalkingThePlankOrgAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -47,7 +47,7 @@ class WattpadComAdapter(BaseSiteAdapter):
# note: classvar may be useless because of del adapter
if WattpadComAdapter.CATEGORY_DEFs is None:
try:
WattpadComAdapter.CATEGORY_DEFs = json.loads(self._fetchUrl(WattpadComAdapter.API_GETCATEGORIES))
WattpadComAdapter.CATEGORY_DEFs = json.loads(self.get_request(WattpadComAdapter.API_GETCATEGORIES))
except:
logger.warning('API_GETCATEGORIES failed.')
WattpadComAdapter.CATEGORY_DEFs = []
@ -91,7 +91,7 @@ class WattpadComAdapter(BaseSiteAdapter):
## %in email.
## https://www.wattpad.com/et?c=euc&t=uploaded_story&l=https%3A%2F%2Fwww.wattpad.com%2F997616013-nuestro-destino-ron-weasley-y-tu-cap-11&emid=uploaded_story.295918124.1608687259%2C544769.4a691b8fc2a4607e1c770aa4ebd48cc3aaf39bd599a738d3747d41fdfa37fcda
chapterIdInUrl = re.match(r'.*https(://|%3A%2F%2F)www\.wattpad\.com(/|%2F)(?P<chapterId>\d+).*', url)
chapterInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_CHAPTERINFO % chapterIdInUrl.group('chapterId')))
chapterInfo = json.loads(self.get_request(WattpadComAdapter.API_CHAPTERINFO % chapterIdInUrl.group('chapterId')))
groupid = chapterInfo.get('groupId', None)
if groupid is None:
raise exceptions.StoryDoesNotExist(url)
@ -100,7 +100,7 @@ class WattpadComAdapter(BaseSiteAdapter):
def doExtractChapterUrlsAndMetadata(self, get_cover=True):
try:
storyInfo = json.loads(self._fetchUrl(WattpadComAdapter.API_STORYINFO % self.storyId))
storyInfo = json.loads(self.get_request(WattpadComAdapter.API_STORYINFO % self.storyId))
# logger.debug('storyInfo: %s' % json.dumps(storyInfo))
except Exception:
raise exceptions.InvalidStoryURL(self.url, self.getSiteDomain(), self.getSiteExampleURLs())
@ -148,7 +148,7 @@ class WattpadComAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('%s' % url)
chapterID = re.search(r'https://www.wattpad.com/(?P<chapterID>\d+).*', url).group('chapterID')
return self.utf8FromSoup(url,self.make_soup(self._fetchUrl(WattpadComAdapter.API_STORYTEXT % chapterID)))
return self.utf8FromSoup(url,self.make_soup(self.get_request(WattpadComAdapter.API_STORYTEXT % chapterID)))
# adapter self-dicovery is not implemented in fanficfare (it existed for the previous project)
def getClass():

View file

@ -116,7 +116,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
url = self.url
try:
data = self._fetchUrl(url)
data = self.get_request(url)
# logger.debug(data)
except HTTPError as e:
if e.code == 404:
@ -186,7 +186,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
raise exceptions.FailedToDownload('csrf token could not be found')
## get chapters from a json API url.
jsondata = json.loads(self._fetchUrl(
jsondata = json.loads(self.get_request(
"https://" + self.getSiteDomain() + "/apiajax/chapter/GetChapterList?_csrfToken=" + csrf_token + "&bookId=" + self.story.getMetadata(
'storyId')))
# print json.dumps(jsondata, sort_keys=True,
@ -230,7 +230,7 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
chapter_id = url.split('/')[-1]
content_url = 'https://%s/apiajax/chapter/GetContent?_csrfToken=%s&bookId=%s&chapterId=%s&_=%d' % (
self.getSiteDomain(), self._csrf_token, book_id, chapter_id, time.time() * 1000)
topdata = json.loads(self._fetchUrl(content_url))
topdata = json.loads(self.get_request(content_url))
# logger.debug(json.dumps(topdata, sort_keys=True,
# indent=2, separators=(',', ':')))
chapter_info = topdata['data']['chapterInfo']
@ -239,14 +239,14 @@ class WWWWebNovelComAdapter(BaseSiteAdapter):
if chapter_info['isVip'] == 1:
content_token_url = 'https://%s/apiajax/chapter/GetChapterContentToken?_csrfToken=%s&bookId=%s&chapterId=%s' % (
self.getSiteDomain(), self._csrf_token, self.story.getMetadata('storyId'), chapter_id)
content_token = json.loads(self._fetchUrl(content_token_url))['data']['token']
content_token = json.loads(self.get_request(content_token_url))['data']['token']
content_by_token_url = 'https://%s/apiajax/chapter/GetChapterContentByToken?_csrfToken=%s&token=%s' % (
self.getSiteDomain(), self._csrf_token, content_token)
# This is actually required or the data/content field will be empty
time.sleep(self._GET_VIP_CONTENT_DELAY)
contents = json.loads(self._fetchUrl(content_by_token_url))['data']['contents']
contents = json.loads(self.get_request(content_by_token_url))['data']['contents']
else:
contents = chapter_info['contents']

View file

@ -70,7 +70,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
# use BeautifulSoup HTML parser to make everything easier to find.
try:
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -110,7 +110,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
## author page to find it.
logger.debug("Author URL: "+self.story.getMetadata('authorUrl'))
soup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl'))) # normalize <br> tags to <br />
soup = self.make_soup(self.get_request(self.story.getMetadata('authorUrl'))) # normalize <br> tags to <br />
# find this story in the list, parse it's metadata based on
# lots of assumptions about the html, since there's little
# tagging.
@ -193,7 +193,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
series_name = a.string
series_url = 'https://'+self.host+'/'+a['href']
try:
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -236,7 +236,7 @@ class WhoficComSiteAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
# hardly a great identifier, I know, but whofic really doesn't

View file

@ -78,7 +78,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -181,7 +181,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/wrfa/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
# can't use ^viewstory...$ in case of higher rated stories with javascript href.
storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
i=1
@ -203,7 +203,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -87,7 +87,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
data = self.get_request(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
@ -196,7 +196,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
series_url = 'https://'+self.host+'/'+a['href']
# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
seriessoup = self.make_soup(self.get_request(series_url))
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
i=1
for a in storyas:
@ -219,7 +219,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
soup = self.make_soup(self.get_request(url))
div = soup.find('div', {'id' : 'story'})

View file

@ -72,7 +72,7 @@ class WuxiaWorldCoSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
logger.debug('URL: %s', self.url)
try:
data = self._fetchUrl(self.url)
data = self.get_request(self.url)
except HTTPError as exception:
if exception.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
@ -144,7 +144,7 @@ class WuxiaWorldCoSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s', url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
content = soup.select_one('.chapter-entity')

View file

@ -80,7 +80,7 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
def extractChapterUrlsAndMetadata(self):
logger.debug('URL: %s', self.url)
try:
data = self._fetchUrl(self.url)
data = self.get_request(self.url)
except HTTPError as exception:
if exception.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(self.url))
@ -120,7 +120,7 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
self.add_chapter(title, url)
last_chapter_data = self._fetchUrl(self.get_chapter(-1,'url'))
last_chapter_data = self.get_request(self.get_chapter(-1,'url'))
last_chapter_soup = self.make_soup(last_chapter_data)
last_chapter_ld = self._parse_linked_data(last_chapter_soup)
self.story.setMetadata('dateUpdated', self._parse_date(last_chapter_ld['datePublished']))
@ -130,7 +130,7 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s', url)
data = self._fetchUrl(url)
data = self.get_request(url)
soup = self.make_soup(data)
content = soup.select_one('.panel-default .fr-view')

Some files were not shown because too many files have changed in this diff Show more