mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-01-05 07:34:36 +01:00
Change all sites that will work with https to use it all the time.
This commit is contained in:
parent
99ea6d5064
commit
f1b3bc021e
36 changed files with 203 additions and 161 deletions
|
|
@ -47,7 +47,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ash')
|
||||
|
|
@ -67,10 +67,10 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://archive.skyehawke.com/story.php?no=1234 http://www.skyehawke.com/archive/story.php?no=1234 http://skyehawke.com/archive/story.php?no=1234"
|
||||
return "https://archive.skyehawke.com/story.php?no=1234 https://www.skyehawke.com/archive/story.php?no=1234 https://skyehawke.com/archive/story.php?no=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://")+r"(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$"
|
||||
return r"https?://(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -100,7 +100,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
author = a.find('a')
|
||||
self.story.setMetadata('authorId',author['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+author['href'])
|
||||
self.story.setMetadata('author',author.string)
|
||||
|
||||
authorSoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/story/view/'+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/story/view/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
|
|
@ -55,7 +55,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/story/view/123456 http://"+cls.getSiteDomain()+"/story/view/123456/story-title-here http://"+cls.getSiteDomain()+"/story/view/123456/1"
|
||||
return "https://"+cls.getSiteDomain()+"/story/view/123456 https://"+cls.getSiteDomain()+"/story/view/123456/story-title-here https://"+cls.getSiteDomain()+"/story/view/123456/1"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r"/story/view/0*(?P<id>\d+)"
|
||||
|
|
@ -89,14 +89,14 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
if self.is_adult or self.getConfig("is_adult"):
|
||||
contentFilter = check.find('a',{'href':'/account/mark_over_18'}) #two different types of adult checks
|
||||
if contentFilter:
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/account/mark_over_18'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/account/mark_over_18'
|
||||
self._fetchUrl(loginUrl)
|
||||
else:
|
||||
params = {}
|
||||
params['csrf_aff_token'] = check.find('input',{'name':'csrf_aff_token'})['value']
|
||||
params['is_of_age'] = '1'
|
||||
params['current_url'] = '/story/view/' + self.story.getMetadata('storyId')
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/account/toggle_age'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/account/toggle_age'
|
||||
self._postUrl(loginUrl,params)
|
||||
|
||||
data = self._fetchUrl(url,usecache=False)
|
||||
|
|
@ -157,7 +157,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
alist = alist.parent.findAll('a', href=re.compile(r"/profile/view/\d+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('/')[-1])
|
||||
self.story.addToList('authorUrl','http://'+self.host+a['href'])
|
||||
self.story.addToList('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.addToList('author',a.text)
|
||||
|
||||
newestChapter = None
|
||||
|
|
@ -168,7 +168,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('numChapters',len(chapters))
|
||||
for index, chapter in enumerate(chapters):
|
||||
if chapter.text != 'Foreword': # skip the foreword
|
||||
self.chapterUrls.append((stripHTML(chapter.text),'http://' + self.getSiteDomain() + chapter['value'])) # note: AFF cuts off chapter names in list. this gets kind of fixed later on
|
||||
self.chapterUrls.append((stripHTML(chapter.text),'https://' + self.getSiteDomain() + chapter['value'])) # note: AFF cuts off chapter names in list. this gets kind of fixed later on
|
||||
# find timestamp
|
||||
a = soup.find('span', text='Updated')
|
||||
if a == None:
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
# get storyId from url--url validation guarantees query is only storyid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
self._setURL('http://{0}/stories/story.php?storyid={1}'.format(self.getSiteDomain(), self.story.getMetadata('storyId')))
|
||||
self._setURL('https://{0}/stories/story.php?storyid={1}'.format(self.getSiteDomain(), self.story.getMetadata('storyId')))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','bdsmlib')
|
||||
|
|
@ -91,10 +91,10 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/stories/story.php?storyid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/stories/story.php?storyid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
|
|
@ -155,7 +155,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
|
|||
for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+"&chapterid=\d+$")):
|
||||
value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
|
||||
self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat))
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.getSiteDomain()+chapter['href']))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.getSiteDomain()+chapter['href']))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
SITE_ABBREVIATION = 'bvc'
|
||||
SITE_DOMAIN = 'bloodshedverse.com'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/'
|
||||
BASE_URL = 'https://' + SITE_DOMAIN + '/'
|
||||
READ_URL_TEMPLATE = BASE_URL + 'stories.php?go=read&no=%s'
|
||||
|
||||
STARTED_DATETIME_FORMAT = '%m/%d/%Y'
|
||||
|
|
@ -65,7 +65,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter):
|
|||
return cls.READ_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(self.BASE_URL + 'stories.php?go=') + r'(read|chapters)\&(amp;)?no=\d+$'
|
||||
return r'https?://' + re.escape(self.SITE_DOMAIN + '/stories.php?go=') + r'(read|chapters)\&(amp;)?no=\d+$'
|
||||
|
||||
# Override stripURLParameters so the "no" parameter won't get stripped
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','csiforensics')
|
||||
|
|
@ -63,10 +63,10 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -117,7 +117,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Rating
|
||||
|
|
@ -128,7 +128,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -193,7 +193,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -22,6 +22,13 @@ from base_efiction_adapter import BaseEfictionAdapter
|
|||
|
||||
class DarkSolaceOrgAdapter(BaseEfictionAdapter):
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
"""
|
||||
Some, but not all site now require https.
|
||||
"""
|
||||
return "https"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'dark-solace.org'
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fiction part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','dimsn') ## XXX
|
||||
|
|
@ -64,10 +64,10 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
||||
return "https?://"+re.escape(self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -90,7 +90,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/fiction/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
|
|
@ -183,13 +183,13 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
# Find authorid and URL from... author url.
|
||||
a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fiction/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/fiction/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -262,7 +262,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"fiction/viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -21,6 +21,13 @@ from base_efiction_adapter import BaseEfictionAdapter
|
|||
|
||||
class FHSArchiveComAdapter(BaseEfictionAdapter):
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
"""
|
||||
Some, but not all site now require https.
|
||||
"""
|
||||
return "https"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'fhsarchive.com'
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
SITE_ABBREVIATION = 'fmt'
|
||||
SITE_DOMAIN = 'fictionmania.tv'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/stories/'
|
||||
BASE_URL = 'https://' + SITE_DOMAIN + '/stories/'
|
||||
READ_TEXT_STORY_URL_TEMPLATE = BASE_URL + 'readtextstory.html?storyID=%s'
|
||||
DETAILS_URL_TEMPLATE = BASE_URL + 'details.html?storyID=%s'
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter):
|
|||
return cls.READ_TEXT_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return 'https?' + re.escape(self.BASE_URL[len('http'):]) + '(readtextstory|readxstory|details)\.html\?storyID=\d+$'
|
||||
return r'https?' + re.escape(self.BASE_URL[len('https'):]) + '(readtextstory|readxstory|details)\.html\?storyID=\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId')
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/archive/' +m.group('cat') +
|
||||
self._setURL('https://' + self.getSiteDomain() + '/archive/' +m.group('cat') +
|
||||
'/' + self.story.getMetadata('storyId') +'.shtml')
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
|
|
@ -82,11 +82,11 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
|||
################################################################################################
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://" + cls.getSiteDomain() + "/archive/999/astoryname.shtml"
|
||||
return "https://" + cls.getSiteDomain() + "/archive/999/astoryname.shtml"
|
||||
|
||||
################################################################################################
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain())+r'/archive/(?P<cat>\d+)/(?P<id>\S+)\.shtml'
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r'/archive/(?P<cat>\d+)/(?P<id>\S+)\.shtml'
|
||||
|
||||
################################################################################################
|
||||
def get_page(self, page):
|
||||
|
|
@ -159,7 +159,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
|||
# Some stories list multiple authors, but the search engine only uses 1 author, and since
|
||||
# we can't tell how many 'words' are in each name, I'm going to do a work around.
|
||||
author_name = mdata.split(' ')[0].strip()
|
||||
author_url = ('http://'+self.getSiteDomain()+'/cgi-bin/search.cgi?Author={}&SortBy=0'+
|
||||
author_url = ('https://'+self.getSiteDomain()+'/cgi-bin/search.cgi?Author={}&SortBy=0'+
|
||||
'&SortOrder=0&NumToList=0&FastSearch=0&ShortResults=0').format(author_name)
|
||||
story_found = False
|
||||
while not story_found:
|
||||
|
|
@ -167,7 +167,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
|||
adata = self.get_page(author_url)
|
||||
if 'No stories found for your search choices.' in adata:
|
||||
author_name = ' '.join(author_name.split()[:-1])
|
||||
author_url = ('http://'+self.getSiteDomain(
|
||||
author_url = ('https://'+self.getSiteDomain(
|
||||
)+'/cgi-bin/search.cgi?Author={}&SortBy=0'+
|
||||
'&SortOrder=0&NumToList=0&FastSearch=0' +
|
||||
'&ShortResults=0').format(author_name)
|
||||
|
|
@ -206,14 +206,14 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter):
|
|||
if label == 'Series Title:':
|
||||
## there is no way to tell which number of the series the story is, so we won't
|
||||
# put a number
|
||||
series_url = 'http://'+self.getSiteDomain()+'/'+link['href']
|
||||
series_url = 'https://'+self.getSiteDomain()+'/'+link['href']
|
||||
self.story.setMetadata('series', link.get_text())
|
||||
self.story.setMetadata('seriesUrl', series_url)
|
||||
elif label == 'Prequel to:':
|
||||
value = link.string + ' (' + 'http://'+self.getSiteDomain()+link['href'] + ')'
|
||||
value = link.string + ' (' + 'https://'+self.getSiteDomain()+link['href'] + ')'
|
||||
self.story.setMetadata('prequelto', value)
|
||||
elif label == 'Sequel to:':
|
||||
value = link.string + ' (' + 'http://'+self.getSiteDomain()+link['href'] + ')'
|
||||
value = link.string + ' (' + 'https://'+self.getSiteDomain()+link['href'] + ')'
|
||||
self.story.setMetadata('sequelto', value)
|
||||
|
||||
# Some stories have alot of text in the "summary", and I've tried to keep down on creating
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','hlf')
|
||||
|
|
@ -63,10 +63,10 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -98,7 +98,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
|
||||
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',aut['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+aut['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+aut['href'])
|
||||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
|
|
@ -109,7 +109,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
if chapters != None:
|
||||
for chapter in chapters.findAll('option'):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value']))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value']))
|
||||
else:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
|
||||
|
|
@ -191,7 +191,7 @@ class HLFictionNetAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','hpffa')
|
||||
|
|
@ -63,7 +63,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?:"+re.escape("//"+self.getSiteDomain()+"/stories/viewstory.php?sid=")+r"\d+$"
|
||||
|
|
@ -103,13 +103,13 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = soup.find('div', id="mainpage").find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/stories/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/stories/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/stories/'+chapter['href']))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/stories/'+chapter['href']))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -186,7 +186,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/stories/'+a['href']
|
||||
series_url = 'https://'+self.host+'/stories/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ime')
|
||||
|
|
@ -63,10 +63,10 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -89,7 +89,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
|
|
@ -171,13 +171,13 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -249,7 +249,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -52,13 +52,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ksa') # XXX
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
# https://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%b/%d/%Y" # XXX
|
||||
|
||||
@classmethod
|
||||
|
|
@ -72,10 +72,10 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return "http://(www.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://(www.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -160,13 +160,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
pagetitle = soup.find('div',id='pagetitle')
|
||||
for a in pagetitle.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")):
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('author',stripHTML(a))
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -286,7 +286,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = stripHTML(a)
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('storyId', m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("http://"+self.getSiteDomain()
|
||||
self._setURL("https://"+self.getSiteDomain()
|
||||
+"/novel/"+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
|
|
@ -80,11 +80,11 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://lightnovelgate.com/novel/astoryname"
|
||||
return "https://lightnovelgate.com/novel/astoryname"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
# http://lightnovelgate.com/novel/stellar_transformation
|
||||
return r"http://lightnovelgate\.com/novel/(?P<id>[^/]+)"
|
||||
return r"https?://lightnovelgate\.com/novel/(?P<id>[^/]+)"
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -55,10 +55,10 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
|
|
@ -107,13 +107,13 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = div.find('a', href=re.compile(r"viewuser.php"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in div.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -213,7 +213,7 @@ class LOTRgficComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self_make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
|
|||
self.story.extendList('author', [authorName])
|
||||
authorId = chapter.getAuthorId()
|
||||
if authorId:
|
||||
authorUrl = 'http://%s/index/%s' % (self.getSiteDomain(), authorId)
|
||||
authorUrl = 'https://%s/index/%s' % (self.getSiteDomain(), authorId)
|
||||
else:
|
||||
authorId = u''
|
||||
authorUrl = u''
|
||||
|
|
@ -280,7 +280,7 @@ class MassEffect2InAdapter(BaseSiteAdapter):
|
|||
@classmethod
|
||||
def _makeDocumentUrl(cls, documentId):
|
||||
"""Make a chapter URL given a document ID."""
|
||||
return 'http://%s/publ/%s' % (cls.getSiteDomain(), documentId)
|
||||
return 'https://%s/publ/%s' % (cls.getSiteDomain(), documentId)
|
||||
|
||||
|
||||
class Chapter(object):
|
||||
|
|
@ -573,7 +573,7 @@ class Chapter(object):
|
|||
else:
|
||||
_logger.warning(u"No title found for rating label `%s'!" % label)
|
||||
# TODO: conduct a research on such abnormal URLs.
|
||||
elif source == 'http://www.masseffect2.in/_fr/10/1360399.png':
|
||||
elif '/_fr/10/1360399.png' in source:
|
||||
label = 'Nn'
|
||||
return {
|
||||
'label': 'Nn',
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://mcstories.com/StoryTitle http://mcstories.com/StoryTitle/index.html http://mcstories.com/StoryTitle/StoryTitle1.html"
|
||||
return "https://mcstories.com/StoryTitle https://mcstories.com/StoryTitle/index.html https://mcstories.com/StoryTitle/StoryTitle1.html"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://(www\.)?mcstories\.com/([a-zA-Z0-9_-]+)/"
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
self.getSiteExampleURLs())
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfic/s/'+cattitle+'/'+urltitle+'/'+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/fanfic/s/'+cattitle+'/'+urltitle+'/'+self.story.getMetadata('storyId'))
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
|
|
@ -76,20 +76,20 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fanfic/s/category-name/story-title/123456 http://"+cls.getSiteDomain()+"/fanfic/c/category-name/story-title/123456/987612"
|
||||
return "https://"+cls.getSiteDomain()+"/fanfic/s/category-name/story-title/123456 https://"+cls.getSiteDomain()+"/fanfic/c/category-name/story-title/123456/987612"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
## old urls
|
||||
## http://www.mediaminer.org/fanfic/view_st.php/76882
|
||||
## https://www.mediaminer.org/fanfic/view_st.php/76882
|
||||
## new urls
|
||||
## http://www.mediaminer.org/fanfic/s/ghosts-from-the-past/72
|
||||
## http://www.mediaminer.org/fanfic/c/ghosts-from-the-past/chapter-2/72/174
|
||||
## http://www.mediaminer.org/fanfic/s/robtech-final-missions/61553
|
||||
## http://www.mediaminer.org/fanfic/c/robtech-final-missions/robotech-final-missions-oneshot/61553/189830
|
||||
## https://www.mediaminer.org/fanfic/s/ghosts-from-the-past/72
|
||||
## https://www.mediaminer.org/fanfic/c/ghosts-from-the-past/chapter-2/72/174
|
||||
## https://www.mediaminer.org/fanfic/s/robtech-final-missions/61553
|
||||
## https://www.mediaminer.org/fanfic/c/robtech-final-missions/robotech-final-missions-oneshot/61553/189830
|
||||
## even newer urls
|
||||
## http://www.mediaminer.org/fanfic/s/gundam-wing-fan-fiction/the-preventer-operatives/171000
|
||||
## http://www.mediaminer.org/fanfic/c/gundam-wing-fan-fiction/the-preventer-operatives/171000/608822
|
||||
return re.escape("http://"+self.getSiteDomain())+r"/fanfic/"+\
|
||||
## https://www.mediaminer.org/fanfic/s/gundam-wing-fan-fiction/the-preventer-operatives/171000
|
||||
## https://www.mediaminer.org/fanfic/c/gundam-wing-fan-fiction/the-preventer-operatives/171000/608822
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r"/fanfic/"+\
|
||||
r"((s/(?P<cattitle4>[^/]+)/(?P<urltitle4>[^/]+)/(?P<id4>\d+))|"+\
|
||||
r"((c/(?P<cattitle5>[^/]+)/(?P<urltitle5>[^/]+)/(?P<id5>\d+))/\d+)|"+\
|
||||
r"(s/(?P<urltitle1>[^/]+)/(?P<id1>\d+))|"+\
|
||||
|
|
@ -138,7 +138,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"/user_info.php/\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('/')[-1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# save date from first for later.
|
||||
|
|
@ -147,7 +147,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter):
|
|||
# Find the chapters - one-shot now have chapter list, too.
|
||||
chap_p = soup.find('p',{'style':'margin-left:10px;'})
|
||||
for (atag,aurl,name) in [ (x,x['href'],stripHTML(x)) for x in chap_p.find_all('a') ]:
|
||||
self.chapterUrls.append((name,'http://'+self.host+aurl))
|
||||
self.chapterUrls.append((name,'https://'+self.host+aurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,13 @@ from base_efiction_adapter import BaseEfictionAdapter
|
|||
|
||||
class NCISFictionComAdapter(BaseEfictionAdapter):
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
"""
|
||||
Some, but not all site now require https.
|
||||
"""
|
||||
return "https"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'ncisfiction.com'
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/')
|
||||
self._setURL('https://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/')
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','phs')
|
||||
|
|
@ -62,10 +62,10 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/fanfiction/story/1234/"
|
||||
return "https://"+cls.getSiteDomain()+"/fanfiction/story/1234/"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fanfiction/story/")+r"\d+/?$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/fanfiction/story/")+r"\d+/?$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -86,7 +86,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
#params['remember'] = '1'
|
||||
params['login'] = 'Login'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/users/processlogin.php'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['txtusername']))
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
|
@ -136,7 +136,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url. /fanfiction/stories.php?psid=125
|
||||
a = b.find('a', href=re.compile(r"/fanfiction/stories.php\?psid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
|
|
@ -152,17 +152,17 @@ class PhoenixSongNetAdapter(BaseSiteAdapter):
|
|||
i = 0
|
||||
chapters = chapters.findAll('option')
|
||||
for chapter in chapters:
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['value']))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+chapter['value']))
|
||||
if i == 0:
|
||||
self.story.setMetadata('storyId',chapter['value'].split('/')[3])
|
||||
head = self.make_soup(self._fetchUrl('http://'+self.host+chapter['value'])).findAll('b')
|
||||
head = self.make_soup(self._fetchUrl('https://'+self.host+chapter['value'])).findAll('b')
|
||||
for b in head:
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat))
|
||||
|
||||
if i == (len(chapters)-1):
|
||||
head = self.make_soup(self._fetchUrl('http://'+self.host+chapter['value'])).findAll('b')
|
||||
head = self.make_soup(self._fetchUrl('https://'+self.host+chapter['value'])).findAll('b')
|
||||
for b in head:
|
||||
if b.text == "Updated":
|
||||
date = b.nextSibling.string.split(': ')[1].split(',')
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL. gets rid of chapter if there, left with chapter index URL
|
||||
nurl = "http://"+self.getSiteDomain()+"/historias/"+self.story.getMetadata('storyId')
|
||||
nurl = "https://"+self.getSiteDomain()+"/historias/"+self.story.getMetadata('storyId')
|
||||
self._setURL(nurl)
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
|
|
@ -69,15 +69,15 @@ class PotterFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://www.potterfics.com/historias/12345 http://www.potterfics.com/historias/12345/capitulo-1 "
|
||||
return "https://www.potterfics.com/historias/12345 https://www.potterfics.com/historias/12345/capitulo-1 "
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
#http://www.potterfics.com/historias/127583
|
||||
#http://www.potterfics.com/historias/127583/capitulo-1
|
||||
#http://www.potterfics.com/historias/127583/capitulo-4
|
||||
#http://www.potterfics.com/historias/92810 -> Complete story
|
||||
#http://www.potterfics.com/historias/111194 -> Complete, single chap
|
||||
p = re.escape("http://"+self.getSiteDomain()+"/historias/")+\
|
||||
#https://www.potterfics.com/historias/127583
|
||||
#https://www.potterfics.com/historias/127583/capitulo-1
|
||||
#https://www.potterfics.com/historias/127583/capitulo-4
|
||||
#https://www.potterfics.com/historias/92810 -> Complete story
|
||||
#https://www.potterfics.com/historias/111194 -> Complete, single chap
|
||||
p = r"https?://"+re.escape(self.getSiteDomain()+"/historias/")+\
|
||||
r"(?P<id>\d+)(/capitulo-(?P<ch>\d+))?/?$"
|
||||
return p
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ class PotterFicsComAdapter(BaseSiteAdapter):
|
|||
params['login_password'] = self.getConfig("password")
|
||||
params['login_ck'] = '1'
|
||||
|
||||
loginUrl = 'http://www.potterfics.com/secciones/usuarios/login.php'
|
||||
loginUrl = 'https://www.potterfics.com/secciones/usuarios/login.php'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['login_usuario']))
|
||||
d = self._postUrl(loginUrl,params)
|
||||
|
|
@ -117,10 +117,10 @@ class PotterFicsComAdapter(BaseSiteAdapter):
|
|||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
#this converts '/historias/12345' to 'http://www.potterfics.com/historias/12345'
|
||||
#this converts '/historias/12345' to 'https://www.potterfics.com/historias/12345'
|
||||
def makeAbsoluteURL(url):
|
||||
if url[0] == '/':
|
||||
url = 'http://'+self.getSiteDomain()+url
|
||||
url = 'https://'+self.getSiteDomain()+url
|
||||
return url
|
||||
|
||||
#use this to get month numbers from Spanish months
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','ptdc')
|
||||
|
|
@ -67,10 +67,10 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/missingpieces/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/missingpieces/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/missingpieces/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://(www\.)?"+re.escape(self.getSiteDomain()+"/missingpieces/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -135,13 +135,13 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/missingpieces/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/missingpieces/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/missingpieces/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/missingpieces/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -213,7 +213,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/missingpieces/'+a['href']
|
||||
series_url = 'https://'+self.host+'/missingpieces/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','atp')
|
||||
|
|
@ -63,10 +63,10 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/atp/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/atp/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/atp/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
|
|
@ -130,7 +130,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
|
||||
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',aut['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/atp/'+aut['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/atp/'+aut['href'])
|
||||
self.story.setMetadata('author',aut.string)
|
||||
aut.extract()
|
||||
|
||||
|
|
@ -141,7 +141,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
if chapters != None:
|
||||
for chapter in chapters.findAll('option'):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/atp/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value']))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/atp/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value']))
|
||||
else:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
|
||||
|
|
@ -223,7 +223,7 @@ class QafFicComAdapter(BaseSiteAdapter):
|
|||
if list.find('a', href=re.compile(r"series.php")) != None:
|
||||
for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")):
|
||||
# Find Series name from series URL.
|
||||
series_url = 'http://'+self.host+'/atp/'+series['href']
|
||||
series_url = 'https://'+self.host+'/atp/'+series['href']
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from base_adapter import BaseSiteAdapter
|
|||
from ..htmlcleanup import stripHTML
|
||||
|
||||
SITE_DOMAIN = 'quotev.com'
|
||||
STORY_URL_TEMPLATE = 'http://www.quotev.com/story/%s'
|
||||
STORY_URL_TEMPLATE = 'https://www.quotev.com/story/%s'
|
||||
|
||||
|
||||
def getClass():
|
||||
|
|
@ -41,8 +41,8 @@ class QuotevComAdapter(BaseSiteAdapter):
|
|||
|
||||
def getSiteURLPattern(self):
|
||||
pattern = re.escape(STORY_URL_TEMPLATE.rsplit('%', 1)[0]) + r'(.+?)($|&|/)'
|
||||
pattern = pattern.replace(r'http\:', r'https?\:')
|
||||
pattern = pattern.replace(r'https?\:\/\/www\.', r'https?\:\/\/(www\.)?')
|
||||
pattern = pattern.replace(r'https', r'https?')
|
||||
pattern = pattern.replace(r'www\.', r'(www\.)?')
|
||||
return pattern
|
||||
|
||||
def use_pagecache(self):
|
||||
|
|
@ -75,7 +75,7 @@ class QuotevComAdapter(BaseSiteAdapter):
|
|||
self.story.addToList('authorUrl', urlparse.urljoin(self.url, a['href']))
|
||||
if not self.story.getList('author'):
|
||||
self.story.addToList('author','Anonymous')
|
||||
self.story.addToList('authorUrl','http://www.quotev.com')
|
||||
self.story.addToList('authorUrl','https://www.quotev.com')
|
||||
self.story.addToList('authorId','0')
|
||||
|
||||
self.setDescription(self.url, soup.find('div', id='qdesct'))
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fiction/'+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/fiction/'+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','rylrdl')
|
||||
|
|
@ -157,7 +157,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
if author_link:
|
||||
authorId = author_link['href'].rsplit('/', 1)[1]
|
||||
self.story.setMetadata('authorId', authorId)
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/user/profile/'+authorId)
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/user/profile/'+authorId)
|
||||
|
||||
self.story.setMetadata('author',soup.find(attrs=dict(property="books:author"))['content'])
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ class RoyalRoadAdapter(BaseSiteAdapter):
|
|||
chapters = soup.find('table',{'id':'chapters'}).find('tbody')
|
||||
tds = [tr.findAll('td')[0] for tr in chapters.findAll('tr')]
|
||||
for td in tds:
|
||||
chapterUrl = 'http://' + self.getSiteDomain() + td.a['href']
|
||||
chapterUrl = 'https://' + self.getSiteDomain() + td.a['href']
|
||||
self.chapterUrls.append((stripHTML(td.text), chapterUrl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
# normalized story URL.
|
||||
# XXX Most sites don't have the /fanfic part. Replace all to remove it usually.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','swf') # XXX
|
||||
|
|
@ -87,10 +87,10 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(self):
|
||||
return "http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=1234"
|
||||
return "https://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/fanfics/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -113,7 +113,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/fanfics/user.php?action=login'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/fanfics/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
|
|
@ -182,7 +182,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href'])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/fanfics/'+a['href'])
|
||||
self.story.addToList('author',a.string)
|
||||
|
||||
|
||||
|
|
@ -195,7 +195,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfics/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/fanfics/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -279,7 +279,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/fanfics/'+a['href']
|
||||
series_url = 'https://'+self.host+'/fanfics/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class SpikeluverComAdapter(BaseSiteAdapter):
|
|||
SITE_ABBREVIATION = 'slc'
|
||||
SITE_DOMAIN = 'spikeluver.com'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/SpuffyRealm/'
|
||||
BASE_URL = 'https://' + SITE_DOMAIN + '/SpuffyRealm/'
|
||||
LOGIN_URL = BASE_URL + 'user.php?action=login'
|
||||
VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%d'
|
||||
METADATA_URL_SUFFIX = '&index=1'
|
||||
|
|
@ -69,7 +69,7 @@ class SpikeluverComAdapter(BaseSiteAdapter):
|
|||
return cls.VIEW_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]) + r'\d+$'
|
||||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
soup = self._customized_fetch_url(self.url + self.METADATA_URL_SUFFIX)
|
||||
|
|
|
|||
|
|
@ -24,6 +24,13 @@ class TheDelphicExpanseComAdapter(BaseEfictionAdapter):
|
|||
''' This adapter will download stories from the
|
||||
'Taste of Poison, the Fanfiction of Arsenic Jade' site '''
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
"""
|
||||
Some, but not all site now require https.
|
||||
"""
|
||||
return "https"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.thedelphicexpanse.com'
|
||||
|
|
|
|||
|
|
@ -20,6 +20,13 @@ from base_efiction_adapter import BaseEfictionAdapter
|
|||
|
||||
class TheHookupZoneNetAdapter(BaseEfictionAdapter):
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
"""
|
||||
Some, but not all site now require https.
|
||||
"""
|
||||
return "https"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'thehookupzone.net'
|
||||
|
|
|
|||
|
|
@ -22,6 +22,13 @@ from base_efiction_adapter import BaseEfictionAdapter
|
|||
|
||||
class ThundercatsFansOrgSiteAdapter(BaseEfictionAdapter):
|
||||
|
||||
@classmethod
|
||||
def getProtocol(self):
|
||||
"""
|
||||
Some, but not all site now require https.
|
||||
"""
|
||||
return "https"
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.thundercatsfans.org'
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL("http://"+self.getSiteDomain()\
|
||||
self._setURL("https://"+self.getSiteDomain()\
|
||||
+"/efiction/viewstory.php?sid="+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
|
|
@ -66,10 +66,10 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234 http://efiction."+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234 https://efiction."+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'(http://trekiverse\.org/efiction/viewstory\.php\?sid=\d+|http://efiction\.trekiverse\.org/viewstory\.php\?sid=\d+)'
|
||||
return r'(https?://trekiverse\.org/efiction/viewstory\.php\?sid=\d+|https?://efiction\.trekiverse\.org/viewstory\.php\?sid=\d+)'
|
||||
|
||||
## Login seems to be reasonably standard across eFiction sites.
|
||||
def needToLoginCheck(self, data):
|
||||
|
|
@ -92,7 +92,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/efiction/user.php?action=login'
|
||||
loginUrl = 'https://' + self.getSiteDomain() + '/efiction/user.php?action=login'
|
||||
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
|
|
@ -171,7 +171,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
a = soup.find('div', {'id' : 'pagetitle'})
|
||||
aut = a.find('a', href=re.compile(r"^viewuser\.php\?uid="))
|
||||
self.story.setMetadata('authorId',aut['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/efiction/'+aut['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/efiction/'+aut['href'])
|
||||
self.story.setMetadata('author',aut.string)
|
||||
|
||||
ttl = a.find('a', href=re.compile(r'^viewstory.php\?sid=%s$'%self.story.getMetadata('storyId')))
|
||||
|
|
@ -185,11 +185,11 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
if len(chapters)==0:
|
||||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: No php/html chapters found.")
|
||||
if len(chapters)==1:
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/efiction/'+chapters[0]['href']))
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),'https://'+self.host+'/efiction/'+chapters[0]['href']))
|
||||
else:
|
||||
for chapter in chapters:
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/efiction/'+chapter['href']))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/efiction/'+chapter['href']))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -282,7 +282,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/efiction/'+a['href']
|
||||
series_url = 'https://'+self.host+'/efiction/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
|
|||
SITE_ABBREVIATION = 'voe'
|
||||
SITE_DOMAIN = 'voracity2.e-fic.com'
|
||||
|
||||
BASE_URL = 'http://' + SITE_DOMAIN + '/'
|
||||
BASE_URL = 'https://' + SITE_DOMAIN + '/'
|
||||
LOGIN_URL = BASE_URL + 'user.php?action=login'
|
||||
VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%d'
|
||||
METADATA_URL_SUFFIX = '&index=1'
|
||||
|
|
@ -98,7 +98,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter):
|
|||
return cls.VIEW_STORY_URL_TEMPLATE % 1234
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]) + r'\d+$'
|
||||
return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$'
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
soup = self._customized_fetch_url(self.url + self.METADATA_URL_SUFFIX)
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','wrfa')
|
||||
|
|
@ -62,10 +62,10 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/wrfa/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/wrfa/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/wrfa/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/wrfa/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
|
|
@ -101,7 +101,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
# Find authorid and URL from... author url.
|
||||
a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('=')[1])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+'/wrfa/'+a['href'])
|
||||
self.story.setMetadata('authorUrl','https://'+self.host+'/wrfa/'+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
rating=pt.text.split('(')[1].split(')')[0]
|
||||
|
|
@ -110,7 +110,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
# Find the chapters:
|
||||
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/wrfa/'+chapter['href']))
|
||||
self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/wrfa/'+chapter['href']))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -178,7 +178,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/wrfa/'+a['href']
|
||||
series_url = 'https://'+self.host+'/wrfa/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
|
||||
|
||||
|
||||
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','wb')
|
||||
|
|
@ -63,10 +63,10 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
|
@ -114,7 +114,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
alist = pt.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+"))
|
||||
for a in alist:
|
||||
self.story.addToList('authorId',a['href'].split('=')[1])
|
||||
self.story.addToList('authorUrl','http://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('authorUrl','https://'+self.host+'/'+a['href'])
|
||||
self.story.addToList('author',a.string)
|
||||
|
||||
rating=pt.text.split('[')[1].split(']')[0]
|
||||
|
|
@ -133,7 +133,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
else:
|
||||
add = ""
|
||||
# just in case there's tags, like <i> in chapter titles.
|
||||
self.chapterUrls.append((stripHTML(chapter)+add,'http://'+self.host+'/'+chapter['href']+addurl))
|
||||
self.chapterUrls.append((stripHTML(chapter)+add,'https://'+self.host+'/'+chapter['href']+addurl))
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
|
|
@ -193,7 +193,7 @@ class WraithBaitComAdapter(BaseSiteAdapter):
|
|||
# Find Series name from series URL.
|
||||
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
|
||||
series_name = a.string
|
||||
series_url = 'http://'+self.host+'/'+a['href']
|
||||
series_url = 'https://'+self.host+'/'+a['href']
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
seriessoup = self.make_soup(self._fetchUrl(series_url))
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
story_id = match.group('id')
|
||||
self.story.setMetadata('storyId', story_id)
|
||||
self._setURL('http://%s/novel/%s' % (self.getSiteDomain(), story_id))
|
||||
self._setURL('https://%s/novel/%s' % (self.getSiteDomain(), story_id))
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
|
|
@ -54,10 +54,10 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return 'http://%s/novel/story-name' % cls.getSiteDomain()
|
||||
return 'https://%s/novel/story-name' % cls.getSiteDomain()
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r'http(s)?://%s/novel/(?P<id>[^/]+)(/)?' % re.escape(self.getSiteDomain())
|
||||
return r'https?://%s/novel/(?P<id>[^/]+)(/)?' % re.escape(self.getSiteDomain())
|
||||
|
||||
def use_pagecache(self):
|
||||
return True
|
||||
|
|
|
|||
Loading…
Reference in a new issue