diff --git a/fanficfare/adapters/adapter_archiveskyehawkecom.py b/fanficfare/adapters/adapter_archiveskyehawkecom.py index 9ed0715a..1d436048 100644 --- a/fanficfare/adapters/adapter_archiveskyehawkecom.py +++ b/fanficfare/adapters/adapter_archiveskyehawkecom.py @@ -47,7 +47,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/story.php?no='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','ash') @@ -67,10 +67,10 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://archive.skyehawke.com/story.php?no=1234 http://www.skyehawke.com/archive/story.php?no=1234 http://skyehawke.com/archive/story.php?no=1234" + return "https://archive.skyehawke.com/story.php?no=1234 https://www.skyehawke.com/archive/story.php?no=1234 https://skyehawke.com/archive/story.php?no=1234" def getSiteURLPattern(self): - return re.escape("http://")+r"(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$" + return r"https?://(archive|www)\.skyehawke\.com/(archive/)?story\.php\?no=\d+$" ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -100,7 +100,7 @@ class ArchiveSkyeHawkeComAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. author = a.find('a') self.story.setMetadata('authorId',author['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+author['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+author['href']) self.story.setMetadata('author',author.string) authorSoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl'))) diff --git a/fanficfare/adapters/adapter_asianfanficscom.py b/fanficfare/adapters/adapter_asianfanficscom.py index 5d40ff6b..3b4bf539 100644 --- a/fanficfare/adapters/adapter_asianfanficscom.py +++ b/fanficfare/adapters/adapter_asianfanficscom.py @@ -35,7 +35,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter): self.story.setMetadata('storyId',m.group('id')) # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/story/view/'+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/story/view/'+self.story.getMetadata('storyId')) else: raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), @@ -55,7 +55,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/story/view/123456 http://"+cls.getSiteDomain()+"/story/view/123456/story-title-here http://"+cls.getSiteDomain()+"/story/view/123456/1" + return "https://"+cls.getSiteDomain()+"/story/view/123456 https://"+cls.getSiteDomain()+"/story/view/123456/story-title-here https://"+cls.getSiteDomain()+"/story/view/123456/1" def getSiteURLPattern(self): return r"https?://"+re.escape(self.getSiteDomain())+r"/story/view/0*(?P\d+)" @@ -89,14 +89,14 @@ class AsianFanFicsComAdapter(BaseSiteAdapter): if self.is_adult or self.getConfig("is_adult"): contentFilter = check.find('a',{'href':'/account/mark_over_18'}) #two different types of adult checks if contentFilter: - loginUrl = 'http://' + self.getSiteDomain() + '/account/mark_over_18' + loginUrl = 'https://' + self.getSiteDomain() + '/account/mark_over_18' self._fetchUrl(loginUrl) else: params = {} params['csrf_aff_token'] = check.find('input',{'name':'csrf_aff_token'})['value'] params['is_of_age'] = '1' params['current_url'] = '/story/view/' + self.story.getMetadata('storyId') - loginUrl = 'http://' + self.getSiteDomain() + '/account/toggle_age' + loginUrl = 'https://' + self.getSiteDomain() + '/account/toggle_age' self._postUrl(loginUrl,params) data = self._fetchUrl(url,usecache=False) @@ -157,7 +157,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter): alist = alist.parent.findAll('a', href=re.compile(r"/profile/view/\d+")) for a in alist: self.story.addToList('authorId',a['href'].split('/')[-1]) - self.story.addToList('authorUrl','http://'+self.host+a['href']) + self.story.addToList('authorUrl','https://'+self.host+a['href']) self.story.addToList('author',a.text) newestChapter = None @@ -168,7 +168,7 @@ class AsianFanFicsComAdapter(BaseSiteAdapter): self.story.setMetadata('numChapters',len(chapters)) for index, chapter in enumerate(chapters): if chapter.text != 'Foreword': # skip the foreword - self.chapterUrls.append((stripHTML(chapter.text),'http://' + self.getSiteDomain() + chapter['value'])) # note: AFF cuts off chapter names in list. this gets kind of fixed later on + self.chapterUrls.append((stripHTML(chapter.text),'https://' + self.getSiteDomain() + chapter['value'])) # note: AFF cuts off chapter names in list. this gets kind of fixed later on # find timestamp a = soup.find('span', text='Updated') if a == None: diff --git a/fanficfare/adapters/adapter_bdsmlibrarycom.py b/fanficfare/adapters/adapter_bdsmlibrarycom.py index b9de09b4..020a7327 100644 --- a/fanficfare/adapters/adapter_bdsmlibrarycom.py +++ b/fanficfare/adapters/adapter_bdsmlibrarycom.py @@ -75,7 +75,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter): # get storyId from url--url validation guarantees query is only storyid=1234 self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - self._setURL('http://{0}/stories/story.php?storyid={1}'.format(self.getSiteDomain(), self.story.getMetadata('storyId'))) + self._setURL('https://{0}/stories/story.php?storyid={1}'.format(self.getSiteDomain(), self.story.getMetadata('storyId'))) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','bdsmlib') @@ -91,10 +91,10 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/stories/story.php?storyid=1234" + return "https://"+cls.getSiteDomain()+"/stories/story.php?storyid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$" def use_pagecache(self): ''' @@ -155,7 +155,7 @@ class BDSMLibraryComSiteAdapter(BaseSiteAdapter): for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php\?storyid='+self.story.getMetadata('storyId')+"&chapterid=\d+$")): value = chapter.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip() self.story.setMetadata('dateUpdated', makeDate(value, self.dateformat)) - self.chapterUrls.append((stripHTML(chapter),'http://'+self.getSiteDomain()+chapter['href'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.getSiteDomain()+chapter['href'])) self.story.setMetadata('numChapters',len(self.chapterUrls)) diff --git a/fanficfare/adapters/adapter_bloodshedversecom.py b/fanficfare/adapters/adapter_bloodshedversecom.py index 9f14a15c..37e5959f 100644 --- a/fanficfare/adapters/adapter_bloodshedversecom.py +++ b/fanficfare/adapters/adapter_bloodshedversecom.py @@ -27,7 +27,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter): SITE_ABBREVIATION = 'bvc' SITE_DOMAIN = 'bloodshedverse.com' - BASE_URL = 'http://' + SITE_DOMAIN + '/' + BASE_URL = 'https://' + SITE_DOMAIN + '/' READ_URL_TEMPLATE = BASE_URL + 'stories.php?go=read&no=%s' STARTED_DATETIME_FORMAT = '%m/%d/%Y' @@ -65,7 +65,7 @@ class BloodshedverseComAdapter(BaseSiteAdapter): return cls.READ_URL_TEMPLATE % 1234 def getSiteURLPattern(self): - return re.escape(self.BASE_URL + 'stories.php?go=') + r'(read|chapters)\&(amp;)?no=\d+$' + return r'https?://' + re.escape(self.SITE_DOMAIN + '/stories.php?go=') + r'(read|chapters)\&(amp;)?no=\d+$' # Override stripURLParameters so the "no" parameter won't get stripped @classmethod diff --git a/fanficfare/adapters/adapter_csiforensicscom.py b/fanficfare/adapters/adapter_csiforensicscom.py index 75275b61..33a50e24 100644 --- a/fanficfare/adapters/adapter_csiforensicscom.py +++ b/fanficfare/adapters/adapter_csiforensicscom.py @@ -47,7 +47,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter): - self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','csiforensics') @@ -63,10 +63,10 @@ class CSIForensicsComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -117,7 +117,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href']) self.story.setMetadata('author',a.string) # Rating @@ -128,7 +128,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter): # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -193,7 +193,7 @@ class CSIForensicsComAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/'+a['href'] + series_url = 'https://'+self.host+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_darksolaceorg.py b/fanficfare/adapters/adapter_darksolaceorg.py index e64c1e88..925caa4a 100644 --- a/fanficfare/adapters/adapter_darksolaceorg.py +++ b/fanficfare/adapters/adapter_darksolaceorg.py @@ -22,6 +22,13 @@ from base_efiction_adapter import BaseEfictionAdapter class DarkSolaceOrgAdapter(BaseEfictionAdapter): + @classmethod + def getProtocol(self): + """ + Some, but not all site now require https. + """ + return "https" + @staticmethod def getSiteDomain(): return 'dark-solace.org' diff --git a/fanficfare/adapters/adapter_deepinmysoulnet.py b/fanficfare/adapters/adapter_deepinmysoulnet.py index 163bce0e..c4106202 100644 --- a/fanficfare/adapters/adapter_deepinmysoulnet.py +++ b/fanficfare/adapters/adapter_deepinmysoulnet.py @@ -48,7 +48,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX # normalized story URL. # XXX Most sites don't have the /fiction part. Replace all to remove it usually. - self._setURL('http://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/fiction/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','dimsn') ## XXX @@ -64,10 +64,10 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/fiction/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$" + return "https?://"+re.escape(self.getSiteDomain()+"/fiction/viewstory.php?sid=")+r"\d+$" ## Login seems to be reasonably standard across eFiction sites. def needToLoginCheck(self, data): @@ -90,7 +90,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX params['cookiecheck'] = '1' params['submit'] = 'Submit' - loginUrl = 'http://' + self.getSiteDomain() + '/fiction/user.php?action=login' + loginUrl = 'https://' + self.getSiteDomain() + '/fiction/user.php?action=login' logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) @@ -183,13 +183,13 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX # Find authorid and URL from... author url. a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href']) self.story.setMetadata('author',a.string) # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fiction/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/fiction/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -262,7 +262,7 @@ class DeepInMySoulNetAdapter(BaseSiteAdapter): # XXX # Find Series name from series URL. a = soup.find('a', href=re.compile(r"fiction/viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/'+a['href'] + series_url = 'https://'+self.host+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_fhsarchivecom.py b/fanficfare/adapters/adapter_fhsarchivecom.py index 3bc60b03..d2f9ff3d 100644 --- a/fanficfare/adapters/adapter_fhsarchivecom.py +++ b/fanficfare/adapters/adapter_fhsarchivecom.py @@ -21,6 +21,13 @@ from base_efiction_adapter import BaseEfictionAdapter class FHSArchiveComAdapter(BaseEfictionAdapter): + @classmethod + def getProtocol(self): + """ + Some, but not all site now require https. + """ + return "https" + @staticmethod def getSiteDomain(): return 'fhsarchive.com' diff --git a/fanficfare/adapters/adapter_fictionmaniatv.py b/fanficfare/adapters/adapter_fictionmaniatv.py index 25577e09..928e6fbe 100644 --- a/fanficfare/adapters/adapter_fictionmaniatv.py +++ b/fanficfare/adapters/adapter_fictionmaniatv.py @@ -19,7 +19,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter): SITE_ABBREVIATION = 'fmt' SITE_DOMAIN = 'fictionmania.tv' - BASE_URL = 'http://' + SITE_DOMAIN + '/stories/' + BASE_URL = 'https://' + SITE_DOMAIN + '/stories/' READ_TEXT_STORY_URL_TEMPLATE = BASE_URL + 'readtextstory.html?storyID=%s' DETAILS_URL_TEMPLATE = BASE_URL + 'details.html?storyID=%s' @@ -62,7 +62,7 @@ class FictionManiaTVAdapter(BaseSiteAdapter): return cls.READ_TEXT_STORY_URL_TEMPLATE % 1234 def getSiteURLPattern(self): - return 'https?' + re.escape(self.BASE_URL[len('http'):]) + '(readtextstory|readxstory|details)\.html\?storyID=\d+$' + return r'https?' + re.escape(self.BASE_URL[len('https'):]) + '(readtextstory|readxstory|details)\.html\?storyID=\d+$' def extractChapterUrlsAndMetadata(self): url = self.DETAILS_URL_TEMPLATE % self.story.getMetadata('storyId') diff --git a/fanficfare/adapters/adapter_fireflypopulliorg.py b/fanficfare/adapters/adapter_fireflypopulliorg.py index 6f5caf90..feaca090 100644 --- a/fanficfare/adapters/adapter_fireflypopulliorg.py +++ b/fanficfare/adapters/adapter_fireflypopulliorg.py @@ -54,7 +54,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter): self.story.setMetadata('storyId',m.group('id')) # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/archive/' +m.group('cat') + + self._setURL('https://' + self.getSiteDomain() + '/archive/' +m.group('cat') + '/' + self.story.getMetadata('storyId') +'.shtml') else: raise exceptions.InvalidStoryURL(url, @@ -82,11 +82,11 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter): ################################################################################################ @classmethod def getSiteExampleURLs(cls): - return "http://" + cls.getSiteDomain() + "/archive/999/astoryname.shtml" + return "https://" + cls.getSiteDomain() + "/archive/999/astoryname.shtml" ################################################################################################ def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain())+r'/archive/(?P\d+)/(?P\S+)\.shtml' + return r"https?://"+re.escape(self.getSiteDomain())+r'/archive/(?P\d+)/(?P\S+)\.shtml' ################################################################################################ def get_page(self, page): @@ -159,7 +159,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter): # Some stories list multiple authors, but the search engine only uses 1 author, and since # we can't tell how many 'words' are in each name, I'm going to do a work around. author_name = mdata.split(' ')[0].strip() - author_url = ('http://'+self.getSiteDomain()+'/cgi-bin/search.cgi?Author={}&SortBy=0'+ + author_url = ('https://'+self.getSiteDomain()+'/cgi-bin/search.cgi?Author={}&SortBy=0'+ '&SortOrder=0&NumToList=0&FastSearch=0&ShortResults=0').format(author_name) story_found = False while not story_found: @@ -167,7 +167,7 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter): adata = self.get_page(author_url) if 'No stories found for your search choices.' in adata: author_name = ' '.join(author_name.split()[:-1]) - author_url = ('http://'+self.getSiteDomain( + author_url = ('https://'+self.getSiteDomain( )+'/cgi-bin/search.cgi?Author={}&SortBy=0'+ '&SortOrder=0&NumToList=0&FastSearch=0' + '&ShortResults=0').format(author_name) @@ -206,14 +206,14 @@ class FireflyPopulliOrgSiteAdapter(BaseSiteAdapter): if label == 'Series Title:': ## there is no way to tell which number of the series the story is, so we won't # put a number - series_url = 'http://'+self.getSiteDomain()+'/'+link['href'] + series_url = 'https://'+self.getSiteDomain()+'/'+link['href'] self.story.setMetadata('series', link.get_text()) self.story.setMetadata('seriesUrl', series_url) elif label == 'Prequel to:': - value = link.string + ' (' + 'http://'+self.getSiteDomain()+link['href'] + ')' + value = link.string + ' (' + 'https://'+self.getSiteDomain()+link['href'] + ')' self.story.setMetadata('prequelto', value) elif label == 'Sequel to:': - value = link.string + ' (' + 'http://'+self.getSiteDomain()+link['href'] + ')' + value = link.string + ' (' + 'https://'+self.getSiteDomain()+link['href'] + ')' self.story.setMetadata('sequelto', value) # Some stories have alot of text in the "summary", and I've tried to keep down on creating diff --git a/fanficfare/adapters/adapter_hlfictionnet.py b/fanficfare/adapters/adapter_hlfictionnet.py index 11192d61..c242af9a 100644 --- a/fanficfare/adapters/adapter_hlfictionnet.py +++ b/fanficfare/adapters/adapter_hlfictionnet.py @@ -47,7 +47,7 @@ class HLFictionNetAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','hlf') @@ -63,10 +63,10 @@ class HLFictionNetAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -98,7 +98,7 @@ class HLFictionNetAdapter(BaseSiteAdapter): aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) self.story.setMetadata('authorId',aut['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+aut['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+aut['href']) self.story.setMetadata('author',aut.string) aut.extract() @@ -109,7 +109,7 @@ class HLFictionNetAdapter(BaseSiteAdapter): if chapters != None: for chapter in chapters.findAll('option'): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) else: self.chapterUrls.append((self.story.getMetadata('title'),url)) @@ -191,7 +191,7 @@ class HLFictionNetAdapter(BaseSiteAdapter): # Find Series name from series URL. a = list.find('a', href=re.compile(r"series.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/'+a['href'] + series_url = 'https://'+self.host+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_hpfanficarchivecom.py b/fanficfare/adapters/adapter_hpfanficarchivecom.py index 87039e50..121ee13d 100644 --- a/fanficfare/adapters/adapter_hpfanficarchivecom.py +++ b/fanficfare/adapters/adapter_hpfanficarchivecom.py @@ -47,7 +47,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','hpffa') @@ -63,7 +63,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234" def getSiteURLPattern(self): return r"https?:"+re.escape("//"+self.getSiteDomain()+"/stories/viewstory.php?sid=")+r"\d+$" @@ -103,13 +103,13 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. a = soup.find('div', id="mainpage").find('a', href=re.compile(r"viewuser.php\?uid=\d+")) self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/stories/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/stories/'+a['href']) self.story.setMetadata('author',a.string) # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/stories/'+chapter['href'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/stories/'+chapter['href'])) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -186,7 +186,7 @@ class HPFanficArchiveComAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/stories/'+a['href'] + series_url = 'https://'+self.host+'/stories/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_imagineeficcom.py b/fanficfare/adapters/adapter_imagineeficcom.py index a0f60343..f6945ead 100644 --- a/fanficfare/adapters/adapter_imagineeficcom.py +++ b/fanficfare/adapters/adapter_imagineeficcom.py @@ -47,7 +47,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','ime') @@ -63,10 +63,10 @@ class ImagineEFicComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" ## Login seems to be reasonably standard across eFiction sites. def needToLoginCheck(self, data): @@ -89,7 +89,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter): params['cookiecheck'] = '1' params['submit'] = 'Submit' - loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' + loginUrl = 'https://' + self.getSiteDomain() + '/user.php?action=login' logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) @@ -171,13 +171,13 @@ class ImagineEFicComAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href']) self.story.setMetadata('author',a.string) # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -249,7 +249,7 @@ class ImagineEFicComAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/'+a['href'] + series_url = 'https://'+self.host+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_ksarchivecom.py b/fanficfare/adapters/adapter_ksarchivecom.py index 635fc5d3..2cb16b88 100644 --- a/fanficfare/adapters/adapter_ksarchivecom.py +++ b/fanficfare/adapters/adapter_ksarchivecom.py @@ -52,13 +52,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX # normalized story URL. # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. - self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','ksa') # XXX # The date format will vary from site to site. - # http://docs.python.org/library/datetime.html#strftime-strptime-behavior + # https://docs.python.org/library/datetime.html#strftime-strptime-behavior self.dateformat = "%b/%d/%Y" # XXX @classmethod @@ -72,10 +72,10 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" def getSiteURLPattern(self): - return "http://(www.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + return r"https?://(www.)?"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -160,13 +160,13 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX pagetitle = soup.find('div',id='pagetitle') for a in pagetitle.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")): self.story.addToList('authorId',a['href'].split('=')[1]) - self.story.addToList('authorUrl','http://'+self.host+'/'+a['href']) + self.story.addToList('authorUrl','https://'+self.host+'/'+a['href']) self.story.addToList('author',stripHTML(a)) # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -286,7 +286,7 @@ class KSArchiveComAdapter(BaseSiteAdapter): # XXX # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = stripHTML(a) - series_url = 'http://'+self.host+'/'+a['href'] + series_url = 'https://'+self.host+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_lightnovelgatecom.py b/fanficfare/adapters/adapter_lightnovelgatecom.py index 38817f4e..a75579cc 100644 --- a/fanficfare/adapters/adapter_lightnovelgatecom.py +++ b/fanficfare/adapters/adapter_lightnovelgatecom.py @@ -67,7 +67,7 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter): self.story.setMetadata('storyId', m.group('id')) # normalized story URL. - self._setURL("http://"+self.getSiteDomain() + self._setURL("https://"+self.getSiteDomain() +"/novel/"+self.story.getMetadata('storyId')) else: raise exceptions.InvalidStoryURL(url, @@ -80,11 +80,11 @@ class LightNovelGateSiteAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://lightnovelgate.com/novel/astoryname" + return "https://lightnovelgate.com/novel/astoryname" def getSiteURLPattern(self): # http://lightnovelgate.com/novel/stellar_transformation - return r"http://lightnovelgate\.com/novel/(?P[^/]+)" + return r"https?://lightnovelgate\.com/novel/(?P[^/]+)" def extractChapterUrlsAndMetadata(self): # fetch the chapter. From that we will get almost all the diff --git a/fanficfare/adapters/adapter_lotrgficcom.py b/fanficfare/adapters/adapter_lotrgficcom.py index e4957580..6d72828e 100644 --- a/fanficfare/adapters/adapter_lotrgficcom.py +++ b/fanficfare/adapters/adapter_lotrgficcom.py @@ -46,7 +46,7 @@ class LOTRgficComAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) @staticmethod @@ -55,10 +55,10 @@ class LOTRgficComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" def use_pagecache(self): ''' @@ -107,13 +107,13 @@ class LOTRgficComAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. a = div.find('a', href=re.compile(r"viewuser.php")) self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href']) self.story.setMetadata('author',a.string) # Find the chapters: for chapter in div.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -213,7 +213,7 @@ class LOTRgficComAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/'+a['href'] + series_url = 'https://'+self.host+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self_make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_masseffect2in.py b/fanficfare/adapters/adapter_masseffect2in.py index c51f31f0..0380e052 100644 --- a/fanficfare/adapters/adapter_masseffect2in.py +++ b/fanficfare/adapters/adapter_masseffect2in.py @@ -157,7 +157,7 @@ class MassEffect2InAdapter(BaseSiteAdapter): self.story.extendList('author', [authorName]) authorId = chapter.getAuthorId() if authorId: - authorUrl = 'http://%s/index/%s' % (self.getSiteDomain(), authorId) + authorUrl = 'https://%s/index/%s' % (self.getSiteDomain(), authorId) else: authorId = u'' authorUrl = u'' @@ -280,7 +280,7 @@ class MassEffect2InAdapter(BaseSiteAdapter): @classmethod def _makeDocumentUrl(cls, documentId): """Make a chapter URL given a document ID.""" - return 'http://%s/publ/%s' % (cls.getSiteDomain(), documentId) + return 'https://%s/publ/%s' % (cls.getSiteDomain(), documentId) class Chapter(object): @@ -573,7 +573,7 @@ class Chapter(object): else: _logger.warning(u"No title found for rating label `%s'!" % label) # TODO: conduct a research on such abnormal URLs. - elif source == 'http://www.masseffect2.in/_fr/10/1360399.png': + elif '/_fr/10/1360399.png' in source: label = 'Nn' return { 'label': 'Nn', diff --git a/fanficfare/adapters/adapter_mcstoriescom.py b/fanficfare/adapters/adapter_mcstoriescom.py index 56b80239..c90bdabc 100644 --- a/fanficfare/adapters/adapter_mcstoriescom.py +++ b/fanficfare/adapters/adapter_mcstoriescom.py @@ -62,7 +62,7 @@ class MCStoriesComSiteAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://mcstories.com/StoryTitle http://mcstories.com/StoryTitle/index.html http://mcstories.com/StoryTitle/StoryTitle1.html" + return "https://mcstories.com/StoryTitle https://mcstories.com/StoryTitle/index.html https://mcstories.com/StoryTitle/StoryTitle1.html" def getSiteURLPattern(self): return r"https?://(www\.)?mcstories\.com/([a-zA-Z0-9_-]+)/" diff --git a/fanficfare/adapters/adapter_mediaminerorg.py b/fanficfare/adapters/adapter_mediaminerorg.py index 72fd2061..28c43134 100644 --- a/fanficfare/adapters/adapter_mediaminerorg.py +++ b/fanficfare/adapters/adapter_mediaminerorg.py @@ -60,7 +60,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): self.getSiteExampleURLs()) # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/fanfic/s/'+cattitle+'/'+urltitle+'/'+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/fanfic/s/'+cattitle+'/'+urltitle+'/'+self.story.getMetadata('storyId')) else: raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), @@ -76,20 +76,20 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/fanfic/s/category-name/story-title/123456 http://"+cls.getSiteDomain()+"/fanfic/c/category-name/story-title/123456/987612" + return "https://"+cls.getSiteDomain()+"/fanfic/s/category-name/story-title/123456 https://"+cls.getSiteDomain()+"/fanfic/c/category-name/story-title/123456/987612" def getSiteURLPattern(self): ## old urls - ## http://www.mediaminer.org/fanfic/view_st.php/76882 + ## https://www.mediaminer.org/fanfic/view_st.php/76882 ## new urls - ## http://www.mediaminer.org/fanfic/s/ghosts-from-the-past/72 - ## http://www.mediaminer.org/fanfic/c/ghosts-from-the-past/chapter-2/72/174 - ## http://www.mediaminer.org/fanfic/s/robtech-final-missions/61553 - ## http://www.mediaminer.org/fanfic/c/robtech-final-missions/robotech-final-missions-oneshot/61553/189830 + ## https://www.mediaminer.org/fanfic/s/ghosts-from-the-past/72 + ## https://www.mediaminer.org/fanfic/c/ghosts-from-the-past/chapter-2/72/174 + ## https://www.mediaminer.org/fanfic/s/robtech-final-missions/61553 + ## https://www.mediaminer.org/fanfic/c/robtech-final-missions/robotech-final-missions-oneshot/61553/189830 ## even newer urls - ## http://www.mediaminer.org/fanfic/s/gundam-wing-fan-fiction/the-preventer-operatives/171000 - ## http://www.mediaminer.org/fanfic/c/gundam-wing-fan-fiction/the-preventer-operatives/171000/608822 - return re.escape("http://"+self.getSiteDomain())+r"/fanfic/"+\ + ## https://www.mediaminer.org/fanfic/s/gundam-wing-fan-fiction/the-preventer-operatives/171000 + ## https://www.mediaminer.org/fanfic/c/gundam-wing-fan-fiction/the-preventer-operatives/171000/608822 + return r"https?://"+re.escape(self.getSiteDomain())+r"/fanfic/"+\ r"((s/(?P[^/]+)/(?P[^/]+)/(?P\d+))|"+\ r"((c/(?P[^/]+)/(?P[^/]+)/(?P\d+))/\d+)|"+\ r"(s/(?P[^/]+)/(?P\d+))|"+\ @@ -138,7 +138,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"/user_info.php/\d+")) self.story.setMetadata('authorId',a['href'].split('/')[-1]) - self.story.setMetadata('authorUrl','http://'+self.host+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+a['href']) self.story.setMetadata('author',a.string) # save date from first for later. @@ -147,7 +147,7 @@ class MediaMinerOrgSiteAdapter(BaseSiteAdapter): # Find the chapters - one-shot now have chapter list, too. chap_p = soup.find('p',{'style':'margin-left:10px;'}) for (atag,aurl,name) in [ (x,x['href'],stripHTML(x)) for x in chap_p.find_all('a') ]: - self.chapterUrls.append((name,'http://'+self.host+aurl)) + self.chapterUrls.append((name,'https://'+self.host+aurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) diff --git a/fanficfare/adapters/adapter_ncisfictioncom.py b/fanficfare/adapters/adapter_ncisfictioncom.py index db06e225..974adbb7 100644 --- a/fanficfare/adapters/adapter_ncisfictioncom.py +++ b/fanficfare/adapters/adapter_ncisfictioncom.py @@ -20,6 +20,13 @@ from base_efiction_adapter import BaseEfictionAdapter class NCISFictionComAdapter(BaseEfictionAdapter): + @classmethod + def getProtocol(self): + """ + Some, but not all site now require https. + """ + return "https" + @staticmethod def getSiteDomain(): return 'ncisfiction.com' diff --git a/fanficfare/adapters/adapter_phoenixsongnet.py b/fanficfare/adapters/adapter_phoenixsongnet.py index 170fd7ec..ad172c3d 100644 --- a/fanficfare/adapters/adapter_phoenixsongnet.py +++ b/fanficfare/adapters/adapter_phoenixsongnet.py @@ -46,7 +46,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/') + self._setURL('https://' + self.getSiteDomain() + '/fanfiction/story/' +self.story.getMetadata('storyId')+'/') # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','phs') @@ -62,10 +62,10 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/fanfiction/story/1234/" + return "https://"+cls.getSiteDomain()+"/fanfiction/story/1234/" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/fanfiction/story/")+r"\d+/?$" + return r"https?://"+re.escape(self.getSiteDomain()+"/fanfiction/story/")+r"\d+/?$" ## Login seems to be reasonably standard across eFiction sites. def needToLoginCheck(self, data): @@ -86,7 +86,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): #params['remember'] = '1' params['login'] = 'Login' - loginUrl = 'http://' + self.getSiteDomain() + '/users/processlogin.php' + loginUrl = 'https://' + self.getSiteDomain() + '/users/processlogin.php' logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['txtusername'])) d = self._fetchUrl(loginUrl, params) @@ -136,7 +136,7 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. /fanfiction/stories.php?psid=125 a = b.find('a', href=re.compile(r"/fanfiction/stories.php\?psid=\d+")) self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href']) self.story.setMetadata('author',a.string) # Find the chapters: @@ -152,17 +152,17 @@ class PhoenixSongNetAdapter(BaseSiteAdapter): i = 0 chapters = chapters.findAll('option') for chapter in chapters: - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['value'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+chapter['value'])) if i == 0: self.story.setMetadata('storyId',chapter['value'].split('/')[3]) - head = self.make_soup(self._fetchUrl('http://'+self.host+chapter['value'])).findAll('b') + head = self.make_soup(self._fetchUrl('https://'+self.host+chapter['value'])).findAll('b') for b in head: if b.text == "Updated": date = b.nextSibling.string.split(': ')[1].split(',') self.story.setMetadata('datePublished', makeDate(date[0]+date[1], self.dateformat)) if i == (len(chapters)-1): - head = self.make_soup(self._fetchUrl('http://'+self.host+chapter['value'])).findAll('b') + head = self.make_soup(self._fetchUrl('https://'+self.host+chapter['value'])).findAll('b') for b in head: if b.text == "Updated": date = b.nextSibling.string.split(': ')[1].split(',') diff --git a/fanficfare/adapters/adapter_potterficscom.py b/fanficfare/adapters/adapter_potterficscom.py index e6fe6c3f..70a62970 100644 --- a/fanficfare/adapters/adapter_potterficscom.py +++ b/fanficfare/adapters/adapter_potterficscom.py @@ -51,7 +51,7 @@ class PotterFicsComAdapter(BaseSiteAdapter): self.story.setMetadata('storyId',m.group('id')) # normalized story URL. gets rid of chapter if there, left with chapter index URL - nurl = "http://"+self.getSiteDomain()+"/historias/"+self.story.getMetadata('storyId') + nurl = "https://"+self.getSiteDomain()+"/historias/"+self.story.getMetadata('storyId') self._setURL(nurl) else: raise exceptions.InvalidStoryURL(url, @@ -69,15 +69,15 @@ class PotterFicsComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://www.potterfics.com/historias/12345 http://www.potterfics.com/historias/12345/capitulo-1 " + return "https://www.potterfics.com/historias/12345 https://www.potterfics.com/historias/12345/capitulo-1 " def getSiteURLPattern(self): - #http://www.potterfics.com/historias/127583 - #http://www.potterfics.com/historias/127583/capitulo-1 - #http://www.potterfics.com/historias/127583/capitulo-4 - #http://www.potterfics.com/historias/92810 -> Complete story - #http://www.potterfics.com/historias/111194 -> Complete, single chap - p = re.escape("http://"+self.getSiteDomain()+"/historias/")+\ + #https://www.potterfics.com/historias/127583 + #https://www.potterfics.com/historias/127583/capitulo-1 + #https://www.potterfics.com/historias/127583/capitulo-4 + #https://www.potterfics.com/historias/92810 -> Complete story + #https://www.potterfics.com/historias/111194 -> Complete, single chap + p = r"https?://"+re.escape(self.getSiteDomain()+"/historias/")+\ r"(?P\d+)(/capitulo-(?P\d+))?/?$" return p @@ -101,7 +101,7 @@ class PotterFicsComAdapter(BaseSiteAdapter): params['login_password'] = self.getConfig("password") params['login_ck'] = '1' - loginUrl = 'http://www.potterfics.com/secciones/usuarios/login.php' + loginUrl = 'https://www.potterfics.com/secciones/usuarios/login.php' logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['login_usuario'])) d = self._postUrl(loginUrl,params) @@ -117,10 +117,10 @@ class PotterFicsComAdapter(BaseSiteAdapter): def extractChapterUrlsAndMetadata(self): - #this converts '/historias/12345' to 'http://www.potterfics.com/historias/12345' + #this converts '/historias/12345' to 'https://www.potterfics.com/historias/12345' def makeAbsoluteURL(url): if url[0] == '/': - url = 'http://'+self.getSiteDomain()+url + url = 'https://'+self.getSiteDomain()+url return url #use this to get month numbers from Spanish months diff --git a/fanficfare/adapters/adapter_pretendercentrecom.py b/fanficfare/adapters/adapter_pretendercentrecom.py index 1154236e..7c44c087 100644 --- a/fanficfare/adapters/adapter_pretendercentrecom.py +++ b/fanficfare/adapters/adapter_pretendercentrecom.py @@ -47,7 +47,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/missingpieces/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','ptdc') @@ -67,10 +67,10 @@ class PretenderCenterComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/missingpieces/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/missingpieces/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://")+"(www\.)?"+re.escape(self.getSiteDomain()+"/missingpieces/viewstory.php?sid=")+r"\d+$" + return r"https?://(www\.)?"+re.escape(self.getSiteDomain()+"/missingpieces/viewstory.php?sid=")+r"\d+$" ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -135,13 +135,13 @@ class PretenderCenterComAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/missingpieces/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/missingpieces/'+a['href']) self.story.setMetadata('author',a.string) # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/missingpieces/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/missingpieces/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -213,7 +213,7 @@ class PretenderCenterComAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/missingpieces/'+a['href'] + series_url = 'https://'+self.host+'/missingpieces/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_qafficcom.py b/fanficfare/adapters/adapter_qafficcom.py index cde60e63..0609da7c 100644 --- a/fanficfare/adapters/adapter_qafficcom.py +++ b/fanficfare/adapters/adapter_qafficcom.py @@ -47,7 +47,7 @@ class QafFicComAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/atp/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','atp') @@ -63,10 +63,10 @@ class QafFicComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/atp/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/atp/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/atp/viewstory.php?sid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/atp/viewstory.php?sid=")+r"\d+$" ## Getting the chapter list and the meta data, plus 'is adult' checking. @@ -130,7 +130,7 @@ class QafFicComAdapter(BaseSiteAdapter): aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) self.story.setMetadata('authorId',aut['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/atp/'+aut['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/atp/'+aut['href']) self.story.setMetadata('author',aut.string) aut.extract() @@ -141,7 +141,7 @@ class QafFicComAdapter(BaseSiteAdapter): if chapters != None: for chapter in chapters.findAll('option'): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/atp/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/atp/viewstory.php?sid='+self.story.getMetadata('storyId')+'&chapter='+chapter['value'])) else: self.chapterUrls.append((self.story.getMetadata('title'),url)) @@ -223,7 +223,7 @@ class QafFicComAdapter(BaseSiteAdapter): if list.find('a', href=re.compile(r"series.php")) != None: for series in asoup.findAll('a', href=re.compile(r"series.php\?seriesid=\d+")): # Find Series name from series URL. - series_url = 'http://'+self.host+'/atp/'+series['href'] + series_url = 'https://'+self.host+'/atp/'+series['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$')) diff --git a/fanficfare/adapters/adapter_quotevcom.py b/fanficfare/adapters/adapter_quotevcom.py index f5b87aea..5175521a 100644 --- a/fanficfare/adapters/adapter_quotevcom.py +++ b/fanficfare/adapters/adapter_quotevcom.py @@ -10,7 +10,7 @@ from base_adapter import BaseSiteAdapter from ..htmlcleanup import stripHTML SITE_DOMAIN = 'quotev.com' -STORY_URL_TEMPLATE = 'http://www.quotev.com/story/%s' +STORY_URL_TEMPLATE = 'https://www.quotev.com/story/%s' def getClass(): @@ -41,8 +41,8 @@ class QuotevComAdapter(BaseSiteAdapter): def getSiteURLPattern(self): pattern = re.escape(STORY_URL_TEMPLATE.rsplit('%', 1)[0]) + r'(.+?)($|&|/)' - pattern = pattern.replace(r'http\:', r'https?\:') - pattern = pattern.replace(r'https?\:\/\/www\.', r'https?\:\/\/(www\.)?') + pattern = pattern.replace(r'https', r'https?') + pattern = pattern.replace(r'www\.', r'(www\.)?') return pattern def use_pagecache(self): @@ -75,7 +75,7 @@ class QuotevComAdapter(BaseSiteAdapter): self.story.addToList('authorUrl', urlparse.urljoin(self.url, a['href'])) if not self.story.getList('author'): self.story.addToList('author','Anonymous') - self.story.addToList('authorUrl','http://www.quotev.com') + self.story.addToList('authorUrl','https://www.quotev.com') self.story.addToList('authorId','0') self.setDescription(self.url, soup.find('div', id='qdesct')) diff --git a/fanficfare/adapters/adapter_royalroadl.py b/fanficfare/adapters/adapter_royalroadl.py index fa7138ba..cbb2e610 100644 --- a/fanficfare/adapters/adapter_royalroadl.py +++ b/fanficfare/adapters/adapter_royalroadl.py @@ -66,7 +66,7 @@ class RoyalRoadAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/fiction/'+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/fiction/'+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','rylrdl') @@ -157,7 +157,7 @@ class RoyalRoadAdapter(BaseSiteAdapter): if author_link: authorId = author_link['href'].rsplit('/', 1)[1] self.story.setMetadata('authorId', authorId) - self.story.setMetadata('authorUrl','http://'+self.host+'/user/profile/'+authorId) + self.story.setMetadata('authorUrl','https://'+self.host+'/user/profile/'+authorId) self.story.setMetadata('author',soup.find(attrs=dict(property="books:author"))['content']) @@ -165,7 +165,7 @@ class RoyalRoadAdapter(BaseSiteAdapter): chapters = soup.find('table',{'id':'chapters'}).find('tbody') tds = [tr.findAll('td')[0] for tr in chapters.findAll('tr')] for td in tds: - chapterUrl = 'http://' + self.getSiteDomain() + td.a['href'] + chapterUrl = 'https://' + self.getSiteDomain() + td.a['href'] self.chapterUrls.append((stripHTML(td.text), chapterUrl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) diff --git a/fanficfare/adapters/adapter_sheppardweircom.py b/fanficfare/adapters/adapter_sheppardweircom.py index 75d168f2..7821cea8 100644 --- a/fanficfare/adapters/adapter_sheppardweircom.py +++ b/fanficfare/adapters/adapter_sheppardweircom.py @@ -71,7 +71,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX # normalized story URL. # XXX Most sites don't have the /fanfic part. Replace all to remove it usually. - self._setURL('http://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/fanfics/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','swf') # XXX @@ -87,10 +87,10 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX @classmethod def getSiteExampleURLs(self): - return "http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=1234" + return "https://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/fanfics/viewstory.php?sid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/fanfics/viewstory.php?sid=")+r"\d+$" ## Login seems to be reasonably standard across eFiction sites. def needToLoginCheck(self, data): @@ -113,7 +113,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX params['cookiecheck'] = '1' params['submit'] = 'Submit' - loginUrl = 'http://' + self.getSiteDomain() + '/fanfics/user.php?action=login' + loginUrl = 'https://' + self.getSiteDomain() + '/fanfics/user.php?action=login' logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) @@ -182,7 +182,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX alist = soup.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")) for a in alist: self.story.addToList('authorId',a['href'].split('=')[1]) - self.story.addToList('authorUrl','http://'+self.host+'/fanfics/'+a['href']) + self.story.addToList('authorUrl','https://'+self.host+'/fanfics/'+a['href']) self.story.addToList('author',a.string) @@ -195,7 +195,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/fanfics/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/fanfics/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -279,7 +279,7 @@ class SheppardWeirComAdapter(BaseSiteAdapter): # XXX # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/fanfics/'+a['href'] + series_url = 'https://'+self.host+'/fanfics/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_spikeluvercom.py b/fanficfare/adapters/adapter_spikeluvercom.py index e516d24b..b02e60cf 100644 --- a/fanficfare/adapters/adapter_spikeluvercom.py +++ b/fanficfare/adapters/adapter_spikeluvercom.py @@ -28,7 +28,7 @@ class SpikeluverComAdapter(BaseSiteAdapter): SITE_ABBREVIATION = 'slc' SITE_DOMAIN = 'spikeluver.com' - BASE_URL = 'http://' + SITE_DOMAIN + '/SpuffyRealm/' + BASE_URL = 'https://' + SITE_DOMAIN + '/SpuffyRealm/' LOGIN_URL = BASE_URL + 'user.php?action=login' VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%d' METADATA_URL_SUFFIX = '&index=1' @@ -69,7 +69,7 @@ class SpikeluverComAdapter(BaseSiteAdapter): return cls.VIEW_STORY_URL_TEMPLATE % 1234 def getSiteURLPattern(self): - return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]) + r'\d+$' + return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$' def extractChapterUrlsAndMetadata(self): soup = self._customized_fetch_url(self.url + self.METADATA_URL_SUFFIX) diff --git a/fanficfare/adapters/adapter_thedelphicexpansecom.py b/fanficfare/adapters/adapter_thedelphicexpansecom.py index 55093146..4a063019 100644 --- a/fanficfare/adapters/adapter_thedelphicexpansecom.py +++ b/fanficfare/adapters/adapter_thedelphicexpansecom.py @@ -24,6 +24,13 @@ class TheDelphicExpanseComAdapter(BaseEfictionAdapter): ''' This adapter will download stories from the 'Taste of Poison, the Fanfiction of Arsenic Jade' site ''' + @classmethod + def getProtocol(self): + """ + Some, but not all site now require https. + """ + return "https" + @staticmethod def getSiteDomain(): return 'www.thedelphicexpanse.com' diff --git a/fanficfare/adapters/adapter_thehookupzonenet.py b/fanficfare/adapters/adapter_thehookupzonenet.py index 65cce295..78563fc3 100644 --- a/fanficfare/adapters/adapter_thehookupzonenet.py +++ b/fanficfare/adapters/adapter_thehookupzonenet.py @@ -20,6 +20,13 @@ from base_efiction_adapter import BaseEfictionAdapter class TheHookupZoneNetAdapter(BaseEfictionAdapter): + @classmethod + def getProtocol(self): + """ + Some, but not all site now require https. + """ + return "https" + @staticmethod def getSiteDomain(): return 'thehookupzone.net' diff --git a/fanficfare/adapters/adapter_thundercatsfansorg.py b/fanficfare/adapters/adapter_thundercatsfansorg.py index bb2630b8..3b31a201 100644 --- a/fanficfare/adapters/adapter_thundercatsfansorg.py +++ b/fanficfare/adapters/adapter_thundercatsfansorg.py @@ -22,6 +22,13 @@ from base_efiction_adapter import BaseEfictionAdapter class ThundercatsFansOrgSiteAdapter(BaseEfictionAdapter): + @classmethod + def getProtocol(self): + """ + Some, but not all site now require https. + """ + return "https" + @staticmethod def getSiteDomain(): return 'www.thundercatsfans.org' diff --git a/fanficfare/adapters/adapter_trekiverseorg.py b/fanficfare/adapters/adapter_trekiverseorg.py index 29886486..80bd915c 100644 --- a/fanficfare/adapters/adapter_trekiverseorg.py +++ b/fanficfare/adapters/adapter_trekiverseorg.py @@ -45,7 +45,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter): self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) # normalized story URL. - self._setURL("http://"+self.getSiteDomain()\ + self._setURL("https://"+self.getSiteDomain()\ +"/efiction/viewstory.php?sid="+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. @@ -66,10 +66,10 @@ class TrekiverseOrgAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234 http://efiction."+cls.getSiteDomain()+"/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/efiction/viewstory.php?sid=1234 https://efiction."+cls.getSiteDomain()+"/viewstory.php?sid=1234" def getSiteURLPattern(self): - return r'(http://trekiverse\.org/efiction/viewstory\.php\?sid=\d+|http://efiction\.trekiverse\.org/viewstory\.php\?sid=\d+)' + return r'(https?://trekiverse\.org/efiction/viewstory\.php\?sid=\d+|https?://efiction\.trekiverse\.org/viewstory\.php\?sid=\d+)' ## Login seems to be reasonably standard across eFiction sites. def needToLoginCheck(self, data): @@ -92,7 +92,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter): params['cookiecheck'] = '1' params['submit'] = 'Submit' - loginUrl = 'http://' + self.getSiteDomain() + '/efiction/user.php?action=login' + loginUrl = 'https://' + self.getSiteDomain() + '/efiction/user.php?action=login' logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['penname'])) @@ -171,7 +171,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter): a = soup.find('div', {'id' : 'pagetitle'}) aut = a.find('a', href=re.compile(r"^viewuser\.php\?uid=")) self.story.setMetadata('authorId',aut['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/efiction/'+aut['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/efiction/'+aut['href']) self.story.setMetadata('author',aut.string) ttl = a.find('a', href=re.compile(r'^viewstory.php\?sid=%s$'%self.story.getMetadata('storyId'))) @@ -185,11 +185,11 @@ class TrekiverseOrgAdapter(BaseSiteAdapter): if len(chapters)==0: raise exceptions.FailedToDownload(self.getSiteDomain() +" says: No php/html chapters found.") if len(chapters)==1: - self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/efiction/'+chapters[0]['href'])) + self.chapterUrls.append((self.story.getMetadata('title'),'https://'+self.host+'/efiction/'+chapters[0]['href'])) else: for chapter in chapters: # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/efiction/'+chapter['href'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/efiction/'+chapter['href'])) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -282,7 +282,7 @@ class TrekiverseOrgAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/efiction/'+a['href'] + series_url = 'https://'+self.host+'/efiction/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_voracity2eficcom.py b/fanficfare/adapters/adapter_voracity2eficcom.py index ebf40fab..06e13335 100644 --- a/fanficfare/adapters/adapter_voracity2eficcom.py +++ b/fanficfare/adapters/adapter_voracity2eficcom.py @@ -27,7 +27,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter): SITE_ABBREVIATION = 'voe' SITE_DOMAIN = 'voracity2.e-fic.com' - BASE_URL = 'http://' + SITE_DOMAIN + '/' + BASE_URL = 'https://' + SITE_DOMAIN + '/' LOGIN_URL = BASE_URL + 'user.php?action=login' VIEW_STORY_URL_TEMPLATE = BASE_URL + 'viewstory.php?sid=%d' METADATA_URL_SUFFIX = '&index=1' @@ -98,7 +98,7 @@ class Voracity2EficComAdapter(BaseSiteAdapter): return cls.VIEW_STORY_URL_TEMPLATE % 1234 def getSiteURLPattern(self): - return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]) + r'\d+$' + return re.escape(self.VIEW_STORY_URL_TEMPLATE[:-2]).replace('https','https?') + r'\d+$' def extractChapterUrlsAndMetadata(self): soup = self._customized_fetch_url(self.url + self.METADATA_URL_SUFFIX) diff --git a/fanficfare/adapters/adapter_wolverineandroguecom.py b/fanficfare/adapters/adapter_wolverineandroguecom.py index 9c7ff871..e569ae90 100644 --- a/fanficfare/adapters/adapter_wolverineandroguecom.py +++ b/fanficfare/adapters/adapter_wolverineandroguecom.py @@ -46,7 +46,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/wrfa/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','wrfa') @@ -62,10 +62,10 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/wrfa/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/wrfa/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/wrfa/viewstory.php?sid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/wrfa/viewstory.php?sid=")+r"\d+$" ## Getting the chapter list and the meta data, plus 'is adult' checking. @@ -101,7 +101,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): # Find authorid and URL from... author url. a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+")) self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','http://'+self.host+'/wrfa/'+a['href']) + self.story.setMetadata('authorUrl','https://'+self.host+'/wrfa/'+a['href']) self.story.setMetadata('author',a.string) rating=pt.text.split('(')[1].split(')')[0] @@ -110,7 +110,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): # Find the chapters: for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")): # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/wrfa/'+chapter['href'])) + self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/wrfa/'+chapter['href'])) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -178,7 +178,7 @@ class WolverineAndRogueComAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/wrfa/'+a['href'] + series_url = 'https://'+self.host+'/wrfa/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_wraithbaitcom.py b/fanficfare/adapters/adapter_wraithbaitcom.py index 9a9ddc94..6884e22c 100644 --- a/fanficfare/adapters/adapter_wraithbaitcom.py +++ b/fanficfare/adapters/adapter_wraithbaitcom.py @@ -47,7 +47,7 @@ class WraithBaitComAdapter(BaseSiteAdapter): - self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) + self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId')) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','wb') @@ -63,10 +63,10 @@ class WraithBaitComAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" + return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234" def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" + return r"https?://"+re.escape(self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$" ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -114,7 +114,7 @@ class WraithBaitComAdapter(BaseSiteAdapter): alist = pt.findAll('a', href=re.compile(r"viewuser.php\?uid=\d+")) for a in alist: self.story.addToList('authorId',a['href'].split('=')[1]) - self.story.addToList('authorUrl','http://'+self.host+'/'+a['href']) + self.story.addToList('authorUrl','https://'+self.host+'/'+a['href']) self.story.addToList('author',a.string) rating=pt.text.split('[')[1].split(']')[0] @@ -133,7 +133,7 @@ class WraithBaitComAdapter(BaseSiteAdapter): else: add = "" # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter)+add,'http://'+self.host+'/'+chapter['href']+addurl)) + self.chapterUrls.append((stripHTML(chapter)+add,'https://'+self.host+'/'+chapter['href']+addurl)) self.story.setMetadata('numChapters',len(self.chapterUrls)) @@ -193,7 +193,7 @@ class WraithBaitComAdapter(BaseSiteAdapter): # Find Series name from series URL. a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+")) series_name = a.string - series_url = 'http://'+self.host+'/'+a['href'] + series_url = 'https://'+self.host+'/'+a['href'] # use BeautifulSoup HTML parser to make everything easier to find. seriessoup = self.make_soup(self._fetchUrl(series_url)) diff --git a/fanficfare/adapters/adapter_wuxiaworldcom.py b/fanficfare/adapters/adapter_wuxiaworldcom.py index e403f061..61060640 100644 --- a/fanficfare/adapters/adapter_wuxiaworldcom.py +++ b/fanficfare/adapters/adapter_wuxiaworldcom.py @@ -46,7 +46,7 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter): story_id = match.group('id') self.story.setMetadata('storyId', story_id) - self._setURL('http://%s/novel/%s' % (self.getSiteDomain(), story_id)) + self._setURL('https://%s/novel/%s' % (self.getSiteDomain(), story_id)) @staticmethod def getSiteDomain(): @@ -54,10 +54,10 @@ class WuxiaWorldComSiteAdapter(BaseSiteAdapter): @classmethod def getSiteExampleURLs(cls): - return 'http://%s/novel/story-name' % cls.getSiteDomain() + return 'https://%s/novel/story-name' % cls.getSiteDomain() def getSiteURLPattern(self): - return r'http(s)?://%s/novel/(?P[^/]+)(/)?' % re.escape(self.getSiteDomain()) + return r'https?://%s/novel/(?P[^/]+)(/)?' % re.escape(self.getSiteDomain()) def use_pagecache(self): return True