From 3e49134cf6750660f28418f0aa27a502fbfeeb45 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Fri, 4 May 2018 12:08:51 -0500 Subject: [PATCH] Remove defunct sites thealphagate.com and harrypotterfanfiction.com --- calibre-plugin/plugin-defaults.ini | 17 -- fanficfare/adapters/__init__.py | 2 - .../adapter_harrypotterfanfictioncom.py | 202 ----------------- .../adapters/adapter_thealphagatecom.py | 209 ------------------ fanficfare/defaults.ini | 17 -- 5 files changed, 447 deletions(-) delete mode 100644 fanficfare/adapters/adapter_harrypotterfanfictioncom.py delete mode 100644 fanficfare/adapters/adapter_thealphagatecom.py diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index 47bb502a..d81d362b 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -2558,17 +2558,6 @@ add_to_titlepage_entries:,growth, shrink, sizeroles #username:YourName #password:yourpassword -[www.harrypotterfanfiction.com] -## Some sites do not require a login, but do require the user to -## confirm they are adult for adult content. In commandline version, -## this should go in your personal.ini, not defaults.ini. -#is_adult:true - -## Site dedicated to these categories/characters/ships -extracategories:Harry Potter - -website_encodings:Windows-1252,utf8 - [www.hpfanficarchive.com] ## Site dedicated to these categories/characters/ships extracategories:Harry Potter @@ -2843,12 +2832,6 @@ extracategories:Lord of the Rings website_encodings:Windows-1252,utf8 -[www.thealphagate.com] -## Site dedicated to these categories/characters/ships -extracategories:Stargate: SG-1 - -website_encodings:Windows-1252,utf8 - [www.thedelphicexpanse.com] ## Site dedicated to these categories/characters/ships extracategories:Star Trek: Enterprise diff --git a/fanficfare/adapters/__init__.py b/fanficfare/adapters/__init__.py index c740054f..89c53fbc 100644 --- a/fanficfare/adapters/__init__.py +++ b/fanficfare/adapters/__init__.py @@ -34,7 +34,6 @@ import adapter_fictionalleyorg import adapter_fictionpresscom import adapter_ficwadcom import adapter_fimfictionnet -import adapter_harrypotterfanfictioncom import adapter_mediaminerorg import adapter_potionsandsnitches import adapter_tenhawkpresentscom @@ -66,7 +65,6 @@ import adapter_iketernalnet import adapter_storiesofardacom import adapter_destinysgatewaycom import adapter_ncisfictioncom -import adapter_thealphagatecom import adapter_fanfiktionde import adapter_ponyfictionarchivenet import adapter_ncisficcom diff --git a/fanficfare/adapters/adapter_harrypotterfanfictioncom.py b/fanficfare/adapters/adapter_harrypotterfanfictioncom.py deleted file mode 100644 index d0ac0b6a..00000000 --- a/fanficfare/adapters/adapter_harrypotterfanfictioncom.py +++ /dev/null @@ -1,202 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Software: eFiction -import time -import logging -logger = logging.getLogger(__name__) -import re -import urllib -import urllib2 - - -from ..htmlcleanup import stripHTML -from .. import exceptions as exceptions - -from base_adapter import BaseSiteAdapter, makeDate - -class HarryPotterFanFictionComSiteAdapter(BaseSiteAdapter): - - def __init__(self, config, url): - BaseSiteAdapter.__init__(self, config, url) - self.story.setMetadata('siteabbrev','hp') - self.is_adult=False - - # get storyId from url--url validation guarantees query is only psid=1234 - self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - - - # normalized story URL. - self._setURL('https://' + self.getSiteDomain() + '/viewstory.php?psid='+self.story.getMetadata('storyId')) - - - @staticmethod - def getSiteDomain(): - return 'www.harrypotterfanfiction.com' - - @classmethod - def getAcceptDomains(cls): - return ['www.harrypotterfanfiction.com','harrypotterfanfiction.com'] - - @classmethod - def getSiteExampleURLs(cls): - return "http://www.harrypotterfanfiction.com/viewstory.php?psid=1234" - - def getSiteURLPattern(self): - return r"https?"+re.escape("://")+r"(www\.)?"+re.escape("harrypotterfanfiction.com/viewstory.php?psid=")+r"\d+$" - - def needToLoginCheck(self, data): - if 'Registered Users Only' in data \ - or 'There is no such account on our website' in data \ - or "That password doesn't match the one in our database" in data: - return True - else: - return False - - def extractChapterUrlsAndMetadata(self): - - url = self.url+'&index=1' - logger.debug("URL: "+url) - - try: - data = self._fetchUrl(url) - except urllib2.HTTPError, e: - if e.code == 404: - raise exceptions.StoryDoesNotExist(self.url) - else: - raise e - - if "Access denied. This story has not been validated by the adminstrators of this site." in data: - raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") - elif "ERROR locating story meta for psid" in data: - raise exceptions.StoryDoesNotExist(self.url) - - # use BeautifulSoup HTML parser to make everything easier to find. - soup = self.make_soup(data) - - ## Title - a = soup.find('a', href=re.compile(r'\?psid='+self.story.getMetadata('storyId'))) - self.story.setMetadata('title',stripHTML(a)) - ## javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?psid=290995' - if "This story may contain adult themes." in a['href'] and not (self.is_adult or self.getConfig("is_adult")): - raise exceptions.AdultCheckRequired(self.url) - - - # Find authorid and URL from... author url. - a = soup.find('a', href=re.compile(r"viewuser.php\?showuid=\d+")) - self.story.setMetadata('authorId',a['href'].split('=')[1]) - self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href']) - self.story.setMetadata('author',a.string) - - ## hpcom doesn't give us total words--but it does give - ## us words/chapter. I'd rather add than fetch and - ## parse another page. - words=0 - for tr in soup.find('table',{'class':'text'}).findAll('tr'): - tdstr = tr.findAll('td')[2].string - if tdstr and tdstr.isdigit(): - words+=int(tdstr) - self.story.setMetadata('numWords',unicode(words)) - - # Find the chapters: - tablelist = soup.find('table',{'class':'text'}) - for chapter in tablelist.findAll('a', href=re.compile(r'\?chapterid=\d+')): - #javascript:if (confirm('Please note. This story may contain adult themes. By clicking here you are stating that you are over 17. Click cancel if you do not meet this requirement.')) location = '?chapterid=433441&i=1' - # just in case there's tags, like in chapter titles. - chpt=re.sub(r'^.*?(\?chapterid=\d+).*?',r'\1',chapter['href']) - self.chapterUrls.append((stripHTML(chapter),'https://'+self.host+'/viewstory.php'+chpt)) - - self.story.setMetadata('numChapters',len(self.chapterUrls)) - - ## Finding the metadata is a bit of a pain. Desc is the only thing this color. - desctable= soup.find('table',{'bgcolor':'#f0e8e8'}) - self.setDescription(url,desctable) - #self.story.setMetadata('description',stripHTML(desctable)) - - ## Finding the metadata is a bit of a pain. Most of the meta - ## data is in a center.table without a bgcolor. - #for center in soup.findAll('center'): - table = soup.find('table',{'class':'storymaininfo'}) - if table: - metastr = stripHTML(unicode(table)).replace('\n',' ').replace('\t',' ') - - m = re.match(r".*?Status: Completed.*?",metastr) - if m: - self.story.setMetadata('status','Completed') - else: - self.story.setMetadata('status','In-Progress') - - m = re.match(r".*?Rating: (.+?)Story",metastr) - if m: - self.story.setMetadata('rating', m.group(1)) - - m = re.match(r".*?Genre\(s\): (.+?) Era.*?",metastr) - if m: - for g in m.group(1).split(','): - self.story.addToList('genre',g) - - m = re.match(r".*?Characters: (.+?) Genre.*?",metastr) - if m: - for g in m.group(1).split(','): - self.story.addToList('characters',g) - - m = re.match(r".*?Pairings: (.+?) +Status",metastr) - if m: - for g in m.group(1).split(','): - self.story.addToList('ships',g) - - m = re.match(r".*?(Warnings|Advisory): (.+).*?",metastr) - if m: - for w in m.group(2).split(','): - if w != 'Now Warnings': - self.story.addToList('warnings',w) - - m = re.match(r".*?First Published: ([0-9\.]+).*?",metastr) - if m: - self.story.setMetadata('datePublished',makeDate(m.group(1), "%Y.%m.%d")) - - # Updated can have more than one space after it. - m = re.match(r".*?Last Updated: ([0-9\.]+).*?",metastr) - if m: - self.story.setMetadata('dateUpdated',makeDate(m.group(1), "%Y.%m.%d")) - - def getChapterText(self, url): - - logger.debug('Getting chapter text from: %s' % url) - - data = self._fetchUrl(url) - - try: - # remove everything after here--the site's chapters break - # the BS4 parser. - data = data[:data.index('