diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index b32e3f92..6cee3b18 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -1193,14 +1193,41 @@ dislikes_label:Dislikes ## program can prompt you, or you can save it in config. In ## commandline version, this should go in your personal.ini, not ## defaults.ini. -#username:YourName +## finestories.com has started requiring login by email rather than +## pen name. +#username:youremail@yourdomain.dom #password:yourpassword -# shows size as "10 KB", not word count -extra_valid_entries:size +## Clear FanFiction from defaults, site is original fiction. +extratags: -# don't show twitter icon. -cover_exclusion_regexp:/res/css/bir.png +extra_valid_entries:size,universe,universeUrl,universeHTML,sitetags,notice,codes,score +#extra_titlepage_entries:size,universeHTML,sitetags,notice,score +include_in_codes:sitetags + +## adds to include_subject_tags instead of replacing it. +#extra_subject_tags:sitetags + +size_label:Size +universe_label:Universe +universeUrl_label:Universe URL +universeHTML_label:Universe +sitetags_label:Site Tags +notice_label:Notice +score_label:Score + +## Assume entryUrl, apply to "%s" to +## make entryHTML. +make_linkhtml_entries:universe + +## storiesonline.net stories can be in a series or a universe, but not +## both. By default, universe will be populated in 'series' with +## index=0 +universe_as_series: true + +## some sites include images that we don't ever want becoming the +## cover image. This lets you exclude them. +cover_exclusion_regexp:/css/bir.png [forums.spacebattles.com] ## see [base_xenforoforum] diff --git a/fanficfare/adapters/adapter_finestoriescom.py b/fanficfare/adapters/adapter_finestoriescom.py index fbb01f94..af04741d 100644 --- a/fanficfare/adapters/adapter_finestoriescom.py +++ b/fanficfare/adapters/adapter_finestoriescom.py @@ -15,279 +15,32 @@ # limitations under the License. # -import time import logging logger = logging.getLogger(__name__) -import re -import urllib2 - - -from ..htmlcleanup import stripHTML -from .. import exceptions as exceptions - -from base_adapter import BaseSiteAdapter, makeDate +from adapter_storiesonlinenet import StoriesOnlineNetAdapter def getClass(): return FineStoriesComAdapter # Class name has to be unique. Our convention is camel case the # sitename with Adapter at the end. www is skipped. -class FineStoriesComAdapter(BaseSiteAdapter): +class FineStoriesComAdapter(StoriesOnlineNetAdapter): - def __init__(self, config, url): - BaseSiteAdapter.__init__(self, config, url) - - self.username = "NoneGiven" # if left empty, site doesn't return any message at all. - self.password = "" - self.is_adult=False - - # get storyId from url - self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2].split(':')[0]) - if 'storyInfo' in self.story.getMetadata('storyId'): - self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1]) - - # normalized story URL. - self._setURL('http://' + self.getSiteDomain() + '/s/storyInfo.php?id='+self.story.getMetadata('storyId')) - - # Each adapter needs to have a unique site abbreviation. - self.story.setMetadata('siteabbrev','fnst') - - # The date format will vary from site to site. - # http://docs.python.org/library/datetime.html#strftime-strptime-behavior - self.dateformat = "%Y-%m-%d" + @classmethod + def getSiteAbbrev(self): + return 'fnst' @staticmethod # must be @staticmethod, don't remove it. def getSiteDomain(): # The site domain. Does have www here, if it uses it. return 'finestories.com' - @classmethod - def getSiteExampleURLs(cls): - return "http://"+cls.getSiteDomain()+"/s/1234 http://"+cls.getSiteDomain()+"/s/1234:4010 http://"+cls.getSiteDomain()+"/library/storyInfo.php?id=1234" - - def getSiteURLPattern(self): - return re.escape("http://"+self.getSiteDomain())+r"/(s|library)?/(storyInfo.php\?id=)?\d+(:\d+)?(;\d+)?$" - ## Login seems to be reasonably standard across eFiction sites. def needToLoginCheck(self, data): if 'Free Registration' in data \ + or "Log In" in data \ or "Invalid Password!" in data \ or "Invalid User Name!" in data: return True else: return False - - def performLogin(self, url): - params = {} - - if self.password: - params['theusername'] = self.username - params['thepassword'] = self.password - else: - params['theusername'] = self.getConfig("username") - params['thepassword'] = self.getConfig("password") - params['rememberMe'] = '1' - params['page'] = 'http://'+self.getSiteDomain()+'/' - params['submit'] = 'Login' - - loginUrl = 'http://' + self.getSiteDomain() + '/login.php' - logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, - params['theusername'])) - - d = self._fetchUrl(loginUrl, params) - - if "My Account" not in d : #Member Account - logger.info("Failed to login to URL %s as %s" % (loginUrl, - params['theusername'])) - raise exceptions.FailedToLogin(url,params['theusername']) - return False - else: - return True - - ## Getting the chapter list and the meta data, plus 'is adult' checking. - def extractChapterUrlsAndMetadata(self): - - # index=1 makes sure we see the story chapter index. Some - # sites skip that for one-chapter stories. - url = self.url - logger.debug("URL: "+url) - - try: - data = self._fetchUrl(url) - except urllib2.HTTPError, e: - if e.code == 404: - raise exceptions.StoryDoesNotExist(self.url) - else: - raise e - - if self.needToLoginCheck(data): - # need to log in for this one. - self.performLogin(url) - data = self._fetchUrl(url) - - if "Access denied. This story has not been validated by the adminstrators of this site." in data: - raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.") - - # use BeautifulSoup HTML parser to make everything easier to find. - soup = self.make_soup(data) - # print data - - # Now go hunting for all the meta data and the chapter list. - - ## Title - a = soup.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId'))) - self.story.setMetadata('title',stripHTML(a)) - - # Find authorid and URL from... author url. - a = soup.find('a', href=re.compile(r"/a/\w+")) - self.story.setMetadata('authorId',a['href'].split('/')[2]) - self.story.setMetadata('authorUrl','http://'+self.host+a['href']) - self.story.setMetadata('author',a.text) - - # Find the chapters: - chapters = soup.findAll('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId')+":\d+$")) - if len(chapters) != 0: - for chapter in chapters: - # just in case there's tags, like in chapter titles. - self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+chapter['href'])) - else: - self.chapterUrls.append((self.story.getMetadata('title'),'http://'+self.host+'/s/'+self.story.getMetadata('storyId'))) - - self.story.setMetadata('numChapters',len(self.chapterUrls)) - - # surprisingly, the detailed page does not give enough details, so go to author's page - - skip=0 - i=0 - while i == 0: - asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')+"&skip="+unicode(skip))) - - tds = asoup.findAll('td', {'class' : 'lc2'}) - for lc2 in tds: - if lc2.find('a', href=re.compile(r'/s/'+self.story.getMetadata('storyId'))): - i=1 - break - if tds[len(tds)-1] == lc2: - skip=skip+10 - - for cat in lc2.findAll('div', {'class' : 'typediv'}): - self.story.addToList('category',cat.text) - - self.story.setMetadata('size', lc2.findNext('td', {'class' : 'num'}).text) - - lc4 = lc2.findNext('td', {'class' : 'lc4'}) - - try: - a = lc4.find('a', href=re.compile(r"/library/show_series.php\?id=\d+")) - i = a.parent.text.split('(')[1].split(')')[0] - self.setSeries(a.text, i) - self.story.setMetadata('seriesUrl','http://'+self.host+a['href']) - except: - pass - try: - a = lc4.find('a', href=re.compile(r"/library/universe.php\?id=\d+")) - self.story.addToList("category",a.text) - except: - pass - - for a in lc4.findAll('span', {'class' : 'help'}) + lc4.findAll('script'): - a.extract() - - self.setDescription('http://'+self.host+'/s/'+self.story.getMetadata('storyId'),lc4.text.split('[More Info')[0]) - - for b in lc4.findAll('b'): - label = b.text - value = b.nextSibling - - if 'For Age' in label: - self.story.setMetadata('rating', value) - - if 'Tags' in label: - for genre in value.split(', '): - self.story.addToList('genre',genre) - - ## Site uses a