diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini
index 417cc848..737320a2 100644
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@@ -1093,6 +1093,21 @@ slow_down_sleep_time:2
## Author'. authorUrl will still point to the Archivist Author's page.
#use_archived_author:false
+[ashwinder.sycophanthex.com]
+## Some sites require login (or login for some rated stories) The
+## program can prompt you, or you can save it in config. In
+## commandline version, this should go in your personal.ini, not
+## defaults.ini.
+#username:YourName
+#password:yourpassword
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+extracharacters:Severus Snape,Hermione Granger
+extraships:Severus Snape/Hermione Granger
+
+website_encodings:Windows-1252,utf8
+
[asr3.slashzone.org]
## Site dedicated to these categories/characters/ships
extracategories:The Sentinel
@@ -1169,6 +1184,18 @@ extracharacters:Buffy,Giles
website_encodings:Windows-1252,utf8
+[chaos.sycophanthex.com]
+## Some sites do not require a login, but do require the user to
+## confirm they are adult for adult content. In commandline version,
+## this should go in your personal.ini, not defaults.ini.
+#is_adult:true
+
+## some sites include images that we don't ever want becoming the
+## cover image. This lets you exclude them.
+cover_exclusion_regexp:/images/.*?ribbon.gif
+
+website_encodings:Windows-1252,utf8
+
[chosentwofanfic.com]
extra_valid_entries:disclaimer
disclaimer_label: Disclaimer
@@ -1270,6 +1297,21 @@ extracategories:Lord of the Rings
website_encodings:Windows-1252,utf8
+[erosnsappho.sycophanthex.com]
+## Some sites do not require a login, but do require the user to
+## confirm they are adult for adult content. In commandline version,
+## this should go in your personal.ini, not defaults.ini.
+#is_adult:true
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+
+## some sites include images that we don't ever want becoming the
+## cover image. This lets you exclude them.
+cover_exclusion_regexp:/images/.*?ribbon.gif
+
+website_encodings:Windows-1252,utf8
+
[fanfic.castletv.net]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
@@ -1640,6 +1682,17 @@ readings_label: Readings
## Site dedicated to these categories/characters/ships
extracategories:Lord of the Rings
+[lumos.sycophanthex.com]
+## Some sites do not require a login, but do require the user to
+## confirm they are adult for adult content. In commandline version,
+## this should go in your personal.ini, not defaults.ini.
+#is_adult:true
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+
+website_encodings:Windows-1252,utf8
+
[mcstories.com]
## Some sites do not require a login, but do require the user to
## confirm they are adult for adult content. In commandline version,
@@ -1759,6 +1812,20 @@ extracategories:Star Wars
extracharacters:Obi-Wan Kenobi,Padmé Amidala
extraships:Obi-Wan Kenobi/Padmé Amidala
+[occlumency.sycophanthex.com]
+## Some sites require login (or login for some rated stories) The
+## program can prompt you, or you can save it in config. In
+## commandline version, this should go in your personal.ini, not
+## defaults.ini.
+#username:YourName
+#password:yourpassword
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+extracharacters:Severus Snape
+
+website_encodings:Windows-1252,utf8
+
[ponyfictionarchive.net]
## Some sites do not require a login, but do require the user to
## confirm they are adult for adult content. In commandline version,
diff --git a/fanficfare/adapters/__init__.py b/fanficfare/adapters/__init__.py
index a67d7576..826f3c07 100644
--- a/fanficfare/adapters/__init__.py
+++ b/fanficfare/adapters/__init__.py
@@ -54,6 +54,11 @@ import adapter_squidgeorgpeja
import adapter_libraryofmoriacom
import adapter_wraithbaitcom
import adapter_dramioneorg
+import adapter_ashwindersycophanthexcom
+import adapter_chaossycophanthexcom
+import adapter_erosnsapphosycophanthexcom
+import adapter_lumossycophanthexcom
+import adapter_occlumencysycophanthexcom
import adapter_phoenixsongnet
import adapter_walkingtheplankorg
import adapter_dokugacom
diff --git a/fanficfare/adapters/adapter_ashwindersycophanthexcom.py b/fanficfare/adapters/adapter_ashwindersycophanthexcom.py
new file mode 100644
index 00000000..3634cf7b
--- /dev/null
+++ b/fanficfare/adapters/adapter_ashwindersycophanthexcom.py
@@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Software: eFiction
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+
+
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+
+from base_adapter import BaseSiteAdapter, makeDate
+
+def getClass():
+ return AshwinderSycophantHexComAdapter
+
+# Class name has to be unique. Our convention is camel case the
+# sitename with Adapter at the end. www is skipped.
+class AshwinderSycophantHexComAdapter(BaseSiteAdapter):
+
+ def __init__(self, config, url):
+ BaseSiteAdapter.__init__(self, config, url)
+
+ self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
+ self.password = ""
+ self.is_adult=False
+
+ # get storyId from url--url validation guarantees query is only sid=1234
+ self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
+
+
+
+ # normalized story URL.
+ self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
+
+ # Each adapter needs to have a unique site abbreviation.
+ self.story.setMetadata('siteabbrev','asph')
+
+ # The date format will vary from site to site.
+ # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+ self.dateformat = "%m/%d/%Y"
+
+ @staticmethod # must be @staticmethod, don't remove it.
+ def getSiteDomain():
+ # The site domain. Does have www here, if it uses it.
+ return 'ashwinder.sycophanthex.com'
+
+ @classmethod
+ def getSiteExampleURLs(cls):
+ return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
+
+ def getSiteURLPattern(self):
+ return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+
+ ## Login seems to be reasonably standard across eFiction sites.
+ def needToLoginCheck(self, data):
+ if 'This story contains adult content and/or themes.' in data \
+ or "That password doesn't match the one in our database" in data \
+ or "Member Login" in data:
+ return True
+ else:
+ return False
+
+ def performLogin(self, url):
+ params = {}
+
+ if self.password:
+ params['penname'] = self.username
+ params['password'] = self.password
+ else:
+ params['penname'] = self.getConfig("username")
+ params['password'] = self.getConfig("password")
+ params['rememberme'] = '1'
+ params['sid'] = ''
+ params['intent'] = ''
+ params['submit'] = 'Submit'
+
+ loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ params['penname']))
+
+ d = self._fetchUrl(loginUrl, params)
+
+ if "Logout" not in d : #Member Account
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
+ params['penname']))
+ raise exceptions.FailedToLogin(url,params['penname'])
+ return False
+ else:
+ return True
+
+ ## Getting the chapter list and the meta data, plus 'is adult' checking.
+ def extractChapterUrlsAndMetadata(self):
+ # index=1 makes sure we see the story chapter index. Some
+ # sites skip that for one-chapter stories.
+ url = self.url
+ logger.debug("URL: "+url)
+
+ try:
+ data = self._fetchUrl(url)
+ except urllib2.HTTPError, e:
+ if e.code == 404:
+ raise exceptions.StoryDoesNotExist(self.url)
+ else:
+ raise e
+
+ if self.needToLoginCheck(data):
+ # need to log in for this one.
+ self.performLogin(url)
+ data = self._fetchUrl(url)
+
+ if "Access denied. This story has not been validated by the adminstrators of this site." in data:
+ raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
+
+ # use BeautifulSoup HTML parser to make everything easier to find.
+ soup = self.make_soup(data)
+ # print data
+
+ # Now go hunting for all the meta data and the chapter list.
+
+ # Find authorid and URL from... author url.
+ a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
+ self.story.setMetadata('authorId',a['href'].split('=')[1])
+ self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
+ self.story.setMetadata('author',a.string)
+ asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
+
+ try:
+ # in case link points somewhere other than the first chapter
+ a = soup.findAll('option')[1]['value']
+ self.story.setMetadata('storyId',a.split('=',)[1])
+ url = 'http://'+self.host+'/'+a
+ soup = self.make_soup(self._fetchUrl(url))
+ except:
+ pass
+
+ for info in asoup.findAll('table', {'width' : '100%', 'bordercolor' : re.compile(r'#')}):
+ a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
+ if a != None:
+ self.story.setMetadata('title',stripHTML(a))
+ break
+
+
+ # Find the chapters:
+ chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
+ if len(chapters) == 0:
+ self.chapterUrls.append((self.story.getMetadata('title'),url))
+ else:
+ for chapter in chapters:
+ # just in case there's tags, like in chapter titles.
+ self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']))
+
+ self.story.setMetadata('numChapters',len(self.chapterUrls))
+
+ # eFiction sites don't help us out a lot with their meta data
+ # formating, so it's a little ugly.
+
+ # utility method
+ def defaultGetattr(d):
+ try:
+ return d.name
+ except:
+ return ""
+
+ cats = info.findAll('a',href=re.compile('categories.php'))
+ for cat in cats:
+ self.story.addToList('category',cat.string)
+
+ a = info.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId')))
+ val = a.nextSibling
+ svalue = ""
+ while not defaultGetattr(val) == 'br':
+ val = val.nextSibling
+ val = val.nextSibling
+ while not defaultGetattr(val) == 'table':
+ svalue += unicode(val)
+ val = val.nextSibling
+ self.setDescription(url,svalue)
+
+ ##
Published: 04/08/2007 |
+
+ ## one story had Updated... in the description. Restrict to sub-table
+ labels = info.find('table').findAll('b')
+ for labelspan in labels:
+ value = labelspan.nextSibling
+ label = stripHTML(labelspan)
+
+ if 'Rating' in label:
+ self.story.setMetadata('rating', value)
+
+ if 'Word Count' in label:
+ self.story.setMetadata('numWords', value)
+
+ if 'Genres' in label:
+ genres = value.string.split(', ')
+ for genre in genres:
+ if genre != 'none':
+ self.story.addToList('genre',genre)
+
+ if 'Warnings' in label:
+ warnings = value.string.split(', ')
+ for warning in warnings:
+ if warning != ' none':
+ self.story.addToList('warnings',warning)
+
+ if 'Completed' in label:
+ if 'Yes' in value:
+ self.story.setMetadata('status', 'Completed')
+ else:
+ self.story.setMetadata('status', 'In-Progress')
+
+ if 'Published' in label:
+ self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
+
+ if 'Updated' in label:
+ # there's a stray [ at the end.
+ #value = value[0:-1]
+ self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
+
+
+ # grab the text for an individual chapter.
+ def getChapterText(self, url):
+
+ logger.debug('Getting chapter text from: %s' % url)
+
+ data = self._fetchUrl(url)
+
+ soup = self.make_soup(data) # some chapters seem to be hanging up on those tags, so it is safer to close them
+
+ story = soup.find('div', {"align" : "left"})
+
+ if None == story:
+ raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
+
+ return self.utf8FromSoup(url,story)
diff --git a/fanficfare/adapters/adapter_chaossycophanthexcom.py b/fanficfare/adapters/adapter_chaossycophanthexcom.py
new file mode 100644
index 00000000..cdc110bc
--- /dev/null
+++ b/fanficfare/adapters/adapter_chaossycophanthexcom.py
@@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Software: eFiction
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+
+
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+
+from base_adapter import BaseSiteAdapter, makeDate
+
+def getClass():
+ return ChaosSycophantHexComAdapter
+
+# Class name has to be unique. Our convention is camel case the
+# sitename with Adapter at the end. www is skipped.
+class ChaosSycophantHexComAdapter(BaseSiteAdapter):
+
+ def __init__(self, config, url):
+ BaseSiteAdapter.__init__(self, config, url)
+
+ self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
+ self.password = ""
+ self.is_adult=False
+
+ # get storyId from url--url validation guarantees query is only sid=1234
+ self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
+
+
+
+ # normalized story URL.
+ self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
+
+ # Each adapter needs to have a unique site abbreviation.
+ self.story.setMetadata('siteabbrev','csph')
+
+ # The date format will vary from site to site.
+ # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+ self.dateformat = "%m/%d/%Y"
+
+ @staticmethod # must be @staticmethod, don't remove it.
+ def getSiteDomain():
+ # The site domain. Does have www here, if it uses it.
+ return 'chaos.sycophanthex.com'
+
+ @classmethod
+ def getSiteExampleURLs(cls):
+ return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
+
+ def getSiteURLPattern(self):
+ return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+
+ ## Getting the chapter list and the meta data, plus 'is adult' checking.
+ def extractChapterUrlsAndMetadata(self):
+
+ if self.is_adult or self.getConfig("is_adult"):
+ # Weirdly, different sites use different warning numbers.
+ # If the title search below fails, there's a good chance
+ # you need a different number. print data at that point
+ # and see what the 'click here to continue' url says.
+ addurl = "&ageconsent=ok&warning=19"
+ else:
+ addurl=""
+
+ # index=1 makes sure we see the story chapter index. Some
+ # sites skip that for one-chapter stories.
+ url = self.url+'&index=1'+addurl
+ logger.debug("URL: "+url)
+
+ try:
+ data = self._fetchUrl(url)
+ except urllib2.HTTPError, e:
+ if e.code == 404:
+ raise exceptions.StoryDoesNotExist(self.url)
+ else:
+ raise e
+
+ # The actual text that is used to announce you need to be an
+ # adult varies from site to site. Again, print data before
+ # the title search to troubleshoot.
+ if "Age Consent Required" in data:
+ raise exceptions.AdultCheckRequired(self.url)
+
+ if "Access denied. This story has not been validated by the adminstrators of this site." in data:
+ raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
+
+ # use BeautifulSoup HTML parser to make everything easier to find.
+ soup = self.make_soup(data)
+ # print data
+
+ # Now go hunting for all the meta data and the chapter list.
+
+ ## Title
+ pt = soup.find('div', {'id' : 'pagetitle'})
+ a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
+ self.story.setMetadata('title',stripHTML(a))
+
+ # Find authorid and URL from... author url.
+ a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
+ self.story.setMetadata('authorId',a['href'].split('=')[1])
+ self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
+ self.story.setMetadata('author',a.string)
+
+ rating=pt.text.split('(')[1].split(')')[0]
+ self.story.setMetadata('rating', rating)
+
+ # Find the chapters:
+ for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
+ # just in case there's tags, like in chapter titles.
+ self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
+
+ self.story.setMetadata('numChapters',len(self.chapterUrls))
+
+ # eFiction sites don't help us out a lot with their meta data
+ # formating, so it's a little ugly.
+
+ # utility method
+ def defaultGetattr(d,k):
+ try:
+ return d[k]
+ except:
+ return ""
+
+
+ # Rated: NC-17
etc
+
+ labels = soup.findAll('span',{'class':'label'})
+
+ value = labels[0].previousSibling
+ svalue = ""
+ while value != None:
+ val = value
+ value = value.previousSibling
+ while 'label' not in defaultGetattr(val,'class'):
+ svalue += unicode(val)
+ val = val.nextSibling
+ self.setDescription(url,svalue)
+
+ for labelspan in labels:
+ value = labelspan.nextSibling
+ label = labelspan.string
+
+ if 'Word count' in label:
+ self.story.setMetadata('numWords', value.split(' -')[0])
+
+ if 'Categories' in label:
+ cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
+ for cat in cats:
+ self.story.addToList('category',cat.string)
+
+ if 'Characters' in label:
+ chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
+ for char in chars:
+ self.story.addToList('characters',char.string)
+
+ if 'Genre' in label:
+ genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
+ for genre in genres:
+ self.story.addToList('genre',genre.string)
+
+ if 'Warnings' in label:
+ warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
+ for warning in warnings:
+ self.story.addToList('warnings',warning.string)
+
+ if 'Complete' in label:
+ if 'Yes' in value:
+ self.story.setMetadata('status', 'Completed')
+ else:
+ self.story.setMetadata('status', 'In-Progress')
+
+ if 'Published' in label:
+ self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' -')[0]), self.dateformat))
+
+ if 'Updated' in label:
+ # there's a stray [ at the end.
+ #value = value[0:-1]
+ self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
+
+ try:
+ # Find Series name from series URL.
+ a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
+ series_name = a.string
+ series_url = 'http://'+self.host+'/'+a['href']
+
+ # use BeautifulSoup HTML parser to make everything easier to find.
+ seriessoup = self.make_soup(self._fetchUrl(series_url))
+ storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
+ i=1
+ for a in storyas:
+ if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
+ self.setSeries(series_name, i)
+ self.story.setMetadata('seriesUrl',series_url)
+ break
+ i+=1
+
+ except:
+ # I find it hard to care if the series parsing fails
+ pass
+
+ # grab the text for an individual chapter.
+ def getChapterText(self, url):
+
+ logger.debug('Getting chapter text from: %s' % url)
+
+ soup = self.make_soup(self._fetchUrl(url))
+
+ div = soup.find('div', {'id' : 'story'})
+
+ if None == div:
+ raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
+
+ return self.utf8FromSoup(url,div)
diff --git a/fanficfare/adapters/adapter_erosnsapphosycophanthexcom.py b/fanficfare/adapters/adapter_erosnsapphosycophanthexcom.py
new file mode 100644
index 00000000..d59dfa57
--- /dev/null
+++ b/fanficfare/adapters/adapter_erosnsapphosycophanthexcom.py
@@ -0,0 +1,250 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Software: eFiction
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+
+
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+
+from base_adapter import BaseSiteAdapter, makeDate
+
+def getClass():
+ return ErosnSapphoSycophantHexComAdapter
+
+# Class name has to be unique. Our convention is camel case the
+# sitename with Adapter at the end. www is skipped.
+class ErosnSapphoSycophantHexComAdapter(BaseSiteAdapter):
+
+ def __init__(self, config, url):
+ BaseSiteAdapter.__init__(self, config, url)
+
+ self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
+ self.password = ""
+ self.is_adult=False
+
+ # get storyId from url--url validation guarantees query is only sid=1234
+ self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
+
+
+
+ # normalized story URL.
+ self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
+
+ # Each adapter needs to have a unique site abbreviation.
+ self.story.setMetadata('siteabbrev','essph')
+
+ # The date format will vary from site to site.
+ # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+ self.dateformat = "%d/%m/%y"
+
+ @staticmethod # must be @staticmethod, don't remove it.
+ def getSiteDomain():
+ # The site domain. Does have www here, if it uses it.
+ return 'erosnsappho.sycophanthex.com'
+
+ @classmethod
+ def getSiteExampleURLs(cls):
+ return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
+
+ def getSiteURLPattern(self):
+ return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+
+ ## Getting the chapter list and the meta data, plus 'is adult' checking.
+ def extractChapterUrlsAndMetadata(self):
+
+ if self.is_adult or self.getConfig("is_adult"):
+ # Weirdly, different sites use different warning numbers.
+ # If the title search below fails, there's a good chance
+ # you need a different number. print data at that point
+ # and see what the 'click here to continue' url says.
+ addurl = "&ageconsent=ok&warning=18"
+ else:
+ addurl=""
+
+ # index=1 makes sure we see the story chapter index. Some
+ # sites skip that for one-chapter stories.
+ url = self.url+'&index=1'+addurl
+ logger.debug("URL: "+url)
+
+ try:
+ data = self._fetchUrl(url)
+ except urllib2.HTTPError, e:
+ if e.code == 404:
+ raise exceptions.StoryDoesNotExist(self.url)
+ else:
+ raise e
+
+ m = re.search(r"'viewstory.php\?sid=\d+((?:&ageconsent=ok)?&warning=\d+)'",data)
+ if m != None:
+ if self.is_adult or self.getConfig("is_adult"):
+ # We tried the default and still got a warning, so
+ # let's pull the warning number from the 'continue'
+ # link and reload data.
+ addurl = m.group(1)
+ # correct stupid & error in url.
+ addurl = addurl.replace("&","&")
+ url = self.url+'&index=1'+addurl
+ logger.debug("URL 2nd try: "+url)
+
+ try:
+ data = self._fetchUrl(url)
+ except urllib2.HTTPError, e:
+ if e.code == 404:
+ raise exceptions.StoryDoesNotExist(self.url)
+ else:
+ raise e
+ else:
+ raise exceptions.AdultCheckRequired(self.url)
+
+ if "Access denied. This story has not been validated by the adminstrators of this site." in data:
+ raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
+
+ # use BeautifulSoup HTML parser to make everything easier to find.
+ soup = self.make_soup(data)
+ # print data
+
+ # Now go hunting for all the meta data and the chapter list.
+
+ ## Title
+ pt = soup.find('div', {'id' : 'pagetitle'})
+ a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
+ self.story.setMetadata('title',stripHTML(a))
+
+ # Find authorid and URL from... author url.
+ a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
+ self.story.setMetadata('authorId',a['href'].split('=')[1])
+ self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
+ self.story.setMetadata('author',a.string)
+
+ rating=pt.text.split('(')[1].split(')')[0]
+ self.story.setMetadata('rating', rating)
+
+ # Find the chapters:
+ for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
+ # just in case there's tags, like in chapter titles.
+ self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
+
+ self.story.setMetadata('numChapters',len(self.chapterUrls))
+
+ # eFiction sites don't help us out a lot with their meta data
+ # formating, so it's a little ugly.
+
+ # utility method
+ def defaultGetattr(d,k):
+ try:
+ return d[k]
+ except:
+ return ""
+
+
+ # Rated: NC-17
etc
+
+ labels = soup.findAll('span',{'class':'label'})
+
+ value = labels[0].previousSibling
+ svalue = ""
+ while value != None:
+ val = value
+ value = value.previousSibling
+ while 'label' not in defaultGetattr(val,'class'):
+ svalue += unicode(val)
+ val = val.nextSibling
+ self.setDescription(url,svalue)
+
+ for labelspan in labels:
+ value = labelspan.nextSibling
+ label = labelspan.string
+
+ if 'Word count' in label:
+ self.story.setMetadata('numWords', value.split(' -')[0])
+
+ if 'Categories' in label:
+ cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
+ for cat in cats:
+ self.story.addToList('category',cat.string)
+
+ if 'Characters' in label:
+ chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
+ for char in chars:
+ self.story.addToList('characters',char.string)
+
+ if 'Genre' in label:
+ genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
+ for genre in genres:
+ self.story.addToList('genre',genre.string)
+
+ if 'Warnings' in label:
+ warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
+ for warning in warnings:
+ self.story.addToList('warnings',warning.string)
+
+ if 'Complete' in label:
+ if 'Yes' in value:
+ self.story.setMetadata('status', 'Completed')
+ else:
+ self.story.setMetadata('status', 'In-Progress')
+
+ if 'Published' in label:
+ self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' -')[0]), self.dateformat))
+
+ if 'Updated' in label:
+ # there's a stray [ at the end.
+ #value = value[0:-1]
+ self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
+
+ try:
+ # Find Series name from series URL.
+ a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
+ series_name = a.string
+ series_url = 'http://'+self.host+'/'+a['href']
+
+ # use BeautifulSoup HTML parser to make everything easier to find.
+ seriessoup = self.make_soup(self._fetchUrl(series_url))
+ storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
+ i=1
+ for a in storyas:
+ # skip 'report this' and 'TOC' links
+ if 'contact.php' not in a['href'] and 'index' not in a['href']:
+ if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
+ self.setSeries(series_name, i)
+ self.story.setMetadata('seriesUrl',series_url)
+ break
+ i+=1
+
+ except:
+ # I find it hard to care if the series parsing fails
+ pass
+
+ # grab the text for an individual chapter.
+ def getChapterText(self, url):
+
+ logger.debug('Getting chapter text from: %s' % url)
+
+ soup = self.make_soup(self._fetchUrl(url))
+
+ div = soup.find('div', {'id' : 'story'})
+
+ if None == div:
+ raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
+
+ return self.utf8FromSoup(url,div)
diff --git a/fanficfare/adapters/adapter_lumossycophanthexcom.py b/fanficfare/adapters/adapter_lumossycophanthexcom.py
new file mode 100644
index 00000000..11d4cfbb
--- /dev/null
+++ b/fanficfare/adapters/adapter_lumossycophanthexcom.py
@@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Software: eFiction
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+
+
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+
+from base_adapter import BaseSiteAdapter, makeDate
+
+def getClass():
+ return LumosSycophantHexComAdapter
+
+# Class name has to be unique. Our convention is camel case the
+# sitename with Adapter at the end. www is skipped.
+class LumosSycophantHexComAdapter(BaseSiteAdapter):
+
+ def __init__(self, config, url):
+ BaseSiteAdapter.__init__(self, config, url)
+
+ self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
+ self.password = ""
+ self.is_adult=False
+
+ # get storyId from url--url validation guarantees query is only sid=1234
+ self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
+
+
+
+ # normalized story URL.
+ self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
+
+ # Each adapter needs to have a unique site abbreviation.
+ self.story.setMetadata('siteabbrev','lsph')
+
+ # The date format will vary from site to site.
+ # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+ self.dateformat = "%m/%d/%Y"
+
+ @staticmethod # must be @staticmethod, don't remove it.
+ def getSiteDomain():
+ # The site domain. Does have www here, if it uses it.
+ return 'lumos.sycophanthex.com'
+
+ @classmethod
+ def getSiteExampleURLs(cls):
+ return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
+
+ def getSiteURLPattern(self):
+ return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+
+ ## Getting the chapter list and the meta data, plus 'is adult' checking.
+ def extractChapterUrlsAndMetadata(self):
+
+ if self.is_adult or self.getConfig("is_adult"):
+ # Weirdly, different sites use different warning numbers.
+ # If the title search below fails, there's a good chance
+ # you need a different number. print data at that point
+ # and see what the 'click here to continue' url says.
+ addurl = "&ageconsent=ok&warning=19"
+ else:
+ addurl=""
+
+ # index=1 makes sure we see the story chapter index. Some
+ # sites skip that for one-chapter stories.
+ url = self.url+'&index=1'+addurl
+ logger.debug("URL: "+url)
+
+ try:
+ data = self._fetchUrl(url)
+ except urllib2.HTTPError, e:
+ if e.code == 404:
+ raise exceptions.StoryDoesNotExist(self.url)
+ else:
+ raise e
+
+ # The actual text that is used to announce you need to be an
+ # adult varies from site to site. Again, print data before
+ # the title search to troubleshoot.
+ if "Age Consent Required" in data:
+ raise exceptions.AdultCheckRequired(self.url)
+
+ if "Access denied. This story has not been validated by the adminstrators of this site." in data:
+ raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
+
+ # use BeautifulSoup HTML parser to make everything easier to find.
+ soup = self.make_soup(data)
+ # print data
+
+ # Now go hunting for all the meta data and the chapter list.
+
+ ## Title
+ pt = soup.find('div', {'id' : 'pagetitle'})
+ a = pt.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
+ self.story.setMetadata('title',stripHTML(a))
+
+ # Find authorid and URL from... author url.
+ a = pt.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
+ self.story.setMetadata('authorId',a['href'].split('=')[1])
+ self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
+ self.story.setMetadata('author',a.string)
+
+ rating=pt.text.split('(')[1].split(')')[0]
+ self.story.setMetadata('rating', rating)
+
+ # Find the chapters:
+ for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
+ # just in case there's tags, like in chapter titles.
+ self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']+addurl))
+
+ self.story.setMetadata('numChapters',len(self.chapterUrls))
+
+ # eFiction sites don't help us out a lot with their meta data
+ # formating, so it's a little ugly.
+
+ # utility method
+ def defaultGetattr(d,k):
+ try:
+ return d[k]
+ except:
+ return ""
+
+
+ # Rated: NC-17
etc
+
+ labels = soup.findAll('span',{'class':'label'})
+
+ value = labels[0].previousSibling
+ svalue = ""
+ while value != None:
+ val = value
+ value = value.previousSibling
+ while 'label' not in defaultGetattr(val,'class'):
+ svalue += unicode(val)
+ val = val.nextSibling
+ self.setDescription(url,svalue)
+
+ for labelspan in labels:
+ value = labelspan.nextSibling
+ label = labelspan.string
+
+ if 'Word count' in label:
+ self.story.setMetadata('numWords', value.split(' -')[0])
+
+ if 'Categories' in label:
+ cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
+ for cat in cats:
+ self.story.addToList('category',cat.string)
+
+ if 'Characters' in label:
+ chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
+ for char in chars:
+ self.story.addToList('characters',char.string)
+
+ if 'Genre' in label:
+ genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1'))
+ for genre in genres:
+ self.story.addToList('genre',genre.string)
+
+ if 'Warnings' in label:
+ warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2'))
+ for warning in warnings:
+ self.story.addToList('warnings',warning.string)
+
+ if 'Complete' in label:
+ if 'Yes' in value:
+ self.story.setMetadata('status', 'Completed')
+ else:
+ self.story.setMetadata('status', 'In-Progress')
+
+ if 'Published' in label:
+ self.story.setMetadata('datePublished', makeDate(stripHTML(value.split(' -')[0]), self.dateformat))
+
+ if 'Updated' in label:
+ # there's a stray [ at the end.
+ #value = value[0:-1]
+ self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
+
+ try:
+ # Find Series name from series URL.
+ a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
+ series_name = a.string
+ series_url = 'http://'+self.host+'/'+a['href']
+
+ # use BeautifulSoup HTML parser to make everything easier to find.
+ seriessoup = self.make_soup(self._fetchUrl(series_url))
+ storyas = seriessoup.findAll('a', href=re.compile(r'^viewstory.php\?sid=\d+$'))
+ i=1
+ for a in storyas:
+ if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
+ self.setSeries(series_name, i)
+ self.story.setMetadata('seriesUrl',series_url)
+ break
+ i+=1
+
+ except:
+ # I find it hard to care if the series parsing fails
+ pass
+
+ # grab the text for an individual chapter.
+ def getChapterText(self, url):
+
+ logger.debug('Getting chapter text from: %s' % url)
+
+ soup = self.make_soup(self._fetchUrl(url))
+
+ div = soup.find('div', {'id' : 'story'})
+
+ if None == div:
+ raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
+
+ return self.utf8FromSoup(url,div)
diff --git a/fanficfare/adapters/adapter_occlumencysycophanthexcom.py b/fanficfare/adapters/adapter_occlumencysycophanthexcom.py
new file mode 100644
index 00000000..4fe3ad81
--- /dev/null
+++ b/fanficfare/adapters/adapter_occlumencysycophanthexcom.py
@@ -0,0 +1,258 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Software: eFiction
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+
+
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+
+from base_adapter import BaseSiteAdapter, makeDate
+
+def getClass():
+ return OcclumencySycophantHexComAdapter
+
+# Class name has to be unique. Our convention is camel case the
+# sitename with Adapter at the end. www is skipped.
+class OcclumencySycophantHexComAdapter(BaseSiteAdapter):
+
+ def __init__(self, config, url):
+ BaseSiteAdapter.__init__(self, config, url)
+
+ self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
+ self.password = ""
+ self.is_adult=False
+
+ # get storyId from url--url validation guarantees query is only sid=1234
+ self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
+
+
+
+ # normalized story URL.
+ self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
+
+ # Each adapter needs to have a unique site abbreviation.
+ self.story.setMetadata('siteabbrev','osph')
+
+ # The date format will vary from site to site.
+ # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+ self.dateformat = "%m/%d/%Y"
+
+ @staticmethod # must be @staticmethod, don't remove it.
+ def getSiteDomain():
+ # The site domain. Does have www here, if it uses it.
+ return 'occlumency.sycophanthex.com'
+
+ @classmethod
+ def getSiteExampleURLs(cls):
+ return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
+
+ def getSiteURLPattern(self):
+ return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+
+ ## Login seems to be reasonably standard across eFiction sites.
+ def needToLoginCheck(self, data):
+ if 'This story contains adult content and/or themes.' in data \
+ or "That password doesn't match the one in our database" in data:
+ return True
+ else:
+ return False
+
+ def performLogin(self, url):
+ params = {}
+
+ if self.password:
+ params['penname'] = self.username
+ params['password'] = self.password
+ else:
+ params['penname'] = self.getConfig("username")
+ params['password'] = self.getConfig("password")
+ params['rememberme'] = '1'
+ params['sid'] = ''
+ params['intent'] = ''
+ params['submit'] = 'Submit'
+
+ loginUrl = 'http://' + self.getSiteDomain() + '/user.php'
+ logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+ params['penname']))
+
+ d = self._fetchUrl(loginUrl, params)
+
+ if "Logout" not in d : #Member Account
+ logger.info("Failed to login to URL %s as %s" % (loginUrl,
+ params['penname']))
+ raise exceptions.FailedToLogin(url,params['penname'])
+ return False
+ else:
+ return True
+
+ ## Getting the chapter list and the meta data, plus 'is adult' checking.
+ def extractChapterUrlsAndMetadata(self):
+ # index=1 makes sure we see the story chapter index. Some
+ # sites skip that for one-chapter stories.
+ url = self.url
+ logger.debug("URL: "+url)
+
+ try:
+ data = self._fetchUrl(url)
+ except urllib2.HTTPError, e:
+ if e.code == 404:
+ raise exceptions.StoryDoesNotExist(self.url)
+ else:
+ raise e
+
+ if self.needToLoginCheck(data):
+ # need to log in for this one.
+ self.performLogin(url)
+ data = self._fetchUrl(url)
+
+ if "Access denied. This story has not been validated by the adminstrators of this site." in data:
+ raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
+
+ # use BeautifulSoup HTML parser to make everything easier to find.
+ soup = self.make_soup(data)
+ # print data
+
+ # Now go hunting for all the meta data and the chapter list.
+
+
+ # Find authorid and URL from... author url.
+ a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
+ self.story.setMetadata('authorId',a['href'].split('=')[1])
+ self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
+ self.story.setMetadata('author',a.string)
+ asoup = self.make_soup(self._fetchUrl(self.story.getMetadata('authorUrl')))
+
+ try:
+ # in case link points somewhere other than the first chapter
+ a = soup.findAll('option')[1]['value']
+ self.story.setMetadata('storyId',a.split('=',)[1])
+ url = 'http://'+self.host+'/'+a
+ soup = self.make_soup(self._fetchUrl(url))
+ except:
+ pass
+
+ for info in asoup.findAll('table', {'class' : 'border'}):
+ a = info.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
+ if a != None:
+ self.story.setMetadata('title',stripHTML(a))
+ break
+
+
+ # Find the chapters:
+ chapters=soup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+&i=1$'))
+ if len(chapters) == 0:
+ self.chapterUrls.append((self.story.getMetadata('title'),url))
+ else:
+ for chapter in chapters:
+ # just in case there's tags, like in chapter titles.
+ self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']))
+
+ self.story.setMetadata('numChapters',len(self.chapterUrls))
+
+ # eFiction sites don't help us out a lot with their meta data
+ # formating, so it's a little ugly.
+
+ # utility method
+ def defaultGetattr(d):
+ try:
+ return d.name
+ except:
+ return ""
+
+ cats = info.findAll('a',href=re.compile('categories.php'))
+ for cat in cats:
+ self.story.addToList('category',cat.string)
+
+
+ a = info.find('a', href=re.compile(r'reviews.php\?sid='+self.story.getMetadata('storyId')))
+ val = a.nextSibling
+ svalue = ""
+ while not defaultGetattr(val) == 'br':
+ val = val.nextSibling
+ val = val.nextSibling
+ while not defaultGetattr(val) == 'table':
+ svalue += unicode(val)
+ val = val.nextSibling
+ self.setDescription(url,svalue)
+
+ # Rated: NC-17
etc
+ labels = info.findAll('b')
+ for labelspan in labels:
+ value = labelspan.nextSibling
+ label = stripHTML(labelspan)
+
+ if 'Rating' in label:
+ self.story.setMetadata('rating', value)
+
+ if 'Word Count' in label:
+ self.story.setMetadata('numWords', value)
+
+ if 'Genres' in label:
+ genres = value.string.split(', ')
+ for genre in genres:
+ if genre != 'none':
+ self.story.addToList('genre',genre)
+
+ if 'Characters' in label:
+ chars = value.string.split(', ')
+ for char in chars:
+ if char != 'none':
+ self.story.addToList('characters',char)
+
+ if 'Warnings' in label:
+ warnings = value.string.split(', ')
+ for warning in warnings:
+ if warning != ' none':
+ self.story.addToList('warnings',warning)
+
+ if 'Completed' in label:
+ if 'Yes' in value:
+ self.story.setMetadata('status', 'Completed')
+ else:
+ self.story.setMetadata('status', 'In-Progress')
+
+ if 'Published' in label:
+ self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
+
+ if 'Updated' in label:
+ # there's a stray [ at the end.
+ #value = value[0:-1]
+ self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
+
+
+ # grab the text for an individual chapter.
+ def getChapterText(self, url):
+
+ logger.debug('Getting chapter text from: %s' % url)
+
+ data = self._fetchUrl(url)
+ data = data.replace('')
+
+ soup = self.make_soup(data)
+
+ story = soup.find('div', {"align" : "left"})
+
+ if None == story:
+ raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
+
+ return self.utf8FromSoup(url,story)
diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini
index 05eb364d..8fa9264f 100644
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@@ -1115,6 +1115,21 @@ slow_down_sleep_time:2
## Author'. authorUrl will still point to the Archivist Author's page.
#use_archived_author:false
+[ashwinder.sycophanthex.com]
+## Some sites require login (or login for some rated stories) The
+## program can prompt you, or you can save it in config. In
+## commandline version, this should go in your personal.ini, not
+## defaults.ini.
+#username:YourName
+#password:yourpassword
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+extracharacters:Severus Snape,Hermione Granger
+extraships:Severus Snape/Hermione Granger
+
+website_encodings:Windows-1252,utf8
+
[asr3.slashzone.org]
## Site dedicated to these categories/characters/ships
extracategories:The Sentinel
@@ -1191,6 +1206,18 @@ extracharacters:Buffy,Giles
website_encodings:Windows-1252,utf8
+[chaos.sycophanthex.com]
+## Some sites do not require a login, but do require the user to
+## confirm they are adult for adult content. In commandline version,
+## this should go in your personal.ini, not defaults.ini.
+#is_adult:true
+
+## some sites include images that we don't ever want becoming the
+## cover image. This lets you exclude them.
+cover_exclusion_regexp:/images/.*?ribbon.gif
+
+website_encodings:Windows-1252,utf8
+
[chosentwofanfic.com]
extra_valid_entries:disclaimer
disclaimer_label: Disclaimer
@@ -1292,6 +1319,21 @@ extracategories:Lord of the Rings
website_encodings:Windows-1252,utf8
+[erosnsappho.sycophanthex.com]
+## Some sites do not require a login, but do require the user to
+## confirm they are adult for adult content. In commandline version,
+## this should go in your personal.ini, not defaults.ini.
+#is_adult:true
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+
+## some sites include images that we don't ever want becoming the
+## cover image. This lets you exclude them.
+cover_exclusion_regexp:/images/.*?ribbon.gif
+
+website_encodings:Windows-1252,utf8
+
[fanfic.castletv.net]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
@@ -1662,6 +1704,17 @@ readings_label: Readings
## Site dedicated to these categories/characters/ships
extracategories:Lord of the Rings
+[lumos.sycophanthex.com]
+## Some sites do not require a login, but do require the user to
+## confirm they are adult for adult content. In commandline version,
+## this should go in your personal.ini, not defaults.ini.
+#is_adult:true
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+
+website_encodings:Windows-1252,utf8
+
[mcstories.com]
## Some sites do not require a login, but do require the user to
## confirm they are adult for adult content. In commandline version,
@@ -1781,6 +1834,20 @@ extracategories:Star Wars
extracharacters:Obi-Wan Kenobi,Padmé Amidala
extraships:Obi-Wan Kenobi/Padmé Amidala
+[occlumency.sycophanthex.com]
+## Some sites require login (or login for some rated stories) The
+## program can prompt you, or you can save it in config. In
+## commandline version, this should go in your personal.ini, not
+## defaults.ini.
+#username:YourName
+#password:yourpassword
+
+## Site dedicated to these categories/characters/ships
+extracategories:Harry Potter
+extracharacters:Severus Snape
+
+website_encodings:Windows-1252,utf8
+
[ponyfictionarchive.net]
## Some sites do not require a login, but do require the user to
## confirm they are adult for adult content. In commandline version,