Adding sites chosentwofanfic.com www.asexstories.com www.bdsmlibrary.com www.ficsite.com -- Thanks GComyn!

2026-05-01 11:24:35 +02:00 · 2016-10-30 10:55:34 -05:00 · 2016-10-30 10:55:34 -05:00 · 9961c59402
commit 9961c59402
parent 1e0cc699c2
7 changed files with 999 additions and 0 deletions
--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@ -936,6 +936,11 @@ strip_text_links:true
 ## Site dedicated to these categories/characters/ships
 extracategories:Blood Ties

+[chosentwofanfic.com]
+extra_valid_entries:disclaimer
+disclaimer_label: Disclaimer
+add_to_titlepage_entries:,disclaimer
+
 [fanfic.castletv.net]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
@ -1790,6 +1795,27 @@ readings_label:Readings
 ## Site dedicated to these categories/characters/ships
 extracategories:Star Trek

+[www.asexstories.com]
+## Some sites also require the user to confirm they are adult for
+## adult content.  Uncomment by removing '#' in front of is_adult.
+#is_adult:true
+
+## This site has links to a vidow site embeded in the text. They are
+## not needed, and will be removed if the below property is set to True
+strip_text_links:true
+
+[www.bdsmlibrary.com]
+## Some sites also require the user to confirm they are adult for
+## adult content.  Uncomment by removing '#' in front of is_adult.
+#is_adult:true
+
+extra_valid_entries:eroticatags,size,comments
+size_label: Size
+comments_label: Comments
+eroticatags_label:Erotica Tags
+extra_titlepage_entries:size,comments,eroticatags
+keep_style_attr: false
+
 [www.dracoandginny.com]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
@ -1917,6 +1943,11 @@ check_next_chapter:false
 ## this should go in your personal.ini, not defaults.ini.
 #is_adult:true

+[www.ficsite.com]
+## Some sites also require the user to confirm they are adult for
+## adult content.  Uncomment by removing '#' in front of is_adult.
+#is_adult:true
+
 [www.fictionalley.org]
 ## Some sites do not require a login, but do require the user to
 ## confirm they are adult for adult content.  In commandline version,
--- a/fanficfare/adapters/init.py
+++ b/fanficfare/adapters/init.py
@ -145,6 +145,10 @@ import adapter_fanfictionlucifaelcom
 import adapter_adultfanfictionorg
 import adapter_fictionhuntcom
 import adapter_royalroadl
+import adapter_chosentwofanficcom
+import adapter_bdsmlibrarycom
+import adapter_ficsitecom
+import adapter_asexstoriescom

 ## This bit of complexity allows adapters to be added by just adding
 ## importing.  It eliminates the long if/else clauses we used to need
--- a/fanficfare/adapters/adapter_asexstoriescom.py
+++ b/fanficfare/adapters/adapter_asexstoriescom.py
@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2013 Fanficdownloader team, 2016 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+import urlparse
+import time
+import os
+
+from bs4.element import Comment
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+import sys
+
+from base_adapter import BaseSiteAdapter, makeDate
+
+def getClass():
+    return ASexStoriesComAdapter
+
+class ASexStoriesComAdapter(BaseSiteAdapter):
+
+    def __init__(self, config, url):
+        BaseSiteAdapter.__init__(self, config, url)
+
+        self.decode = ["utf8",
+                       "Windows-1252",
+                       "iso-8859-1"] 
+                        # 1252 is a superset of iso-8859-1.
+                        # Most sites that claim to be
+                        # iso-8859-1 (and some that claim to be
+                        # utf8) are really windows-1252.
+
+        self.story.setMetadata('siteabbrev','asscom')
+
+        # Extract story ID from base URL, http://www.asexstories.com/Halloween-party-with-the-phantom/
+        storyId = self.parsedUrl.path.split('/',)[1]
+        self.story.setMetadata('storyId', storyId)
+
+        ## set url
+        self._setURL(url)
+
+    @staticmethod
+    def getSiteDomain():
+        return 'www.asexstories.com'
+
+    @classmethod
+    def getAcceptDomains(cls):
+        return ['www.asexstories.com']
+
+    @classmethod
+    def getSiteExampleURLs(cls):
+        return "http://www.asexstories.com/StoryTitle/"
+
+    def getSiteURLPattern(self):
+        return r"https?://(www\.)?asexstories\.com/([a-zA-Z0-9_-]+)/"
+
+    def extractChapterUrlsAndMetadata(self):
+        """
+        Chapters are located at /StoryName/  (for single-chapter
+        stories), or //StoryName/index#.html for multiple chapters (# is a
+        non-padded incrementing number, like StoryName1, StoryName2.html, ...,
+        StoryName10.html)
+
+        This site doesn't have much in the way of metadata, except on the 
+        Category and Tags index pages. so we will get what we can.
+        
+        Also, as this is an Adult site, the is_adult check is mandatory.
+        """
+
+        if not (self.is_adult or self.getConfig("is_adult")):
+            raise exceptions.AdultCheckRequired(self.url)
+
+        try:
+            data1 = self._fetchUrl(self.url)
+            soup1 = self.make_soup(data1)
+            #strip comments from soup
+            [comment.extract() for comment in soup1.find_all(text=lambda text:isinstance(text, Comment))]
+        except urllib2.HTTPError, e:
+            if e.code == 404:
+                raise exceptions.StoryDoesNotExist(self.url)
+            else:
+                raise e
+
+        if 'Page Not Found.' in data1:
+            raise exceptions.StoryDoesNotExist(self.url)
+
+        url = self.url
+
+        # Extract metadata
+        # Title
+        title = soup1.find('div',{'class':'story-top-block'}).find('h1')
+        self.story.setMetadata('title', title.string)
+
+        # Author
+        author = soup1.find('div',{'class':'story-info'}).findAll('div',{'class':'story-info-bl'})[1].find('a')
+        authorurl = author['href']
+        self.story.setMetadata('author', author.string)
+        self.story.setMetadata('authorUrl', authorurl)
+        authorid = os.path.splitext(os.path.basename(authorurl))[0]
+        self.story.setMetadata('authorId', authorid)
+
+        # Description
+        ### The only way to get the Description (summary) is to 
+        ### parse through the Category and/or Tags index pages.
+        ### To get a summary, I've taken the first 150 characters
+        ### from the story.
+        description = soup1.find('div',{'class':'story-block'}).get_text(strip=True)
+        description = description.encode('utf-8','ignore').strip()[0:150].decode('utf-8','ignore')
+        self.setDescription(url,'Excerpt from beginning of story: '+description+'...')
+        
+        # Get chapter URLs
+        self.chapterUrls = []
+
+        ### The first 'chapter' is not listed in the links, so we have to
+        ### add it before the rest of the pages, if any
+        self.chapterUrls.append(('1', self.url))
+
+        chapterTable = soup1.find('div',{'class':'pages'}).findAll('a')
+
+        if chapterTable is not None:
+            # Multi-chapter story
+            
+            for page in chapterTable:
+                chapterTitle = page.string
+                chapterUrl = urlparse.urljoin(self.url, page['href'])
+                self.chapterUrls.append((chapterTitle, chapterUrl))
+
+        self.story.setMetadata('numChapters', len(self.chapterUrls))
+
+        rated = soup1.find('div',{'class':'story-info'}).findAll('div',{'story-info-bl5'})[0].find('img')['title'].replace('- Rate','').strip()
+        self.story.setMetadata('rating',rated)
+        
+        self.story.setMetadata('dateUpdated', makeDate('01/01/2001', '%m/%d/%Y'))
+        
+        logger.debug("Story: <%s>", self.story)
+
+        return
+
+    def getChapterText(self, url):
+        logger.debug('Getting chapter text from <%s>' % url)
+        #logger.info('Getting chapter text from <%s>' % url)
+
+        data1 = self._fetchUrl(url)
+        soup1 = self.make_soup(data1)
+
+        # get story text
+        story1 = soup1.find('div', {'class':'story-block'})
+        
+        ### This site has links embeded in the text that lead 
+        ### to either a video site, or to a tags index page
+        ### the default is to remove them, but you can set the 
+        ### strip_text_links to false to keep them in the text
+        if self.getConfig('strip_text_links'):
+            for anchor in story1('a', {'target': '_blank'}):
+                anchor.replaceWith(anchor.string)
+
+        return self.utf8FromSoup(url, story1)
--- a/fanficfare/adapters/adapter_bdsmlibrarycom.py
+++ b/fanficfare/adapters/adapter_bdsmlibrarycom.py
@ -0,0 +1,226 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+###########################################################################
+### written by GComyn - 10/06/2016
+### updated by GComyn = 10/24/2016
+###########################################################################
+'''
+This works, but some of the stories have abysmal formatting, so it would 
+probably need to be edited for reading.
+
+I've seen one story that downloaded at 25M, but after editing is only 201K
+after the formatting was corrected.
+
+Right now it is written to download each chapter seperatly, but I may change 
+that to get the whole story. It will still have formatting problems, but should
+be able to get the longer stories this way.
+
+Also, the site is notrious for lagging, so some of the longer stories will 
+probably not be downloadable, since this program doesn't wait long enough 
+for the site to catch up.
+
+'''
+
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib
+import urllib2
+import sys
+import urlparse
+
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+
+from base_adapter import BaseSiteAdapter,  makeDate
+
+def getClass():
+    return BDSMLibraryComSiteAdapter
+
+class BDSMLibraryComSiteAdapter(BaseSiteAdapter):
+
+    def __init__(self, config, url):
+        BaseSiteAdapter.__init__(self, config, url)
+
+        self.decode = ["utf8",
+                       "Windows-1252",
+                       "iso-8859-1"] # 1252 is a superset of iso-8859-1.
+                               # Most sites that claim to be
+                               # iso-8859-1 (and some that claim to be
+                               # utf8) are really windows-1252.
+							
+        self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
+        self.password = ""
+        self.is_adult=False
+
+        # get storyId from url--url validation guarantees query is only storyid=1234
+        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
+
+        self._setURL('http://{0}/stories/story.php?storyid={1}'.format(self.getSiteDomain(), self.story.getMetadata('storyId')))
+
+        # Each adapter needs to have a unique site abbreviation.
+        self.story.setMetadata('siteabbrev','bdsmlib')
+
+        # The date format will vary from site to site.
+        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+        self.dateformat = "%b %d, %Y"
+
+    @staticmethod # must be @staticmethod, don't remove it.
+    def getSiteDomain():
+        # The site domain.  Does have www here, if it uses it.
+        return 'www.bdsmlibrary.com'
+
+    @classmethod
+    def getSiteExampleURLs(cls):
+        return "http://"+cls.getSiteDomain()+"/stories/story.php?storyid=1234"
+
+    def getSiteURLPattern(self):
+        return re.escape("http://"+self.getSiteDomain()+"/stories/story.php?storyid=")+r"\d+$"
+
+    def use_pagecache(self):
+        '''
+        adapters that will work with the page cache need to implement
+        this and change it to True.
+        '''
+        return True
+
+    def extractChapterUrlsAndMetadata(self):
+        if not (self.is_adult or self.getConfig("is_adult")):
+            raise exceptions.AdultCheckRequired(self.url)
+
+        try:
+            data = self._fetchUrl(self.url)
+            soup = self.make_soup(data)
+        except urllib2.HTTPError, e:
+            if e.code == 404:
+                raise exceptions.StoryDoesNotExist(self.url)
+            else:
+                raise e
+            
+        if 'The story does not exist' in data:
+            raise exceptions.StoryDoesNotExist(self.url)
+
+        # Extract metadata
+        title=soup.title.text.replace('BDSM Library - Story: ','')
+        self.story.setMetadata('title', title)
+
+        # Author
+        author = soup.find('a', href=re.compile(r"/stories/author.php\?authorid=\d+"))
+        i = 0
+        while author == None:
+            time.sleep(1)
+            logger.warning('A problem retrieving the author information. Trying Again')
+            try:
+                data = self._fetchUrl(self.url)
+                soup = self.make_soup(data)
+            except urllib2.HTTPError, e:
+                if e.code == 404:
+                    raise exceptions.StoryDoesNotExist(self.url)
+                else:
+                    raise e
+            author = soup.find('a', href=re.compile(r"/stories/author.php\?authorid=\d+"))
+            print author
+            i += 1
+            if i == 20:
+                logger.info('Too Many cycles... exiting')
+                sys.exit()
+
+
+        authorurl = urlparse.urljoin(self.url, author['href'])
+        self.story.setMetadata('author', author.text)
+        self.story.setMetadata('authorUrl', authorurl)
+        authorid = author['href'].split('=')[1]
+        self.story.setMetadata('authorId', authorid)
+
+        # Find the chapters:
+        # The update date is with the chapter links... so we will update it here as well
+        for a in soup.findAll('a'):
+            if '/stories/chapter.php?storyid='+self.story.getMetadata('storyId')+"&chapterid=" in a['href']:
+                value = a.findNext('td').findNext('td').string.replace('(added on','').replace(')','').strip()
+                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
+                self.chapterUrls.append((stripHTML(a),'http://'+self.getSiteDomain()+a['href']))
+
+        # I can't seem to get the re.compile to work for this. so I'm commenting it out
+        #for chapter in soup.findAll('a', href=re.compile(r'/stories/chapter.php?storyid='+self.story.getMetadata('storyId')+"&chapterid=\d+$")):
+        #    # just in case there's tags, like <i> in chapter titles.
+        #    self.chapterUrls.append((stripHTML(chapter),'http://'+self.getSiteDomain()+chapter['href']+addurl))
+
+        self.story.setMetadata('numChapters',len(self.chapterUrls))
+        
+        # Get the MetaData
+        # Erotia Tags
+        tags = soup.findAll('a',href=re.compile(r'/stories/search.php\?selectedcode'))
+        for tag in tags:
+            self.story.addToList('eroticatags',tag.text)
+            
+        # Published Date
+        tds = soup.findAll('td')
+        for td in tds:
+            if len(td.text)>0:
+                if 'Added on:' in td.text and '<table' not in unicode(td):
+                    value = td.text.replace('Added on:','').strip()
+                    self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
+                elif 'Synopsis:' in td.text and '<table' not in unicode(td):
+                    value = td.text.replace('\n','').replace('Synopsis:','').strip()
+                    self.setDescription(self.url,stripHTML(value))
+                elif 'Size:' in td.text and '<table' not in unicode(td):
+                    value = td.text.replace('\n','').replace('Size:','').strip()
+                    self.story.setMetadata('size',stripHTML(value))
+                elif 'Comments:' in td.text and '<table' not in unicode(td):
+                    value = td.text.replace('\n','').replace('Comments:','').strip()
+                    self.story.setMetadata('comments',stripHTML(value))
+
+    # grab the text for an individual chapter.
+    def getChapterText(self, url):
+        #Since each chapter is on 1 page, we don't need to do anything special, just get the content of the page.
+        logger.debug('Getting chapter text from: %s' % url)
+        logger.info('Getting chapter text from: %s' % url)
+
+        soup = self.make_soup(self._fetchUrl(url))
+        chaptertag = soup.find('div',{'class' : 'storyblock'})
+
+        # Some of the stories have the chapters in <pre> sections, so have to check for that
+        if chaptertag == None:
+            chaptertag = soup.find('pre')
+        
+        try:
+            # BDSM Library basically wraps it's own html around the document,
+            # so we will be removing the script, title and meta content from the 
+            # storyblock
+            scripts = chaptertag.findAll('style')
+            if scripts != None:
+                for script in scripts:
+                    script.extract()
+                    
+            titles = chaptertag.findAll('title')
+            if titles !=None:
+                for title in titles:
+                    title.extract()
+            
+            metas = chaptertag.findAll('meta')
+            if metas !=None:
+                for meta in metas:
+                    meta.extract()
+        except:
+            pass
+
+        if None == chaptertag:
+            raise exceptions.FailedToDownload("Error downloading Chapter: {0}!  Missing required element!".format(url))
+        
+        return self.utf8FromSoup(url,chaptertag)
+                    
--- a/fanficfare/adapters/adapter_chosentwofanficcom.py
+++ b/fanficfare/adapters/adapter_chosentwofanficcom.py
@ -0,0 +1,241 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2012 Fanficdownloader team, 2015 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Software: eFiction
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+import sys
+
+from bs4.element import Comment
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+
+from base_adapter import BaseSiteAdapter,  makeDate
+
+def getClass():
+    return ChosenTwoFanFicArchiveAdapter
+
+# Class name has to be unique.  Our convention is camel case the
+# sitename with Adapter at the end.  www is skipped.
+class ChosenTwoFanFicArchiveAdapter(BaseSiteAdapter):
+
+    def __init__(self, config, url):
+        BaseSiteAdapter.__init__(self, config, url)
+
+        self.decode = ["Windows-1252",
+                       "utf8", 
+                       "iso-8859-1"] # 1252 is a superset of iso-8859-1.
+                                     # Most sites that claim to be
+                                     # iso-8859-1 (and some that claim to be
+                                     # utf8) are really windows-1252.
+        self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
+        self.password = ""
+        self.is_adult=False
+
+        # get storyId from url--url validation guarantees query is only sid=1234
+        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
+
+
+        # normalized story URL.
+        self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
+
+        # Each adapter needs to have a unique site abbreviation.
+        self.story.setMetadata('siteabbrev','chosen2')
+
+        # The date format will vary from site to site.
+        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+        self.dateformat = "%m/%d/%Y"
+
+    @staticmethod # must be @staticmethod, don't remove it.
+    def getSiteDomain():
+        # The site domain.  Does have www here, if it uses it.
+        return 'chosentwofanfic.com'
+
+    @classmethod
+    def getSiteExampleURLs(cls):
+        return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
+
+    def getSiteURLPattern(self):
+        return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+
+    ## Getting the chapter list and the meta data, plus 'is adult' checking.
+    def extractChapterUrlsAndMetadata(self):
+
+        # checking to see if the is_adult is set to true
+        if self.is_adult or self.getConfig("is_adult"):
+            addURL = "&ageconsent=ok&warning=3"
+        else:
+            addURL = ""
+    
+        # index=1 makes sure we see the story chapter index.  Some
+        # sites skip that for one-chapter stories.
+        url = '{0}&index=1{1}'.format(self.url,addURL)
+        logger.debug("URL: "+url)
+
+        try:
+            data = self._fetchUrl(url)
+        except urllib2.HTTPError, e:
+            if e.code == 404:
+                raise exceptions.StoryDoesNotExist(self.url)
+            else:
+                raise e
+
+        if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
+            raise exceptions.AdultCheckRequired(self.url)
+
+        if "Access denied. This story has not been validated by the adminstrators of this site." in data:
+            raise exceptions.AccessDenied("{0} says: Access denied. This story has not been validated by the adminstrators of this site.".format(self.getSiteDomain()))
+
+        # use BeautifulSoup HTML parser to make everything easier to find.
+        soup = self.make_soup(data)
+
+        # Now go hunting for all the meta data and the chapter list.
+
+        ## Title
+        ## Some stories have a banner that has it's own a tag before the actual text title...
+        ## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
+        a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
+        self.story.setMetadata('title',stripHTML(a))
+
+        # Find authorid and URL from... author url.
+        # This site lists the newest member to the site before the div that has the story info
+        # so I'm checking the pagetitle div for this as well
+        a = soup.find('div',{'id':'pagetitle'}).find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
+        self.story.setMetadata('authorId',a['href'].split('=')[1])
+        self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
+        self.story.setMetadata('author',a.string)
+
+        # Find the chapters:
+        for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
+            # just in case there's tags, like <i> in chapter titles.
+            #self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']))
+            self.chapterUrls.append((stripHTML(chapter),'http://{0}/{1}{2}'.format(self.host, chapter['href'],addURL)))
+
+        self.story.setMetadata('numChapters',len(self.chapterUrls))
+
+        # eFiction sites don't help us out a lot with their meta data
+        # formating, so it's a little ugly.
+
+        # utility method
+        def defaultGetattr(d,k):
+            try:
+                return d[k]
+            except:
+                return ""
+
+        # <span class="label">Rated:</span> NC-17<br /> etc
+        labels = soup.findAll('span',{'class':'label'})
+        for labelspan in labels:
+            val = labelspan.nextSibling
+            value = unicode('')
+            while val and not 'label' in defaultGetattr(val,'class'):
+                # print("val:%s"%val)
+                if not isinstance(val,Comment):
+                    value += unicode(val)
+                val = val.nextSibling
+            label = labelspan.string
+            # print("label:%s\nvalue:%s"%(label,value))
+
+            if 'Summary' in label:
+                self.setDescription(url,value)
+
+            if 'Rated' in label:
+                self.story.setMetadata('rating', stripHTML(value))
+
+            if 'Word count' in label:
+                self.story.setMetadata('numWords', stripHTML(value))
+
+            if 'Categories' in label:
+                cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
+                for cat in cats:
+                    self.story.addToList('category',cat.string)
+
+            if 'Characters' in label:
+                chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
+                for char in chars:
+                    self.story.addToList('characters',char.string)
+
+            if 'Genre' in label:
+                genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
+                for genre in genres:
+                    self.story.addToList('genre',genre.string)
+
+            if 'Pairing' in label:
+                ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
+                for ship in ships:
+                    self.story.addToList('ships',ship.string)
+
+            if 'Warnings' in label:
+                warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
+                for warning in warnings:
+                    self.story.addToList('warnings',warning.string)
+
+            if 'Completed' in label:
+                if 'Yes' in stripHTML(value):
+                    self.story.setMetadata('status', 'Completed')
+                else:
+                    self.story.setMetadata('status', 'In-Progress')
+
+            if 'Published' in label:
+                self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
+
+            if 'Updated' in label:
+                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
+
+            if 'Disclaimer' in label:
+                self.story.setMetadata('disclaimer', stripHTML(value))
+
+        try:
+            # Find Series name from series URL.
+            a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
+            series_name = a.string
+            series_url = 'http://'+self.host+'/'+a['href']
+
+            # use BeautifulSoup HTML parser to make everything easier to find.
+            seriessoup = self.make_soup(self._fetchUrl(series_url))
+            # can't use ^viewstory...$ in case of higher rated stories with javascript href.
+            storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
+            i=1
+            for a in storyas:
+                # skip 'report this' and 'TOC' links
+                if 'contact.php' not in a['href'] and 'index' not in a['href']:
+                    if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
+                        self.setSeries(series_name, i)
+                        self.story.setMetadata('seriesUrl',series_url)
+                        break
+                    i+=1
+
+        except:
+            # I find it hard to care if the series parsing fails
+            pass
+
+    # grab the text for an individual chapter.
+    def getChapterText(self, url):
+
+        logger.debug('Getting chapter text from: %s' % url)
+
+        soup = self.make_soup(self._fetchUrl(url))
+
+        div = soup.find('div', {'id' : 'story'})
+
+        if None == div:
+            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
+
+        return self.utf8FromSoup(url,div)
--- a/fanficfare/adapters/adapter_ficsitecom.py
+++ b/fanficfare/adapters/adapter_ficsitecom.py
@ -0,0 +1,292 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2012 Fanficdownloader team, 2015 FanFicFare team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Software: eFiction
+import time
+import logging
+logger = logging.getLogger(__name__)
+import re
+import urllib2
+import sys
+
+from bs4.element import Comment
+from ..htmlcleanup import stripHTML
+from .. import exceptions as exceptions
+
+from base_adapter import BaseSiteAdapter,  makeDate
+
+def getClass():
+    return HPFanficArchiveComAdapter
+
+# Class name has to be unique.  Our convention is camel case the
+# sitename with Adapter at the end.  www is skipped.
+class HPFanficArchiveComAdapter(BaseSiteAdapter):
+
+    def __init__(self, config, url):
+        BaseSiteAdapter.__init__(self, config, url)
+
+        self.decode = ["Windows-1252",
+                       "utf8", "iso-8859-1"] 
+                               # 1252 is a superset of iso-8859-1.
+                               # Most sites that claim to be
+                               # iso-8859-1 (and some that claim to be
+                               # utf8) are really windows-1252.
+        self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
+        self.password = ""
+        self.is_adult=False
+
+        # get storyId from url--url validation guarantees query is only sid=1234
+        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
+
+
+        # normalized story URL.
+        self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
+
+        # Each adapter needs to have a unique site abbreviation.
+        self.story.setMetadata('siteabbrev','ficsite')
+
+        # The date format will vary from site to site.
+        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
+        self.dateformat = "%m/%d/%Y"
+
+    @staticmethod # must be @staticmethod, don't remove it.
+    def getSiteDomain():
+        # The site domain.  Does have www here, if it uses it.
+        return 'www.ficsite.com'
+
+    @classmethod
+    def getSiteExampleURLs(cls):
+        return "http://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
+
+    def getSiteURLPattern(self):
+        return re.escape("http://"+self.getSiteDomain()+"/viewstory.php?sid=")+r"\d+$"
+
+    ## Login seems to be reasonably standard across eFiction sites.
+    def needToLoginCheck(self, data):
+        if 'Registered Users Only' in data \
+                or 'There is no such account on our website' in data \
+                or "That password doesn't match the one in our database" in data:
+            return True
+        else:
+            return False
+
+    def performLogin(self, url):
+        params = {}
+
+        if self.password:
+            params['penname'] = self.username
+            params['password'] = self.password
+        else:
+            params['penname'] = self.getConfig("username")
+            params['password'] = self.getConfig("password")
+        params['cookiecheck'] = '1'
+        params['submit'] = 'Submit'
+
+        loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
+        logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
+                                                              params['penname']))
+
+        d = self._fetchUrl(loginUrl, params)
+
+        if "Member Account" not in d : #Member Account
+            logger.info("Failed to login to URL %s as %s" % (loginUrl,
+                                                              params['penname']))
+            raise exceptions.FailedToLogin(url,params['penname'])
+            return False
+        else:
+            return True
+    
+    # I've added this because there are several warnings
+    # that are used by this site.
+    def getWarning(self, data):
+        if "This story contains adult subject matter that may include coarse language, violence, and mild sexual content of a graphical nature. Reader discretion is requested. Thank you." in data:
+            return '&ageconsent=ok&warning=5'
+        elif "This story contains graphical material of an adult nature and a same sex primary relationship. Please do not read if this is not to your taste. Thank you." in data:
+            return '&warning=7'
+        elif "This story contains graphical material of an adult nature. Reader discretion is requested. Thank you." in data:
+            return '&warning=6'
+        else:
+            return False
+        
+    
+    ## Getting the chapter list and the meta data, plus 'is adult' checking.
+    def extractChapterUrlsAndMetadata(self):
+
+        if (self.is_adult or self.getConfig("is_adult")):
+            addurl = '&index=1&ageconsent=ok&warning=5'
+        else:
+            addurl='&index=1'
+
+        # index=1 makes sure we see the story chapter index.  Some
+        # sites skip that for one-chapter stories.
+        url = self.url+addurl
+        logger.debug("URL: "+url)
+
+        try:
+            data = self._fetchUrl(url)
+        except urllib2.HTTPError, e:
+            if e.code == 404:
+                raise exceptions.StoryDoesNotExist(self.url)
+            else:
+                raise e
+
+        if self.needToLoginCheck(data):
+            # need to log in for this one.
+            self.performLogin(url)
+            data = self._fetchUrl(url)
+        
+        warning = self.getWarning(data)
+        if warning != False:
+            data = self._fetchUrl(url+warning)
+
+        if "Access denied. This story has not been validated by the adminstrators of this site." in data:
+            raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
+        elif "This story contains adult subject matter that may include coarse language, violence, and mild sexual content of a graphical nature. Reader discretion is requested. Thank you." in data:
+            raise exceptions.AccessDenied(self.getSiteDomain()+" says: This story contains adult subject matter that may include coarse language, violence, and mild sexual content of a graphical nature. Reader discretion is requested. Thank you.")
+        elif "This story contains graphical material of an adult nature and a same sex primary relationship. Please do not read if this is not to your taste. Thank you." in data:
+            raise exceptions.AccessDenied(self.getSiteDomain()+" says: This story contains graphical material of an adult nature and a same sex primary relationship. Please do not read if this is not to your taste. Thank you.")
+
+        # use BeautifulSoup HTML parser to make everything easier to find.
+        soup = self.make_soup(data)
+        # print data
+
+        # Now go hunting for all the meta data and the chapter list.
+
+        ## Title and Author Div
+        div = soup.find('div',{'id':'pagetitle'})
+        ## Title
+        a = div.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
+        self.story.setMetadata('title',stripHTML(a))
+
+        # Find authorid and URL from... author url.
+        a = div.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
+        self.story.setMetadata('authorId',a['href'].split('=')[1])
+        self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
+        self.story.setMetadata('author',a.string)
+
+        # Find the chapters:
+        for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
+            # just in case there's tags, like <i> in chapter titles.
+            self.chapterUrls.append((stripHTML(chapter),'http://'+self.host+'/'+chapter['href']))
+        self.story.setMetadata('numChapters',len(self.chapterUrls))
+
+        # eFiction sites don't help us out a lot with their meta data
+        # formating, so it's a little ugly.
+
+        # utility method
+        def defaultGetattr(d,k):
+            try:
+                return d[k]
+            except:
+                return ""
+
+        # <span class="label">Rated:</span> NC-17<br /> etc
+        labels = soup.findAll('span',{'class':'label'})
+        for labelspan in labels:
+            val = labelspan.nextSibling
+            value = unicode('')
+            while val and not 'label' in defaultGetattr(val,'class'):
+                # print("val:%s"%val)
+                if not isinstance(val,Comment):
+                    value += unicode(val)
+                val = val.nextSibling
+            label = labelspan.string
+            # print("label:%s\nvalue:%s"%(label,value))
+
+            if 'Summary' in label:
+                self.setDescription(url,value)
+
+            if 'Rated' in label:
+                self.story.setMetadata('rating', stripHTML(value))
+
+            if 'Word count' in label:
+                self.story.setMetadata('numWords', stripHTML(value))
+
+            if 'Categories' in label:
+                cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
+                for cat in cats:
+                    self.story.addToList('category',cat.string)
+
+            if 'Characters' in label:
+                chars = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=characters'))
+                for char in chars:
+                    self.story.addToList('characters',char.string)
+
+            if 'Genre' in label:
+                genres = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=1')) # XXX
+                for genre in genres:
+                    self.story.addToList('genre',genre.string)
+
+            if 'Pairing' in label:
+                ships = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=4'))
+                for ship in ships:
+                    self.story.addToList('ships',ship.string)
+
+            if 'Warnings' in label:
+                warnings = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=class&type_id=2')) # XXX
+                for warning in warnings:
+                    self.story.addToList('warnings',warning.string)
+
+            if 'Completed' in label:
+                if 'Yes' in stripHTML(value):
+                    self.story.setMetadata('status', 'Completed')
+                else:
+                    self.story.setMetadata('status', 'In-Progress')
+
+            if 'Published' in label:
+                self.story.setMetadata('datePublished', makeDate(stripHTML(value), self.dateformat))
+
+            if 'Updated' in label:
+                self.story.setMetadata('dateUpdated', makeDate(stripHTML(value), self.dateformat))
+
+        try:
+            # Find Series name from series URL.
+            a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
+            series_name = a.string
+            series_url = 'http://'+self.host+'/'+a['href']
+
+            # use BeautifulSoup HTML parser to make everything easier to find.
+            seriessoup = self.make_soup(self._fetchUrl(series_url))
+            # can't use ^viewstory...$ in case of higher rated stories with javascript href.
+            storyas = seriessoup.findAll('a', href=re.compile(r'viewstory.php\?sid=\d+'))
+            i=1
+            for a in storyas:
+                # skip 'report this' and 'TOC' links
+                if 'contact.php' not in a['href'] and 'index' not in a['href']:
+                    if a['href'] == ('viewstory.php?sid='+self.story.getMetadata('storyId')):
+                        self.setSeries(series_name, i)
+                        self.story.setMetadata('seriesUrl',series_url)
+                        break
+                    i+=1
+
+        except:
+            # I find it hard to care if the series parsing fails
+            pass
+
+    # grab the text for an individual chapter.
+    def getChapterText(self, url):
+
+        logger.debug('Getting chapter text from: %s' % url)
+
+        soup = self.make_soup(self._fetchUrl(url))
+
+        div = soup.find('div', {'id' : 'story'})
+
+        if None == div:
+            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)
+
+        return self.utf8FromSoup(url,div)
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -982,6 +982,11 @@ strip_text_links:true
 ## Site dedicated to these categories/characters/ships
 extracategories:Blood Ties

+[chosentwofanfic.com]
+extra_valid_entries:disclaimer
+disclaimer_label: Disclaimer
+add_to_titlepage_entries:,disclaimer
+
 [fanfic.castletv.net]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
@ -1809,6 +1814,27 @@ readings_label:Readings
 ## Site dedicated to these categories/characters/ships
 extracategories:Star Trek

+[www.asexstories.com]
+## Some sites also require the user to confirm they are adult for
+## adult content.  Uncomment by removing '#' in front of is_adult.
+#is_adult:true
+
+## This site has links to a vidow site embeded in the text. They are
+## not needed, and will be removed if the below property is set to True
+strip_text_links:true
+
+[www.bdsmlibrary.com]
+## Some sites also require the user to confirm they are adult for
+## adult content.  Uncomment by removing '#' in front of is_adult.
+#is_adult:true
+
+extra_valid_entries:eroticatags,size,comments
+size_label: Size
+comments_label: Comments
+eroticatags_label:Erotica Tags
+extra_titlepage_entries:size,comments,eroticatags
+keep_style_attr: false
+
 [www.dracoandginny.com]
 ## Some sites require login (or login for some rated stories) The
 ## program can prompt you, or you can save it in config.  In
@ -1927,6 +1953,11 @@ check_next_chapter:false
 ## this should go in your personal.ini, not defaults.ini.
 #is_adult:true

+[www.ficsite.com]
+## Some sites also require the user to confirm they are adult for
+## adult content.  Uncomment by removing '#' in front of is_adult.
+#is_adult:true
+
 [www.fictionalley.org]
 ## Some sites do not require a login, but do require the user to
 ## confirm they are adult for adult content.  In commandline version,