Add [base_efiction] and [base_xenforoforum] sections, add base_xenforoforum_adapter, document new options.

2025-12-06 08:52:55 +01:00 · 2015-06-29 13:07:21 -05:00 · 2015-06-29 13:07:21 -05:00 · 4dcfd6e4be
commit 4dcfd6e4be
parent b2c1d485b9
13 changed files with 203 additions and 379 deletions
--- a/calibre-plugin/config.py
+++ b/calibre-plugin/config.py
@ -89,7 +89,7 @@ from calibre_plugins.fanficfare_plugin.dialogs \
            EditTextDialog, IniTextDialog, RejectUrlEntry)
    
 from calibre_plugins.fanficfare_plugin.fanficfare.adapters \
-    import getConfigSections
+    import getSiteSections

 from calibre_plugins.fanficfare_plugin.common_utils \
    import ( KeyboardConfigDialog, PrefsViewerDialog )
@ -922,7 +922,7 @@ class CalibreCoverTab(QWidget):
        
        self.gc_dropdowns = {}

-        sitelist = getConfigSections()
+        sitelist = getSiteSections()
        sitelist.sort()
        sitelist.insert(0,_("Default"))
        for site in sitelist:
--- a/calibre-plugin/fff_util.py
+++ b/calibre-plugin/fff_util.py
@ -23,12 +23,12 @@ def get_fff_personalini():
 def get_fff_config(url,fileform="epub",personalini=None):
    if not personalini:
        personalini = get_fff_personalini()
-    site='unknown'
+    sections=['unknown']
    try:
-        site = adapters.getConfigSectionFor(url)
+        sections = adapters.getConfigSectionsFor(url)
    except Exception as e:
-        logger.debug("Failed trying to get ini config for url(%s): %s, using section [%s] instead"%(url,e,site))
-    configuration = Configuration(site,fileform)
+        logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
+    configuration = Configuration(sections,fileform)
    configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
    configuration.readfp(StringIO(personalini))

--- a/calibre-plugin/plugin-defaults.ini
+++ b/calibre-plugin/plugin-defaults.ini
@ -299,6 +299,13 @@ chapter_title_strip_pattern:^[0-9]+[\.: -]+
 ## "The Beginning" => "1. The Beginning" 
 chapter_title_add_pattern:${index}. ${title}

+## If true, when updating an epub that already has old chapters, new
+## chapters will be marked in the TOC and chapter header by prepending
+## '(new) ' to the chapter title.  So 'The Big Fight' will become
+## '4. (new) The Big Fight' if both mark_new_chapters and
+## add_chapter_numbers are set true.
+mark_new_chapters:false
+
 ## Uses a python template substitution.  The ${title} is the default
 ## title of a new anthology, <series name> in the case of a series, or
 ## the first book title otherwise.  This is only applied to new
@ -330,12 +337,74 @@ sort_ships:false
 ## User-agent
 user_agent:FFF/2.X

+## Added for [base_xenforoforum], but can be used with other sites,
+## too.  Limit the 'description' to the first X *characters*
+## collected.  Character count includes HTML tags, so it can be
+## non-intuitive.
+#description_limit:1000
+
+[base_efiction]
 ## At the time of writing, eFiction Base adapters allow downloading
 ## the whole story in bulk using the 'Print' feature. If 'bulk_load'
 ## is set to 'true', both metadata and chapters can be loaded in one
 ## step
 bulk_load:true

+[base_xenforoforum]
+## Currently only forums.spacebattles.com and forums.sufficientvelocity.com
+
+cover_exclusion_regexp:/clear.png
+
+## I saw lots of chapters name simply '1.1' etc during testing.
+strip_chapter_numbers:false
+
+## Copy title to tagsfromtitle for parsing tags.
+add_to_extra_valid_entries:,tagsfromtitle
+
+## '.NOREPL' tells the system to *not* apply title's
+## in/exclude/replace_metadata -- Only works on include_in_ lines.
+include_in_tagsfromtitle:title.NOREPL
+
+tagsfromtitle_label:Tags from Title
+
+## might want to do this, maybe not.  Will often include category, but
+## also often include non-category stuff.
+# include_in_category:tagsfromtitle 
+
+include_metadata_pre:
+# only keep tagsfromtitle with ( or [ in.
+ tagsfromtitle=~[\[\(]
+ 
+replace_metadata:
+# remove anything outside () or []
+ tagsfromtitle=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
+# remove () []
+ tagsfromtitle=>[\(\)\[\]]=>
+# change (spaces)slash(spaces) to comma
+ tagsfromtitle=> */ *=>,
+ tagsfromtitle=> x =>,
+ 
+# remove [] or () blocks and leading/trailing spaces
+ title=> *[\(\[]([^\]\)]+)[\)\]] *=>
+# remove 'Thread' and the next word, usually "Thread 2", "Thread
+# four", "Thread iv", etc
+ title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
+
+extra_titlepage_entries: tagsfromtitle
+
+## '.SPLIT' tells the system to split by ','
+add_to_include_subject_tags:,tagsfromtitle.SPLIT
+
+## base_xenforoforum reads Published and Updated datetimes from
+## Threadmarks if used, or from the posted & updated times of the
+## 'first' post if no threadmarks.
+datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
+dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
+
+## Only take the first X characters of the 'first' post to use as
+## the description.
+description_limit:500
+
 ## Each output format has a section that overrides [defaults]
 [html]

@ -1041,91 +1110,11 @@ extra_valid_entries:size
 # don't show twitter icon.
 cover_exclusion_regexp:/res/css/bir.png

-[forums.sufficientvelocity.com]
-
-cover_exclusion_regexp:/clear.png
-
-strip_chapter_numbers:false
-
-add_to_extra_valid_entries:,titletags
-# '.NOREPL' tells the system to *not* apply title's
-# in/exclude/replace_metadata -- Only works on include_in_ lines.
-include_in_titletags:title.NOREPL
-
-## might want to do this, maybe not.  Will often include category, but
-## also often include non-category stuff.
-# include_in_category:titletags 
-
-include_metadata_pre:
-# only keep titletags with ( or [ in.
- titletags=~[\[\(]
- 
-replace_metadata:
-# remove anything outside () or []
- titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
-# remove () []
- titletags=>[\(\)\[\]]=>
-# change (spaces)slash(spaces) to comma
- titletags=> */ *=>,
- titletags=> x =>,
-# remove [] or () blocks and leading/trailing spaces
- title=> *[\(\[]([^\]\)]+)[\)\]] *=>
-# remove 'Thread' and the next word, usually "Thread 2", "Thread
-# four", "Thread iv", etc
- title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
-
-extra_titlepage_entries: titletags
-
-## '.SPLIT' tells the system to split by ','
-add_to_include_subject_tags:,titletags.SPLIT
-
-datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
-dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
-
-description_limit:500
-
 [forums.spacebattles.com]
+## see [base_xenforoforum]

-cover_exclusion_regexp:/clear.png
-
-strip_chapter_numbers:false
-
-add_to_extra_valid_entries:,titletags
-# '.NOREPL' tells the system to *not* apply title's
-# in/exclude/replace_metadata -- Only works on include_in_ lines.
-include_in_titletags:title.NOREPL
-
-## might want to do this, maybe not.  Will often include category, but
-## also often include non-category stuff.
-# include_in_category:titletags 
-
-include_metadata_pre:
-# only keep titletags with ( or [ in.
- titletags=~[\[\(]
- 
-replace_metadata:
-# remove anything outside () or []
- titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
-# remove () []
- titletags=>[\(\)\[\]]=>
-# change (spaces)slash(spaces) to comma
- titletags=> */ *=>,
- titletags=> x =>,
-# remove [] or () blocks and leading/trailing spaces
- title=> *[\(\[]([^\]\)]+)[\)\]] *=>
-# remove 'Thread' and the next word, usually "Thread 2", "Thread
-# four", "Thread iv", etc
- title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
-
-extra_titlepage_entries: titletags
-
-## '.SPLIT' tells the system to split by ','
-add_to_include_subject_tags:,titletags.SPLIT
-
-datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
-dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
-
-description_limit:500
+[forums.sufficientvelocity.com]
+## see [base_xenforoforum]

 [grangerenchanted.com]
 ## Some sites require login (or login for some rated stories) The
--- a/fanficfare/adapters/init.py
+++ b/fanficfare/adapters/init.py
@ -196,14 +196,24 @@ def getAdapter(config,url,anyurl=False):
    # No adapter found.
    raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )

-def getConfigSections():
+def getSiteSections():
+    # doesn't include base sections. Sections rather than site DNS because of squidge/peja
    return [cls.getConfigSection() for cls in __class_list]

+def getConfigSections():
+    # does include base sections.
+    sections = set()
+    for cls in __class_list:
+        sections.update(cls.getConfigSections())
+    return sections
+
 def get_bulk_load_sites():
    # for now, all eFiction Base adapters are assumed to allow bulk_load.
-    return [cls.getConfigSection().replace('www.','') for cls in
-            filter( lambda x : issubclass(x,base_efiction_adapter.BaseEfictionAdapter),
-                    __class_list)]
+    sections = set()
+    for cls in filter( lambda x : issubclass(x,base_efiction_adapter.BaseEfictionAdapter),
+                       __class_list):
+        sections.update( [ x.replace('www.','') for x in cls.getConfigSections() ] )
+    return sections

 def getSiteExamples():
    l=[]
@ -211,10 +221,10 @@ def getSiteExamples():
        l.append((cls.getConfigSection(),cls.getSiteExampleURLs().split()))
    return l

-def getConfigSectionFor(url):
+def getConfigSectionsFor(url):
    (cls,fixedurl) = getClassFor(url)
    if cls:
-        return cls.getConfigSection()
+        return cls.getConfigSections()

    # No adapter found.
    raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
--- a/fanficfare/adapters/adapter_forumsspacebattlescom.py
+++ b/fanficfare/adapters/adapter_forumsspacebattlescom.py
@ -24,47 +24,18 @@ import urllib2
 from ..htmlcleanup import stripHTML
 from .. import exceptions as exceptions

-from base_adapter import BaseSiteAdapter,  makeDate
+from base_xenforoforum_adapter import BaseXenForoForumAdapter

 def getClass():
    return ForumsSpacebattlesComAdapter

-logger = logging.getLogger(__name__)
-
-class ForumsSpacebattlesComAdapter(BaseSiteAdapter):
+class ForumsSpacebattlesComAdapter(BaseXenForoForumAdapter):

    def __init__(self, config, url):
-        BaseSiteAdapter.__init__(self, config, url)
-
-        self.decode = ["utf8",
-                       "Windows-1252"] # 1252 is a superset of iso-8859-1.
-                               # Most sites that claim to be
-                               # iso-8859-1 (and some that claim to be
-                               # utf8) are really windows-1252.
-							   
-							   
-        # get storyId from url--url validation guarantees query is only sid=1234
-        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
-        
-        
-        # get storyId from url--url validation guarantees query correct
-        m = re.match(self.getSiteURLPattern(),url)
-        if m:
-            self.story.setMetadata('storyId',m.group('id'))
-
-            # normalized story URL.
-            self._setURL(self.getURLPrefix() + '/'+m.group('tp')+'/'+self.story.getMetadata('storyId')+'/')
-        else:
-            raise exceptions.InvalidStoryURL(url,
-                                             self.getSiteDomain(),
-                                             self.getSiteExampleURLs())
+        BaseXenForoForumAdapter.__init__(self, config, url)
        
        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','fsb')
-
-        # The date format will vary from site to site.
-        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
-        self.dateformat = "%b %d, %Y at %I:%M %p"
            
    @staticmethod # must be @staticmethod, don't remove it.
    def getSiteDomain():
@ -73,155 +44,5 @@ class ForumsSpacebattlesComAdapter(BaseSiteAdapter):

    @classmethod
    def getURLPrefix(cls):
-        # The site domain.  Does have www here, if it uses it.
        return 'https://' + cls.getSiteDomain() 

-    @classmethod
-    def getSiteExampleURLs(cls):
-        return cls.getURLPrefix()+"/threads/some-story-name.123456/"
-
-    def getSiteURLPattern(self):
-        return r"https?://"+re.escape(self.getSiteDomain())+r"/(?P<tp>threads|posts)/(.+\.)?(?P<id>\d+)/"
-        
-    def use_pagecache(self):
-        '''
-        adapters that will work with the page cache need to implement
-        this and change it to True.
-        '''
-        return True
-
-    ## Getting the chapter list and the meta data, plus 'is adult' checking.
-    def extractChapterUrlsAndMetadata(self):
-
-        useurl = self.url
-        logger.info("url: "+useurl)
-
-        try:
-            (data,opened) = self._fetchUrlOpened(useurl)
-            useurl = opened.geturl()
-            logger.info("use useurl: "+useurl)
-        except urllib2.HTTPError, e:
-            if e.code == 404:
-                raise exceptions.StoryDoesNotExist(self.url)
-            else:
-                raise e
-
-        # use BeautifulSoup HTML parser to make everything easier to find.
-        soup = self.make_soup(data)
-
-        a = soup.find('h3',{'class':'userText'}).find('a')
-        self.story.addToList('authorId',a['href'].split('/')[1])
-        self.story.addToList('authorUrl',self.getURLPrefix()+'/'+a['href'])
-        self.story.addToList('author',a.text)
-
-        h1 = soup.find('div',{'class':'titleBar'}).h1
-        self.story.setMetadata('title',stripHTML(h1))
-        
-        if '#' in useurl:
-            anchorid = useurl.split('#')[1]
-            soup = soup.find('li',id=anchorid)
-        else:
-            # try threadmarks if no '#' in , require at least 2.
-            threadmarksa = soup.find('a',{'class':'threadmarksTrigger'})
-            if threadmarksa:
-                soupmarks = self.make_soup(self._fetchUrl(self.getURLPrefix()+'/'+threadmarksa['href']))
-                markas = soupmarks.find('ol',{'class':'overlayScroll'}).find_all('a')
-                if len(markas) > 1:
-                    for (atag,url,name) in [ (x,x['href'],stripHTML(x)) for x in markas ]:
-                        date = self.make_date(atag.find_next_sibling('div',{'class':'extra'}))
-                        if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'):
-                            self.story.setMetadata('datePublished', date)
-                        if not self.story.getMetadataRaw('dateUpdated') or date > self.story.getMetadataRaw('dateUpdated'):
-                            self.story.setMetadata('dateUpdated', date)
-                            
-                        self.chapterUrls.append((name,self.getURLPrefix()+'/'+url))
-                        
-            soup = soup.find('li',{'class':'message'}) # limit first post for date stuff below. ('#' posts above)
-                
-        # Now go hunting for the 'chapter list'.
-        bq = soup.find('blockquote') # assume first posting contains TOC urls.
-        
-        bq.name='div'
-
-        for iframe in bq.find_all('iframe'):
-            iframe.extract() # calibre book reader & editor don't like iframes to youtube.
-
-        for qdiv in bq.find_all('div',{'class':'quoteExpand'}):
-            qdiv.extract() # Remove <div class="quoteExpand">click to expand</div>
-            
-        self.setDescription(useurl,bq)
-
-        # otherwise, use first post links--include first post since that's 
-        if not self.chapterUrls:
-            self.chapterUrls.append(("First Post",useurl))
-            for (url,name) in [ (x['href'],stripHTML(x)) for x in bq.find_all('a') ]:
-                logger.debug("found chapurl:%s"%url)
-                if not url.startswith('http'):
-                    url = self.getURLPrefix()+'/'+url
-    
-                if (url.startswith(self.getURLPrefix()) or url.startswith('http://'+self.getSiteDomain())) and ('/posts/' in url or '/threads/' in url):
-                    # brute force way to deal with SB's http->https change when hardcoded http urls.
-                    url = url.replace('http://'+self.getSiteDomain(),self.getURLPrefix())
-                    logger.debug("used chapurl:%s"%(url))
-                    self.chapterUrls.append((name,url))
-                    if url == useurl and 'First Post' == self.chapterUrls[0][0]:
-                        # remove "First Post" if included in list.
-                        logger.debug("delete dup 'First Post' chapter: %s %s"%self.chapterUrls[0])
-                        del self.chapterUrls[0]
-                        
-            # Didn't use threadmarks, so take created/updated dates
-            # from the 'first' posting created and updated.
-            date = self.make_date(soup.find('a',{'class':'datePermalink'}))
-            if date:
-                self.story.setMetadata('datePublished', date)
-                self.story.setMetadata('dateUpdated', date) # updated overwritten below if found.
-        
-            date = self.make_date(soup.find('div',{'class':'editDate'}))
-            if date:
-                self.story.setMetadata('dateUpdated', date) 
-            
-        self.story.setMetadata('numChapters',len(self.chapterUrls))
-
-    def make_date(self,parenttag): # forums use a BS thing where dates
-                                  # can appear different if recent.
-        datestr=None
-        try:
-            datetag = parenttag.find('span',{'class':'DateTime'})
-            if datetag:
-                datestr = datetag['title']
-            else:
-                datetag = parenttag.find('abbr',{'class':'DateTime'})
-                if datetag:
-                    datestr="%s at %s"%(datetag['data-datestring'],datetag['data-timestring'])
-            # Apr 24, 2015 at 4:39 AM
-            # May 1, 2015 at 5:47 AM
-            datestr = re.sub(r' (\d[^\d])',r' 0\1',datestr) # add leading 0 for single digit day & hours.
-            return makeDate(datestr, self.dateformat)
-        except:
-            logger.debug('No date found in %s'%parenttag)
-            return None
-        
-    # grab the text for an individual chapter.
-    def getChapterText(self, url):
-        logger.debug('Getting chapter text from: %s' % url)
-
-        (data,opened) = self._fetchUrlOpened(url)
-        url = opened.geturl()
-        logger.debug("chapter URL redirected to: %s"%url)
-
-        soup = self.make_soup(data)
-
-        if '#' in url:
-            anchorid = url.split('#')[1]
-            soup = soup.find('li',id=anchorid)
-        bq = soup.find('blockquote')
-
-        bq.name='div'
-
-        for iframe in bq.find_all('iframe'):
-            iframe.extract() # calibre book reader & editor don't like iframes to youtube.
-
-        for qdiv in bq.find_all('div',{'class':'quoteExpand'}):
-            qdiv.extract() # Remove <div class="quoteExpand">click to expand</div>
-
-        return self.utf8FromSoup(url,bq)
--- a/fanficfare/adapters/adapter_forumssufficientvelocitycom.py
+++ b/fanficfare/adapters/adapter_forumssufficientvelocitycom.py
@ -15,15 +15,15 @@
 # limitations under the License.
 #

-from adapter_forumsspacebattlescom import ForumsSpacebattlesComAdapter
+from base_xenforoforum_adapter import BaseXenForoForumAdapter

 def getClass():
    return ForumsSufficientVelocityComAdapter

-class ForumsSufficientVelocityComAdapter(ForumsSpacebattlesComAdapter):
+class ForumsSufficientVelocityComAdapter(BaseXenForoForumAdapter):

    def __init__(self, config, url):
-        ForumsSpacebattlesComAdapter.__init__(self, config, url)
+        BaseXenForoForumAdapter.__init__(self, config, url)

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','fsv')
@ -35,5 +35,4 @@ class ForumsSufficientVelocityComAdapter(ForumsSpacebattlesComAdapter):

    @classmethod
    def getURLPrefix(cls):
-        # The site domain.  Does have www here, if it uses it.
        return 'http://' + cls.getSiteDomain()
--- a/fanficfare/adapters/base_adapter.py
+++ b/fanficfare/adapters/base_adapter.py
@ -452,6 +452,11 @@ class BaseSiteAdapter(Configurable):
        "Only needs to be overriden if != site domain."
        return cls.getSiteDomain()
    
+    @classmethod
+    def getConfigSections(cls):
+        "Only needs to be overriden if has additional ini sections."
+        return [cls.getConfigSection()]
+    
    @classmethod
    def stripURLParameters(cls,url):
        "Only needs to be overriden if URL contains more than one parameter"
--- a/fanficfare/adapters/base_efiction_adapter.py
+++ b/fanficfare/adapters/base_efiction_adapter.py
@ -70,6 +70,11 @@ class BaseEfictionAdapter(BaseSiteAdapter):
        self.triedAcceptWarnings = False
        self.username = "NoneGiven" # if left empty, site doesn't return any message at all.

+    @classmethod
+    def getConfigSections(cls):
+        "Only needs to be overriden if has additional ini sections."
+        return ['base_efiction',cls.getConfigSection()]
+    
    @classmethod
    def getAcceptDomains(cls):
        return [cls.getSiteDomain(),'www.' + cls.getSiteDomain()]
--- a/fanficfare/cli.py
+++ b/fanficfare/cli.py
@ -184,7 +184,7 @@ def do_download(arg,
        url = arg
        
    try:
-        configuration = Configuration(adapters.getConfigSectionFor(url), options.format)
+        configuration = Configuration(adapters.getConfigSectionsFor(url), options.format)
    except exceptions.UnknownSite, e:
        if options.list or options.normalize:
            # list for page doesn't have to be a supported site.
--- a/fanficfare/configurable.py
+++ b/fanficfare/configurable.py
@ -77,10 +77,12 @@ formatsections = ['html','txt','epub','mobi']
 othersections = ['defaults','overrides']

 def get_valid_sections():
-    sites = adapters.getConfigSections()
+    sites = adapters.getConfigSections() 
    sitesections = list(othersections)
    for section in sites:
        sitesections.append(section)
+        # also allows [www.base_efiction] and [www.base_forum]. Not
+        # likely to matter.
        if section.startswith('www.'):
            # add w/o www if has www
            sitesections.append(section[4:])
@ -130,6 +132,7 @@ def get_valid_set_options():
               'replace_hr':(None,None,boollist),
               'sort_ships':(None,None,boollist),
               'strip_chapter_numbers':(None,None,boollist),
+               'mark_new_chapters':(None,None,boollist),
               'titlepage_use_table':(None,None,boollist),
               
               'use_ssl_unverified_context':(None,None,boollist),
@ -212,6 +215,7 @@ def get_valid_keywords():
                 'chapter_start',
                 'chapter_title_add_pattern',
                 'chapter_title_strip_pattern',
+                 'mark_new_chapters',
                 'check_next_chapter',
                 'skip_author_cover',
                 'collect_series',
@ -224,6 +228,7 @@ def get_valid_keywords():
                 'datePublished_format',
                 'dateUpdated_format',
                 'default_cover_image',
+                 'description_limit',
                 'do_update_hook',
                 'exclude_notes',
                 'extra_logpage_entries',
@ -331,7 +336,8 @@ def make_generate_cover_settings(param):

 class Configuration(ConfigParser.SafeConfigParser):

-    def __init__(self, site, fileform):
+    def __init__(self, sections, fileform):
+        site = sections[-1] # first section is site DN.
        ConfigParser.SafeConfigParser.__init__(self)

        self.linenos=dict() # key by section or section,key -> lineno
@ -339,6 +345,11 @@ class Configuration(ConfigParser.SafeConfigParser):
        ## [injected] section has even less priority than [defaults]
        self.sectionslist = ['defaults','injected']

+        ## add other sections (not including site DN) after defaults,
+        ## but before site-specific.
+        for section in sections[:-1]:
+            self.addConfigSection(section)
+        
        if site.startswith("www."):
            sitewith = site
            sitewithout = site.replace("www.","")
@ -348,8 +359,13 @@ class Configuration(ConfigParser.SafeConfigParser):
        
        self.addConfigSection(sitewith)
        self.addConfigSection(sitewithout)
+        
        if fileform:
            self.addConfigSection(fileform)
+            ## add other sections:fileform (not including site DN)
+            ## after fileform, but before site-specific:fileform.
+            for section in sections[:-1]:
+                self.addConfigSection(section+":"+fileform)
            self.addConfigSection(sitewith+":"+fileform)
            self.addConfigSection(sitewithout+":"+fileform)
        self.addConfigSection("overrides")
--- a/fanficfare/defaults.ini
+++ b/fanficfare/defaults.ini
@ -306,6 +306,13 @@ chapter_title_strip_pattern:^[0-9]+[\.: -]+
 ## "The Beginning" => "1. The Beginning" 
 chapter_title_add_pattern:${index}. ${title}

+## If true, when updating an epub that already has old chapters, new
+## chapters will be marked in the TOC and chapter header by prepending
+## '(new) ' to the chapter title.  So 'The Big Fight' will become
+## '4. (new) The Big Fight' if both mark_new_chapters and
+## add_chapter_numbers are set true.
+mark_new_chapters:false
+
 ## Reorder ships so b/a and c/b/a become a/b and a/b/c. Only separates
 ## on '/', so use replace_metadata to change separator first if
 ## needed. Something like: ships=>[ ]*(/|&amp;|&)[ ]*=>/ You can use
@ -327,12 +334,74 @@ sort_ships:false
 ## User-agent
 user_agent:FFF/2.X

+## Added for [base_xenforoforum], but can be used with other sites,
+## too.  Limit the 'description' to the first X *characters*
+## collected.  Character count includes HTML tags, so it can be
+## non-intuitive.
+#description_limit:1000
+
+[base_efiction]
 ## At the time of writing, eFiction Base adapters allow downloading
 ## the whole story in bulk using the 'Print' feature. If 'bulk_load'
 ## is set to 'true', both metadata and chapters can be loaded in one
 ## step
 bulk_load:true

+[base_xenforoforum]
+## Currently only forums.spacebattles.com and forums.sufficientvelocity.com
+
+cover_exclusion_regexp:/clear.png
+
+## I saw lots of chapters name simply '1.1' etc during testing.
+strip_chapter_numbers:false
+
+## Copy title to tagsfromtitle for parsing tags.
+add_to_extra_valid_entries:,tagsfromtitle
+
+## '.NOREPL' tells the system to *not* apply title's
+## in/exclude/replace_metadata -- Only works on include_in_ lines.
+include_in_tagsfromtitle:title.NOREPL
+
+tagsfromtitle_label:Tags from Title
+
+## might want to do this, maybe not.  Will often include category, but
+## also often include non-category stuff.
+# include_in_category:tagsfromtitle 
+
+include_metadata_pre:
+# only keep tagsfromtitle with ( or [ in.
+ tagsfromtitle=~[\[\(]
+ 
+replace_metadata:
+# remove anything outside () or []
+ tagsfromtitle=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
+# remove () []
+ tagsfromtitle=>[\(\)\[\]]=>
+# change (spaces)slash(spaces) to comma
+ tagsfromtitle=> */ *=>,
+ tagsfromtitle=> x =>,
+ 
+# remove [] or () blocks and leading/trailing spaces
+ title=> *[\(\[]([^\]\)]+)[\)\]] *=>
+# remove 'Thread' and the next word, usually "Thread 2", "Thread
+# four", "Thread iv", etc
+ title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
+
+extra_titlepage_entries: tagsfromtitle
+
+## '.SPLIT' tells the system to split by ','
+add_to_include_subject_tags:,tagsfromtitle.SPLIT
+
+## base_xenforoforum reads Published and Updated datetimes from
+## Threadmarks if used, or from the posted & updated times of the
+## 'first' post if no threadmarks.
+datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
+dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
+
+## Only take the first X characters of the 'first' post to use as
+## the description.
+description_limit:500
+
 ## Each output format has a section that overrides [defaults]
 [html]

@ -1027,101 +1096,11 @@ extra_valid_entries:size
 # don't show twitter icon.
 cover_exclusion_regexp:/res/css/bir.png

-[forums.sufficientvelocity.com]
-
-cover_exclusion_regexp:/clear.png
-
-add_to_extratags:,ForumFic
-
-strip_chapter_numbers:false
-
-# true, false, threadmarksonly
-add_chapter_dates:false
-
-add_to_extra_valid_entries:,titletags
-# '.NOREPL' tells the system to *not* apply title's
-# in/exclude/replace_metadata -- Only works on include_in_ lines.
-include_in_titletags:title.NOREPL
-
-## might want to do this, maybe not.  Will often include category, but
-## also often include non-category stuff.
-# include_in_category:titletags 
-
-include_metadata_pre:
-# only keep titletags with ( or [ in.
- titletags=~[\[\(]
- 
-replace_metadata:
-# remove 'Thread' and the next word, usually "Thread 2", "Thread
-# four", "Thread iv", etc
- title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
-# remove anything outside () or []
- titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
-# remove () []
- titletags=>[\(\)\[\]]=>
-# change (spaces)slash(spaces) to comma
- titletags=> */ *=>,
- titletags=> x =>,
-# remove [] or () blocks and leading/trailing spaces
- title=> *[\(\[]([^\]\)]+)[\)\]] *=>
-
-extra_titlepage_entries: titletags
-
-## '.SPLIT' tells the system to split by ','
-add_to_include_subject_tags:,titletags.SPLIT
-
-datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
-dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
-
-description_limit:500
-
 [forums.spacebattles.com]
+## see [base_xenforoforum]

-cover_exclusion_regexp:/clear.png
-
-add_to_extratags:,ForumFic
-
-# true, false, threadmarksonly
-add_chapter_dates:false
-
-strip_chapter_numbers:false
-
-add_to_extra_valid_entries:,titletags
-# '.NOREPL' tells the system to *not* apply title's
-# in/exclude/replace_metadata -- Only works on include_in_ lines.
-include_in_titletags:title.NOREPL
-
-## might want to do this, maybe not.  Will often include category, but
-## also often include non-category stuff.
-# include_in_category:titletags 
-
-include_metadata_pre:
-# only keep titletags with ( or [ in.
- titletags=~[\[\(]
- 
-replace_metadata:
-# remove 'Thread' and the next word, usually "Thread 2", "Thread
-# four", "Thread iv", etc
- title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
-# remove anything outside () or []
- titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
-# remove () []
- titletags=>[\(\)\[\]]=>
-# change (spaces)slash(spaces) to comma
- titletags=> */ *=>,
- titletags=> x =>,
-# remove [] or () blocks and leading/trailing spaces
- title=> *[\(\[]([^\]\)]+)[\)\]] *=>
-
-extra_titlepage_entries: titletags
-
-## '.SPLIT' tells the system to split by ','
-add_to_include_subject_tags:,titletags.SPLIT
-
-datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
-dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
-
-description_limit:500
+[forums.sufficientvelocity.com]
+## see [base_xenforoforum]

 [grangerenchanted.com]
 ## Some sites require login (or login for some rated stories) The
--- a/fanficfare/story.py
+++ b/fanficfare/story.py
@ -412,7 +412,7 @@ class Story(Configurable):
        except:
            self.metadata = {'version':'4.4'}
        self.in_ex_cludes = {}
-        self.chapters = [] # chapters will be tuples of (title,html)
+        self.chapters = [] # chapters will be tuples of (url,title,html)
        self.imgurls = []
        self.imgtuples = []

--- a/webservice/main.py
+++ b/webservice/main.py
@ -62,7 +62,7 @@ class UserConfigServer(webapp2.RequestHandler):
    
    def getUserConfig(self,user,url,fileformat):

-        configuration = Configuration(adapters.getConfigSectionFor(url),fileformat)
+        configuration = Configuration(adapters.getConfigSectionsFor(url),fileformat)
        
        logging.debug('reading defaults.ini config file')
        configuration.read('fanficfare/defaults.ini')