diff --git a/calibre-plugin/config.py b/calibre-plugin/config.py index 937c2a5a..1686d075 100644 --- a/calibre-plugin/config.py +++ b/calibre-plugin/config.py @@ -89,7 +89,7 @@ from calibre_plugins.fanficfare_plugin.dialogs \ EditTextDialog, IniTextDialog, RejectUrlEntry) from calibre_plugins.fanficfare_plugin.fanficfare.adapters \ - import getConfigSections + import getSiteSections from calibre_plugins.fanficfare_plugin.common_utils \ import ( KeyboardConfigDialog, PrefsViewerDialog ) @@ -922,7 +922,7 @@ class CalibreCoverTab(QWidget): self.gc_dropdowns = {} - sitelist = getConfigSections() + sitelist = getSiteSections() sitelist.sort() sitelist.insert(0,_("Default")) for site in sitelist: diff --git a/calibre-plugin/fff_util.py b/calibre-plugin/fff_util.py index b1e8a4dc..0ba7227d 100644 --- a/calibre-plugin/fff_util.py +++ b/calibre-plugin/fff_util.py @@ -23,12 +23,12 @@ def get_fff_personalini(): def get_fff_config(url,fileform="epub",personalini=None): if not personalini: personalini = get_fff_personalini() - site='unknown' + sections=['unknown'] try: - site = adapters.getConfigSectionFor(url) + sections = adapters.getConfigSectionsFor(url) except Exception as e: - logger.debug("Failed trying to get ini config for url(%s): %s, using section [%s] instead"%(url,e,site)) - configuration = Configuration(site,fileform) + logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections)) + configuration = Configuration(sections,fileform) configuration.readfp(StringIO(get_resources("plugin-defaults.ini"))) configuration.readfp(StringIO(personalini)) diff --git a/calibre-plugin/plugin-defaults.ini b/calibre-plugin/plugin-defaults.ini index d542eed4..329eac53 100644 --- a/calibre-plugin/plugin-defaults.ini +++ b/calibre-plugin/plugin-defaults.ini @@ -299,6 +299,13 @@ chapter_title_strip_pattern:^[0-9]+[\.: -]+ ## "The Beginning" => "1. The Beginning" chapter_title_add_pattern:${index}. ${title} +## If true, when updating an epub that already has old chapters, new +## chapters will be marked in the TOC and chapter header by prepending +## '(new) ' to the chapter title. So 'The Big Fight' will become +## '4. (new) The Big Fight' if both mark_new_chapters and +## add_chapter_numbers are set true. +mark_new_chapters:false + ## Uses a python template substitution. The ${title} is the default ## title of a new anthology, in the case of a series, or ## the first book title otherwise. This is only applied to new @@ -330,12 +337,74 @@ sort_ships:false ## User-agent user_agent:FFF/2.X +## Added for [base_xenforoforum], but can be used with other sites, +## too. Limit the 'description' to the first X *characters* +## collected. Character count includes HTML tags, so it can be +## non-intuitive. +#description_limit:1000 + +[base_efiction] ## At the time of writing, eFiction Base adapters allow downloading ## the whole story in bulk using the 'Print' feature. If 'bulk_load' ## is set to 'true', both metadata and chapters can be loaded in one ## step bulk_load:true +[base_xenforoforum] +## Currently only forums.spacebattles.com and forums.sufficientvelocity.com + +cover_exclusion_regexp:/clear.png + +## I saw lots of chapters name simply '1.1' etc during testing. +strip_chapter_numbers:false + +## Copy title to tagsfromtitle for parsing tags. +add_to_extra_valid_entries:,tagsfromtitle + +## '.NOREPL' tells the system to *not* apply title's +## in/exclude/replace_metadata -- Only works on include_in_ lines. +include_in_tagsfromtitle:title.NOREPL + +tagsfromtitle_label:Tags from Title + +## might want to do this, maybe not. Will often include category, but +## also often include non-category stuff. +# include_in_category:tagsfromtitle + +include_metadata_pre: +# only keep tagsfromtitle with ( or [ in. + tagsfromtitle=~[\[\(] + +replace_metadata: +# remove anything outside () or [] + tagsfromtitle=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1 +# remove () [] + tagsfromtitle=>[\(\)\[\]]=> +# change (spaces)slash(spaces) to comma + tagsfromtitle=> */ *=>, + tagsfromtitle=> x =>, + +# remove [] or () blocks and leading/trailing spaces + title=> *[\(\[]([^\]\)]+)[\)\]] *=> +# remove 'Thread' and the next word, usually "Thread 2", "Thread +# four", "Thread iv", etc + title=>[-: ]*[Tt]hread [^ ]+[-: ]*=> + +extra_titlepage_entries: tagsfromtitle + +## '.SPLIT' tells the system to split by ',' +add_to_include_subject_tags:,tagsfromtitle.SPLIT + +## base_xenforoforum reads Published and Updated datetimes from +## Threadmarks if used, or from the posted & updated times of the +## 'first' post if no threadmarks. +datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S +dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S + +## Only take the first X characters of the 'first' post to use as +## the description. +description_limit:500 + ## Each output format has a section that overrides [defaults] [html] @@ -1041,91 +1110,11 @@ extra_valid_entries:size # don't show twitter icon. cover_exclusion_regexp:/res/css/bir.png -[forums.sufficientvelocity.com] - -cover_exclusion_regexp:/clear.png - -strip_chapter_numbers:false - -add_to_extra_valid_entries:,titletags -# '.NOREPL' tells the system to *not* apply title's -# in/exclude/replace_metadata -- Only works on include_in_ lines. -include_in_titletags:title.NOREPL - -## might want to do this, maybe not. Will often include category, but -## also often include non-category stuff. -# include_in_category:titletags - -include_metadata_pre: -# only keep titletags with ( or [ in. - titletags=~[\[\(] - -replace_metadata: -# remove anything outside () or [] - titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1 -# remove () [] - titletags=>[\(\)\[\]]=> -# change (spaces)slash(spaces) to comma - titletags=> */ *=>, - titletags=> x =>, -# remove [] or () blocks and leading/trailing spaces - title=> *[\(\[]([^\]\)]+)[\)\]] *=> -# remove 'Thread' and the next word, usually "Thread 2", "Thread -# four", "Thread iv", etc - title=>[-: ]*[Tt]hread [^ ]+[-: ]*=> - -extra_titlepage_entries: titletags - -## '.SPLIT' tells the system to split by ',' -add_to_include_subject_tags:,titletags.SPLIT - -datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S -dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S - -description_limit:500 - [forums.spacebattles.com] +## see [base_xenforoforum] -cover_exclusion_regexp:/clear.png - -strip_chapter_numbers:false - -add_to_extra_valid_entries:,titletags -# '.NOREPL' tells the system to *not* apply title's -# in/exclude/replace_metadata -- Only works on include_in_ lines. -include_in_titletags:title.NOREPL - -## might want to do this, maybe not. Will often include category, but -## also often include non-category stuff. -# include_in_category:titletags - -include_metadata_pre: -# only keep titletags with ( or [ in. - titletags=~[\[\(] - -replace_metadata: -# remove anything outside () or [] - titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1 -# remove () [] - titletags=>[\(\)\[\]]=> -# change (spaces)slash(spaces) to comma - titletags=> */ *=>, - titletags=> x =>, -# remove [] or () blocks and leading/trailing spaces - title=> *[\(\[]([^\]\)]+)[\)\]] *=> -# remove 'Thread' and the next word, usually "Thread 2", "Thread -# four", "Thread iv", etc - title=>[-: ]*[Tt]hread [^ ]+[-: ]*=> - -extra_titlepage_entries: titletags - -## '.SPLIT' tells the system to split by ',' -add_to_include_subject_tags:,titletags.SPLIT - -datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S -dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S - -description_limit:500 +[forums.sufficientvelocity.com] +## see [base_xenforoforum] [grangerenchanted.com] ## Some sites require login (or login for some rated stories) The diff --git a/fanficfare/adapters/__init__.py b/fanficfare/adapters/__init__.py index 301cd9c1..15110b65 100644 --- a/fanficfare/adapters/__init__.py +++ b/fanficfare/adapters/__init__.py @@ -196,14 +196,24 @@ def getAdapter(config,url,anyurl=False): # No adapter found. raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] ) -def getConfigSections(): +def getSiteSections(): + # doesn't include base sections. Sections rather than site DNS because of squidge/peja return [cls.getConfigSection() for cls in __class_list] +def getConfigSections(): + # does include base sections. + sections = set() + for cls in __class_list: + sections.update(cls.getConfigSections()) + return sections + def get_bulk_load_sites(): # for now, all eFiction Base adapters are assumed to allow bulk_load. - return [cls.getConfigSection().replace('www.','') for cls in - filter( lambda x : issubclass(x,base_efiction_adapter.BaseEfictionAdapter), - __class_list)] + sections = set() + for cls in filter( lambda x : issubclass(x,base_efiction_adapter.BaseEfictionAdapter), + __class_list): + sections.update( [ x.replace('www.','') for x in cls.getConfigSections() ] ) + return sections def getSiteExamples(): l=[] @@ -211,10 +221,10 @@ def getSiteExamples(): l.append((cls.getConfigSection(),cls.getSiteExampleURLs().split())) return l -def getConfigSectionFor(url): +def getConfigSectionsFor(url): (cls,fixedurl) = getClassFor(url) if cls: - return cls.getConfigSection() + return cls.getConfigSections() # No adapter found. raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] ) diff --git a/fanficfare/adapters/adapter_forumsspacebattlescom.py b/fanficfare/adapters/adapter_forumsspacebattlescom.py index 839c2cd5..0a58c65d 100644 --- a/fanficfare/adapters/adapter_forumsspacebattlescom.py +++ b/fanficfare/adapters/adapter_forumsspacebattlescom.py @@ -24,47 +24,18 @@ import urllib2 from ..htmlcleanup import stripHTML from .. import exceptions as exceptions -from base_adapter import BaseSiteAdapter, makeDate +from base_xenforoforum_adapter import BaseXenForoForumAdapter def getClass(): return ForumsSpacebattlesComAdapter -logger = logging.getLogger(__name__) - -class ForumsSpacebattlesComAdapter(BaseSiteAdapter): +class ForumsSpacebattlesComAdapter(BaseXenForoForumAdapter): def __init__(self, config, url): - BaseSiteAdapter.__init__(self, config, url) - - self.decode = ["utf8", - "Windows-1252"] # 1252 is a superset of iso-8859-1. - # Most sites that claim to be - # iso-8859-1 (and some that claim to be - # utf8) are really windows-1252. - - - # get storyId from url--url validation guarantees query is only sid=1234 - self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2]) - - - # get storyId from url--url validation guarantees query correct - m = re.match(self.getSiteURLPattern(),url) - if m: - self.story.setMetadata('storyId',m.group('id')) - - # normalized story URL. - self._setURL(self.getURLPrefix() + '/'+m.group('tp')+'/'+self.story.getMetadata('storyId')+'/') - else: - raise exceptions.InvalidStoryURL(url, - self.getSiteDomain(), - self.getSiteExampleURLs()) + BaseXenForoForumAdapter.__init__(self, config, url) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','fsb') - - # The date format will vary from site to site. - # http://docs.python.org/library/datetime.html#strftime-strptime-behavior - self.dateformat = "%b %d, %Y at %I:%M %p" @staticmethod # must be @staticmethod, don't remove it. def getSiteDomain(): @@ -73,155 +44,5 @@ class ForumsSpacebattlesComAdapter(BaseSiteAdapter): @classmethod def getURLPrefix(cls): - # The site domain. Does have www here, if it uses it. return 'https://' + cls.getSiteDomain() - @classmethod - def getSiteExampleURLs(cls): - return cls.getURLPrefix()+"/threads/some-story-name.123456/" - - def getSiteURLPattern(self): - return r"https?://"+re.escape(self.getSiteDomain())+r"/(?Pthreads|posts)/(.+\.)?(?P\d+)/" - - def use_pagecache(self): - ''' - adapters that will work with the page cache need to implement - this and change it to True. - ''' - return True - - ## Getting the chapter list and the meta data, plus 'is adult' checking. - def extractChapterUrlsAndMetadata(self): - - useurl = self.url - logger.info("url: "+useurl) - - try: - (data,opened) = self._fetchUrlOpened(useurl) - useurl = opened.geturl() - logger.info("use useurl: "+useurl) - except urllib2.HTTPError, e: - if e.code == 404: - raise exceptions.StoryDoesNotExist(self.url) - else: - raise e - - # use BeautifulSoup HTML parser to make everything easier to find. - soup = self.make_soup(data) - - a = soup.find('h3',{'class':'userText'}).find('a') - self.story.addToList('authorId',a['href'].split('/')[1]) - self.story.addToList('authorUrl',self.getURLPrefix()+'/'+a['href']) - self.story.addToList('author',a.text) - - h1 = soup.find('div',{'class':'titleBar'}).h1 - self.story.setMetadata('title',stripHTML(h1)) - - if '#' in useurl: - anchorid = useurl.split('#')[1] - soup = soup.find('li',id=anchorid) - else: - # try threadmarks if no '#' in , require at least 2. - threadmarksa = soup.find('a',{'class':'threadmarksTrigger'}) - if threadmarksa: - soupmarks = self.make_soup(self._fetchUrl(self.getURLPrefix()+'/'+threadmarksa['href'])) - markas = soupmarks.find('ol',{'class':'overlayScroll'}).find_all('a') - if len(markas) > 1: - for (atag,url,name) in [ (x,x['href'],stripHTML(x)) for x in markas ]: - date = self.make_date(atag.find_next_sibling('div',{'class':'extra'})) - if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'): - self.story.setMetadata('datePublished', date) - if not self.story.getMetadataRaw('dateUpdated') or date > self.story.getMetadataRaw('dateUpdated'): - self.story.setMetadata('dateUpdated', date) - - self.chapterUrls.append((name,self.getURLPrefix()+'/'+url)) - - soup = soup.find('li',{'class':'message'}) # limit first post for date stuff below. ('#' posts above) - - # Now go hunting for the 'chapter list'. - bq = soup.find('blockquote') # assume first posting contains TOC urls. - - bq.name='div' - - for iframe in bq.find_all('iframe'): - iframe.extract() # calibre book reader & editor don't like iframes to youtube. - - for qdiv in bq.find_all('div',{'class':'quoteExpand'}): - qdiv.extract() # Remove
click to expand
- - self.setDescription(useurl,bq) - - # otherwise, use first post links--include first post since that's - if not self.chapterUrls: - self.chapterUrls.append(("First Post",useurl)) - for (url,name) in [ (x['href'],stripHTML(x)) for x in bq.find_all('a') ]: - logger.debug("found chapurl:%s"%url) - if not url.startswith('http'): - url = self.getURLPrefix()+'/'+url - - if (url.startswith(self.getURLPrefix()) or url.startswith('http://'+self.getSiteDomain())) and ('/posts/' in url or '/threads/' in url): - # brute force way to deal with SB's http->https change when hardcoded http urls. - url = url.replace('http://'+self.getSiteDomain(),self.getURLPrefix()) - logger.debug("used chapurl:%s"%(url)) - self.chapterUrls.append((name,url)) - if url == useurl and 'First Post' == self.chapterUrls[0][0]: - # remove "First Post" if included in list. - logger.debug("delete dup 'First Post' chapter: %s %s"%self.chapterUrls[0]) - del self.chapterUrls[0] - - # Didn't use threadmarks, so take created/updated dates - # from the 'first' posting created and updated. - date = self.make_date(soup.find('a',{'class':'datePermalink'})) - if date: - self.story.setMetadata('datePublished', date) - self.story.setMetadata('dateUpdated', date) # updated overwritten below if found. - - date = self.make_date(soup.find('div',{'class':'editDate'})) - if date: - self.story.setMetadata('dateUpdated', date) - - self.story.setMetadata('numChapters',len(self.chapterUrls)) - - def make_date(self,parenttag): # forums use a BS thing where dates - # can appear different if recent. - datestr=None - try: - datetag = parenttag.find('span',{'class':'DateTime'}) - if datetag: - datestr = datetag['title'] - else: - datetag = parenttag.find('abbr',{'class':'DateTime'}) - if datetag: - datestr="%s at %s"%(datetag['data-datestring'],datetag['data-timestring']) - # Apr 24, 2015 at 4:39 AM - # May 1, 2015 at 5:47 AM - datestr = re.sub(r' (\d[^\d])',r' 0\1',datestr) # add leading 0 for single digit day & hours. - return makeDate(datestr, self.dateformat) - except: - logger.debug('No date found in %s'%parenttag) - return None - - # grab the text for an individual chapter. - def getChapterText(self, url): - logger.debug('Getting chapter text from: %s' % url) - - (data,opened) = self._fetchUrlOpened(url) - url = opened.geturl() - logger.debug("chapter URL redirected to: %s"%url) - - soup = self.make_soup(data) - - if '#' in url: - anchorid = url.split('#')[1] - soup = soup.find('li',id=anchorid) - bq = soup.find('blockquote') - - bq.name='div' - - for iframe in bq.find_all('iframe'): - iframe.extract() # calibre book reader & editor don't like iframes to youtube. - - for qdiv in bq.find_all('div',{'class':'quoteExpand'}): - qdiv.extract() # Remove
click to expand
- - return self.utf8FromSoup(url,bq) diff --git a/fanficfare/adapters/adapter_forumssufficientvelocitycom.py b/fanficfare/adapters/adapter_forumssufficientvelocitycom.py index f16785ee..883dc749 100644 --- a/fanficfare/adapters/adapter_forumssufficientvelocitycom.py +++ b/fanficfare/adapters/adapter_forumssufficientvelocitycom.py @@ -15,15 +15,15 @@ # limitations under the License. # -from adapter_forumsspacebattlescom import ForumsSpacebattlesComAdapter +from base_xenforoforum_adapter import BaseXenForoForumAdapter def getClass(): return ForumsSufficientVelocityComAdapter -class ForumsSufficientVelocityComAdapter(ForumsSpacebattlesComAdapter): +class ForumsSufficientVelocityComAdapter(BaseXenForoForumAdapter): def __init__(self, config, url): - ForumsSpacebattlesComAdapter.__init__(self, config, url) + BaseXenForoForumAdapter.__init__(self, config, url) # Each adapter needs to have a unique site abbreviation. self.story.setMetadata('siteabbrev','fsv') @@ -35,5 +35,4 @@ class ForumsSufficientVelocityComAdapter(ForumsSpacebattlesComAdapter): @classmethod def getURLPrefix(cls): - # The site domain. Does have www here, if it uses it. return 'http://' + cls.getSiteDomain() diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py index efc774c3..aebf330e 100644 --- a/fanficfare/adapters/base_adapter.py +++ b/fanficfare/adapters/base_adapter.py @@ -452,6 +452,11 @@ class BaseSiteAdapter(Configurable): "Only needs to be overriden if != site domain." return cls.getSiteDomain() + @classmethod + def getConfigSections(cls): + "Only needs to be overriden if has additional ini sections." + return [cls.getConfigSection()] + @classmethod def stripURLParameters(cls,url): "Only needs to be overriden if URL contains more than one parameter" diff --git a/fanficfare/adapters/base_efiction_adapter.py b/fanficfare/adapters/base_efiction_adapter.py index 354072f1..7d0fa230 100644 --- a/fanficfare/adapters/base_efiction_adapter.py +++ b/fanficfare/adapters/base_efiction_adapter.py @@ -70,6 +70,11 @@ class BaseEfictionAdapter(BaseSiteAdapter): self.triedAcceptWarnings = False self.username = "NoneGiven" # if left empty, site doesn't return any message at all. + @classmethod + def getConfigSections(cls): + "Only needs to be overriden if has additional ini sections." + return ['base_efiction',cls.getConfigSection()] + @classmethod def getAcceptDomains(cls): return [cls.getSiteDomain(),'www.' + cls.getSiteDomain()] diff --git a/fanficfare/cli.py b/fanficfare/cli.py index 0856ce7d..32ae1025 100644 --- a/fanficfare/cli.py +++ b/fanficfare/cli.py @@ -184,7 +184,7 @@ def do_download(arg, url = arg try: - configuration = Configuration(adapters.getConfigSectionFor(url), options.format) + configuration = Configuration(adapters.getConfigSectionsFor(url), options.format) except exceptions.UnknownSite, e: if options.list or options.normalize: # list for page doesn't have to be a supported site. diff --git a/fanficfare/configurable.py b/fanficfare/configurable.py index e6f6ee80..f38f474d 100644 --- a/fanficfare/configurable.py +++ b/fanficfare/configurable.py @@ -77,10 +77,12 @@ formatsections = ['html','txt','epub','mobi'] othersections = ['defaults','overrides'] def get_valid_sections(): - sites = adapters.getConfigSections() + sites = adapters.getConfigSections() sitesections = list(othersections) for section in sites: sitesections.append(section) + # also allows [www.base_efiction] and [www.base_forum]. Not + # likely to matter. if section.startswith('www.'): # add w/o www if has www sitesections.append(section[4:]) @@ -130,6 +132,7 @@ def get_valid_set_options(): 'replace_hr':(None,None,boollist), 'sort_ships':(None,None,boollist), 'strip_chapter_numbers':(None,None,boollist), + 'mark_new_chapters':(None,None,boollist), 'titlepage_use_table':(None,None,boollist), 'use_ssl_unverified_context':(None,None,boollist), @@ -212,6 +215,7 @@ def get_valid_keywords(): 'chapter_start', 'chapter_title_add_pattern', 'chapter_title_strip_pattern', + 'mark_new_chapters', 'check_next_chapter', 'skip_author_cover', 'collect_series', @@ -224,6 +228,7 @@ def get_valid_keywords(): 'datePublished_format', 'dateUpdated_format', 'default_cover_image', + 'description_limit', 'do_update_hook', 'exclude_notes', 'extra_logpage_entries', @@ -331,7 +336,8 @@ def make_generate_cover_settings(param): class Configuration(ConfigParser.SafeConfigParser): - def __init__(self, site, fileform): + def __init__(self, sections, fileform): + site = sections[-1] # first section is site DN. ConfigParser.SafeConfigParser.__init__(self) self.linenos=dict() # key by section or section,key -> lineno @@ -339,6 +345,11 @@ class Configuration(ConfigParser.SafeConfigParser): ## [injected] section has even less priority than [defaults] self.sectionslist = ['defaults','injected'] + ## add other sections (not including site DN) after defaults, + ## but before site-specific. + for section in sections[:-1]: + self.addConfigSection(section) + if site.startswith("www."): sitewith = site sitewithout = site.replace("www.","") @@ -348,8 +359,13 @@ class Configuration(ConfigParser.SafeConfigParser): self.addConfigSection(sitewith) self.addConfigSection(sitewithout) + if fileform: self.addConfigSection(fileform) + ## add other sections:fileform (not including site DN) + ## after fileform, but before site-specific:fileform. + for section in sections[:-1]: + self.addConfigSection(section+":"+fileform) self.addConfigSection(sitewith+":"+fileform) self.addConfigSection(sitewithout+":"+fileform) self.addConfigSection("overrides") diff --git a/fanficfare/defaults.ini b/fanficfare/defaults.ini index 07d62ba6..26de893d 100644 --- a/fanficfare/defaults.ini +++ b/fanficfare/defaults.ini @@ -306,6 +306,13 @@ chapter_title_strip_pattern:^[0-9]+[\.: -]+ ## "The Beginning" => "1. The Beginning" chapter_title_add_pattern:${index}. ${title} +## If true, when updating an epub that already has old chapters, new +## chapters will be marked in the TOC and chapter header by prepending +## '(new) ' to the chapter title. So 'The Big Fight' will become +## '4. (new) The Big Fight' if both mark_new_chapters and +## add_chapter_numbers are set true. +mark_new_chapters:false + ## Reorder ships so b/a and c/b/a become a/b and a/b/c. Only separates ## on '/', so use replace_metadata to change separator first if ## needed. Something like: ships=>[ ]*(/|&|&)[ ]*=>/ You can use @@ -327,12 +334,74 @@ sort_ships:false ## User-agent user_agent:FFF/2.X +## Added for [base_xenforoforum], but can be used with other sites, +## too. Limit the 'description' to the first X *characters* +## collected. Character count includes HTML tags, so it can be +## non-intuitive. +#description_limit:1000 + +[base_efiction] ## At the time of writing, eFiction Base adapters allow downloading ## the whole story in bulk using the 'Print' feature. If 'bulk_load' ## is set to 'true', both metadata and chapters can be loaded in one ## step bulk_load:true +[base_xenforoforum] +## Currently only forums.spacebattles.com and forums.sufficientvelocity.com + +cover_exclusion_regexp:/clear.png + +## I saw lots of chapters name simply '1.1' etc during testing. +strip_chapter_numbers:false + +## Copy title to tagsfromtitle for parsing tags. +add_to_extra_valid_entries:,tagsfromtitle + +## '.NOREPL' tells the system to *not* apply title's +## in/exclude/replace_metadata -- Only works on include_in_ lines. +include_in_tagsfromtitle:title.NOREPL + +tagsfromtitle_label:Tags from Title + +## might want to do this, maybe not. Will often include category, but +## also often include non-category stuff. +# include_in_category:tagsfromtitle + +include_metadata_pre: +# only keep tagsfromtitle with ( or [ in. + tagsfromtitle=~[\[\(] + +replace_metadata: +# remove anything outside () or [] + tagsfromtitle=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1 +# remove () [] + tagsfromtitle=>[\(\)\[\]]=> +# change (spaces)slash(spaces) to comma + tagsfromtitle=> */ *=>, + tagsfromtitle=> x =>, + +# remove [] or () blocks and leading/trailing spaces + title=> *[\(\[]([^\]\)]+)[\)\]] *=> +# remove 'Thread' and the next word, usually "Thread 2", "Thread +# four", "Thread iv", etc + title=>[-: ]*[Tt]hread [^ ]+[-: ]*=> + +extra_titlepage_entries: tagsfromtitle + +## '.SPLIT' tells the system to split by ',' +add_to_include_subject_tags:,tagsfromtitle.SPLIT + +## base_xenforoforum reads Published and Updated datetimes from +## Threadmarks if used, or from the posted & updated times of the +## 'first' post if no threadmarks. +datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S +dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S + +## Only take the first X characters of the 'first' post to use as +## the description. +description_limit:500 + ## Each output format has a section that overrides [defaults] [html] @@ -1027,101 +1096,11 @@ extra_valid_entries:size # don't show twitter icon. cover_exclusion_regexp:/res/css/bir.png -[forums.sufficientvelocity.com] - -cover_exclusion_regexp:/clear.png - -add_to_extratags:,ForumFic - -strip_chapter_numbers:false - -# true, false, threadmarksonly -add_chapter_dates:false - -add_to_extra_valid_entries:,titletags -# '.NOREPL' tells the system to *not* apply title's -# in/exclude/replace_metadata -- Only works on include_in_ lines. -include_in_titletags:title.NOREPL - -## might want to do this, maybe not. Will often include category, but -## also often include non-category stuff. -# include_in_category:titletags - -include_metadata_pre: -# only keep titletags with ( or [ in. - titletags=~[\[\(] - -replace_metadata: -# remove 'Thread' and the next word, usually "Thread 2", "Thread -# four", "Thread iv", etc - title=>[-: ]*[Tt]hread [^ ]+[-: ]*=> -# remove anything outside () or [] - titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1 -# remove () [] - titletags=>[\(\)\[\]]=> -# change (spaces)slash(spaces) to comma - titletags=> */ *=>, - titletags=> x =>, -# remove [] or () blocks and leading/trailing spaces - title=> *[\(\[]([^\]\)]+)[\)\]] *=> - -extra_titlepage_entries: titletags - -## '.SPLIT' tells the system to split by ',' -add_to_include_subject_tags:,titletags.SPLIT - -datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S -dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S - -description_limit:500 - [forums.spacebattles.com] +## see [base_xenforoforum] -cover_exclusion_regexp:/clear.png - -add_to_extratags:,ForumFic - -# true, false, threadmarksonly -add_chapter_dates:false - -strip_chapter_numbers:false - -add_to_extra_valid_entries:,titletags -# '.NOREPL' tells the system to *not* apply title's -# in/exclude/replace_metadata -- Only works on include_in_ lines. -include_in_titletags:title.NOREPL - -## might want to do this, maybe not. Will often include category, but -## also often include non-category stuff. -# include_in_category:titletags - -include_metadata_pre: -# only keep titletags with ( or [ in. - titletags=~[\[\(] - -replace_metadata: -# remove 'Thread' and the next word, usually "Thread 2", "Thread -# four", "Thread iv", etc - title=>[-: ]*[Tt]hread [^ ]+[-: ]*=> -# remove anything outside () or [] - titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1 -# remove () [] - titletags=>[\(\)\[\]]=> -# change (spaces)slash(spaces) to comma - titletags=> */ *=>, - titletags=> x =>, -# remove [] or () blocks and leading/trailing spaces - title=> *[\(\[]([^\]\)]+)[\)\]] *=> - -extra_titlepage_entries: titletags - -## '.SPLIT' tells the system to split by ',' -add_to_include_subject_tags:,titletags.SPLIT - -datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S -dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S - -description_limit:500 +[forums.sufficientvelocity.com] +## see [base_xenforoforum] [grangerenchanted.com] ## Some sites require login (or login for some rated stories) The diff --git a/fanficfare/story.py b/fanficfare/story.py index b9abdd87..dd578cee 100644 --- a/fanficfare/story.py +++ b/fanficfare/story.py @@ -412,7 +412,7 @@ class Story(Configurable): except: self.metadata = {'version':'4.4'} self.in_ex_cludes = {} - self.chapters = [] # chapters will be tuples of (title,html) + self.chapters = [] # chapters will be tuples of (url,title,html) self.imgurls = [] self.imgtuples = [] diff --git a/webservice/main.py b/webservice/main.py index b480265d..443fb3a7 100644 --- a/webservice/main.py +++ b/webservice/main.py @@ -62,7 +62,7 @@ class UserConfigServer(webapp2.RequestHandler): def getUserConfig(self,user,url,fileformat): - configuration = Configuration(adapters.getConfigSectionFor(url),fileformat) + configuration = Configuration(adapters.getConfigSectionsFor(url),fileformat) logging.debug('reading defaults.ini config file') configuration.read('fanficfare/defaults.ini')