mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
Add [base_efiction] and [base_xenforoforum] sections, add base_xenforoforum_adapter, document new options.
This commit is contained in:
parent
b2c1d485b9
commit
4dcfd6e4be
13 changed files with 203 additions and 379 deletions
|
|
@ -89,7 +89,7 @@ from calibre_plugins.fanficfare_plugin.dialogs \
|
|||
EditTextDialog, IniTextDialog, RejectUrlEntry)
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.fanficfare.adapters \
|
||||
import getConfigSections
|
||||
import getSiteSections
|
||||
|
||||
from calibre_plugins.fanficfare_plugin.common_utils \
|
||||
import ( KeyboardConfigDialog, PrefsViewerDialog )
|
||||
|
|
@ -922,7 +922,7 @@ class CalibreCoverTab(QWidget):
|
|||
|
||||
self.gc_dropdowns = {}
|
||||
|
||||
sitelist = getConfigSections()
|
||||
sitelist = getSiteSections()
|
||||
sitelist.sort()
|
||||
sitelist.insert(0,_("Default"))
|
||||
for site in sitelist:
|
||||
|
|
|
|||
|
|
@ -23,12 +23,12 @@ def get_fff_personalini():
|
|||
def get_fff_config(url,fileform="epub",personalini=None):
|
||||
if not personalini:
|
||||
personalini = get_fff_personalini()
|
||||
site='unknown'
|
||||
sections=['unknown']
|
||||
try:
|
||||
site = adapters.getConfigSectionFor(url)
|
||||
sections = adapters.getConfigSectionsFor(url)
|
||||
except Exception as e:
|
||||
logger.debug("Failed trying to get ini config for url(%s): %s, using section [%s] instead"%(url,e,site))
|
||||
configuration = Configuration(site,fileform)
|
||||
logger.debug("Failed trying to get ini config for url(%s): %s, using section %s instead"%(url,e,sections))
|
||||
configuration = Configuration(sections,fileform)
|
||||
configuration.readfp(StringIO(get_resources("plugin-defaults.ini")))
|
||||
configuration.readfp(StringIO(personalini))
|
||||
|
||||
|
|
|
|||
|
|
@ -299,6 +299,13 @@ chapter_title_strip_pattern:^[0-9]+[\.: -]+
|
|||
## "The Beginning" => "1. The Beginning"
|
||||
chapter_title_add_pattern:${index}. ${title}
|
||||
|
||||
## If true, when updating an epub that already has old chapters, new
|
||||
## chapters will be marked in the TOC and chapter header by prepending
|
||||
## '(new) ' to the chapter title. So 'The Big Fight' will become
|
||||
## '4. (new) The Big Fight' if both mark_new_chapters and
|
||||
## add_chapter_numbers are set true.
|
||||
mark_new_chapters:false
|
||||
|
||||
## Uses a python template substitution. The ${title} is the default
|
||||
## title of a new anthology, <series name> in the case of a series, or
|
||||
## the first book title otherwise. This is only applied to new
|
||||
|
|
@ -330,12 +337,74 @@ sort_ships:false
|
|||
## User-agent
|
||||
user_agent:FFF/2.X
|
||||
|
||||
## Added for [base_xenforoforum], but can be used with other sites,
|
||||
## too. Limit the 'description' to the first X *characters*
|
||||
## collected. Character count includes HTML tags, so it can be
|
||||
## non-intuitive.
|
||||
#description_limit:1000
|
||||
|
||||
[base_efiction]
|
||||
## At the time of writing, eFiction Base adapters allow downloading
|
||||
## the whole story in bulk using the 'Print' feature. If 'bulk_load'
|
||||
## is set to 'true', both metadata and chapters can be loaded in one
|
||||
## step
|
||||
bulk_load:true
|
||||
|
||||
[base_xenforoforum]
|
||||
## Currently only forums.spacebattles.com and forums.sufficientvelocity.com
|
||||
|
||||
cover_exclusion_regexp:/clear.png
|
||||
|
||||
## I saw lots of chapters name simply '1.1' etc during testing.
|
||||
strip_chapter_numbers:false
|
||||
|
||||
## Copy title to tagsfromtitle for parsing tags.
|
||||
add_to_extra_valid_entries:,tagsfromtitle
|
||||
|
||||
## '.NOREPL' tells the system to *not* apply title's
|
||||
## in/exclude/replace_metadata -- Only works on include_in_ lines.
|
||||
include_in_tagsfromtitle:title.NOREPL
|
||||
|
||||
tagsfromtitle_label:Tags from Title
|
||||
|
||||
## might want to do this, maybe not. Will often include category, but
|
||||
## also often include non-category stuff.
|
||||
# include_in_category:tagsfromtitle
|
||||
|
||||
include_metadata_pre:
|
||||
# only keep tagsfromtitle with ( or [ in.
|
||||
tagsfromtitle=~[\[\(]
|
||||
|
||||
replace_metadata:
|
||||
# remove anything outside () or []
|
||||
tagsfromtitle=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
|
||||
# remove () []
|
||||
tagsfromtitle=>[\(\)\[\]]=>
|
||||
# change (spaces)slash(spaces) to comma
|
||||
tagsfromtitle=> */ *=>,
|
||||
tagsfromtitle=> x =>,
|
||||
|
||||
# remove [] or () blocks and leading/trailing spaces
|
||||
title=> *[\(\[]([^\]\)]+)[\)\]] *=>
|
||||
# remove 'Thread' and the next word, usually "Thread 2", "Thread
|
||||
# four", "Thread iv", etc
|
||||
title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
|
||||
|
||||
extra_titlepage_entries: tagsfromtitle
|
||||
|
||||
## '.SPLIT' tells the system to split by ','
|
||||
add_to_include_subject_tags:,tagsfromtitle.SPLIT
|
||||
|
||||
## base_xenforoforum reads Published and Updated datetimes from
|
||||
## Threadmarks if used, or from the posted & updated times of the
|
||||
## 'first' post if no threadmarks.
|
||||
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
## Only take the first X characters of the 'first' post to use as
|
||||
## the description.
|
||||
description_limit:500
|
||||
|
||||
## Each output format has a section that overrides [defaults]
|
||||
[html]
|
||||
|
||||
|
|
@ -1041,91 +1110,11 @@ extra_valid_entries:size
|
|||
# don't show twitter icon.
|
||||
cover_exclusion_regexp:/res/css/bir.png
|
||||
|
||||
[forums.sufficientvelocity.com]
|
||||
|
||||
cover_exclusion_regexp:/clear.png
|
||||
|
||||
strip_chapter_numbers:false
|
||||
|
||||
add_to_extra_valid_entries:,titletags
|
||||
# '.NOREPL' tells the system to *not* apply title's
|
||||
# in/exclude/replace_metadata -- Only works on include_in_ lines.
|
||||
include_in_titletags:title.NOREPL
|
||||
|
||||
## might want to do this, maybe not. Will often include category, but
|
||||
## also often include non-category stuff.
|
||||
# include_in_category:titletags
|
||||
|
||||
include_metadata_pre:
|
||||
# only keep titletags with ( or [ in.
|
||||
titletags=~[\[\(]
|
||||
|
||||
replace_metadata:
|
||||
# remove anything outside () or []
|
||||
titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
|
||||
# remove () []
|
||||
titletags=>[\(\)\[\]]=>
|
||||
# change (spaces)slash(spaces) to comma
|
||||
titletags=> */ *=>,
|
||||
titletags=> x =>,
|
||||
# remove [] or () blocks and leading/trailing spaces
|
||||
title=> *[\(\[]([^\]\)]+)[\)\]] *=>
|
||||
# remove 'Thread' and the next word, usually "Thread 2", "Thread
|
||||
# four", "Thread iv", etc
|
||||
title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
|
||||
|
||||
extra_titlepage_entries: titletags
|
||||
|
||||
## '.SPLIT' tells the system to split by ','
|
||||
add_to_include_subject_tags:,titletags.SPLIT
|
||||
|
||||
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
description_limit:500
|
||||
|
||||
[forums.spacebattles.com]
|
||||
## see [base_xenforoforum]
|
||||
|
||||
cover_exclusion_regexp:/clear.png
|
||||
|
||||
strip_chapter_numbers:false
|
||||
|
||||
add_to_extra_valid_entries:,titletags
|
||||
# '.NOREPL' tells the system to *not* apply title's
|
||||
# in/exclude/replace_metadata -- Only works on include_in_ lines.
|
||||
include_in_titletags:title.NOREPL
|
||||
|
||||
## might want to do this, maybe not. Will often include category, but
|
||||
## also often include non-category stuff.
|
||||
# include_in_category:titletags
|
||||
|
||||
include_metadata_pre:
|
||||
# only keep titletags with ( or [ in.
|
||||
titletags=~[\[\(]
|
||||
|
||||
replace_metadata:
|
||||
# remove anything outside () or []
|
||||
titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
|
||||
# remove () []
|
||||
titletags=>[\(\)\[\]]=>
|
||||
# change (spaces)slash(spaces) to comma
|
||||
titletags=> */ *=>,
|
||||
titletags=> x =>,
|
||||
# remove [] or () blocks and leading/trailing spaces
|
||||
title=> *[\(\[]([^\]\)]+)[\)\]] *=>
|
||||
# remove 'Thread' and the next word, usually "Thread 2", "Thread
|
||||
# four", "Thread iv", etc
|
||||
title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
|
||||
|
||||
extra_titlepage_entries: titletags
|
||||
|
||||
## '.SPLIT' tells the system to split by ','
|
||||
add_to_include_subject_tags:,titletags.SPLIT
|
||||
|
||||
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
description_limit:500
|
||||
[forums.sufficientvelocity.com]
|
||||
## see [base_xenforoforum]
|
||||
|
||||
[grangerenchanted.com]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
|
|
|
|||
|
|
@ -196,14 +196,24 @@ def getAdapter(config,url,anyurl=False):
|
|||
# No adapter found.
|
||||
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
|
||||
|
||||
def getConfigSections():
|
||||
def getSiteSections():
|
||||
# doesn't include base sections. Sections rather than site DNS because of squidge/peja
|
||||
return [cls.getConfigSection() for cls in __class_list]
|
||||
|
||||
def getConfigSections():
|
||||
# does include base sections.
|
||||
sections = set()
|
||||
for cls in __class_list:
|
||||
sections.update(cls.getConfigSections())
|
||||
return sections
|
||||
|
||||
def get_bulk_load_sites():
|
||||
# for now, all eFiction Base adapters are assumed to allow bulk_load.
|
||||
return [cls.getConfigSection().replace('www.','') for cls in
|
||||
filter( lambda x : issubclass(x,base_efiction_adapter.BaseEfictionAdapter),
|
||||
__class_list)]
|
||||
sections = set()
|
||||
for cls in filter( lambda x : issubclass(x,base_efiction_adapter.BaseEfictionAdapter),
|
||||
__class_list):
|
||||
sections.update( [ x.replace('www.','') for x in cls.getConfigSections() ] )
|
||||
return sections
|
||||
|
||||
def getSiteExamples():
|
||||
l=[]
|
||||
|
|
@ -211,10 +221,10 @@ def getSiteExamples():
|
|||
l.append((cls.getConfigSection(),cls.getSiteExampleURLs().split()))
|
||||
return l
|
||||
|
||||
def getConfigSectionFor(url):
|
||||
def getConfigSectionsFor(url):
|
||||
(cls,fixedurl) = getClassFor(url)
|
||||
if cls:
|
||||
return cls.getConfigSection()
|
||||
return cls.getConfigSections()
|
||||
|
||||
# No adapter found.
|
||||
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
|
||||
|
|
|
|||
|
|
@ -24,47 +24,18 @@ import urllib2
|
|||
from ..htmlcleanup import stripHTML
|
||||
from .. import exceptions as exceptions
|
||||
|
||||
from base_adapter import BaseSiteAdapter, makeDate
|
||||
from base_xenforoforum_adapter import BaseXenForoForumAdapter
|
||||
|
||||
def getClass():
|
||||
return ForumsSpacebattlesComAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ForumsSpacebattlesComAdapter(BaseSiteAdapter):
|
||||
class ForumsSpacebattlesComAdapter(BaseXenForoForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
|
||||
self.decode = ["utf8",
|
||||
"Windows-1252"] # 1252 is a superset of iso-8859-1.
|
||||
# Most sites that claim to be
|
||||
# iso-8859-1 (and some that claim to be
|
||||
# utf8) are really windows-1252.
|
||||
|
||||
|
||||
# get storyId from url--url validation guarantees query is only sid=1234
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
|
||||
|
||||
# get storyId from url--url validation guarantees query correct
|
||||
m = re.match(self.getSiteURLPattern(),url)
|
||||
if m:
|
||||
self.story.setMetadata('storyId',m.group('id'))
|
||||
|
||||
# normalized story URL.
|
||||
self._setURL(self.getURLPrefix() + '/'+m.group('tp')+'/'+self.story.getMetadata('storyId')+'/')
|
||||
else:
|
||||
raise exceptions.InvalidStoryURL(url,
|
||||
self.getSiteDomain(),
|
||||
self.getSiteExampleURLs())
|
||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','fsb')
|
||||
|
||||
# The date format will vary from site to site.
|
||||
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
|
||||
self.dateformat = "%b %d, %Y at %I:%M %p"
|
||||
|
||||
@staticmethod # must be @staticmethod, don't remove it.
|
||||
def getSiteDomain():
|
||||
|
|
@ -73,155 +44,5 @@ class ForumsSpacebattlesComAdapter(BaseSiteAdapter):
|
|||
|
||||
@classmethod
|
||||
def getURLPrefix(cls):
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'https://' + cls.getSiteDomain()
|
||||
|
||||
@classmethod
|
||||
def getSiteExampleURLs(cls):
|
||||
return cls.getURLPrefix()+"/threads/some-story-name.123456/"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return r"https?://"+re.escape(self.getSiteDomain())+r"/(?P<tp>threads|posts)/(.+\.)?(?P<id>\d+)/"
|
||||
|
||||
def use_pagecache(self):
|
||||
'''
|
||||
adapters that will work with the page cache need to implement
|
||||
this and change it to True.
|
||||
'''
|
||||
return True
|
||||
|
||||
## Getting the chapter list and the meta data, plus 'is adult' checking.
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
useurl = self.url
|
||||
logger.info("url: "+useurl)
|
||||
|
||||
try:
|
||||
(data,opened) = self._fetchUrlOpened(useurl)
|
||||
useurl = opened.geturl()
|
||||
logger.info("use useurl: "+useurl)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
soup = self.make_soup(data)
|
||||
|
||||
a = soup.find('h3',{'class':'userText'}).find('a')
|
||||
self.story.addToList('authorId',a['href'].split('/')[1])
|
||||
self.story.addToList('authorUrl',self.getURLPrefix()+'/'+a['href'])
|
||||
self.story.addToList('author',a.text)
|
||||
|
||||
h1 = soup.find('div',{'class':'titleBar'}).h1
|
||||
self.story.setMetadata('title',stripHTML(h1))
|
||||
|
||||
if '#' in useurl:
|
||||
anchorid = useurl.split('#')[1]
|
||||
soup = soup.find('li',id=anchorid)
|
||||
else:
|
||||
# try threadmarks if no '#' in , require at least 2.
|
||||
threadmarksa = soup.find('a',{'class':'threadmarksTrigger'})
|
||||
if threadmarksa:
|
||||
soupmarks = self.make_soup(self._fetchUrl(self.getURLPrefix()+'/'+threadmarksa['href']))
|
||||
markas = soupmarks.find('ol',{'class':'overlayScroll'}).find_all('a')
|
||||
if len(markas) > 1:
|
||||
for (atag,url,name) in [ (x,x['href'],stripHTML(x)) for x in markas ]:
|
||||
date = self.make_date(atag.find_next_sibling('div',{'class':'extra'}))
|
||||
if not self.story.getMetadataRaw('datePublished') or date < self.story.getMetadataRaw('datePublished'):
|
||||
self.story.setMetadata('datePublished', date)
|
||||
if not self.story.getMetadataRaw('dateUpdated') or date > self.story.getMetadataRaw('dateUpdated'):
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
self.chapterUrls.append((name,self.getURLPrefix()+'/'+url))
|
||||
|
||||
soup = soup.find('li',{'class':'message'}) # limit first post for date stuff below. ('#' posts above)
|
||||
|
||||
# Now go hunting for the 'chapter list'.
|
||||
bq = soup.find('blockquote') # assume first posting contains TOC urls.
|
||||
|
||||
bq.name='div'
|
||||
|
||||
for iframe in bq.find_all('iframe'):
|
||||
iframe.extract() # calibre book reader & editor don't like iframes to youtube.
|
||||
|
||||
for qdiv in bq.find_all('div',{'class':'quoteExpand'}):
|
||||
qdiv.extract() # Remove <div class="quoteExpand">click to expand</div>
|
||||
|
||||
self.setDescription(useurl,bq)
|
||||
|
||||
# otherwise, use first post links--include first post since that's
|
||||
if not self.chapterUrls:
|
||||
self.chapterUrls.append(("First Post",useurl))
|
||||
for (url,name) in [ (x['href'],stripHTML(x)) for x in bq.find_all('a') ]:
|
||||
logger.debug("found chapurl:%s"%url)
|
||||
if not url.startswith('http'):
|
||||
url = self.getURLPrefix()+'/'+url
|
||||
|
||||
if (url.startswith(self.getURLPrefix()) or url.startswith('http://'+self.getSiteDomain())) and ('/posts/' in url or '/threads/' in url):
|
||||
# brute force way to deal with SB's http->https change when hardcoded http urls.
|
||||
url = url.replace('http://'+self.getSiteDomain(),self.getURLPrefix())
|
||||
logger.debug("used chapurl:%s"%(url))
|
||||
self.chapterUrls.append((name,url))
|
||||
if url == useurl and 'First Post' == self.chapterUrls[0][0]:
|
||||
# remove "First Post" if included in list.
|
||||
logger.debug("delete dup 'First Post' chapter: %s %s"%self.chapterUrls[0])
|
||||
del self.chapterUrls[0]
|
||||
|
||||
# Didn't use threadmarks, so take created/updated dates
|
||||
# from the 'first' posting created and updated.
|
||||
date = self.make_date(soup.find('a',{'class':'datePermalink'}))
|
||||
if date:
|
||||
self.story.setMetadata('datePublished', date)
|
||||
self.story.setMetadata('dateUpdated', date) # updated overwritten below if found.
|
||||
|
||||
date = self.make_date(soup.find('div',{'class':'editDate'}))
|
||||
if date:
|
||||
self.story.setMetadata('dateUpdated', date)
|
||||
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
def make_date(self,parenttag): # forums use a BS thing where dates
|
||||
# can appear different if recent.
|
||||
datestr=None
|
||||
try:
|
||||
datetag = parenttag.find('span',{'class':'DateTime'})
|
||||
if datetag:
|
||||
datestr = datetag['title']
|
||||
else:
|
||||
datetag = parenttag.find('abbr',{'class':'DateTime'})
|
||||
if datetag:
|
||||
datestr="%s at %s"%(datetag['data-datestring'],datetag['data-timestring'])
|
||||
# Apr 24, 2015 at 4:39 AM
|
||||
# May 1, 2015 at 5:47 AM
|
||||
datestr = re.sub(r' (\d[^\d])',r' 0\1',datestr) # add leading 0 for single digit day & hours.
|
||||
return makeDate(datestr, self.dateformat)
|
||||
except:
|
||||
logger.debug('No date found in %s'%parenttag)
|
||||
return None
|
||||
|
||||
# grab the text for an individual chapter.
|
||||
def getChapterText(self, url):
|
||||
logger.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
(data,opened) = self._fetchUrlOpened(url)
|
||||
url = opened.geturl()
|
||||
logger.debug("chapter URL redirected to: %s"%url)
|
||||
|
||||
soup = self.make_soup(data)
|
||||
|
||||
if '#' in url:
|
||||
anchorid = url.split('#')[1]
|
||||
soup = soup.find('li',id=anchorid)
|
||||
bq = soup.find('blockquote')
|
||||
|
||||
bq.name='div'
|
||||
|
||||
for iframe in bq.find_all('iframe'):
|
||||
iframe.extract() # calibre book reader & editor don't like iframes to youtube.
|
||||
|
||||
for qdiv in bq.find_all('div',{'class':'quoteExpand'}):
|
||||
qdiv.extract() # Remove <div class="quoteExpand">click to expand</div>
|
||||
|
||||
return self.utf8FromSoup(url,bq)
|
||||
|
|
|
|||
|
|
@ -15,15 +15,15 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from adapter_forumsspacebattlescom import ForumsSpacebattlesComAdapter
|
||||
from base_xenforoforum_adapter import BaseXenForoForumAdapter
|
||||
|
||||
def getClass():
|
||||
return ForumsSufficientVelocityComAdapter
|
||||
|
||||
class ForumsSufficientVelocityComAdapter(ForumsSpacebattlesComAdapter):
|
||||
class ForumsSufficientVelocityComAdapter(BaseXenForoForumAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
ForumsSpacebattlesComAdapter.__init__(self, config, url)
|
||||
BaseXenForoForumAdapter.__init__(self, config, url)
|
||||
|
||||
# Each adapter needs to have a unique site abbreviation.
|
||||
self.story.setMetadata('siteabbrev','fsv')
|
||||
|
|
@ -35,5 +35,4 @@ class ForumsSufficientVelocityComAdapter(ForumsSpacebattlesComAdapter):
|
|||
|
||||
@classmethod
|
||||
def getURLPrefix(cls):
|
||||
# The site domain. Does have www here, if it uses it.
|
||||
return 'http://' + cls.getSiteDomain()
|
||||
|
|
|
|||
|
|
@ -452,6 +452,11 @@ class BaseSiteAdapter(Configurable):
|
|||
"Only needs to be overriden if != site domain."
|
||||
return cls.getSiteDomain()
|
||||
|
||||
@classmethod
|
||||
def getConfigSections(cls):
|
||||
"Only needs to be overriden if has additional ini sections."
|
||||
return [cls.getConfigSection()]
|
||||
|
||||
@classmethod
|
||||
def stripURLParameters(cls,url):
|
||||
"Only needs to be overriden if URL contains more than one parameter"
|
||||
|
|
|
|||
|
|
@ -70,6 +70,11 @@ class BaseEfictionAdapter(BaseSiteAdapter):
|
|||
self.triedAcceptWarnings = False
|
||||
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
|
||||
|
||||
@classmethod
|
||||
def getConfigSections(cls):
|
||||
"Only needs to be overriden if has additional ini sections."
|
||||
return ['base_efiction',cls.getConfigSection()]
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return [cls.getSiteDomain(),'www.' + cls.getSiteDomain()]
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ def do_download(arg,
|
|||
url = arg
|
||||
|
||||
try:
|
||||
configuration = Configuration(adapters.getConfigSectionFor(url), options.format)
|
||||
configuration = Configuration(adapters.getConfigSectionsFor(url), options.format)
|
||||
except exceptions.UnknownSite, e:
|
||||
if options.list or options.normalize:
|
||||
# list for page doesn't have to be a supported site.
|
||||
|
|
|
|||
|
|
@ -77,10 +77,12 @@ formatsections = ['html','txt','epub','mobi']
|
|||
othersections = ['defaults','overrides']
|
||||
|
||||
def get_valid_sections():
|
||||
sites = adapters.getConfigSections()
|
||||
sites = adapters.getConfigSections()
|
||||
sitesections = list(othersections)
|
||||
for section in sites:
|
||||
sitesections.append(section)
|
||||
# also allows [www.base_efiction] and [www.base_forum]. Not
|
||||
# likely to matter.
|
||||
if section.startswith('www.'):
|
||||
# add w/o www if has www
|
||||
sitesections.append(section[4:])
|
||||
|
|
@ -130,6 +132,7 @@ def get_valid_set_options():
|
|||
'replace_hr':(None,None,boollist),
|
||||
'sort_ships':(None,None,boollist),
|
||||
'strip_chapter_numbers':(None,None,boollist),
|
||||
'mark_new_chapters':(None,None,boollist),
|
||||
'titlepage_use_table':(None,None,boollist),
|
||||
|
||||
'use_ssl_unverified_context':(None,None,boollist),
|
||||
|
|
@ -212,6 +215,7 @@ def get_valid_keywords():
|
|||
'chapter_start',
|
||||
'chapter_title_add_pattern',
|
||||
'chapter_title_strip_pattern',
|
||||
'mark_new_chapters',
|
||||
'check_next_chapter',
|
||||
'skip_author_cover',
|
||||
'collect_series',
|
||||
|
|
@ -224,6 +228,7 @@ def get_valid_keywords():
|
|||
'datePublished_format',
|
||||
'dateUpdated_format',
|
||||
'default_cover_image',
|
||||
'description_limit',
|
||||
'do_update_hook',
|
||||
'exclude_notes',
|
||||
'extra_logpage_entries',
|
||||
|
|
@ -331,7 +336,8 @@ def make_generate_cover_settings(param):
|
|||
|
||||
class Configuration(ConfigParser.SafeConfigParser):
|
||||
|
||||
def __init__(self, site, fileform):
|
||||
def __init__(self, sections, fileform):
|
||||
site = sections[-1] # first section is site DN.
|
||||
ConfigParser.SafeConfigParser.__init__(self)
|
||||
|
||||
self.linenos=dict() # key by section or section,key -> lineno
|
||||
|
|
@ -339,6 +345,11 @@ class Configuration(ConfigParser.SafeConfigParser):
|
|||
## [injected] section has even less priority than [defaults]
|
||||
self.sectionslist = ['defaults','injected']
|
||||
|
||||
## add other sections (not including site DN) after defaults,
|
||||
## but before site-specific.
|
||||
for section in sections[:-1]:
|
||||
self.addConfigSection(section)
|
||||
|
||||
if site.startswith("www."):
|
||||
sitewith = site
|
||||
sitewithout = site.replace("www.","")
|
||||
|
|
@ -348,8 +359,13 @@ class Configuration(ConfigParser.SafeConfigParser):
|
|||
|
||||
self.addConfigSection(sitewith)
|
||||
self.addConfigSection(sitewithout)
|
||||
|
||||
if fileform:
|
||||
self.addConfigSection(fileform)
|
||||
## add other sections:fileform (not including site DN)
|
||||
## after fileform, but before site-specific:fileform.
|
||||
for section in sections[:-1]:
|
||||
self.addConfigSection(section+":"+fileform)
|
||||
self.addConfigSection(sitewith+":"+fileform)
|
||||
self.addConfigSection(sitewithout+":"+fileform)
|
||||
self.addConfigSection("overrides")
|
||||
|
|
|
|||
|
|
@ -306,6 +306,13 @@ chapter_title_strip_pattern:^[0-9]+[\.: -]+
|
|||
## "The Beginning" => "1. The Beginning"
|
||||
chapter_title_add_pattern:${index}. ${title}
|
||||
|
||||
## If true, when updating an epub that already has old chapters, new
|
||||
## chapters will be marked in the TOC and chapter header by prepending
|
||||
## '(new) ' to the chapter title. So 'The Big Fight' will become
|
||||
## '4. (new) The Big Fight' if both mark_new_chapters and
|
||||
## add_chapter_numbers are set true.
|
||||
mark_new_chapters:false
|
||||
|
||||
## Reorder ships so b/a and c/b/a become a/b and a/b/c. Only separates
|
||||
## on '/', so use replace_metadata to change separator first if
|
||||
## needed. Something like: ships=>[ ]*(/|&|&)[ ]*=>/ You can use
|
||||
|
|
@ -327,12 +334,74 @@ sort_ships:false
|
|||
## User-agent
|
||||
user_agent:FFF/2.X
|
||||
|
||||
## Added for [base_xenforoforum], but can be used with other sites,
|
||||
## too. Limit the 'description' to the first X *characters*
|
||||
## collected. Character count includes HTML tags, so it can be
|
||||
## non-intuitive.
|
||||
#description_limit:1000
|
||||
|
||||
[base_efiction]
|
||||
## At the time of writing, eFiction Base adapters allow downloading
|
||||
## the whole story in bulk using the 'Print' feature. If 'bulk_load'
|
||||
## is set to 'true', both metadata and chapters can be loaded in one
|
||||
## step
|
||||
bulk_load:true
|
||||
|
||||
[base_xenforoforum]
|
||||
## Currently only forums.spacebattles.com and forums.sufficientvelocity.com
|
||||
|
||||
cover_exclusion_regexp:/clear.png
|
||||
|
||||
## I saw lots of chapters name simply '1.1' etc during testing.
|
||||
strip_chapter_numbers:false
|
||||
|
||||
## Copy title to tagsfromtitle for parsing tags.
|
||||
add_to_extra_valid_entries:,tagsfromtitle
|
||||
|
||||
## '.NOREPL' tells the system to *not* apply title's
|
||||
## in/exclude/replace_metadata -- Only works on include_in_ lines.
|
||||
include_in_tagsfromtitle:title.NOREPL
|
||||
|
||||
tagsfromtitle_label:Tags from Title
|
||||
|
||||
## might want to do this, maybe not. Will often include category, but
|
||||
## also often include non-category stuff.
|
||||
# include_in_category:tagsfromtitle
|
||||
|
||||
include_metadata_pre:
|
||||
# only keep tagsfromtitle with ( or [ in.
|
||||
tagsfromtitle=~[\[\(]
|
||||
|
||||
replace_metadata:
|
||||
# remove anything outside () or []
|
||||
tagsfromtitle=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
|
||||
# remove () []
|
||||
tagsfromtitle=>[\(\)\[\]]=>
|
||||
# change (spaces)slash(spaces) to comma
|
||||
tagsfromtitle=> */ *=>,
|
||||
tagsfromtitle=> x =>,
|
||||
|
||||
# remove [] or () blocks and leading/trailing spaces
|
||||
title=> *[\(\[]([^\]\)]+)[\)\]] *=>
|
||||
# remove 'Thread' and the next word, usually "Thread 2", "Thread
|
||||
# four", "Thread iv", etc
|
||||
title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
|
||||
|
||||
extra_titlepage_entries: tagsfromtitle
|
||||
|
||||
## '.SPLIT' tells the system to split by ','
|
||||
add_to_include_subject_tags:,tagsfromtitle.SPLIT
|
||||
|
||||
## base_xenforoforum reads Published and Updated datetimes from
|
||||
## Threadmarks if used, or from the posted & updated times of the
|
||||
## 'first' post if no threadmarks.
|
||||
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
## Only take the first X characters of the 'first' post to use as
|
||||
## the description.
|
||||
description_limit:500
|
||||
|
||||
## Each output format has a section that overrides [defaults]
|
||||
[html]
|
||||
|
||||
|
|
@ -1027,101 +1096,11 @@ extra_valid_entries:size
|
|||
# don't show twitter icon.
|
||||
cover_exclusion_regexp:/res/css/bir.png
|
||||
|
||||
[forums.sufficientvelocity.com]
|
||||
|
||||
cover_exclusion_regexp:/clear.png
|
||||
|
||||
add_to_extratags:,ForumFic
|
||||
|
||||
strip_chapter_numbers:false
|
||||
|
||||
# true, false, threadmarksonly
|
||||
add_chapter_dates:false
|
||||
|
||||
add_to_extra_valid_entries:,titletags
|
||||
# '.NOREPL' tells the system to *not* apply title's
|
||||
# in/exclude/replace_metadata -- Only works on include_in_ lines.
|
||||
include_in_titletags:title.NOREPL
|
||||
|
||||
## might want to do this, maybe not. Will often include category, but
|
||||
## also often include non-category stuff.
|
||||
# include_in_category:titletags
|
||||
|
||||
include_metadata_pre:
|
||||
# only keep titletags with ( or [ in.
|
||||
titletags=~[\[\(]
|
||||
|
||||
replace_metadata:
|
||||
# remove 'Thread' and the next word, usually "Thread 2", "Thread
|
||||
# four", "Thread iv", etc
|
||||
title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
|
||||
# remove anything outside () or []
|
||||
titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
|
||||
# remove () []
|
||||
titletags=>[\(\)\[\]]=>
|
||||
# change (spaces)slash(spaces) to comma
|
||||
titletags=> */ *=>,
|
||||
titletags=> x =>,
|
||||
# remove [] or () blocks and leading/trailing spaces
|
||||
title=> *[\(\[]([^\]\)]+)[\)\]] *=>
|
||||
|
||||
extra_titlepage_entries: titletags
|
||||
|
||||
## '.SPLIT' tells the system to split by ','
|
||||
add_to_include_subject_tags:,titletags.SPLIT
|
||||
|
||||
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
description_limit:500
|
||||
|
||||
[forums.spacebattles.com]
|
||||
## see [base_xenforoforum]
|
||||
|
||||
cover_exclusion_regexp:/clear.png
|
||||
|
||||
add_to_extratags:,ForumFic
|
||||
|
||||
# true, false, threadmarksonly
|
||||
add_chapter_dates:false
|
||||
|
||||
strip_chapter_numbers:false
|
||||
|
||||
add_to_extra_valid_entries:,titletags
|
||||
# '.NOREPL' tells the system to *not* apply title's
|
||||
# in/exclude/replace_metadata -- Only works on include_in_ lines.
|
||||
include_in_titletags:title.NOREPL
|
||||
|
||||
## might want to do this, maybe not. Will often include category, but
|
||||
## also often include non-category stuff.
|
||||
# include_in_category:titletags
|
||||
|
||||
include_metadata_pre:
|
||||
# only keep titletags with ( or [ in.
|
||||
titletags=~[\[\(]
|
||||
|
||||
replace_metadata:
|
||||
# remove 'Thread' and the next word, usually "Thread 2", "Thread
|
||||
# four", "Thread iv", etc
|
||||
title=>[-: ]*[Tt]hread [^ ]+[-: ]*=>
|
||||
# remove anything outside () or []
|
||||
titletags=>^.*?([\(\[]([^\]\)]+)[\)\]]).*?$=>\1
|
||||
# remove () []
|
||||
titletags=>[\(\)\[\]]=>
|
||||
# change (spaces)slash(spaces) to comma
|
||||
titletags=> */ *=>,
|
||||
titletags=> x =>,
|
||||
# remove [] or () blocks and leading/trailing spaces
|
||||
title=> *[\(\[]([^\]\)]+)[\)\]] *=>
|
||||
|
||||
extra_titlepage_entries: titletags
|
||||
|
||||
## '.SPLIT' tells the system to split by ','
|
||||
add_to_include_subject_tags:,titletags.SPLIT
|
||||
|
||||
datePublished_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
dateUpdated_format:%%Y-%%m-%%d %%H:%%M:%%S
|
||||
|
||||
description_limit:500
|
||||
[forums.sufficientvelocity.com]
|
||||
## see [base_xenforoforum]
|
||||
|
||||
[grangerenchanted.com]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
|
|
|
|||
|
|
@ -412,7 +412,7 @@ class Story(Configurable):
|
|||
except:
|
||||
self.metadata = {'version':'4.4'}
|
||||
self.in_ex_cludes = {}
|
||||
self.chapters = [] # chapters will be tuples of (title,html)
|
||||
self.chapters = [] # chapters will be tuples of (url,title,html)
|
||||
self.imgurls = []
|
||||
self.imgtuples = []
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ class UserConfigServer(webapp2.RequestHandler):
|
|||
|
||||
def getUserConfig(self,user,url,fileformat):
|
||||
|
||||
configuration = Configuration(adapters.getConfigSectionFor(url),fileformat)
|
||||
configuration = Configuration(adapters.getConfigSectionsFor(url),fileformat)
|
||||
|
||||
logging.debug('reading defaults.ini config file')
|
||||
configuration.read('fanficfare/defaults.ini')
|
||||
|
|
|
|||
Loading…
Reference in a new issue