\d+)"
## Login
def needToLoginCheck(self, data):
diff --git a/fanficdownloader/adapters/adapter_castlefansorg.py b/fanficdownloader/adapters/adapter_castlefansorg.py
index a78f76e9..1985f4cd 100644
--- a/fanficdownloader/adapters/adapter_castlefansorg.py
+++ b/fanficdownloader/adapters/adapter_castlefansorg.py
@@ -174,13 +174,14 @@ class CastleFansOrgAdapter(BaseSiteAdapter): # XXX
# print data
# Now go hunting for all the meta data and the chapter list.
-
+
+ pagetitle = soup.find('div',{'id':'pagetitle'})
## Title
- a = soup.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
+ a = pagetitle.find('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))
self.story.setMetadata('title',a.string)
# Find authorid and URL from... author url.
- a = soup.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
+ a = pagetitle.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl','http://'+self.host+'/'+a['href'])
self.story.setMetadata('author',a.string)
diff --git a/fanficdownloader/adapters/adapter_dramioneorg.py b/fanficdownloader/adapters/adapter_dramioneorg.py
index ef05e993..6ca73b2b 100644
--- a/fanficdownloader/adapters/adapter_dramioneorg.py
+++ b/fanficdownloader/adapters/adapter_dramioneorg.py
@@ -161,20 +161,18 @@ class DramioneOrgAdapter(BaseSiteAdapter):
self.story.setMetadata('author',a.string)
# Use banner as cover if found
- if self.getConfig('include_images'):
- coverurl = ''
- img = soup.find('img',{'class':'banner'})
- if img:
- coverurl = img['src']
- #print "Cover: "+coverurl
- a = soup.find(text="This story has a banner; click to view.")
- if a:
- #print "A: "+ ', '.join("(%s, %s)" %tup for tup in a.parent.attrs)
- coverurl = a.parent['href']
- #print "Cover: "+coverurl
- if coverurl:
- self.story.addImgUrl(self,url,coverurl,self._fetchUrlRaw,cover=True)
-
+ coverurl = ''
+ img = soup.find('img',{'class':'banner'})
+ if img:
+ coverurl = img['src']
+ #print "Cover: "+coverurl
+ a = soup.find(text="This story has a banner; click to view.")
+ if a:
+ #print "A: "+ ', '.join("(%s, %s)" %tup for tup in a.parent.attrs)
+ coverurl = a.parent['href']
+ #print "Cover: "+coverurl
+ if coverurl:
+ self.setCoverImage(url,coverurl)
# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"&chapter=\d+$")):
@@ -194,6 +192,22 @@ class DramioneOrgAdapter(BaseSiteAdapter):
for warning in warnings:
self.story.addToList('warnings',warning.string)
+ themes=soup.findAll('a', {'class' : "tag-3"})
+ for theme in themes:
+ self.story.addToList('themes',theme.string)
+
+ hermiones=soup.findAll('a', {'class' : "tag-4"})
+ for hermione in hermiones:
+ self.story.addToList('hermiones',hermione.string)
+
+ dracos=soup.findAll('a', {'class' : "tag-5"})
+ for draco in dracos:
+ self.story.addToList('dracos',draco.string)
+
+ timelines=soup.findAll('a', {'class' : "tag-6"})
+ for timeline in timelines:
+ self.story.addToList('timeline',timeline.string)
+
# utility method
def defaultGetattr(d,k):
try:
diff --git a/fanficdownloader/adapters/adapter_fanfictionnet.py b/fanficdownloader/adapters/adapter_fanfictionnet.py
index cd33f2f5..a4bb1211 100644
--- a/fanficdownloader/adapters/adapter_fanfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fanfictionnet.py
@@ -186,8 +186,14 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
donechars = False
while len(metalist) > 0:
- if metalist[0].startswith('Reviews') or metalist[0].startswith('Chapters') or metalist[0].startswith('Status') or metalist[0].startswith('id:') or metalist[0].startswith('Favs:') or metalist[0].startswith('Follows:'):
+ if metalist[0].startswith('Chapters') or metalist[0].startswith('Status') or metalist[0].startswith('id:'):
pass
+ elif metalist[0].startswith('Reviews'):
+ self.story.setMetadata('reviews',metalist[0].split(':')[1].strip())
+ elif metalist[0].startswith('Favs:'):
+ self.story.setMetadata('favs',metalist[0].split(':')[1].strip())
+ elif metalist[0].startswith('Follows:'):
+ self.story.setMetadata('follows',metalist[0].split(':')[1].strip())
elif metalist[0].startswith('Updated'):
self.story.setMetadata('dateUpdated',makeDate(metalist[0].split(':')[1].strip(), '%m-%d-%y'))
elif metalist[0].startswith('Published'):
@@ -208,10 +214,9 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
else:
self.story.setMetadata('status', 'In-Progress')
- if self.getConfig('include_images'):
- img = soup.find('img',{'class':'cimage'})
- if img:
- self.story.addImgUrl(self,url,img['src'],self._fetchUrlRaw,cover=True)
+ img = soup.find('img',{'class':'cimage'})
+ if img:
+ self.setCoverImage(url,img['src'])
# Find the chapter selector
select = soup.find('select', { 'name' : 'chapter' } )
diff --git a/fanficdownloader/adapters/adapter_fimfictionnet.py b/fanficdownloader/adapters/adapter_fimfictionnet.py
index 77d90642..e1c6deda 100644
--- a/fanficdownloader/adapters/adapter_fimfictionnet.py
+++ b/fanficdownloader/adapters/adapter_fimfictionnet.py
@@ -138,14 +138,14 @@ class FimFictionNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata("numWords", str(storyMetadata["words"]))
# fimfic is the first site with an explicit cover image.
- if self.getConfig('include_images') and "image" in storyMetadata.keys():
+ if "image" in storyMetadata.keys():
if "full_image" in storyMetadata:
coverurl = storyMetadata["full_image"]
else:
coverurl = storyMetadata["image"]
if coverurl.startswith('//static.fimfiction.net'): # fix for img urls missing 'http:'
coverurl = "http:"+coverurl
- self.story.addImgUrl(self,self.url,coverurl,self._fetchUrlRaw,cover=True)
+ self.setCoverImage(self.url,coverurl)
# the fimfic API gives bbcode for desc, not html.
diff --git a/fanficdownloader/adapters/base_adapter.py b/fanficdownloader/adapters/base_adapter.py
index 9ed89eb5..7ca9686f 100644
--- a/fanficdownloader/adapters/base_adapter.py
+++ b/fanficdownloader/adapters/base_adapter.py
@@ -67,10 +67,8 @@ class BaseSiteAdapter(Configurable):
def validateURL(self):
return re.match(self.getSiteURLPattern(), self.url)
- def __init__(self, config, url):
- self.config = config
- Configurable.__init__(self, config)
- self.setSectionOrder(self.getConfigSection())
+ def __init__(self, configuration, url):
+ Configurable.__init__(self, configuration)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
@@ -79,7 +77,7 @@ class BaseSiteAdapter(Configurable):
self.opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
self.storyDone = False
self.metadataDone = False
- self.story = Story()
+ self.story = Story(configuration)
self.story.setMetadata('site',self.getConfigSection())
self.story.setMetadata('dateCreated',datetime.datetime.now())
self.chapterUrls = [] # tuples of (chapter title,chapter url)
@@ -138,7 +136,7 @@ class BaseSiteAdapter(Configurable):
logging.debug("try code:"+code)
return data.decode(code)
except:
- logging.info("code failed:"+code)
+ logging.debug("code failed:"+code)
pass
logging.info("Could not decode story, tried:%s Stripping non-ASCII."%decode)
return "".join([x for x in data if ord(x) < 128])
@@ -199,8 +197,7 @@ class BaseSiteAdapter(Configurable):
if (self.chapterFirst!=None and index < self.chapterFirst) or \
(self.chapterLast!=None and index > self.chapterLast):
self.story.addChapter(removeEntities(title),
- None,
- self)
+ None)
else:
if self.oldchapters and index < len(self.oldchapters):
data = self.utf8FromSoup(None,
@@ -209,16 +206,14 @@ class BaseSiteAdapter(Configurable):
else:
data = self.getChapterText(url)
self.story.addChapter(removeEntities(title),
- removeEntities(data),
- self)
+ removeEntities(data))
self.storyDone = True
# include image, but no cover from story, add default_cover_image cover.
if self.getConfig('include_images') and \
not self.story.cover and \
self.getConfig('default_cover_image'):
- self.story.addImgUrl(self,
- None,
+ self.story.addImgUrl(None,
#self.getConfig('default_cover_image'),
self.story.formatFileName(self.getConfig('default_cover_image'),
self.getConfig('allow_unsafe_filename')),
@@ -298,6 +293,10 @@ class BaseSiteAdapter(Configurable):
self.story.setMetadata('description',stripHTML(svalue))
#print("\n\ndescription:\n"+self.story.getMetadata('description')+"\n\n")
+ def setCoverImage(self,storyurl,imgurl):
+ if self.getConfig('include_images'):
+ self.story.addImgUrl(storyurl,imgurl,self._fetchUrlRaw,cover=True)
+
# This gives us a unicode object, not just a string containing bytes.
# (I gave soup a unicode string, you'd think it could give it back...)
# Now also does a bunch of other common processing for us.
@@ -313,7 +312,7 @@ class BaseSiteAdapter(Configurable):
# some pre-existing epubs have img tags that had src stripped off.
if img.has_key('src'):
img['longdesc']=img['src']
- img['src']=self.story.addImgUrl(self,url,img['src'],fetch,
+ img['src']=self.story.addImgUrl(url,img['src'],fetch,
coverexclusion=self.getConfig('cover_exclusion_regexp'))
for attr in soup._getAttrMap().keys():
diff --git a/fanficdownloader/configurable.py b/fanficdownloader/configurable.py
index 2435ccf2..b76d9ed1 100644
--- a/fanficdownloader/configurable.py
+++ b/fanficdownloader/configurable.py
@@ -21,10 +21,6 @@ import ConfigParser
# inherit from Configurable. The config file(s) uses ini format:
# [sections] with key:value settings.
#
-# writer does [defaults], [www.whofic.com], [epub], [www.whofic.com:epub], [overrides]
-#
-# Until a write is created, the adapter only has [defaults], [www.whofic.com], [overrides]
-#
# [defaults]
# titlepage_entries: category,genre, status
# [www.whofic.com]
@@ -36,28 +32,67 @@ import ConfigParser
# [overrides]
# titlepage_entries: category
+class Configuration(ConfigParser.SafeConfigParser):
-class Configurable(object):
-
- def __init__(self, config):
- self.config = config
- self.sectionslist = ['defaults']
-
- def setSectionOrder(self,site,fileform=None):
+ def __init__(self, site, fileform):
+ ConfigParser.SafeConfigParser.__init__(self)
self.sectionslist = ['defaults']
self.addConfigSection(site)
if fileform:
self.addConfigSection(fileform)
self.addConfigSection(site+":"+fileform)
self.addConfigSection("overrides")
-
+
+ self.validEntries = [
+ 'category',
+ 'genre',
+ 'language',
+ 'characters',
+ 'ships',
+ 'series',
+ 'status',
+ 'datePublished',
+ 'dateUpdated',
+ 'dateCreated',
+ 'rating',
+ 'warnings',
+ 'numChapters',
+ 'numWords',
+ 'site',
+ 'storyId',
+ 'authorId',
+ 'extratags',
+ 'title',
+ 'storyUrl',
+ 'description',
+ 'author',
+ 'authorUrl',
+ 'formatname',
+ 'formatext',
+ 'siteabbrev',
+ 'version',
+ # internal stuff.
+ 'langcode',
+ 'output_css',
+ 'authorHTML'
+ ]
+
def addConfigSection(self,section):
self.sectionslist.insert(0,section)
+ def isValidMetaEntry(self, key):
+ return key in self.getValidMetaList()
+
+ def getValidMetaList(self):
+ vl = []
+ vl.extend(self.validEntries)
+ vl.extend(self.getConfigList("extra_valid_entries"))
+ return vl
+
def hasConfig(self, key):
for section in self.sectionslist:
try:
- self.config.get(section,key)
+ self.get(section,key)
#print("found %s in section [%s]"%(key,section))
return True
except:
@@ -65,11 +100,11 @@ class Configurable(object):
return False
- def getConfig(self, key):
- val = ""
+ def getConfig(self, key, default=""):
+ val = default
for section in self.sectionslist:
try:
- val = self.config.get(section,key)
+ val = self.get(section,key)
if val and val.lower() == "false":
val = False
#print "getConfig(%s)=[%s]%s" % (key,section,val)
@@ -84,5 +119,26 @@ class Configurable(object):
vlist = self.getConfig(key).split(',')
vlist = [ v.strip() for v in vlist ]
#print "vlist("+key+"):"+str(vlist)
- return vlist
+ return vlist
+
+# extended by adapter, writer and story for ease of calling configuration.
+class Configurable(object):
+
+ def __init__(self, configuration):
+ self.configuration = configuration
+
+ def isValidMetaEntry(self, key):
+ return self.configuration.isValidMetaEntry(key)
+
+ def getValidMetaList(self):
+ return self.configuration.getValidMetaList()
+
+ def hasConfig(self, key):
+ return self.configuration.hasConfig(key)
+
+ def getConfig(self, key, default=""):
+ return self.configuration.getConfig(key)
+
+ def getConfigList(self, key):
+ return self.configuration.getConfigList(key)
diff --git a/fanficdownloader/geturls.py b/fanficdownloader/geturls.py
index 5492fd9f..08d7a252 100644
--- a/fanficdownloader/geturls.py
+++ b/fanficdownloader/geturls.py
@@ -18,19 +18,19 @@
import re
import urlparse
import urllib2 as u2
-import ConfigParser
from BeautifulSoup import BeautifulSoup
from gziphttp import GZipProcessor
import adapters
+from configurable import Configuration
-def get_urls_from_page(url,config=None):
+def get_urls_from_page(url,configuration=None):
normalized = set() # normalized url
retlist = [] # orig urls.
- if not config:
- config = ConfigParser.SafeConfigParser()
+ if not configuration:
+ configuration = Configuration("test1.com","EPUB")
data = None
@@ -39,7 +39,7 @@ def get_urls_from_page(url,config=None):
# them, AO3 doesn't even show them if not logged in. Only works
# with saved user/pass--not going to prompt for list.
if 'archiveofourown.org' in url:
- ao3adapter = adapters.getAdapter(config,"http://www.archiveofourown.org/works/0","EPUB")
+ ao3adapter = adapters.getAdapter(configuration,"http://www.archiveofourown.org/works/0","EPUB")
if ao3adapter.getConfig("username"):
if ao3adapter.getConfig("is_adult"):
addurl = "?view_adult=true"
@@ -72,7 +72,7 @@ def get_urls_from_page(url,config=None):
try:
href = href.replace('&index=1','')
- adapter = adapters.getAdapter(config,href,"EPUB")
+ adapter = adapters.getAdapter(configuration,href,"EPUB")
if adapter.story.getMetadata('storyUrl') not in normalized:
normalized.add(adapter.story.getMetadata('storyUrl'))
retlist.append(href)
diff --git a/fanficdownloader/story.py b/fanficdownloader/story.py
index 73008008..a4ea20f8 100644
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@@ -22,6 +22,7 @@ from math import floor
import exceptions
from htmlcleanup import conditionalRemoveEntities, removeAllEntities
+from configurable import Configurable
# Create convert_image method depending on which graphics lib we can
# load. Preferred: calibre, PIL, none
@@ -186,9 +187,10 @@ langs = {
"Devanagari":"hi",
}
-class Story:
+class Story(Configurable):
- def __init__(self):
+ def __init__(self, configuration):
+ Configurable.__init__(self, configuration)
try:
self.metadata = {'version':os.environ['CURRENT_VERSION_ID']}
except:
@@ -197,35 +199,50 @@ class Story:
self.chapters = [] # chapters will be tuples of (title,html)
self.imgurls = []
self.imgtuples = []
- self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists.
+
self.cover=None # *href* of new cover image--need to create html.
self.oldcover=None # (oldcoverhtmlhref,oldcoverhtmltype,oldcoverhtmldata,oldcoverimghref,oldcoverimgtype,oldcoverimgdata)
self.calibrebookmark=None # cheesy way to carry calibre bookmark file forward across update.
self.logfile=None # cheesy way to carry log file forward across update.
- def setMetadata(self, key, value):
+ self.setReplace(self.getConfig('replace_metadata'))
+
+ def setMetadata(self, key, value, condremoveentities=True):
## still keeps < < and &
- self.metadata[key]=conditionalRemoveEntities(value)
+ if condremoveentities:
+ self.metadata[key]=conditionalRemoveEntities(value)
+ else:
+ self.metadata[key]=value
if key == "language":
try:
self.metadata['langcode'] = langs[self.metadata[key]]
except:
self.metadata['langcode'] = 'en'
+ if key == 'dateUpdated':
+ # Last Update tags for Bill.
+ self.addToList('lastupdate',value.strftime("Last Update Year/Month: %Y/%m"))
+ self.addToList('lastupdate',value.strftime("Last Update: %Y/%m/%d"))
+
def getMetadataRaw(self,key):
- if self.metadata.has_key(key):
+ if self.isValidMetaEntry(key) and self.metadata.has_key(key):
return self.metadata[key]
def doReplacments(self,value):
for (p,v) in self.replacements:
- if (isinstance(value,str) or isinstance(value,unicode)) and re.match(p,value):
+ if (isinstance(value,basestring)) and re.match(p,value):
value = re.sub(p,v,value)
return value
- def getMetadata(self, key, removeallentities=False, doreplacements=True):
+ def getMetadata(self, key,
+ removeallentities=False,
+ doreplacements=True):
value = None
+ if not self.isValidMetaEntry(key):
+ return value
+
if self.isList(key):
- value = ', '.join(self.getList(key, removeallentities))
+ value = u', '.join(self.getList(key, removeallentities, doreplacements=True))
elif self.metadata.has_key(key):
value = self.metadata[key]
if value:
@@ -233,10 +250,8 @@ class Story:
value = commaGroups(value)
if key == "numChapters":
value = commaGroups("%d"%value)
- if key == "dateCreated":
- value = value.strftime("%Y-%m-%d %H:%M:%S")
- if key == "datePublished" or key == "dateUpdated":
- value = value.strftime("%Y-%m-%d")
+ if key in ("dateCreated","datePublished","dateUpdated"):
+ value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
if doreplacements:
value=self.doReplacments(value)
@@ -244,19 +259,22 @@ class Story:
return removeAllEntities(value)
else:
return value
-
- def getAllMetadata(self, removeallentities=False, doreplacements=True):
+
+ def getAllMetadata(self,
+ removeallentities=False,
+ doreplacements=True,
+ keeplists=False):
'''
- All single value *and* list value metadata as strings.
+ All single value *and* list value metadata as strings (unless keeplists=True, then keep lists).
'''
allmetadata = {}
# special handling for authors/authorUrls
authlinkhtml="%s"
- if 'author' in self.listables.keys(): # more than one author, assume multiple authorUrl too.
+ if self.isList('author'): # more than one author, assume multiple authorUrl too.
htmllist=[]
- for i, v in enumerate(self.listables['author']):
- aurl = self.listables['authorUrl'][i]
+ for i, v in enumerate(self.getList('author')):
+ aurl = self.getList('authorUrl')[i]
auth = v
# make sure doreplacements & removeallentities are honored.
if doreplacements:
@@ -271,11 +289,12 @@ class Story:
else:
self.setMetadata('authorHTML',authlinkhtml%(self.getMetadata('authorUrl', removeallentities, doreplacements),
self.getMetadata('author', removeallentities, doreplacements)))
-
- for k in self.metadata.keys():
- allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)
- for l in self.listables.keys():
- allmetadata[l] = self.getMetadata(l, removeallentities, doreplacements)
+
+ for k in self.getValidMetaList():
+ if self.isList(k) and keeplists:
+ allmetadata[k] = self.getList(k, removeallentities, doreplacements)
+ else:
+ allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)
return allmetadata
@@ -288,52 +307,80 @@ class Story:
if value==None:
return
value = conditionalRemoveEntities(value)
- if not self.isList(listname):
- self.listables[listname]=[]
+ if not self.isList(listname) or not listname in self.metadata:
+ # Calling addToList to a non-list meta will overwrite it.
+ self.metadata[listname]=[]
# prevent duplicates.
- if not value in self.listables[listname]:
- self.listables[listname].append(value)
+ if not value in self.metadata[listname]:
+ self.metadata[listname].append(value)
- def getList(self,listname, removeallentities=False, doreplacements=True):
+ def isList(self,listname):
+ 'Everything set with an include_in_* is considered a list.'
+ return self.hasConfig("include_in_"+listname) or \
+ ( self.isValidMetaEntry(listname) and self.metadata.has_key(listname) \
+ and isinstance(self.metadata[listname],list) )
+
+ def getList(self,listname,
+ removeallentities=False,
+ doreplacements=True,
+ doincludein=True):
retlist = []
- if not self.isList(listname):
- retlist = [self.getMetadata(listname,removeallentities=removeallentities)]
+ if not self.isValidMetaEntry(listname):
+ return retlist
+
+ # doincludein prevents recursion of include_in_'s
+ if doincludein and self.hasConfig("include_in_"+listname):
+ for k in self.getConfigList("include_in_"+listname):
+ retlist.extend(self.getList(k,removeallentities,doreplacements,doincludein=False))
else:
- retlist = self.listables[listname]
+
+ if not self.isList(listname):
+ retlist = [self.getMetadata(listname,removeallentities, doreplacements)]
+ else:
+ retlist = self.getMetadataRaw(listname)
- if doreplacements:
- retlist = filter( lambda x : x!=None and x!='' ,
- map(self.doReplacments,retlist) )
- if removeallentities:
- retlist = filter( lambda x : x!=None and x!='' ,
- map(removeAllEntities,retlist) )
+ if doreplacements:
+ retlist = filter( lambda x : x!=None and x!='' ,
+ map(self.doReplacments,retlist) )
+ if removeallentities:
+ retlist = filter( lambda x : x!=None and x!='' ,
+ map(removeAllEntities,retlist) )
return retlist
- def getLists(self, removeallentities=False):
- lsts = {}
- for ln in self.listables.keys():
- lsts[ln] = self.getList(ln, removeallentities)
- return lsts
+ def getSubjectTags(self, removeallentities=False):
+ # set to avoid duplicates subject tags.
+ subjectset = set()
+
+ tags_list = self.getConfigList("include_subject_tags") + self.getConfigList("extra_subject_tags")
+
+ # metadata all go into dc:subject tags, but only if they are configured.
+ for (name,value) in self.getAllMetadata(removeallentities=removeallentities,keeplists=True).iteritems():
+ if name in tags_list:
+ if isinstance(value,list):
+ for tag in value:
+ subjectset.add(tag)
+ else:
+ subjectset.add(value)
- def isList(self,listname):
- return self.listables.has_key(listname)
-
- def addChapter(self, title, html, configurable=None):
- if configurable and \
- configurable.getConfig('strip_chapter_numbers') and \
- configurable.getConfig('chapter_title_strip_pattern'):
- title = re.sub(configurable.getConfig('chapter_title_strip_pattern'),"",title)
+ if None in subjectset:
+ subjectset.remove(None)
+
+ return list(subjectset)
+
+ def addChapter(self, title, html):
+ if self.getConfig('strip_chapter_numbers') and \
+ self.getConfig('chapter_title_strip_pattern'):
+ title = re.sub(self.getConfig('chapter_title_strip_pattern'),"",title)
self.chapters.append( (title,html) )
- def getChapters(self, configurable=None):
+ def getChapters(self):
"Chapters will be tuples of (title,html)"
retval = []
- if configurable and \
- configurable.getConfig('add_chapter_numbers') and \
- configurable.getConfig('chapter_title_add_pattern'):
+ if self.getConfig('add_chapter_numbers') and \
+ self.getConfig('chapter_title_add_pattern'):
for index, (title,html) in enumerate(self.chapters):
- retval.append( (string.Template(configurable.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
+ retval.append( (string.Template(self.getConfig('chapter_title_add_pattern')).substitute({'index':index+1,'title':title}),html) )
else:
retval = self.chapters
@@ -355,11 +402,11 @@ class Story:
# pass fetch in from adapter in case we need the cookies collected
# as well as it's a base_story class method.
- def addImgUrl(self,configurable,parenturl,url,fetch,cover=False,coverexclusion=None):
+ def addImgUrl(self,parenturl,url,fetch,cover=False,coverexclusion=None):
# otherwise it saves the image in the epub even though it
# isn't used anywhere.
- if cover and configurable.getConfig('never_make_cover'):
+ if cover and self.getConfig('never_make_cover'):
return
url = url.strip() # ran across an image with a space in the
@@ -397,20 +444,20 @@ class Story:
if imgurl not in self.imgurls:
parsedUrl = urlparse.urlparse(imgurl)
try:
- sizes = [ int(x) for x in configurable.getConfigList('image_max_size') ]
+ sizes = [ int(x) for x in self.getConfigList('image_max_size') ]
except Exception, e:
- raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(configurable.getConfigList('image_max_size'),e))
+ raise exceptions.FailedToDownload("Failed to parse image_max_size from personal.ini:%s\nException: %s"%(self.getConfigList('image_max_size'),e))
try:
(data,ext,mime) = convert_image(imgurl,
fetch(imgurl),
sizes,
- configurable.getConfig('grayscale_images'))
+ self.getConfig('grayscale_images'))
except Exception, e:
print("Failed to load or convert image, skipping:\n%s\nException: %s"%(imgurl,e))
return "failedtoload"
# explicit cover, make the first image.
- if cover and not configurable.getConfig('never_make_cover'):
+ if cover and not self.getConfig('never_make_cover'):
if len(self.imgtuples) > 0 and 'cover' in self.imgtuples[0]['newsrc']:
# remove existing cover, if there is one.
del self.imgurls[0]
@@ -427,8 +474,8 @@ class Story:
# NOT never_make_cover AND
# either no coverexclusion OR coverexclusion doesn't match
if self.cover == None and \
- configurable.getConfig('make_firstimage_cover') and \
- not configurable.getConfig('never_make_cover') and \
+ self.getConfig('make_firstimage_cover') and \
+ not self.getConfig('never_make_cover') and \
(not coverexclusion or not re.search(coverexclusion,imgurl)):
newsrc = "images/cover.%s"%ext
self.cover=newsrc
@@ -457,7 +504,7 @@ class Story:
return retlist
def __str__(self):
- return "Metadata: " +str(self.metadata) + "\nListables: " +str(self.listables) #+ "\nChapters: "+str(self.chapters)
+ return "Metadata: " +str(self.metadata)
def setReplace(self,replace):
for line in replace.splitlines():
diff --git a/fanficdownloader/writers/base_writer.py b/fanficdownloader/writers/base_writer.py
index 25cca2fe..013d6e9e 100644
--- a/fanficdownloader/writers/base_writer.py
+++ b/fanficdownloader/writers/base_writer.py
@@ -36,44 +36,12 @@ class BaseStoryWriter(Configurable):
def getFormatExt():
return '.bse'
- def __init__(self, config, adapter):
- Configurable.__init__(self, config)
- self.setSectionOrder(adapter.getConfigSection(),self.getFormatName())
+ def __init__(self, configuration, adapter):
+ Configurable.__init__(self, configuration)
self.adapter = adapter
self.story = adapter.getStoryMetadataOnly() # only cache the metadata initially.
- self.story.setReplace(self.getConfig('replace_metadata'))
-
- self.validEntries = [
- 'category',
- 'genre',
- 'language',
- 'characters',
- 'ships',
- 'series',
- 'status',
- 'datePublished',
- 'dateUpdated',
- 'dateCreated',
- 'rating',
- 'warnings',
- 'numChapters',
- 'numWords',
- 'site',
- 'storyId',
- 'authorId',
- 'extratags',
- 'title',
- 'storyUrl',
- 'description',
- 'author',
- 'authorUrl',
- 'formatname',
- 'formatext',
- 'siteabbrev',
- 'version']
-
# fall back labels.
self.titleLabels = {
'category':'Category',
@@ -148,11 +116,11 @@ class BaseStoryWriter(Configurable):
if WIDE_ENTRY==None:
WIDE_ENTRY=ENTRY
- titleEntriesList = self.getConfigList("titlepage_entries")
+ titleEntriesList = self.getConfigList("titlepage_entries") + self.getConfigList("extra_titlepage_entries")
wideTitleEntriesList = self.getConfigList("wide_titlepage_entries")
for entry in titleEntriesList:
- if entry in self.validEntries:
+ if self.isValidMetaEntry(entry):
if self.story.getMetadata(entry):
if entry in wideTitleEntriesList:
TEMPLATE=WIDE_ENTRY
@@ -161,9 +129,12 @@ class BaseStoryWriter(Configurable):
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
- else:
- print("Using fallback label for %s_label"%entry)
+ elif entry in self.titleLabels:
+ logging.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
+ else:
+ label="%s"%entry.title()
+ logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
# If the label for the title entry is empty, use the
# 'no title' option if there is one.
@@ -184,10 +155,10 @@ class BaseStoryWriter(Configurable):
names as Story.metadata, but ENTRY should use index and chapter.
"""
# Only do TOC if there's more than one chapter and it's configured.
- if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
+ if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
self._write(out,START.substitute(self.story.getAllMetadata()))
- for index, (title,html) in enumerate(self.story.getChapters(self)):
+ for index, (title,html) in enumerate(self.story.getChapters()):
if html:
self._write(out,ENTRY.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
@@ -202,9 +173,11 @@ class BaseStoryWriter(Configurable):
# minor cheat, tucking css into metadata.
if self.getConfig("output_css"):
- self.story.metadata["output_css"] = self.getConfig("output_css")
+ self.story.setMetadata("output_css",
+ self.getConfig("output_css"),
+ condremoveentities=False)
else:
- self.story.metadata["output_css"] = ''
+ self.story.setMetadata("output_css",'')
if not outstream:
close=True
@@ -261,29 +234,6 @@ class BaseStoryWriter(Configurable):
if close:
outstream.close()
- def getTags(self, removeallentities=False):
- # set to avoid duplicates subject tags.
- subjectset = set()
-
- if self.story.getMetadataRaw('dateUpdated'):
- # Last Update tags for Bill.
- self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update Year/Month: %Y/%m"))
- self.story.addToList('lastupdate',self.story.getMetadataRaw('dateUpdated').strftime("Last Update: %Y/%m/%d"))
-
- for entry in self.validEntries:
- if entry in self.getConfigList("include_subject_tags") and \
- entry not in self.story.getLists() and \
- self.story.getMetadata(entry):
- subjectset.add(self.getMetadata(entry, removeallentities))
-
- # listables all go into dc:subject tags, but only if they are configured.
- for (name,lst) in self.story.getLists(removeallentities).iteritems():
- if name in self.getConfigList("include_subject_tags"):
- for tag in lst:
- subjectset.add(tag)
-
- return list(subjectset)
-
def writeStoryImpl(self, out):
"Must be overriden by sub classes."
pass
diff --git a/fanficdownloader/writers/writer_epub.py b/fanficdownloader/writers/writer_epub.py
index 312c937b..4f92643f 100644
--- a/fanficdownloader/writers/writer_epub.py
+++ b/fanficdownloader/writers/writer_epub.py
@@ -193,31 +193,33 @@ ${value}
Switch rindex to index to search from top instead of bottom.
"""
values = {}
- for entry in self.getConfigList("logpage_entries"):
- if entry in self.validEntries:
- try:
- # 1975-04-15
- span = ''%entry
- idx = logfile.rindex(span)+len(span)
- values[entry] = logfile[idx:logfile.index('',idx)]
- except Exception, e:
- #print("e:%s"%e)
- pass
+ for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
+ try:
+ # 1975-04-15
+ span = ''%entry
+ idx = logfile.rindex(span)+len(span)
+ values[entry] = logfile[idx:logfile.index('',idx)]
+ except Exception, e:
+ #print("e:%s"%e)
+ pass
return values
def _makeLogEntry(self, oldvalues={}):
retval = ""
- for entry in self.getConfigList("logpage_entries"):
- if entry in self.validEntries:
+ for entry in self.getConfigList("logpage_entries") + self.getConfigList("extra_logpage_entries"):
+ if self.isValidMetaEntry(entry):
val = self.story.getMetadata(entry)
if val and ( entry not in oldvalues or val != oldvalues[entry] ):
if self.hasConfig(entry+"_label"):
label=self.getConfig(entry+"_label")
- else:
- print("Using fallback label for %s_label"%entry)
+ elif entry in self.titleLabels:
+ logging.debug("Using fallback label for %s_label"%entry)
label=self.titleLabels[entry]
+ else:
+ label="%s"%entry.title()
+ logging.debug("No known label for %s, fallback to '%s'"%(entry,label))
retval = retval + self.EPUB_LOG_ENTRY.substitute({'id':entry,
'label':label,
@@ -342,7 +344,7 @@ ${value}
metadata.appendChild(newTag(contentdom,"dc:description",text=
self.getMetadata('description')))
- for subject in self.getTags():
+ for subject in self.story.getSubjectTags():
metadata.appendChild(newTag(contentdom,"dc:subject",text=subject))
@@ -441,7 +443,7 @@ div { margin: 0pt; padding: 0pt; }
if self.getConfig("include_titlepage"):
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
itemrefs.append("title_page")
- if len(self.story.getChapters(self)) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
+ if len(self.story.getChapters()) > 1 and self.getConfig("include_tocpage") and not self.metaonly :
items.append(("toc_page","OEBPS/toc_page.xhtml","application/xhtml+xml","Table of Contents"))
itemrefs.append("toc_page")
@@ -449,7 +451,7 @@ div { margin: 0pt; padding: 0pt; }
items.append(("log_page","OEBPS/log_page.xhtml","application/xhtml+xml","Update Log"))
itemrefs.append("log_page")
- for index, (title,html) in enumerate(self.story.getChapters(self)):
+ for index, (title,html) in enumerate(self.story.getChapters()):
if html:
i=index+1
items.append(("file%04d"%i,
@@ -587,7 +589,7 @@ div { margin: 0pt; padding: 0pt; }
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
logpageIO.close()
- for index, (title,html) in enumerate(self.story.getChapters(self)):
+ for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.EPUB_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.EPUB_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
diff --git a/fanficdownloader/writers/writer_html.py b/fanficdownloader/writers/writer_html.py
index 5caf1bca..85ba1fa5 100644
--- a/fanficdownloader/writers/writer_html.py
+++ b/fanficdownloader/writers/writer_html.py
@@ -94,7 +94,7 @@ ${output_css}
self.HTML_TOC_ENTRY,
self.HTML_TOC_PAGE_END)
- for index, (title,html) in enumerate(self.story.getChapters(self)):
+ for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.HTML_CHAPTER_START.substitute({'chapter':title, 'index':"%04d"%(index+1)}))
diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py
index 07d8704c..8a443b88 100644
--- a/fanficdownloader/writers/writer_mobi.py
+++ b/fanficdownloader/writers/writer_mobi.py
@@ -169,7 +169,7 @@ ${value}
# files.append(tocpageIO.getvalue())
# tocpageIO.close()
- for index, (title,html) in enumerate(self.story.getChapters(self)):
+ for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
diff --git a/fanficdownloader/writers/writer_txt.py b/fanficdownloader/writers/writer_txt.py
index 47545402..6b9f35b0 100644
--- a/fanficdownloader/writers/writer_txt.py
+++ b/fanficdownloader/writers/writer_txt.py
@@ -133,7 +133,7 @@ End file.
self._write(out,self.lineends(self.wraplines(towrap)))
- for index, (title,html) in enumerate(self.story.getChapters(self)):
+ for index, (title,html) in enumerate(self.story.getChapters()):
if html:
logging.debug('Writing chapter text for: %s' % title)
self._write(out,self.lineends(self.wraplines(removeAllEntities(self.TEXT_CHAPTER_START.substitute({'chapter':title, 'index':index+1})))))
diff --git a/index.html b/index.html
index 6821a2ab..321379a2 100644
--- a/index.html
+++ b/index.html
@@ -58,7 +58,23 @@
New sites bloodties-fans.com and thehookupzone.net, thanks Marillapm!
+ New Features and Fixes
+ Added extra metadata feature for certain sites. This one
+ contains some metadata features that have been requested,
+ plus quite a bit of internal refactoring to allow them.
+ This is primarily intended for calibre plugin users, so
+ it's documented in the plugin forum.
+
+
Also added cover images for dramione.org, thanks
+ ilovejedd.
+
+
+ Fixes for: squidge.org/peja Rating including '[' when
+ there's no stars; loosen url check on AO3 to allow more
+ copy/paste URLs; fix author on castlefans.org due to
+ site/skin changes.
+
Questions? Check out our
FAQs.
@@ -67,7 +83,7 @@
If you have any problems with this application, please
report them in
the FanFictionDownLoader Google Group. The
- Previous Version is also available for you to use if necessary.
+ Previous Version is also available for you to use if necessary.
{{ error_message }}
diff --git a/main.py b/main.py
index ae3b9757..d0108126 100644
--- a/main.py
+++ b/main.py
@@ -29,7 +29,6 @@ import datetime
import traceback
from StringIO import StringIO
-import ConfigParser
## Just to shut up the appengine warning about "You are using the
## default Django version (0.96). The default Django version will
@@ -57,22 +56,25 @@ from google.appengine.runtime import DeadlineExceededError
from ffstorage import *
from fanficdownloader import adapters, writers, exceptions
+from fanficdownloader.configurable import Configuration
class UserConfigServer(webapp2.RequestHandler):
- def getUserConfig(self,user):
- config = ConfigParser.SafeConfigParser()
+
+ def getUserConfig(self,user,url,fileformat):
+ configuration = Configuration(adapters.getConfigSectionFor(url),fileformat)
+
logging.debug('reading defaults.ini config file')
- config.read('defaults.ini')
+ configuration.read('defaults.ini')
## Pull user's config record.
l = UserConfig.all().filter('user =', user).fetch(1)
if l and l[0].config:
uconfig=l[0]
#logging.debug('reading config from UserConfig(%s)'%uconfig.config)
- config.readfp(StringIO(uconfig.config))
+ configuration.readfp(StringIO(uconfig.config))
- return config
+ return configuration
class MainHandler(webapp2.RequestHandler):
def get(self):
@@ -137,7 +139,8 @@ class EditConfigServer(UserConfigServer):
uconfig.config = self.request.get('config').encode('utf8')[:10000] ## just in case.
uconfig.put()
try:
- config = self.getUserConfig(user)
+ # just getting config for testing purposes.
+ configuration = self.getUserConfig(user,"test1.com","epub")
self.redirect("/?error=configsaved")
except Exception, e:
logging.info("Saved Config Failed:%s"%e)
@@ -367,12 +370,12 @@ class FanfictionDownloader(UserConfigServer):
adapter = None
try:
try:
- config = self.getUserConfig(user)
+ configuration = self.getUserConfig(user,url,format)
except Exception, e:
self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e)))
return
- adapter = adapters.getAdapter(config,url,format)
+ adapter = adapters.getAdapter(configuration,url)
logging.info('Created an adaper: %s' % adapter)
if len(login) > 1:
@@ -474,8 +477,8 @@ class FanfictionDownloaderTask(UserConfigServer):
logging.info('Creating adapter...')
try:
- config = self.getUserConfig(user)
- adapter = adapters.getAdapter(config,url,format)
+ configuration = self.getUserConfig(user,url,format)
+ adapter = adapters.getAdapter(configuration,url)
logging.info('Created an adapter: %s' % adapter)
@@ -488,7 +491,7 @@ class FanfictionDownloaderTask(UserConfigServer):
# adapter.getStoryMetadataOnly() only fetches enough to
# get metadata. writer.writeStory() will call
# adapter.getStory(), too.
- writer = writers.getWriter(format,config,adapter)
+ writer = writers.getWriter(format,configuration,adapter)
download.name = writer.getOutputFileName()
#logging.debug('output_filename:'+writer.getConfig('output_filename'))
logging.debug('getOutputFileName:'+writer.getOutputFileName())
diff --git a/plugin-defaults.ini b/plugin-defaults.ini
index 406d4afb..9792280f 100644
--- a/plugin-defaults.ini
+++ b/plugin-defaults.ini
@@ -67,9 +67,14 @@ authorId_label:Author ID
## show up in Calibre as tags. Also carried into mobi when converted.
extratags_label:Extra Tags
## The version of fanficdownloader
-##
version_label:FFDL Version
+## Date formats used by FFDL. Published and Update don't have time.
+## Note that ini format requires % to be escaped as %%.
+dateCreated_format:%%Y-%%m-%%d %%H:%%M:%%S
+datePublished_format:%%Y-%%m-%%d
+dateUpdated_format:%%Y-%%m-%%d
+
## items to include in the title page
## Empty metadata entries will *not* appear, even if in the list.
## You can include extra text or HTML that will be included as-is in
@@ -463,6 +468,55 @@ extraships:Draco Malfoy/Hermione Granger
## personal.ini, not defaults.ini.
#is_adult:true
+## Some adapters collect additional meta information beyond the
+## standard ones. They need to be defined in extra_valid_entries to
+## tell the rest of the FFDL system about them. They can be used in
+## include_subject_tags, titlepage_entries, extra_titlepage_entries,
+## logpage_entries, extra_logpage_entries, and include_in_* config
+## items. You can also add additional entries here to build up
+## composite metadata entries. dramione.org, for example, adds
+## 'cliches' and then defines as the composite of hermiones,dracos in
+## include_in_cliches.
+extra_valid_entries:themes,hermiones,dracos,timeline,cliches
+include_in_cliches:hermiones,dracos
+
+## For another example, you could, by uncommenting this line, include
+## themes in with genre metadata.
+#include_in_genre:genre, themes
+
+## You can give each new valid entry a specific label for use on
+## titlepage and logpage. If not defined, it will simply be the
+themes_label:Themes
+hermiones_label:Hermiones
+dracos_label:Dracos
+timeline_label:Timeline
+cliches_label:Character Cliches
+
+## extra_titlepage_entries (and extra_logpage_entries) *add* to
+## titlepage_entries (and logpage_entries) so you can add site
+## specific entries to titlepage/logpage without having to copy the
+## entire titlepage_entries line. (But if you want them higher than
+## the end, you will need to copy titlepage_entries.)
+#extra_titlepage_entries: themes,timeline,cliches
+#extra_logpage_entries: themes,timeline,cliches
+#extra_subject_tags: themes,timeline,cliches
+
+## (Plugin Only) - You can also populate calibre custom columns with
+## the site specific metadata using custom_columns_settings (but only
+## if 'Allow custom_columns_settings from personal.ini' is checked in
+## the plugin GUI config.) There are three parts, the entry name,
+## then the label of the calibre custom column, then (optionally) a
+## 'mode'. 'r' to Replace any existing values, 'a' to Add to existing
+## value (use with tag-like columns), and 'n' for setting on New books
+## only.
+## Make sure to keep at least one space at the start of each
+## line.
+
+#custom_columns_settings:
+# cliches=>#acolumn,r
+# themes=>#bcolumn,a
+# timeline=>#ccolumn,n
+
[erosnsappho.sycophanthex.com]
## Site dedicated to these categories/characters/ships
extracategories:Harry Potter
@@ -774,9 +828,13 @@ extraships:Sesshoumaru/Kagome
[www.fanfiction.net]
## fanfiction.net's 'cover' images are really just tiny thumbnails.
-## Comment this out or change it to false to use them anyway.
+## Change this to false to use them anyway.
never_make_cover: true
+## Extra metadata that this adapter knows about. See [dramione.org]
+## for examples of how to use them.
+extra_valid_entries:reviews,favs,follows
+
[www.fanfiktion.de]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
diff --git a/plugin-example.ini b/plugin-example.ini
index 11839b22..928835a7 100644
--- a/plugin-example.ini
+++ b/plugin-example.ini
@@ -42,7 +42,8 @@
## Most common, I expect will be using this to save username/passwords
-## for different sites. Here are a few examples.
+## for different sites. Here are a few examples. See defaults.ini
+## for the full list.
[www.twilighted.net]
#username:YourPenname