Support for ficwad.com. Further tweaks/improvments, especially to user config.

(I'm getting a bit carried away with making things configurable, honestly.)
This commit is contained in:
Jim Miller 2011-05-08 21:53:06 -05:00
parent ecd1690b70
commit d90c9ebced
15 changed files with 443 additions and 97 deletions

View file

@ -1,43 +1,64 @@
[defaults] [defaults]
## [defaults] section applies to all formats and sites but may be ## [defaults] section applies to all formats and sites but may be
## overridden. ## overridden at several levels
# All available titlepage_entries: ## All available titlepage_entries and the label used for them:
# category ## <entryname>_label:<label>
# genre ## Labels may be customized.
# status title_label:Title
# datePublished storyUrl_label:Story URL
# dateUpdated description_label:Summary
# dateCreated author_label:Author
# rating authorUrl_label:Author URL
# warnings ## epub, txt, html
# numChapters formatname_label:File Format
# numWords ## .epub, .txt, .html
# site formatext_label:File Extension
# siteabbrev ## Category and Genre have overlap, depending on the site.
# author ## Sometimes Harry Potter is a category and Fantasy a genre. (fanfiction.net)
# authorId ## Sometimes Fantasy is category *and* a genre (fictionpress.com)
# authorURL ## Sometimes there are multiple categories and/or genres.
# title category_label:Category
# storyId genre_label:Genre
# storyUrl ## Completed/In-Progress
# extratags status_label:Status
# description ## Dates story first published, last updated, and downloaded(last with time).
# formatname datePublished_label:Published
# formatext dateUpdated_label:Updated
dateCreated_label:Packaged
## Rating depends on the site. Some use K,T,M,etc, and some PG,R,NC-17
rating_label:Rating
## Also depends on the site.
warnings_label:Warnings
numChapters_label:Chapters
numWords_label:Words
## www.fanfiction.net, fictionalley.com, etc.
site_label:Publisher
## ffnet, fpcom, etc.
siteabbrev_label:Site Abbrev
## The site's unique story/author identifier. Usually a number.
storyId_label:Story ID
authorId_label:Author ID
## Primarily to put specific values in dc:subject tags for epub. Will
## show up in Calibre as tags. Also carried into mobi when converted.
extratags_label:Extra Tags
## items to include in title page ## items to include in the title page
titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description ## Empty entries will *not* appear, even if in the list.
## All current formats already include title and author.
titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,extratags,description
## include title page as first page. ## include title page as first page.
include_titlepage: true include_titlepage: true
## include TOC page immediately after title page. ## include a TOC page before the story text
include_tocpage: true include_tocpage: true
## python string Template, string with ${title}, ${author} etc, same as titlepage_entries ## python string Template, string with ${title}, ${author} etc, same as titlepage_entries
## Can include directories. ${formatext} will be added if not in name somewhere. ## Can include directories. ${formatext} will be added if not in filename somewhere.
#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext}
#output_filename: books/${formatname}/${siteabbrev}/${authorId}/${title}-${siteabbrev}_${storyId}${formatext}
output_filename: ${title}-${siteabbrev}_${storyId}${formatext} output_filename: ${title}-${siteabbrev}_${storyId}${formatext}
## Make directories as needed. ## Make directories as needed.
make_directories: true make_directories: true
@ -47,25 +68,27 @@ zip_output: false
## Can include directories. .zip will be added if not in name somewhere ## Can include directories. .zip will be added if not in name somewhere
zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip
## try to make the output file name 'safe'--remove invalid filename chars. ## Normally, try to make the output file name 'safe' by removing
## applies to both output_filename & zip_filename ## invalid filename chars. Applies to both output_filename &
safe_filename: true ## zip_filename.
allow_unsafe_filename: false
## extra tags (comma separated) to include, primarily for epub. ## extra tags (comma separated) to include, primarily for epub.
extratags: FanFiction extratags: FanFiction
## number of seconds to sleep between calls to the story site. ## number of seconds to sleep between calls to the story site. May by
## useful if pulling large numbers of stories or if the site is slow.
## Primarily for commandline.
#slow_down_sleep_time:0.5 #slow_down_sleep_time:0.5
## Each output format has a section that overrides [defaults] ## Each output format has a section that overrides [defaults]
[html] [html]
[txt] [txt]
## Add URLs since there aren't links. ## Add URLs since there aren't links.
titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,extratags,storyUrl, author URL, description titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,extratags,storyUrl, author URL, description
# use \r\n for line endings, the windows convention. txt output only. ## use \r\n for line endings, the windows convention. text output only.
windows_eol: true windows_eol: true
[epub] [epub]
@ -73,43 +96,62 @@ windows_eol: true
## epub is already a zip file. ## epub is already a zip file.
zip_output: false zip_output: false
# entries tags to make epub subject tags ## entries to make epub subject tags
# lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d" ## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d"
include_subject_tags: extratags, genre, category, lastupdate, status include_subject_tags: extratags, genre, category, lastupdate, status
#include_tocpage: false #include_tocpage: false
# epub->mobi conversions typically don't like tables. ## epub->mobi conversions typically don't like tables.
titlepage_use_table: false titlepage_use_table: false
## When using tables, make these span both columns. ## When using tables, make these span both columns.
wide_titlepage_entries: description, storyUrl, author URL wide_titlepage_entries: description, storyUrl, author URL
## Each site has a section that overrides [defaults] *and* the format section ## Each site has a section that overrides [defaults] *and* the format
## sections test1.com specifically is not a real story site. Instead,
## it is a fake site for testing configuration and output. It uses
## URLs like: http://test1.com?sid=12345
[test1.com] [test1.com]
#titlepage_entries: title,description,category,genre, status,dateCreated,rating,numChapters,numWords,extratags,description,storyUrl,extratags
extratags: FanFiction,Testing extratags: FanFiction,Testing
## If necessary, you can define [<site>:<format>] sections to customize ## If necessary, you can define [<site>:<format>] sections to
## the formats differently for the same site. Overrides defaults, format and site. ## customize the formats differently for the same site. Overrides
## defaults, format and site.
[test1.com:txt] [test1.com:txt]
extratags: FanFiction,Testing,Text extratags: FanFiction,Testing,Text
[test1.com:html] [test1.com:html]
extratags: FanFiction,Testing,HTML extratags: FanFiction,Testing,HTML
[www.whofic.com]
[www.fanfiction.net] [www.fanfiction.net]
[www.twilighted.net]
## Some sites require login (or login for some rated stories)
## The program can prompt you, or you can save it in config.
## This should go in your personal.ini, not defaults.ini.
#username:YourName
#password:yourpassword
[www.fictionpress.com] [www.fictionpress.com]
## Clear FanFiction from defaults, fictionpress.com is original fiction. ## Clear FanFiction from defaults, fictionpress.com is original fiction.
extratags: extratags:
[www.ficwad.com]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
[www.twilighted.net]
## Some sites require login (or login for some rated stories) The
## program can prompt you, or you can save it in config. In
## commandline version, this should go in your personal.ini, not
## defaults.ini.
#username:YourName
#password:yourpassword
[www.whofic.com]
[overrides]
## It may sometimes be useful to override all of the specific format,
## site and site:format sections in your private configuration. For
## example, this extratags param here would override all of the
## extratags params in all other sections. Only commandline options
## beat overrides.
#extratags:fanficdownloader

View file

@ -45,10 +45,9 @@
<h3>Edit Config</h3> <h3>Edit Config</h3>
<div id='logpassword'> <div id='logpassword'>
Editing configuration for {{ nickname }}. Editing configuration for {{ nickname }}.
{% if default %} Default values are shown. {% else %} Empty this box and Save to go back to use the default values. {% endif %}
</div> </div>
<div class='fieldandlabel'> <div class='fieldandlabel'>
<textarea name="config" style="width: 100%; height: 500px;" wrap='off'>{{ config }}</textarea> <textarea name="config" style="width: 100%; height: 200px;" wrap='off'>{{ config }}</textarea>
</div> </div>
</div> </div>
@ -56,6 +55,13 @@
<input type="submit" value="Save"> <input type="submit" value="Save">
</div> </div>
</form> </form>
<div>
<h3>Default System configuration</h3>
<pre>
{{ defaultsini }}
</pre>
</div>
<div style='text-align: center'> <div style='text-align: center'>
<img src="http://code.google.com/appengine/images/appengine-silver-120x30.gif" <img src="http://code.google.com/appengine/images/appengine-silver-120x30.gif"

29
example.ini Normal file
View file

@ -0,0 +1,29 @@
## This is an example of what your personal configuration might look
## like.
## Most common, I expect will be using this to save username/passwords
## for different sites.
[www.twilighted.net]
#username:YourPenname
#password:YourPassword
[www.ficwad.com]
#username:YourUsername
#password:YourPassword
## The [defaults] section here will override the system [defaults],
## but not format, site for site:format sections.
[defaults]
## Directories only useful in commandline or zip files.
#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext}
#output_filename: books/${site}/${authorId}/${title}-${storyId}${formatext}
## For example, zip_output here will turn on zip for html and txt, but
## not epub because the system [epub] section explicitly says
## zip_output: false (epubs *are* specially formated zip files.)
#zip_output: true
#zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip
## This section will override anything in the system defaults or other
## sections here.
[overrides]

View file

@ -156,12 +156,14 @@ class FictionPressComSiteAdapter(BaseSiteAdapter):
soup = bs.BeautifulStoneSoup(self._fetchUrl(url), soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags. selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
span = soup.find('div', {'id' : 'storytext'}) div = soup.find('div', {'id' : 'storytext'})
## fp puts a padding style on the div that we don't want.
del div['style']
if None == span: if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url) raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return utf8FromSoup(span) return utf8FromSoup(div)
def getClass(): def getClass():
return FictionPressComSiteAdapter return FictionPressComSiteAdapter

View file

@ -0,0 +1,206 @@
# -*- coding: utf-8 -*-
import time
import datetime
import logging
import re
import urllib2
import time
import httplib, urllib
import fanficdownloader.BeautifulSoup as bs
import fanficdownloader.exceptions as exceptions
from fanficdownloader.htmlcleanup import stripHTML
from base_adapter import BaseSiteAdapter, utf8FromSoup
class FicwadComSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev','fw')
# get storyId from url--url validation guarantees second part is storyId
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
self.username = "NoneGiven"
self.password = ""
@staticmethod
def getSiteDomain():
return 'www.ficwad.com'
@classmethod
def getAcceptDomains(cls):
return ['www.ficwad.com']
def getSiteExampleURLs(self):
return "http://www.ficwad.com/story/137169"
def getSiteURLPattern(self):
return re.escape(r"http://"+self.getSiteDomain())+"/story/\d+?$"
def performLogin(self,url):
params = {}
if self.password:
params['username'] = self.username
params['password'] = self.password
else:
params['username'] = self.getConfig("username")
params['password'] = self.getConfig("password")
loginUrl = 'http://' + self.getSiteDomain() + '/account/login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['username']))
d = self._postUrl(loginUrl,params)
if "Login attempt failed..." in d:
logging.info("Failed to login to URL %s as %s" % (loginUrl,
params['username']))
raise exceptions.FailedToLogin(url,params['username'])
return False
else:
return True
def extractChapterUrlsAndMetadata(self):
# fetch the chapter. From that we will get almost all the
# metadata and chapter list
url = self.url
logging.debug("URL: "+url)
# use BeautifulSoup HTML parser to make everything easier to find.
try:
soup = bs.BeautifulSoup(self._fetchUrl(url))
except urllib2.HTTPError, e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
h3 = soup.find('h3')
storya = h3.find('a',href=re.compile("^/story/\d+$"))
if storya : # if there's a story link in the h3 header, this is a chapter page.
# normalize story URL on chapter list.
self.story.setMetadata('storyId',storya['href'].split('/',)[2])
url = "http://"+self.getSiteDomain()+storya['href']
logging.debug("Normalizing to URL: "+url)
self._setURL(url)
try:
soup = bs.BeautifulSoup(self._fetchUrl(url))
except urllib2.HTTPError, e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
# if blocked, attempt login.
if soup.find("li",{"class":"blocked"}):
if self.performLogin(url): # performLogin raises
# FailedToLogin if it fails.
soup = bs.BeautifulSoup(self._fetchUrl(url))
# title - first h4 tag will be title.
titleh4 = soup.find('h4')
self.story.setMetadata('title', titleh4.a.string)
# Find authorid and URL from... author url.
a = soup.find('a', href=re.compile(r"^/author/\d+"))
self.story.setMetadata('authorId',a['href'].split('/')[2])
self.story.setMetadata('authorUrl','http://'+self.host+a['href'])
self.story.setMetadata('author',a.string)
# description
storydiv = soup.find("div",{"id":"story"})
self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
# most of the meta data is here:
metap = storydiv.find("p",{"class":"meta"})
self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string)
# warnings
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
spanreq = metap.find("span",{"class":"req"})
for a in spanreq.findAll("a"):
self.story.addToList('warnings',a['title'])
## perhaps not the most efficient way to parse this, using
## regexps for each rather than something more complex, but
## IMO, it's more readable and amenable to change.
metapstr = stripHTML(str(metap)).replace('\n',' ').replace('\t','')
#print "metap: (%s)"%metapstr
m = re.match(r".*?Rating: (.+?) -.*?",metapstr)
if m:
self.story.setMetadata('rating', m.group(1))
m = re.match(r".*?Genres: (.+?) -.*?",metapstr)
if m:
for g in m.group(1).split(','):
self.story.addToList('genre',g)
m = re.match(r".*?Published: ([0-9/]+?) -.*?",metapstr)
if m:
self.story.setMetadata('datePublished',
datetime.datetime.fromtimestamp(\
time.mktime(time.strptime(m.group(1), "%Y/%m/%d"))))
# Updated can have more than one space after it. <shrug>
m = re.match(r".*?Updated: ([0-9/]+?) +-.*?",metapstr)
if m:
self.story.setMetadata('dateUpdated',
datetime.datetime.fromtimestamp(\
time.mktime(time.strptime(m.group(1), "%Y/%m/%d"))))
m = re.match(r".*? - ([0-9/]+?) words.*?",metapstr)
if m:
self.story.setMetadata('numWords',m.group(1))
if metapstr.endswith("Complete"):
self.story.setMetadata('status', 'Completed')
else:
self.story.setMetadata('status', 'In-Progress')
# get the chapter list first this time because that's how we
# detect the need to login.
storylistul = soup.find('ul',{'id':'storylist'})
if not storylistul:
# no list found, so it's a one-chapter story.
self.chapterUrls.append((self.story.getMetadata('title'),url))
else:
chapterlistlis = storylistul.findAll('li')
for chapterli in chapterlistlis:
if "blocked" in chapterli['class']:
# paranoia check. We should already be logged in by now.
raise exceptions.FailedToLogin(url,self.username)
else:
#print "chapterli.h4.a (%s)"%chapterli.h4.a
self.chapterUrls.append((chapterli.h4.a.string,
u'http://%s%s'%(self.getSiteDomain(),
chapterli.h4.a['href'])))
#print "self.chapterUrls:%s"%self.chapterUrls
self.story.setMetadata('numChapters',len(self.chapterUrls))
return
def getChapterText(self, url):
logging.debug('Getting chapter text from: %s' % url)
time.sleep(0.5) ## ffnet tends to fail more if hit too fast.
## This is in additional to what ever the
## slow_down_sleep_time setting is.
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
span = soup.find('div', {'id' : 'storytext'})
if None == span:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return utf8FromSoup(span)
def getClass():
return FicwadComSiteAdapter

View file

@ -54,28 +54,27 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
return False return False
def performLogin(self, url): def performLogin(self, url):
data = {} params = {}
if self.password: if self.password:
data['penname'] = self.username params['penname'] = self.username
data['password'] = self.password params['password'] = self.password
else: else:
data['penname'] = self.getConfig("username") params['penname'] = self.getConfig("username")
data['password'] = self.getConfig("password") params['password'] = self.getConfig("password")
data['cookiecheck'] = '1' params['cookiecheck'] = '1'
data['submit'] = 'Submit' params['submit'] = 'Submit'
urlvals = urllib.urlencode(data)
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login' loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl, logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
data['penname'])) params['penname']))
d = self._fetchUrl(loginUrl, urlvals) d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account if "Member Account" not in d : #Member Account
logging.info("Failed to login to URL %s as %s" % (loginUrl, logging.info("Failed to login to URL %s as %s" % (loginUrl,
data['penname'])) params['penname']))
raise exceptions.FailedToLogin(url,data['penname']) raise exceptions.FailedToLogin(url,params['penname'])
return False return False
else: else:
return True return True
@ -150,10 +149,10 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
self.story.setMetadata('description',stripHTML(svalue)) self.story.setMetadata('description',stripHTML(svalue))
if 'Rated' in label: if 'Rated' in label:
self.story.setMetadata('rating', value.strip()) self.story.setMetadata('rating', value)
if 'Word count' in label: if 'Word count' in label:
self.story.setMetadata('numWords', value.strip()) self.story.setMetadata('numWords', value)
if 'Categories' in label: if 'Categories' in label:
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories')) cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))

View file

@ -3,6 +3,7 @@
import re import re
import datetime import datetime
import time import time
import urllib
import urllib2 as u2 import urllib2 as u2
import urlparse as up import urlparse as up
@ -27,7 +28,7 @@ class BaseSiteAdapter(Configurable):
def __init__(self, config, url): def __init__(self, config, url):
Configurable.__init__(self, config) Configurable.__init__(self, config)
self.addConfigSection(self.getSiteDomain()) self.addConfigSection(self.getSiteDomain())
self.addConfigSection("commandline") self.addConfigSection("overrides")
self.opener = u2.build_opener(u2.HTTPCookieProcessor()) self.opener = u2.build_opener(u2.HTTPCookieProcessor())
self.storyDone = False self.storyDone = False
@ -49,12 +50,30 @@ class BaseSiteAdapter(Configurable):
self.host = self.parsedUrl.netloc self.host = self.parsedUrl.netloc
self.path = self.parsedUrl.path self.path = self.parsedUrl.path
self.story.setMetadata('storyUrl',self.url) self.story.setMetadata('storyUrl',self.url)
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
def _postUrl(self, url, parameters={}, headers={}):
if self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time')))
## u2.Request assumes POST when data!=None. Also assumes data
## is application/x-www-form-urlencoded.
if 'Content-type' not in headers:
headers['Content-type']='application/x-www-form-urlencoded'
if 'Accept' not in headers:
headers['Accept']="text/html,*/*"
req = u2.Request(url,
data=urllib.urlencode(parameters),
headers=headers)
return self.opener.open(req).read().decode(self.decode)
# parameters is a dict()
def _fetchUrl(self, url, parameters=None): def _fetchUrl(self, url, parameters=None):
if self.getConfig('slow_down_sleep_time'): if self.getConfig('slow_down_sleep_time'):
time.sleep(float(self.getConfig('slow_down_sleep_time'))) time.sleep(float(self.getConfig('slow_down_sleep_time')))
if parameters: if parameters:
return self.opener.open(url,parameters).read().decode(self.decode) return self.opener.open(url,urllib.urlencode(parameters))\
.read().decode(self.decode)
else: else:
return self.opener.open(url).read().decode(self.decode) return self.opener.open(url).read().decode(self.decode)

View file

@ -26,7 +26,7 @@ def stripHTML(soup):
def conditionalRemoveEntities(value): def conditionalRemoveEntities(value):
if isinstance(value,str) or isinstance(value,unicode) : if isinstance(value,str) or isinstance(value,unicode) :
return removeEntities(value.strip()) return removeEntities(value).strip()
else: else:
return value return value

View file

@ -3,6 +3,8 @@
## This could (should?) use a dynamic loader like adapters, but for ## This could (should?) use a dynamic loader like adapters, but for
## now, it's static, since there's so few of them. ## now, it's static, since there's so few of them.
from fanficdownloader.exceptions import FailedToDownload
from writer_html import HTMLWriter from writer_html import HTMLWriter
from writer_txt import TextWriter from writer_txt import TextWriter
from writer_epub import EpubWriter from writer_epub import EpubWriter
@ -14,3 +16,5 @@ def getWriter(type,config,story):
return TextWriter(config,story) return TextWriter(config,story)
if type == "epub": if type == "epub":
return EpubWriter(config,story) return EpubWriter(config,story)
raise FailedToDownload("(%s) is not a supported download format."%type)

View file

@ -27,6 +27,31 @@ class BaseStoryWriter(Configurable):
## Pass adapter instead, to check date before fetching all? ## Pass adapter instead, to check date before fetching all?
## Or add 'check update' method to writer? ## Or add 'check update' method to writer?
self.story = story self.story = story
self.validEntries = [
'category',
'genre',
'status',
'datePublished',
'dateUpdated',
'dateCreated',
'rating',
'warnings',
'numChapters',
'numWords',
'site',
'storyId',
'authorId',
'extratags',
'title',
'storyUrl',
'description',
'author',
'authorUrl',
'formatname',
'formatext',
'siteabbrev']
# fall back labels.
self.titleLabels = { self.titleLabels = {
'category':'Category', 'category':'Category',
'genre':'Genre', 'genre':'Genre',
@ -38,7 +63,7 @@ class BaseStoryWriter(Configurable):
'warnings':'Warnings', 'warnings':'Warnings',
'numChapters':'Chapters', 'numChapters':'Chapters',
'numWords':'Words', 'numWords':'Words',
'site':'Publisher', 'site':'Site',
'storyId':'Story ID', 'storyId':'Story ID',
'authorId':'Author ID', 'authorId':'Author ID',
'extratags':'Extra Tags', 'extratags':'Extra Tags',
@ -49,6 +74,7 @@ class BaseStoryWriter(Configurable):
'authorUrl':'Author URL', 'authorUrl':'Author URL',
'formatname':'File Format', 'formatname':'File Format',
'formatext':'File Extension', 'formatext':'File Extension',
'siteabbrev':'Site Abbrev'
} }
self.story.setMetadata('formatname',self.getFormatName()) self.story.setMetadata('formatname',self.getFormatName())
self.story.setMetadata('formatext',self.getFormatExt()) self.story.setMetadata('formatext',self.getFormatExt())
@ -61,17 +87,15 @@ class BaseStoryWriter(Configurable):
def getFileName(self,template,extension="${formatext}"): def getFileName(self,template,extension="${formatext}"):
values = self.story.metadata values = self.story.metadata
fallback=False
# fall back default: # fall back default:
if not template: if not template:
template="${title}-${siteabbrev}_${storyId}${formatext}" template="${title}-${siteabbrev}_${storyId}${formatext}"
fallback=True
# Add extension if not already included. # Add extension if not already included.
if extension not in template: if extension not in template:
template+=extension template+=extension
if fallback or self.getConfig('safe_filename'): if not self.getConfig('allow_unsafe_filename'):
values={} values={}
pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+") pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+")
for k in self.story.metadata.keys(): for k in self.story.metadata.keys():
@ -99,13 +123,17 @@ class BaseStoryWriter(Configurable):
wideTitleEntriesList = self.getConfigList("wide_titlepage_entries") wideTitleEntriesList = self.getConfigList("wide_titlepage_entries")
for entry in titleEntriesList: for entry in titleEntriesList:
if entry in self.titleLabels: if entry in self.validEntries:
if self.story.getMetadata(entry): if self.story.getMetadata(entry):
if entry in wideTitleEntriesList: if entry in wideTitleEntriesList:
TEMPLATE=WIDE_ENTRY TEMPLATE=WIDE_ENTRY
else: else:
TEMPLATE=ENTRY TEMPLATE=ENTRY
self._write(out,TEMPLATE.substitute({'label':self.titleLabels[entry], if self.getConfigList(entry):
label=self.getConfig(entry+"_label")
else:
label=self.titleLabels[entry]
self._write(out,TEMPLATE.substitute({'label':label,
'value':self.story.getMetadata(entry)})) 'value':self.story.getMetadata(entry)}))
self._write(out,END.substitute(self.story.metadata)) self._write(out,END.substitute(self.story.metadata))
@ -129,7 +157,7 @@ class BaseStoryWriter(Configurable):
def writeStory(self,outstream=None): def writeStory(self,outstream=None):
self.addConfigSection(self.story.getMetadata('site')) self.addConfigSection(self.story.getMetadata('site'))
self.addConfigSection(self.story.getMetadata('site')+":"+self.getFormatName()) self.addConfigSection(self.story.getMetadata('site')+":"+self.getFormatName())
self.addConfigSection("commandline") self.addConfigSection("overrides")
for tag in self.getConfigList("extratags"): for tag in self.getConfigList("extratags"):
self.story.addToList("extratags",tag) self.story.addToList("extratags",tag)

View file

@ -242,7 +242,7 @@ h6 { text-align: center; }
# set to avoid duplicates subject tags. # set to avoid duplicates subject tags.
subjectset = set() subjectset = set()
for entry in self.titleLabels.keys(): for entry in self.validEntries:
if entry in self.getConfigList("include_subject_tags") and \ if entry in self.getConfigList("include_subject_tags") and \
entry not in self.story.getLists() and \ entry not in self.story.getLists() and \
self.story.getMetadata(entry): self.story.getMetadata(entry):

View file

@ -20,4 +20,4 @@ class DownloadData(db.Model):
class UserConfig(db.Model): class UserConfig(db.Model):
user = db.UserProperty() user = db.UserProperty()
config = db.TextProperty() config = db.BlobProperty()

View file

@ -56,10 +56,10 @@
This version is a new re-org/re-write of the code. This version is a new re-org/re-write of the code.
</p> </p>
<p> <p>
So far, only a few sites are supported: fanfiction.net, twilighted.net and whofic.com. So far, the only sites supported are: fanfiction.net, fictionalley.com, ficwad.com, twilighted.net and whofic.com.
</p> </p>
<p> <p>
Login/Password is only asked for when required now. Login/Password is asked for when required now.
</p> </p>
<p> <p>
Mobi support (for Kindle) is only via EPub conversion in this version. Mobi support (for Kindle) is only via EPub conversion in this version.
@ -77,7 +77,7 @@
<div id='typeoptions'> <div id='typeoptions'>
<input type='radio' name='format' value='epub' checked>EPub</input> <input type='radio' name='format' value='epub' checked>EPub</input>
<input type='radio' name='format' value='html'>HTML</input> <input type='radio' name='format' value='html'>HTML</input>
<input type='radio' name='format' value='text'>Plain Text</input> <input type='radio' name='format' value='txt'>Plain Text</input>
<p><i>For Mobi (Kindle) select EPub and use the Convert link when it's finished.</i></p> <p><i>For Mobi (Kindle) select EPub and use the Convert link when it's finished.</i></p>
</div> </div>
<div> <div>
@ -133,8 +133,9 @@
</dd> </dd>
<dt>ficwad.com</dt> <dt>ficwad.com</dt>
<dd> <dd>
Use the URL of any story chapter, such as Use the URL of the story's chapter list, such as
<br /><a href="http://www.ficwad.com/story/75246">http://www.ficwad.com/story/75246</a>. <br /><a href="http://www.ficwad.com/story/74884">http://www.ficwad.com/story/74884</a>.
<br />Note that this is changed from the previous version. The system will still accept chapter URLs, however.
</dd> </dd>
<dt>harrypotterfanfiction.com</dt> <dt>harrypotterfanfiction.com</dt>
<dd> <dd>

15
main.py
View file

@ -115,11 +115,16 @@ class EditConfigServer(webapp.RequestHandler):
if uconfig is not None and uconfig.config: if uconfig is not None and uconfig.config:
config = uconfig.config config = uconfig.config
else: else:
template_values['default'] = True configfile = open("example.ini","rb")
configfile = open("defaults.ini","rb")
config = configfile.read() config = configfile.read()
configfile.close() configfile.close()
template_values['config'] = config template_values['config'] = config
configfile = open("defaults.ini","rb")
config = configfile.read()
configfile.close()
template_values['defaultsini'] = config
path = os.path.join(os.path.dirname(__file__), 'editconfig.html') path = os.path.join(os.path.dirname(__file__), 'editconfig.html')
self.response.headers['Content-Type'] = 'text/html' self.response.headers['Content-Type'] = 'text/html'
self.response.out.write(template.render(path, template_values)) self.response.out.write(template.render(path, template_values))
@ -227,6 +232,9 @@ class UserConfigServer(webapp.RequestHandler):
def getUserConfig(self,user): def getUserConfig(self,user):
config = ConfigParser.SafeConfigParser() config = ConfigParser.SafeConfigParser()
logging.debug('reading defaults.ini config file')
config.read('defaults.ini')
## Pull user's config record. ## Pull user's config record.
l = UserConfig.all().filter('user =', user).fetch(1) l = UserConfig.all().filter('user =', user).fetch(1)
## TEST THIS ## TEST THIS
@ -234,9 +242,6 @@ class UserConfigServer(webapp.RequestHandler):
uconfig=l[0] uconfig=l[0]
logging.debug('reading config from UserConfig(%s)'%uconfig.config) logging.debug('reading config from UserConfig(%s)'%uconfig.config)
config.readfp(StringIO.StringIO(uconfig.config)) config.readfp(StringIO.StringIO(uconfig.config))
else:
logging.debug('reading defaults.ini config file')
config.read('defaults.ini')
return config return config

View file

@ -41,12 +41,15 @@ def main():
config.read('defaults.ini') config.read('defaults.ini')
logging.debug('reading personal.ini config file, if present') logging.debug('reading personal.ini config file, if present')
config.read('personal.ini') config.read('personal.ini')
config.add_section("commandline") try:
config.add_section("overrides")
except ConfigParser.DuplicateSectionError:
pass
if options.options: if options.options:
for opt in options.options: for opt in options.options:
(var,val) = opt.split('=') (var,val) = opt.split('=')
config.set("commandline",var,val) config.set("overrides",var,val)
try: try:
adapter = adapters.getAdapter(config,args[0]) adapter = adapters.getAdapter(config,args[0])
@ -62,9 +65,11 @@ def main():
adapter.getStoryMetadataOnly() adapter.getStoryMetadataOnly()
if options.metaonly: if options.metaonly:
adapter.getStoryMetadataOnly() print adapter.getStoryMetadataOnly()
return return
## XXX Use format. ## XXX Use format.
## XXX Doing all three formats actually causes some interesting
## XXX config issues with format-specific sections.
print "format: %s" % options.format print "format: %s" % options.format
writeStory(config,adapter,"epub") writeStory(config,adapter,"epub")
writeStory(config,adapter,"html") writeStory(config,adapter,"html")