mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
Support for ficwad.com. Further tweaks/improvments, especially to user config.
(I'm getting a bit carried away with making things configurable, honestly.)
This commit is contained in:
parent
ecd1690b70
commit
d90c9ebced
15 changed files with 443 additions and 97 deletions
144
defaults.ini
144
defaults.ini
|
|
@ -1,43 +1,64 @@
|
|||
[defaults]
|
||||
|
||||
## [defaults] section applies to all formats and sites but may be
|
||||
## overridden.
|
||||
## overridden at several levels
|
||||
|
||||
# All available titlepage_entries:
|
||||
# category
|
||||
# genre
|
||||
# status
|
||||
# datePublished
|
||||
# dateUpdated
|
||||
# dateCreated
|
||||
# rating
|
||||
# warnings
|
||||
# numChapters
|
||||
# numWords
|
||||
# site
|
||||
# siteabbrev
|
||||
# author
|
||||
# authorId
|
||||
# authorURL
|
||||
# title
|
||||
# storyId
|
||||
# storyUrl
|
||||
# extratags
|
||||
# description
|
||||
# formatname
|
||||
# formatext
|
||||
## All available titlepage_entries and the label used for them:
|
||||
## <entryname>_label:<label>
|
||||
## Labels may be customized.
|
||||
title_label:Title
|
||||
storyUrl_label:Story URL
|
||||
description_label:Summary
|
||||
author_label:Author
|
||||
authorUrl_label:Author URL
|
||||
## epub, txt, html
|
||||
formatname_label:File Format
|
||||
## .epub, .txt, .html
|
||||
formatext_label:File Extension
|
||||
## Category and Genre have overlap, depending on the site.
|
||||
## Sometimes Harry Potter is a category and Fantasy a genre. (fanfiction.net)
|
||||
## Sometimes Fantasy is category *and* a genre (fictionpress.com)
|
||||
## Sometimes there are multiple categories and/or genres.
|
||||
category_label:Category
|
||||
genre_label:Genre
|
||||
## Completed/In-Progress
|
||||
status_label:Status
|
||||
## Dates story first published, last updated, and downloaded(last with time).
|
||||
datePublished_label:Published
|
||||
dateUpdated_label:Updated
|
||||
dateCreated_label:Packaged
|
||||
## Rating depends on the site. Some use K,T,M,etc, and some PG,R,NC-17
|
||||
rating_label:Rating
|
||||
## Also depends on the site.
|
||||
warnings_label:Warnings
|
||||
numChapters_label:Chapters
|
||||
numWords_label:Words
|
||||
## www.fanfiction.net, fictionalley.com, etc.
|
||||
site_label:Publisher
|
||||
## ffnet, fpcom, etc.
|
||||
siteabbrev_label:Site Abbrev
|
||||
## The site's unique story/author identifier. Usually a number.
|
||||
storyId_label:Story ID
|
||||
authorId_label:Author ID
|
||||
## Primarily to put specific values in dc:subject tags for epub. Will
|
||||
## show up in Calibre as tags. Also carried into mobi when converted.
|
||||
extratags_label:Extra Tags
|
||||
|
||||
## items to include in title page
|
||||
titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,description
|
||||
## items to include in the title page
|
||||
## Empty entries will *not* appear, even if in the list.
|
||||
## All current formats already include title and author.
|
||||
titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,extratags,description
|
||||
|
||||
## include title page as first page.
|
||||
include_titlepage: true
|
||||
|
||||
## include TOC page immediately after title page.
|
||||
## include a TOC page before the story text
|
||||
include_tocpage: true
|
||||
|
||||
## python string Template, string with ${title}, ${author} etc, same as titlepage_entries
|
||||
## Can include directories. ${formatext} will be added if not in name somewhere.
|
||||
## Can include directories. ${formatext} will be added if not in filename somewhere.
|
||||
#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext}
|
||||
#output_filename: books/${formatname}/${siteabbrev}/${authorId}/${title}-${siteabbrev}_${storyId}${formatext}
|
||||
output_filename: ${title}-${siteabbrev}_${storyId}${formatext}
|
||||
## Make directories as needed.
|
||||
make_directories: true
|
||||
|
|
@ -47,25 +68,27 @@ zip_output: false
|
|||
## Can include directories. .zip will be added if not in name somewhere
|
||||
zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip
|
||||
|
||||
## try to make the output file name 'safe'--remove invalid filename chars.
|
||||
## applies to both output_filename & zip_filename
|
||||
safe_filename: true
|
||||
## Normally, try to make the output file name 'safe' by removing
|
||||
## invalid filename chars. Applies to both output_filename &
|
||||
## zip_filename.
|
||||
allow_unsafe_filename: false
|
||||
|
||||
## extra tags (comma separated) to include, primarily for epub.
|
||||
extratags: FanFiction
|
||||
|
||||
## number of seconds to sleep between calls to the story site.
|
||||
## number of seconds to sleep between calls to the story site. May by
|
||||
## useful if pulling large numbers of stories or if the site is slow.
|
||||
## Primarily for commandline.
|
||||
#slow_down_sleep_time:0.5
|
||||
|
||||
## Each output format has a section that overrides [defaults]
|
||||
|
||||
[html]
|
||||
|
||||
[txt]
|
||||
## Add URLs since there aren't links.
|
||||
titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,extratags,storyUrl, author URL, description
|
||||
|
||||
# use \r\n for line endings, the windows convention. txt output only.
|
||||
## use \r\n for line endings, the windows convention. text output only.
|
||||
windows_eol: true
|
||||
|
||||
[epub]
|
||||
|
|
@ -73,43 +96,62 @@ windows_eol: true
|
|||
## epub is already a zip file.
|
||||
zip_output: false
|
||||
|
||||
# entries tags to make epub subject tags
|
||||
# lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d"
|
||||
## entries to make epub subject tags
|
||||
## lastupdate creates two tags: "Last Update Year/Month: %Y/%m" and "Last Update: %Y/%m/%d"
|
||||
include_subject_tags: extratags, genre, category, lastupdate, status
|
||||
#include_tocpage: false
|
||||
|
||||
# epub->mobi conversions typically don't like tables.
|
||||
## epub->mobi conversions typically don't like tables.
|
||||
titlepage_use_table: false
|
||||
|
||||
## When using tables, make these span both columns.
|
||||
wide_titlepage_entries: description, storyUrl, author URL
|
||||
|
||||
|
||||
## Each site has a section that overrides [defaults] *and* the format section
|
||||
## Each site has a section that overrides [defaults] *and* the format
|
||||
## sections test1.com specifically is not a real story site. Instead,
|
||||
## it is a fake site for testing configuration and output. It uses
|
||||
## URLs like: http://test1.com?sid=12345
|
||||
[test1.com]
|
||||
#titlepage_entries: title,description,category,genre, status,dateCreated,rating,numChapters,numWords,extratags,description,storyUrl,extratags
|
||||
extratags: FanFiction,Testing
|
||||
|
||||
## If necessary, you can define [<site>:<format>] sections to customize
|
||||
## the formats differently for the same site. Overrides defaults, format and site.
|
||||
## If necessary, you can define [<site>:<format>] sections to
|
||||
## customize the formats differently for the same site. Overrides
|
||||
## defaults, format and site.
|
||||
[test1.com:txt]
|
||||
extratags: FanFiction,Testing,Text
|
||||
|
||||
[test1.com:html]
|
||||
extratags: FanFiction,Testing,HTML
|
||||
|
||||
[www.whofic.com]
|
||||
|
||||
[www.fanfiction.net]
|
||||
|
||||
[www.twilighted.net]
|
||||
## Some sites require login (or login for some rated stories)
|
||||
## The program can prompt you, or you can save it in config.
|
||||
## This should go in your personal.ini, not defaults.ini.
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[www.fictionpress.com]
|
||||
## Clear FanFiction from defaults, fictionpress.com is original fiction.
|
||||
extratags:
|
||||
extratags:
|
||||
|
||||
[www.ficwad.com]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
## defaults.ini.
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[www.twilighted.net]
|
||||
## Some sites require login (or login for some rated stories) The
|
||||
## program can prompt you, or you can save it in config. In
|
||||
## commandline version, this should go in your personal.ini, not
|
||||
## defaults.ini.
|
||||
#username:YourName
|
||||
#password:yourpassword
|
||||
|
||||
[www.whofic.com]
|
||||
|
||||
[overrides]
|
||||
## It may sometimes be useful to override all of the specific format,
|
||||
## site and site:format sections in your private configuration. For
|
||||
## example, this extratags param here would override all of the
|
||||
## extratags params in all other sections. Only commandline options
|
||||
## beat overrides.
|
||||
#extratags:fanficdownloader
|
||||
|
|
|
|||
|
|
@ -45,10 +45,9 @@
|
|||
<h3>Edit Config</h3>
|
||||
<div id='logpassword'>
|
||||
Editing configuration for {{ nickname }}.
|
||||
{% if default %} Default values are shown. {% else %} Empty this box and Save to go back to use the default values. {% endif %}
|
||||
</div>
|
||||
<div class='fieldandlabel'>
|
||||
<textarea name="config" style="width: 100%; height: 500px;" wrap='off'>{{ config }}</textarea>
|
||||
<textarea name="config" style="width: 100%; height: 200px;" wrap='off'>{{ config }}</textarea>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
|
@ -56,6 +55,13 @@
|
|||
<input type="submit" value="Save">
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<div>
|
||||
<h3>Default System configuration</h3>
|
||||
<pre>
|
||||
{{ defaultsini }}
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
<div style='text-align: center'>
|
||||
<img src="http://code.google.com/appengine/images/appengine-silver-120x30.gif"
|
||||
|
|
|
|||
29
example.ini
Normal file
29
example.ini
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
## This is an example of what your personal configuration might look
|
||||
## like.
|
||||
|
||||
## Most common, I expect will be using this to save username/passwords
|
||||
## for different sites.
|
||||
[www.twilighted.net]
|
||||
#username:YourPenname
|
||||
#password:YourPassword
|
||||
|
||||
[www.ficwad.com]
|
||||
#username:YourUsername
|
||||
#password:YourPassword
|
||||
|
||||
## The [defaults] section here will override the system [defaults],
|
||||
## but not format, site for site:format sections.
|
||||
[defaults]
|
||||
## Directories only useful in commandline or zip files.
|
||||
#output_filename: books/${title}-${siteabbrev}_${storyId}${formatext}
|
||||
#output_filename: books/${site}/${authorId}/${title}-${storyId}${formatext}
|
||||
|
||||
## For example, zip_output here will turn on zip for html and txt, but
|
||||
## not epub because the system [epub] section explicitly says
|
||||
## zip_output: false (epubs *are* specially formated zip files.)
|
||||
#zip_output: true
|
||||
#zip_filename: ${title}-${siteabbrev}_${storyId}${formatext}.zip
|
||||
|
||||
## This section will override anything in the system defaults or other
|
||||
## sections here.
|
||||
[overrides]
|
||||
|
|
@ -156,12 +156,14 @@ class FictionPressComSiteAdapter(BaseSiteAdapter):
|
|||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
||||
span = soup.find('div', {'id' : 'storytext'})
|
||||
div = soup.find('div', {'id' : 'storytext'})
|
||||
## fp puts a padding style on the div that we don't want.
|
||||
del div['style']
|
||||
|
||||
if None == span:
|
||||
if None == div:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
return utf8FromSoup(div)
|
||||
|
||||
def getClass():
|
||||
return FictionPressComSiteAdapter
|
||||
|
|
|
|||
206
fanficdownloader/adapters/adapter_ficwadcom.py
Normal file
206
fanficdownloader/adapters/adapter_ficwadcom.py
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import time
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
import urllib2
|
||||
import time
|
||||
import httplib, urllib
|
||||
|
||||
import fanficdownloader.BeautifulSoup as bs
|
||||
import fanficdownloader.exceptions as exceptions
|
||||
from fanficdownloader.htmlcleanup import stripHTML
|
||||
|
||||
from base_adapter import BaseSiteAdapter, utf8FromSoup
|
||||
|
||||
class FicwadComSiteAdapter(BaseSiteAdapter):
|
||||
|
||||
def __init__(self, config, url):
|
||||
BaseSiteAdapter.__init__(self, config, url)
|
||||
self.story.setMetadata('siteabbrev','fw')
|
||||
|
||||
# get storyId from url--url validation guarantees second part is storyId
|
||||
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
|
||||
|
||||
self.username = "NoneGiven"
|
||||
self.password = ""
|
||||
|
||||
@staticmethod
|
||||
def getSiteDomain():
|
||||
return 'www.ficwad.com'
|
||||
|
||||
@classmethod
|
||||
def getAcceptDomains(cls):
|
||||
return ['www.ficwad.com']
|
||||
|
||||
def getSiteExampleURLs(self):
|
||||
return "http://www.ficwad.com/story/137169"
|
||||
|
||||
def getSiteURLPattern(self):
|
||||
return re.escape(r"http://"+self.getSiteDomain())+"/story/\d+?$"
|
||||
|
||||
def performLogin(self,url):
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
params['username'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
params['username'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/account/login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['username']))
|
||||
d = self._postUrl(loginUrl,params)
|
||||
|
||||
if "Login attempt failed..." in d:
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
params['username']))
|
||||
raise exceptions.FailedToLogin(url,params['username'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def extractChapterUrlsAndMetadata(self):
|
||||
|
||||
# fetch the chapter. From that we will get almost all the
|
||||
# metadata and chapter list
|
||||
|
||||
url = self.url
|
||||
logging.debug("URL: "+url)
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
try:
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
h3 = soup.find('h3')
|
||||
storya = h3.find('a',href=re.compile("^/story/\d+$"))
|
||||
if storya : # if there's a story link in the h3 header, this is a chapter page.
|
||||
# normalize story URL on chapter list.
|
||||
self.story.setMetadata('storyId',storya['href'].split('/',)[2])
|
||||
url = "http://"+self.getSiteDomain()+storya['href']
|
||||
logging.debug("Normalizing to URL: "+url)
|
||||
self._setURL(url)
|
||||
try:
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 404:
|
||||
raise exceptions.StoryDoesNotExist(self.url)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# if blocked, attempt login.
|
||||
if soup.find("li",{"class":"blocked"}):
|
||||
if self.performLogin(url): # performLogin raises
|
||||
# FailedToLogin if it fails.
|
||||
soup = bs.BeautifulSoup(self._fetchUrl(url))
|
||||
|
||||
# title - first h4 tag will be title.
|
||||
titleh4 = soup.find('h4')
|
||||
self.story.setMetadata('title', titleh4.a.string)
|
||||
|
||||
# Find authorid and URL from... author url.
|
||||
a = soup.find('a', href=re.compile(r"^/author/\d+"))
|
||||
self.story.setMetadata('authorId',a['href'].split('/')[2])
|
||||
self.story.setMetadata('authorUrl','http://'+self.host+a['href'])
|
||||
self.story.setMetadata('author',a.string)
|
||||
|
||||
# description
|
||||
storydiv = soup.find("div",{"id":"story"})
|
||||
self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)
|
||||
|
||||
# most of the meta data is here:
|
||||
metap = storydiv.find("p",{"class":"meta"})
|
||||
self.story.addToList('category',metap.find("a",href=re.compile(r"^/category/\d+")).string)
|
||||
|
||||
# warnings
|
||||
# <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
|
||||
spanreq = metap.find("span",{"class":"req"})
|
||||
for a in spanreq.findAll("a"):
|
||||
self.story.addToList('warnings',a['title'])
|
||||
|
||||
## perhaps not the most efficient way to parse this, using
|
||||
## regexps for each rather than something more complex, but
|
||||
## IMO, it's more readable and amenable to change.
|
||||
metapstr = stripHTML(str(metap)).replace('\n',' ').replace('\t','')
|
||||
#print "metap: (%s)"%metapstr
|
||||
|
||||
m = re.match(r".*?Rating: (.+?) -.*?",metapstr)
|
||||
if m:
|
||||
self.story.setMetadata('rating', m.group(1))
|
||||
|
||||
m = re.match(r".*?Genres: (.+?) -.*?",metapstr)
|
||||
if m:
|
||||
for g in m.group(1).split(','):
|
||||
self.story.addToList('genre',g)
|
||||
|
||||
m = re.match(r".*?Published: ([0-9/]+?) -.*?",metapstr)
|
||||
if m:
|
||||
self.story.setMetadata('datePublished',
|
||||
datetime.datetime.fromtimestamp(\
|
||||
time.mktime(time.strptime(m.group(1), "%Y/%m/%d"))))
|
||||
|
||||
# Updated can have more than one space after it. <shrug>
|
||||
m = re.match(r".*?Updated: ([0-9/]+?) +-.*?",metapstr)
|
||||
if m:
|
||||
self.story.setMetadata('dateUpdated',
|
||||
datetime.datetime.fromtimestamp(\
|
||||
time.mktime(time.strptime(m.group(1), "%Y/%m/%d"))))
|
||||
|
||||
m = re.match(r".*? - ([0-9/]+?) words.*?",metapstr)
|
||||
if m:
|
||||
self.story.setMetadata('numWords',m.group(1))
|
||||
|
||||
if metapstr.endswith("Complete"):
|
||||
self.story.setMetadata('status', 'Completed')
|
||||
else:
|
||||
self.story.setMetadata('status', 'In-Progress')
|
||||
|
||||
# get the chapter list first this time because that's how we
|
||||
# detect the need to login.
|
||||
storylistul = soup.find('ul',{'id':'storylist'})
|
||||
if not storylistul:
|
||||
# no list found, so it's a one-chapter story.
|
||||
self.chapterUrls.append((self.story.getMetadata('title'),url))
|
||||
else:
|
||||
chapterlistlis = storylistul.findAll('li')
|
||||
for chapterli in chapterlistlis:
|
||||
if "blocked" in chapterli['class']:
|
||||
# paranoia check. We should already be logged in by now.
|
||||
raise exceptions.FailedToLogin(url,self.username)
|
||||
else:
|
||||
#print "chapterli.h4.a (%s)"%chapterli.h4.a
|
||||
self.chapterUrls.append((chapterli.h4.a.string,
|
||||
u'http://%s%s'%(self.getSiteDomain(),
|
||||
chapterli.h4.a['href'])))
|
||||
#print "self.chapterUrls:%s"%self.chapterUrls
|
||||
self.story.setMetadata('numChapters',len(self.chapterUrls))
|
||||
|
||||
return
|
||||
|
||||
|
||||
def getChapterText(self, url):
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
time.sleep(0.5) ## ffnet tends to fail more if hit too fast.
|
||||
## This is in additional to what ever the
|
||||
## slow_down_sleep_time setting is.
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
||||
span = soup.find('div', {'id' : 'storytext'})
|
||||
|
||||
if None == span:
|
||||
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
|
||||
|
||||
return utf8FromSoup(span)
|
||||
|
||||
def getClass():
|
||||
return FicwadComSiteAdapter
|
||||
|
||||
|
|
@ -54,28 +54,27 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
return False
|
||||
|
||||
def performLogin(self, url):
|
||||
data = {}
|
||||
params = {}
|
||||
|
||||
if self.password:
|
||||
data['penname'] = self.username
|
||||
data['password'] = self.password
|
||||
params['penname'] = self.username
|
||||
params['password'] = self.password
|
||||
else:
|
||||
data['penname'] = self.getConfig("username")
|
||||
data['password'] = self.getConfig("password")
|
||||
data['cookiecheck'] = '1'
|
||||
data['submit'] = 'Submit'
|
||||
params['penname'] = self.getConfig("username")
|
||||
params['password'] = self.getConfig("password")
|
||||
params['cookiecheck'] = '1'
|
||||
params['submit'] = 'Submit'
|
||||
|
||||
urlvals = urllib.urlencode(data)
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
data['penname']))
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, urlvals)
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
||||
if "Member Account" not in d : #Member Account
|
||||
logging.info("Failed to login to URL %s as %s" % (loginUrl,
|
||||
data['penname']))
|
||||
raise exceptions.FailedToLogin(url,data['penname'])
|
||||
params['penname']))
|
||||
raise exceptions.FailedToLogin(url,params['penname'])
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
|
@ -150,10 +149,10 @@ class TwilightedNetSiteAdapter(BaseSiteAdapter):
|
|||
self.story.setMetadata('description',stripHTML(svalue))
|
||||
|
||||
if 'Rated' in label:
|
||||
self.story.setMetadata('rating', value.strip())
|
||||
self.story.setMetadata('rating', value)
|
||||
|
||||
if 'Word count' in label:
|
||||
self.story.setMetadata('numWords', value.strip())
|
||||
self.story.setMetadata('numWords', value)
|
||||
|
||||
if 'Categories' in label:
|
||||
cats = labelspan.parent.findAll('a',href=re.compile(r'browse.php\?type=categories'))
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
import re
|
||||
import datetime
|
||||
import time
|
||||
import urllib
|
||||
import urllib2 as u2
|
||||
import urlparse as up
|
||||
|
||||
|
|
@ -27,7 +28,7 @@ class BaseSiteAdapter(Configurable):
|
|||
def __init__(self, config, url):
|
||||
Configurable.__init__(self, config)
|
||||
self.addConfigSection(self.getSiteDomain())
|
||||
self.addConfigSection("commandline")
|
||||
self.addConfigSection("overrides")
|
||||
|
||||
self.opener = u2.build_opener(u2.HTTPCookieProcessor())
|
||||
self.storyDone = False
|
||||
|
|
@ -49,12 +50,30 @@ class BaseSiteAdapter(Configurable):
|
|||
self.host = self.parsedUrl.netloc
|
||||
self.path = self.parsedUrl.path
|
||||
self.story.setMetadata('storyUrl',self.url)
|
||||
|
||||
|
||||
# Assumes application/x-www-form-urlencoded. parameters, headers are dict()s
|
||||
def _postUrl(self, url, parameters={}, headers={}):
|
||||
if self.getConfig('slow_down_sleep_time'):
|
||||
time.sleep(float(self.getConfig('slow_down_sleep_time')))
|
||||
|
||||
## u2.Request assumes POST when data!=None. Also assumes data
|
||||
## is application/x-www-form-urlencoded.
|
||||
if 'Content-type' not in headers:
|
||||
headers['Content-type']='application/x-www-form-urlencoded'
|
||||
if 'Accept' not in headers:
|
||||
headers['Accept']="text/html,*/*"
|
||||
req = u2.Request(url,
|
||||
data=urllib.urlencode(parameters),
|
||||
headers=headers)
|
||||
return self.opener.open(req).read().decode(self.decode)
|
||||
|
||||
# parameters is a dict()
|
||||
def _fetchUrl(self, url, parameters=None):
|
||||
if self.getConfig('slow_down_sleep_time'):
|
||||
time.sleep(float(self.getConfig('slow_down_sleep_time')))
|
||||
if parameters:
|
||||
return self.opener.open(url,parameters).read().decode(self.decode)
|
||||
return self.opener.open(url,urllib.urlencode(parameters))\
|
||||
.read().decode(self.decode)
|
||||
else:
|
||||
return self.opener.open(url).read().decode(self.decode)
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ def stripHTML(soup):
|
|||
|
||||
def conditionalRemoveEntities(value):
|
||||
if isinstance(value,str) or isinstance(value,unicode) :
|
||||
return removeEntities(value.strip())
|
||||
return removeEntities(value).strip()
|
||||
else:
|
||||
return value
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
## This could (should?) use a dynamic loader like adapters, but for
|
||||
## now, it's static, since there's so few of them.
|
||||
|
||||
from fanficdownloader.exceptions import FailedToDownload
|
||||
|
||||
from writer_html import HTMLWriter
|
||||
from writer_txt import TextWriter
|
||||
from writer_epub import EpubWriter
|
||||
|
|
@ -14,3 +16,5 @@ def getWriter(type,config,story):
|
|||
return TextWriter(config,story)
|
||||
if type == "epub":
|
||||
return EpubWriter(config,story)
|
||||
|
||||
raise FailedToDownload("(%s) is not a supported download format."%type)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,31 @@ class BaseStoryWriter(Configurable):
|
|||
## Pass adapter instead, to check date before fetching all?
|
||||
## Or add 'check update' method to writer?
|
||||
self.story = story
|
||||
self.validEntries = [
|
||||
'category',
|
||||
'genre',
|
||||
'status',
|
||||
'datePublished',
|
||||
'dateUpdated',
|
||||
'dateCreated',
|
||||
'rating',
|
||||
'warnings',
|
||||
'numChapters',
|
||||
'numWords',
|
||||
'site',
|
||||
'storyId',
|
||||
'authorId',
|
||||
'extratags',
|
||||
'title',
|
||||
'storyUrl',
|
||||
'description',
|
||||
'author',
|
||||
'authorUrl',
|
||||
'formatname',
|
||||
'formatext',
|
||||
'siteabbrev']
|
||||
|
||||
# fall back labels.
|
||||
self.titleLabels = {
|
||||
'category':'Category',
|
||||
'genre':'Genre',
|
||||
|
|
@ -38,7 +63,7 @@ class BaseStoryWriter(Configurable):
|
|||
'warnings':'Warnings',
|
||||
'numChapters':'Chapters',
|
||||
'numWords':'Words',
|
||||
'site':'Publisher',
|
||||
'site':'Site',
|
||||
'storyId':'Story ID',
|
||||
'authorId':'Author ID',
|
||||
'extratags':'Extra Tags',
|
||||
|
|
@ -49,6 +74,7 @@ class BaseStoryWriter(Configurable):
|
|||
'authorUrl':'Author URL',
|
||||
'formatname':'File Format',
|
||||
'formatext':'File Extension',
|
||||
'siteabbrev':'Site Abbrev'
|
||||
}
|
||||
self.story.setMetadata('formatname',self.getFormatName())
|
||||
self.story.setMetadata('formatext',self.getFormatExt())
|
||||
|
|
@ -61,17 +87,15 @@ class BaseStoryWriter(Configurable):
|
|||
|
||||
def getFileName(self,template,extension="${formatext}"):
|
||||
values = self.story.metadata
|
||||
fallback=False
|
||||
# fall back default:
|
||||
if not template:
|
||||
template="${title}-${siteabbrev}_${storyId}${formatext}"
|
||||
fallback=True
|
||||
|
||||
# Add extension if not already included.
|
||||
if extension not in template:
|
||||
template+=extension
|
||||
|
||||
if fallback or self.getConfig('safe_filename'):
|
||||
if not self.getConfig('allow_unsafe_filename'):
|
||||
values={}
|
||||
pattern = re.compile(r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+")
|
||||
for k in self.story.metadata.keys():
|
||||
|
|
@ -99,13 +123,17 @@ class BaseStoryWriter(Configurable):
|
|||
wideTitleEntriesList = self.getConfigList("wide_titlepage_entries")
|
||||
|
||||
for entry in titleEntriesList:
|
||||
if entry in self.titleLabels:
|
||||
if entry in self.validEntries:
|
||||
if self.story.getMetadata(entry):
|
||||
if entry in wideTitleEntriesList:
|
||||
TEMPLATE=WIDE_ENTRY
|
||||
else:
|
||||
TEMPLATE=ENTRY
|
||||
self._write(out,TEMPLATE.substitute({'label':self.titleLabels[entry],
|
||||
if self.getConfigList(entry):
|
||||
label=self.getConfig(entry+"_label")
|
||||
else:
|
||||
label=self.titleLabels[entry]
|
||||
self._write(out,TEMPLATE.substitute({'label':label,
|
||||
'value':self.story.getMetadata(entry)}))
|
||||
|
||||
self._write(out,END.substitute(self.story.metadata))
|
||||
|
|
@ -129,7 +157,7 @@ class BaseStoryWriter(Configurable):
|
|||
def writeStory(self,outstream=None):
|
||||
self.addConfigSection(self.story.getMetadata('site'))
|
||||
self.addConfigSection(self.story.getMetadata('site')+":"+self.getFormatName())
|
||||
self.addConfigSection("commandline")
|
||||
self.addConfigSection("overrides")
|
||||
|
||||
for tag in self.getConfigList("extratags"):
|
||||
self.story.addToList("extratags",tag)
|
||||
|
|
|
|||
|
|
@ -242,7 +242,7 @@ h6 { text-align: center; }
|
|||
|
||||
# set to avoid duplicates subject tags.
|
||||
subjectset = set()
|
||||
for entry in self.titleLabels.keys():
|
||||
for entry in self.validEntries:
|
||||
if entry in self.getConfigList("include_subject_tags") and \
|
||||
entry not in self.story.getLists() and \
|
||||
self.story.getMetadata(entry):
|
||||
|
|
|
|||
|
|
@ -20,4 +20,4 @@ class DownloadData(db.Model):
|
|||
|
||||
class UserConfig(db.Model):
|
||||
user = db.UserProperty()
|
||||
config = db.TextProperty()
|
||||
config = db.BlobProperty()
|
||||
|
|
|
|||
11
index.html
11
index.html
|
|
@ -56,10 +56,10 @@
|
|||
This version is a new re-org/re-write of the code.
|
||||
</p>
|
||||
<p>
|
||||
So far, only a few sites are supported: fanfiction.net, twilighted.net and whofic.com.
|
||||
So far, the only sites supported are: fanfiction.net, fictionalley.com, ficwad.com, twilighted.net and whofic.com.
|
||||
</p>
|
||||
<p>
|
||||
Login/Password is only asked for when required now.
|
||||
Login/Password is asked for when required now.
|
||||
</p>
|
||||
<p>
|
||||
Mobi support (for Kindle) is only via EPub conversion in this version.
|
||||
|
|
@ -77,7 +77,7 @@
|
|||
<div id='typeoptions'>
|
||||
<input type='radio' name='format' value='epub' checked>EPub</input>
|
||||
<input type='radio' name='format' value='html'>HTML</input>
|
||||
<input type='radio' name='format' value='text'>Plain Text</input>
|
||||
<input type='radio' name='format' value='txt'>Plain Text</input>
|
||||
<p><i>For Mobi (Kindle) select EPub and use the Convert link when it's finished.</i></p>
|
||||
</div>
|
||||
<div>
|
||||
|
|
@ -133,8 +133,9 @@
|
|||
</dd>
|
||||
<dt>ficwad.com</dt>
|
||||
<dd>
|
||||
Use the URL of any story chapter, such as
|
||||
<br /><a href="http://www.ficwad.com/story/75246">http://www.ficwad.com/story/75246</a>.
|
||||
Use the URL of the story's chapter list, such as
|
||||
<br /><a href="http://www.ficwad.com/story/74884">http://www.ficwad.com/story/74884</a>.
|
||||
<br />Note that this is changed from the previous version. The system will still accept chapter URLs, however.
|
||||
</dd>
|
||||
<dt>harrypotterfanfiction.com</dt>
|
||||
<dd>
|
||||
|
|
|
|||
15
main.py
15
main.py
|
|
@ -115,11 +115,16 @@ class EditConfigServer(webapp.RequestHandler):
|
|||
if uconfig is not None and uconfig.config:
|
||||
config = uconfig.config
|
||||
else:
|
||||
template_values['default'] = True
|
||||
configfile = open("defaults.ini","rb")
|
||||
configfile = open("example.ini","rb")
|
||||
config = configfile.read()
|
||||
configfile.close()
|
||||
template_values['config'] = config
|
||||
|
||||
configfile = open("defaults.ini","rb")
|
||||
config = configfile.read()
|
||||
configfile.close()
|
||||
template_values['defaultsini'] = config
|
||||
|
||||
path = os.path.join(os.path.dirname(__file__), 'editconfig.html')
|
||||
self.response.headers['Content-Type'] = 'text/html'
|
||||
self.response.out.write(template.render(path, template_values))
|
||||
|
|
@ -227,6 +232,9 @@ class UserConfigServer(webapp.RequestHandler):
|
|||
def getUserConfig(self,user):
|
||||
config = ConfigParser.SafeConfigParser()
|
||||
|
||||
logging.debug('reading defaults.ini config file')
|
||||
config.read('defaults.ini')
|
||||
|
||||
## Pull user's config record.
|
||||
l = UserConfig.all().filter('user =', user).fetch(1)
|
||||
## TEST THIS
|
||||
|
|
@ -234,9 +242,6 @@ class UserConfigServer(webapp.RequestHandler):
|
|||
uconfig=l[0]
|
||||
logging.debug('reading config from UserConfig(%s)'%uconfig.config)
|
||||
config.readfp(StringIO.StringIO(uconfig.config))
|
||||
else:
|
||||
logging.debug('reading defaults.ini config file')
|
||||
config.read('defaults.ini')
|
||||
|
||||
return config
|
||||
|
||||
|
|
|
|||
|
|
@ -41,12 +41,15 @@ def main():
|
|||
config.read('defaults.ini')
|
||||
logging.debug('reading personal.ini config file, if present')
|
||||
config.read('personal.ini')
|
||||
|
||||
config.add_section("commandline")
|
||||
|
||||
try:
|
||||
config.add_section("overrides")
|
||||
except ConfigParser.DuplicateSectionError:
|
||||
pass
|
||||
if options.options:
|
||||
for opt in options.options:
|
||||
(var,val) = opt.split('=')
|
||||
config.set("commandline",var,val)
|
||||
config.set("overrides",var,val)
|
||||
|
||||
try:
|
||||
adapter = adapters.getAdapter(config,args[0])
|
||||
|
|
@ -62,9 +65,11 @@ def main():
|
|||
adapter.getStoryMetadataOnly()
|
||||
|
||||
if options.metaonly:
|
||||
adapter.getStoryMetadataOnly()
|
||||
print adapter.getStoryMetadataOnly()
|
||||
return
|
||||
## XXX Use format.
|
||||
## XXX Doing all three formats actually causes some interesting
|
||||
## XXX config issues with format-specific sections.
|
||||
print "format: %s" % options.format
|
||||
writeStory(config,adapter,"epub")
|
||||
writeStory(config,adapter,"html")
|
||||
|
|
|
|||
Loading…
Reference in a new issue