Refactorings/tweaks from Update Feature work that are worthwhile even without it.

This commit is contained in:
Jim Miller 2011-06-12 16:26:05 -05:00
parent 8bf22729fe
commit fddd146b39
9 changed files with 297 additions and 220 deletions

View file

@ -9,6 +9,10 @@ handlers:
script: utils/remover.py
login: admin
- url: /tally.*
script: utils/tally.py
login: admin
- url: /fdownloadtask
script: main.py
login: admin

View file

@ -100,25 +100,25 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"
('Chapter 3, Over Cinnabar',self.url+"&chapter=4"),
('Chapter 4',self.url+"&chapter=5"),
('Chapter 5',self.url+"&chapter=6"),
('Chapter 6',self.url+"&chapter=6"),
('Chapter 7',self.url+"&chapter=6"),
('Chapter 8',self.url+"&chapter=6"),
('Chapter 9',self.url+"&chapter=6"),
('Chapter 0',self.url+"&chapter=6"),
('Chapter a',self.url+"&chapter=6"),
('Chapter b',self.url+"&chapter=6"),
('Chapter c',self.url+"&chapter=6"),
('Chapter d',self.url+"&chapter=6"),
('Chapter e',self.url+"&chapter=6"),
('Chapter f',self.url+"&chapter=6"),
('Chapter g',self.url+"&chapter=6"),
('Chapter h',self.url+"&chapter=6"),
('Chapter i',self.url+"&chapter=6"),
('Chapter j',self.url+"&chapter=6"),
('Chapter k',self.url+"&chapter=6"),
('Chapter l',self.url+"&chapter=6"),
('Chapter m',self.url+"&chapter=6"),
('Chapter n',self.url+"&chapter=6"),
# ('Chapter 6',self.url+"&chapter=6"),
# ('Chapter 7',self.url+"&chapter=6"),
# ('Chapter 8',self.url+"&chapter=6"),
# ('Chapter 9',self.url+"&chapter=6"),
# ('Chapter 0',self.url+"&chapter=6"),
# ('Chapter a',self.url+"&chapter=6"),
# ('Chapter b',self.url+"&chapter=6"),
# ('Chapter c',self.url+"&chapter=6"),
# ('Chapter d',self.url+"&chapter=6"),
# ('Chapter e',self.url+"&chapter=6"),
# ('Chapter f',self.url+"&chapter=6"),
# ('Chapter g',self.url+"&chapter=6"),
# ('Chapter h',self.url+"&chapter=6"),
# ('Chapter i',self.url+"&chapter=6"),
# ('Chapter j',self.url+"&chapter=6"),
# ('Chapter k',self.url+"&chapter=6"),
# ('Chapter l',self.url+"&chapter=6"),
# ('Chapter m',self.url+"&chapter=6"),
# ('Chapter n',self.url+"&chapter=6"),
]
self.story.setMetadata('numChapters',len(self.chapterUrls))

View file

@ -15,12 +15,17 @@
# limitations under the License.
#
import os
from htmlcleanup import conditionalRemoveEntities
class Story:
def __init__(self):
self.metadata = {'version':'4.0.0'}
try:
self.metadata = {'version':os.environ['CURRENT_VERSION_ID']}
except:
self.metadata = {'version':'4.0'}
self.chapters = [] # chapters will be tuples of (title,html)
self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists.

View file

@ -188,6 +188,7 @@ h6 { text-align: center; }
rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf",
"media-type":"application/oebps-package+xml"}))
outputepub.writestr("META-INF/container.xml",containerdom.toxml(encoding='utf-8'))
containerdom.unlink()
del containerdom
## Epub has two metadata files with real data. We're putting
@ -321,6 +322,7 @@ h6 { text-align: center; }
"linear":"yes"}))
# write content.opf to zip.
outputepub.writestr("content.opf",contentdom.toxml(encoding='utf-8'))
contentdom.unlink()
del contentdom
## create toc.ncx file
@ -370,6 +372,7 @@ h6 { text-align: center; }
# write toc.ncs to zip file
outputepub.writestr("toc.ncx",tocncxdom.toxml(encoding='utf-8'))
tocncxdom.unlink()
del tocncxdom
# write stylesheet.css file.

View file

@ -26,7 +26,7 @@
<body>
<div id='main'>
<h1>
<a href="/" style="text-decoration: none; color: black;">FanFiction Downloader</a>
<a href="/" style="text-decoration: none; color: black;">FanFiction Downloader</a> <g:plusone size="medium"></g:plusone>
</h1>
<div style="text-align: center">
@ -53,25 +53,15 @@
<p>Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites
much easier. </p>
</div>
<h3>fanfiction.net/fictionpress.com changes</h3>
<p>
The sites fanfiction.net and fictionpress.com changed their
output enough to break the downloader. Stories appeared
to download, but the chapters would not contain the story
text.
</p>
<p>
It should be fixed now.
</p>
<!-- put announcements here, h3 is a good title size. -->
<p>
If you have any problems with this application, please
report them in
the <a href="http://groups.google.com/group/fanfic-downloader">Fanfiction
Downloader Google Group</a>. The
<a href="http://3-0-2.fanfictionloader.appspot.com">Old
<a href="http://4-0-2.fanfictionloader.appspot.com">Previous
Version</a> is also available for you to use if necessary.
</p>
<p><g:plusone size="medium"></g:plusone></p>
<div id='error'>
{{ error_message }}
</div>

View file

@ -1,33 +1,33 @@
indexes:
# AUTOGENERATED
# This index.yaml is automatically updated whenever the dev_appserver
# detects that a new type of query is run. If you want to manage the
# index.yaml file manually, remove the above marker line (the line
# saying "# AUTOGENERATED"). If you want to manage some indexes
# manually, move them above the marker line. The index.yaml file is
# automatically uploaded to the admin console when you next deploy
# your application using appcfg.py.
- kind: DownloadData
properties:
- name: download
- name: index
- kind: DownloadMeta
properties:
- name: user
- name: date
direction: desc
- kind: DownloadedFanfic
properties:
- name: cleared
- name: date
- kind: DownloadedFanfic
properties:
- name: user
- name: date
direction: desc
indexes:
# AUTOGENERATED
# This index.yaml is automatically updated whenever the dev_appserver
# detects that a new type of query is run. If you want to manage the
# index.yaml file manually, remove the above marker line (the line
# saying "# AUTOGENERATED"). If you want to manage some indexes
# manually, move them above the marker line. The index.yaml file is
# automatically uploaded to the admin console when you next deploy
# your application using appcfg.py.
- kind: DownloadData
properties:
- name: download
- name: index
- kind: DownloadMeta
properties:
- name: user
- name: date
direction: desc
- kind: DownloadedFanfic
properties:
- name: cleared
- name: date
- kind: DownloadedFanfic
properties:
- name: user
- name: date
direction: desc

313
main.py
View file

@ -28,7 +28,8 @@ import urllib
import datetime
import traceback
import StringIO
from StringIO import StringIO
import ConfigParser
## Just to shut up the appengine warning about "You are using the
## default Django version (0.96). The default Django version will
@ -36,6 +37,10 @@ import StringIO
## use_library() to explicitly select a Django version. For more
## information see
## http://code.google.com/appengine/docs/python/tools/libraries.html#Django"
## Note that if you are using the SDK App Engine Launcher and hit an SDK
## Console page first, you will get a django version mismatch error when you
## to go hit one of the application pages. Just change a file again, and
## make sure to hit an app page before the SDK page to clear it.
os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
from google.appengine.dist import use_library
use_library('django', '1.2')
@ -51,7 +56,6 @@ from google.appengine.runtime import DeadlineExceededError
from ffstorage import *
from fanficdownloader import adapters, writers, exceptions
import ConfigParser
class MainHandler(webapp.RequestHandler):
def get(self):
@ -61,7 +65,7 @@ class MainHandler(webapp.RequestHandler):
template_values = {'nickname' : user.nickname(), 'authorized': True}
url = self.request.get('url')
template_values['url'] = url
if error:
if error == 'login_required':
template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.'
@ -73,11 +77,11 @@ class MainHandler(webapp.RequestHandler):
template_values['error_message'] = 'Configuration Saved'
elif error == 'recentcleared':
template_values['error_message'] = 'Your Recent Downloads List has been Cleared'
filename = self.request.get('file')
if len(filename) > 1:
template_values['yourfile'] = '''<div id='yourfile'><a href='/file?id=%s'>"%s" by %s</a></div>''' % (filename, self.request.get('name'), self.request.get('author'))
self.response.headers['Content-Type'] = 'text/html'
path = os.path.join(os.path.dirname(__file__), 'index.html')
@ -99,7 +103,7 @@ class EditConfigServer(webapp.RequestHandler):
if not user:
self.redirect(users.create_login_url(self.request.uri))
return
template_values = {'nickname' : user.nickname(), 'authorized': True}
## Pull user's config record.
@ -129,29 +133,26 @@ class EditConfigServer(webapp.RequestHandler):
config = configfile.read()
configfile.close()
template_values['defaultsini'] = config
path = os.path.join(os.path.dirname(__file__), 'editconfig.html')
self.response.headers['Content-Type'] = 'text/html'
self.response.out.write(template.render(path, template_values))
class FileServer(webapp.RequestHandler):
def get(self):
fileId = self.request.get('id')
if fileId == None or len(fileId) < 3:
self.redirect('/')
return
try:
key = db.Key(fileId)
fanfic = db.get(key)
download = getDownloadMeta(id=fileId)
name = download.name.encode('utf-8')
# check for completed & failure.
name = fanfic.name.encode('utf-8')
logging.info("Serving file: %s" % name)
if name.endswith('.epub'):
@ -166,15 +167,15 @@ class FileServer(webapp.RequestHandler):
self.response.headers['Content-Type'] = 'application/zip'
else:
self.response.headers['Content-Type'] = 'application/octet-stream'
self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name
data = DownloadData.all().filter("download =", fanfic).order("index")
self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name
data = DownloadData.all().filter("download =", download).order("index")
# epubs are all already compressed.
# Each chunk is compress individually to avoid having
# to hold the whole in memory just for the
# compress/uncompress
if fanfic.format != 'epub':
if download.format != 'epub':
def dc(data):
try:
return zlib.decompress(data)
@ -184,65 +185,64 @@ class FileServer(webapp.RequestHandler):
else:
def dc(data):
return data
for datum in data:
self.response.out.write(dc(datum.blob))
except Exception, e:
fic = DownloadMeta()
fic.failure = unicode(e)
template_values = dict(fic = fic,
#nickname = user.nickname(),
#escaped_url = escaped_url
)
path = os.path.join(os.path.dirname(__file__), 'status.html')
self.response.out.write(template.render(path, template_values))
class FileStatusServer(webapp.RequestHandler):
def get(self):
user = users.get_current_user()
if not user:
self.redirect(users.create_login_url(self.request.uri))
return
fileId = self.request.get('id')
if fileId == None or len(fileId) < 3:
self.redirect('/')
escaped_url=False
try:
key = db.Key(fileId)
fic = db.get(key)
download = getDownloadMeta(id=fileId)
if fic:
logging.info("Status url: %s" % fic.url)
if fic.completed and fic.format=='epub':
escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+fileId+"&fake=file."+fic.format)
if download:
logging.info("Status url: %s" % download.url)
if download.completed and download.format=='epub':
escaped_url = urlEscape(self.request.host_url+"/file/"+download.name+"."+download.format+"?id="+fileId+"&fake=file."+download.format)
else:
fic = DownloadMeta()
fic.failure = "Download not found"
download = DownloadMeta()
download.failure = "Download not found"
except Exception, e:
fic = DownloadMeta()
fic.failure = unicode(e)
template_values = dict(fic = fic,
download = DownloadMeta()
download.failure = unicode(e)
template_values = dict(fic = download,
nickname = user.nickname(),
escaped_url = escaped_url
)
path = os.path.join(os.path.dirname(__file__), 'status.html')
self.response.out.write(template.render(path, template_values))
class ClearRecentServer(webapp.RequestHandler):
def get(self):
user = users.get_current_user()
if not user:
self.redirect(users.create_login_url(self.request.uri))
return
logging.info("Clearing Recent List for user: "+user.nickname())
q = DownloadMeta.all()
q.filter('user =', user)
@ -260,7 +260,7 @@ class ClearRecentServer(webapp.RequestHandler):
break
logging.info('Deleted %d instances download.' % num)
self.redirect("/?error=recentcleared")
class RecentFilesServer(webapp.RequestHandler):
def get(self):
user = users.get_current_user()
@ -276,7 +276,7 @@ class RecentFilesServer(webapp.RequestHandler):
for fic in fics:
if fic.completed and fic.format == 'epub':
fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format)
template_values = dict(fics = fics, nickname = user.nickname())
path = os.path.join(os.path.dirname(__file__), 'recent.html')
self.response.out.write(template.render(path, template_values))
@ -287,17 +287,16 @@ class UserConfigServer(webapp.RequestHandler):
logging.debug('reading defaults.ini config file')
config.read('defaults.ini')
## Pull user's config record.
l = UserConfig.all().filter('user =', user).fetch(1)
## TEST THIS
if l and l[0].config:
uconfig=l[0]
#logging.debug('reading config from UserConfig(%s)'%uconfig.config)
config.readfp(StringIO.StringIO(uconfig.config))
config.readfp(StringIO(uconfig.config))
return config
class FanfictionDownloader(UserConfigServer):
def get(self):
self.post()
@ -311,32 +310,20 @@ class FanfictionDownloader(UserConfigServer):
format = self.request.get('format')
url = self.request.get('url')
if not url or url.strip() == "":
self.redirect('/')
return
logging.info("Queuing Download: %s" % url)
login = self.request.get('login')
password = self.request.get('password')
is_adult = self.request.get('is_adult') == "on"
# use existing record if available.
q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
if( q is None or len(q) < 1 ):
download = DownloadMeta()
else:
download = q[0]
download.completed=False
download.failure=None
download.date=datetime.datetime.now()
for c in download.data_chunks:
c.delete()
download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID'])
download.user = user
download.url = url
download.format = format
# use existing record if available. Fetched/Created before
# the adapter can normalize the URL in case we need to record
# an exception.
download = getDownloadMeta(url=url,user=user,format=format,new=True)
adapter = None
try:
@ -348,7 +335,7 @@ class FanfictionDownloader(UserConfigServer):
adapter.username=login
adapter.password=password
adapter.is_adult=is_adult
## This scrapes the metadata, which will be
## duplicated in the queue task, but it
## detects bad URLs, bad login, bad story, etc
@ -356,6 +343,12 @@ class FanfictionDownloader(UserConfigServer):
## it's worth the double up. Could maybe save
## it all in the download object someday.
story = adapter.getStoryMetadataOnly()
## Fetch again using normalized story URL. The one
## fetched/created above, if different, will not be saved.
download = getDownloadMeta(url=story.getMetadata('storyUrl'),
user=user,format=format,new=True)
download.title = story.getMetadata('title')
download.author = story.getMetadata('author')
download.url = story.getMetadata('storyUrl')
@ -370,7 +363,7 @@ class FanfictionDownloader(UserConfigServer):
'password':password,
'user':user.email(),
'is_adult':is_adult})
logging.info("enqueued download key: " + str(download.key()))
except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e:
@ -389,7 +382,7 @@ class FanfictionDownloader(UserConfigServer):
if isinstance(e,exceptions.AdultCheckRequired):
template_values['login']=login
template_values['password']=password
path = os.path.join(os.path.dirname(__file__), 'login.html')
self.response.out.write(template.render(path, template_values))
return
@ -402,136 +395,148 @@ class FanfictionDownloader(UserConfigServer):
logging.exception(e)
download.failure = unicode(e)
download.put()
self.redirect('/status?id='+str(download.key()))
return
class FanfictionDownloaderTask(UserConfigServer):
def _printableVersion(self, text):
text = removeEntities(text)
try:
d = text.decode('utf-8')
except:
d = text
return d
def post(self):
logging.getLogger().setLevel(logging.DEBUG)
fileId = self.request.get('id')
# User object can't pass, just email address
user = users.User(self.request.get('user'))
format = self.request.get('format')
url = self.request.get('url')
login = self.request.get('login')
password = self.request.get('password')
is_adult = self.request.get('is_adult')
# User object can't pass, just email address
user = users.User(self.request.get('user'))
logging.info("Downloading: " + url + " for user: "+user.nickname())
logging.info("ID: " + fileId)
adapter = None
writerClass = None
if fileId:
try:
## try to get download rec from passed id first.
## may need to fall back to user/url/format during transition.
download = db.get(db.Key(fileId))
logging.info("DownloadMeta found by ID:"+fileId)
except:
pass
if not download:
# use existing record if available.
q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
if( q is None or len(q) < 1 ):
logging.info("New DownloadMeta")
download = DownloadMeta()
else:
logging.info("DownloadMeta found by user/url/format")
download = q[0]
## populate DownloadMeta, regardless of how found or created.
download.failure=None
download.date=datetime.datetime.now()
download.completed=False
download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID'])
download.user = user
download.url = url
download.format = format
# use existing record if available.
# fileId should have record from /fdown.
download = getDownloadMeta(id=fileId,url=url,user=user,format=format,new=True)
for c in download.data_chunks:
c.delete()
download.put()
logging.info('Creating adapter...')
try:
config = self.getUserConfig(user)
adapter = adapters.getAdapter(config,url)
except Exception, e:
logging.exception(e)
download.failure = unicode(e)
download.put()
return
logging.info('Created an adapter: %s' % adapter)
if len(login) > 1:
adapter.username=login
adapter.password=password
adapter.is_adult=is_adult
try:
logging.info('Created an adapter: %s' % adapter)
if len(login) > 1:
adapter.username=login
adapter.password=password
adapter.is_adult=is_adult
# adapter.getStory() is what does all the heavy lifting.
# adapter.getStoryMetadataOnly() only fetches enough to
# get metadata. writer.writeStory() will call
# adapter.getStory(), too.
writer = writers.getWriter(format,config,adapter)
download.name = writer.getOutputFileName()
logging.debug('output_filename:'+writer.getConfig('output_filename'))
#logging.debug('output_filename:'+writer.getConfig('output_filename'))
logging.debug('getOutputFileName:'+writer.getOutputFileName())
download.title = adapter.getStory().getMetadata('title')
download.author = adapter.getStory().getMetadata('author')
download.url = adapter.getStory().getMetadata('storyUrl')
download.put()
outbuffer = StringIO()
writer.writeStory(outbuffer)
data = outbuffer.getvalue()
outbuffer.close()
del outbuffer
#del writer.adapter
#del writer.story
del writer
#del adapter.story
del adapter
# epubs are all already compressed. Each chunk is
# compressed individually to avoid having to hold the
# whole in memory just for the compress/uncompress.
if format != 'epub':
def c(data):
return zlib.compress(data)
else:
def c(data):
return data
index=0
while( len(data) > 0 ):
DownloadData(download=download,
index=index,
blob=c(data[:1000000])).put()
index += 1
data = data[1000000:]
download.completed=True
download.put()
logging.info("Download finished OK")
del data
except Exception, e:
logging.exception(e)
download.failure = unicode(e)
download.put()
return
outbuffer = StringIO.StringIO()
writer.writeStory(outbuffer)
data = outbuffer.getvalue()
outbuffer.close()
del writer
del adapter
# epubs are all already compressed.
# Each chunk is compressed individually to avoid having
# to hold the whole in memory just for the
# compress/uncompress.
if format != 'epub':
def c(data):
return zlib.compress(data)
else:
def c(data):
return data
index=0
while( len(data) > 0 ):
DownloadData(download=download,
index=index,
blob=c(data[:1000000])).put()
index += 1
data = data[1000000:]
download.completed=True
download.put()
logging.info("Download finished OK")
return
def toPercentDecimal(match):
def getDownloadMeta(id=None,url=None,user=None,format=None,new=False):
## try to get download rec from passed id first. then fall back
## to user/url/format
download = None
if id:
try:
download = db.get(db.Key(id))
logging.info("DownloadMeta found by ID:"+id)
except:
pass
if not download and url and user and format:
try:
q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
if( q is not None and len(q) > 0 ):
logging.debug("DownloadMeta found by user:%s url:%s format:%s"%(user,url,format))
download = q[0]
except:
pass
if new:
# NOT clearing existing chunks here, because this record may
# never be saved.
if not download:
logging.debug("New DownloadMeta")
download = DownloadMeta()
download.completed=False
download.failure=None
download.date=datetime.datetime.now()
download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID'])
if user:
download.user = user
if url:
download.url = url
if format:
download.format = format
return download
def toPercentDecimal(match):
"Return the %decimal number for the character for url escaping"
s = match.group(1)
return "%%%02x" % ord(s)

1
utils/__init__.py Normal file
View file

@ -0,0 +1 @@
# -*- coding: utf-8 -*-

69
utils/tally.py Normal file
View file

@ -0,0 +1,69 @@
#!/usr/bin/env python
# encoding: utf-8
# Copyright 2011 Fanficdownloader team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import datetime
import logging
from google.appengine.ext.webapp import util
from google.appengine.ext import webapp
from google.appengine.api import users
from google.appengine.api import taskqueue
from google.appengine.api import memcache
from ffstorage import *
class Tally(webapp.RequestHandler):
def get(self):
logging.debug("Starting Tally")
user = users.get_current_user()
logging.debug("Working as user %s" % user)
fics = DownloadMeta.all()
cursor = memcache.get('tally_search_cursor')
if cursor:
fics.with_cursor(cursor)
self.response.out.write('"user","url","name","title","author","format","failure","completed","date","version"<br/>')
num = 0
step = 500
results = fics.fetch(step)
for d in results:
self.response.out.write('"%s","%s","%s","%s","%s","%s","%s","%s","%s","%s"<br/>' %
(d.user,d.url,d.name,d.title,d.author,
d.format,d.failure,d.completed,d.date,
d.version))
num += 1
if num < step:
memcache.delete('tally_search_cursor')
logging.warn('Tally search reached end, starting over next time.')
else:
memcache.set('tally_search_cursor',fics.cursor())
logging.info('Tallied %d fics.' % num)
self.response.out.write('<br/>Tallied %d fics.<br/>' % num)
def main():
application = webapp.WSGIApplication([('/tally', Tally),
],
debug=False)
util.run_wsgi_app(application)
if __name__ == '__main__':
logging.getLogger().setLevel(logging.DEBUG)
main()