First version with background queue processing and support for stories >1M.

This commit is contained in:
retiefjimm 2011-03-10 13:00:11 -06:00
parent 0e9d992e13
commit 53e2a0f204
7 changed files with 228 additions and 83 deletions

View file

@ -4,8 +4,8 @@ class OneDownload(db.Model):
user = db.UserProperty()
url = db.StringProperty()
format = db.StringProperty()
#login = db.StringProperty()
#password = db.StringProperty()
login = db.StringProperty()
password = db.StringProperty()
failure = db.StringProperty()
date = db.DateTimeProperty(auto_now_add=True)
@ -19,3 +19,21 @@ class DownloadedFanfic(db.Model):
blob = db.BlobProperty()
mac = db.StringProperty()
cleared = db.BooleanProperty(default=False)
class DownloadMeta(db.Model):
user = db.UserProperty()
url = db.StringProperty()
name = db.StringProperty()
title = db.StringProperty()
author = db.StringProperty()
format = db.StringProperty()
failure = db.StringProperty()
completed = db.BooleanProperty(default=False)
date = db.DateTimeProperty(auto_now_add=True)
# data_chunks is implicit from DownloadData def.
class DownloadData(db.Model):
download = db.ReferenceProperty(DownloadMeta,
collection_name='data_chunks')
blob = db.BlobProperty()
index = db.IntegerProperty()

View file

@ -10,6 +10,17 @@ indexes:
# automatically uploaded to the admin console when you next deploy
# your application using appcfg.py.
- kind: DownloadData
properties:
- name: download
- name: index
- kind: DownloadMeta
properties:
- name: user
- name: date
direction: desc
- kind: DownloadedFanfic
properties:
- name: cleared

152
main.py
View file

@ -41,8 +41,6 @@ from fanficdownloader.zipdir import *
from ffstorage import *
class LoginRequired(webapp.RequestHandler):
def get(self):
user = users.get_current_user()
@ -100,6 +98,8 @@ class FileServer(webapp.RequestHandler):
key = db.Key(fileId)
fanfic = db.get(key)
# check for completed & failure.
name = fanfic.name.encode('utf-8')
@ -119,20 +119,40 @@ class FileServer(webapp.RequestHandler):
elif fanfic.format == 'mobi':
self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook'
self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi'
self.response.out.write(fanfic.blob)
data = DownloadData.all().filter("download =", fanfic).order("index")
for datum in data:
self.response.out.write(datum.blob)
class FileStatusServer(webapp.RequestHandler):
def get(self):
logging.info("Status id: %s" % id)
user = users.get_current_user()
if not user:
self.redirect('/login')
fileId = self.request.get('id')
if fileId == None or len(fileId) < 3:
self.redirect('/')
key = db.Key(fileId)
fic = db.get(key)
logging.info("Status url: %s" % fic.url)
template_values = dict(fic = fic, nickname = user.nickname())
path = os.path.join(os.path.dirname(__file__), 'status.html')
self.response.out.write(template.render(path, template_values))
class RecentFilesServer(webapp.RequestHandler):
def get(self):
user = users.get_current_user()
if not user:
self.redirect('/login')
# fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1 and cleared = :2", user)
q = DownloadedFanfic.all()
q.filter('user =', user)
q.filter('cleared =', False)
q = DownloadMeta.all()
q.filter('user =', user).order('-date')
fics = q.fetch(100)
template_values = dict(fics = fics, nickname = user.nickname())
@ -164,8 +184,24 @@ class FanfictionDownloader(webapp.RequestHandler):
login = self.request.get('login')
password = self.request.get('password')
logging.info("Downloading: " + url)
logging.info("Queuing Download: " + url)
# use existing record if available.
q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
if( q is None or len(q) < 1 ):
download = DownloadMeta()
else:
download = q[0]
download.completed=False
for c in download.data_chunks:
c.delete()
download.user = user
download.url = url
download.format = format
download.put()
taskqueue.add(url='/fdowntask',
queue_name="download",
params={'format':format,
@ -174,7 +210,9 @@ class FanfictionDownloader(webapp.RequestHandler):
'password':password,
'user':user.email()})
self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=Check recent in a bit for the download.' )
logging.info("enqueued download key: " + str(download.key()))
self.redirect('/status?id='+str(download.key()))
return
@ -191,25 +229,32 @@ class FanfictionDownloaderTask(webapp.RequestHandler):
def post(self):
logging.getLogger().setLevel(logging.DEBUG)
format = self.request.get('format')
url = self.request.get('url')
login = self.request.get('login')
password = self.request.get('password')
# User object can't pass, just email address
user = user = users.User(self.request.get('user'))
user = users.User(self.request.get('user'))
logging.info("Downloading: " + url + " for user: "+user.nickname())
adapter = None
writerClass = None
download = OneDownload()
# use existing record if available.
q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
if( q is None or len(q) < 1 ):
download = DownloadMeta()
else:
download = q[0]
download.completed=False
for c in download.data_chunks:
c.delete()
download.user = user
download.url = url
#download.login = login
#download.password = password
download.format = format
download.put()
logging.info('Creating adapter...')
try:
@ -233,14 +278,13 @@ class FanfictionDownloaderTask(webapp.RequestHandler):
adapter = mediaminer.MediaMiner(url)
else:
logging.debug("Bad URL detected")
self.redirect('/?error=bad_url&url=' + urlEscape(url) )
download.failure = url +" is not a valid story URL."
download.put()
return
except Exception, e:
logging.exception(e)
download.failure = "Adapter was not created: " + str(e)
download.put()
self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=' + urlEscape(str(traceback.format_exc())) )
return
logging.info('Created an adaper: %s' % adapter)
@ -277,67 +321,38 @@ class FanfictionDownloaderTask(webapp.RequestHandler):
logging.exception(e)
download.failure = 'Login problem detected'
download.put()
self.redirect('/?error=login_required&url=' + urlEscape(url))
return
except:
e = sys.exc_info()[0]
except Exception, e:
logging.exception(e)
download.failure = 'Some exception happened in downloader: ' + str(e)
download.failure = 'Some exception happened in downloader: ' + str(e)
download.put()
self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=' + urlEscape(str(traceback.format_exc())) )
return
if data == None:
if loader.badLogin:
logging.debug("Bad login detected")
download.failure = 'Login problem detected'
download.failure = 'Login failed'
download.put()
self.redirect('/?error=login_required&url=' + urlEscape(url))
else:
fic = DownloadedFanfic()
fic.user = user
fic.url = url
fic.format = format
fic.name = self._printableVersion(adapter.getOutputName())
fic.author = self._printableVersion(adapter.getAuthorName())
if( len(data)<1024*1000 ):
fic.blob = data
else:
logging.debug("Long file, split required")
fic.blob = data[:1024*1000]
# try:
fic.put()
key = fic.key()
return
download.failure = 'No data returned by adaptor'
download.put()
# self.redirect('/?file='+str(key)+'&name=' + urlEscape(fic.name) + '&author=' + urlEscape(fic.author))
else:
download.name = self._printableVersion(adapter.getOutputName())
download.title = self._printableVersion(adapter.getStoryName())
download.author = self._printableVersion(adapter.getAuthorName())
download.put()
index=0
while( len(data) > 0 ):
DownloadData(download=download,
index=index,
blob=data[:1024*1000]).put()
index += 1
data = data[1024*1000:]
download.completed=True
download.put()
logging.info("Download finished OK")
self.response.clear()
self.response.set_status(200)
return
# except Exception, e:
# logging.exception(e)
# # it was too large, won't save it
# name = str(makeAcceptableFilename(adapter.getStoryName()))
# if format == 'epub':
# self.response.headers['Content-Type'] = 'application/epub+zip'
# self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.epub'
# elif format == 'html':
# self.response.headers['Content-Type'] = 'application/zip'
# self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.html.zip'
# elif format == 'text':
# self.response.headers['Content-Type'] = 'application/zip'
# self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.txt.zip'
# elif format == 'mobi':
# self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook'
# self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi'
# self.response.out.write(data)
def toPercentDecimal(match):
"Return the %decimal number for the character for url escaping"
@ -354,6 +369,7 @@ def main():
('/fdowntask', FanfictionDownloaderTask),
('/fdown', FanfictionDownloader),
('/file', FileServer),
('/status', FileStatusServer),
('/recent', RecentFilesServer),
('/r2d2', RecentAllFilesServer),
('/login', LoginRequired)],

View file

@ -2,4 +2,6 @@ queue:
- name: default
rate: 1/s
- name: download
rate: 10/s
rate: 10/s
retry_parameters:
task_retry_limit: 3

View file

@ -32,13 +32,26 @@
<div id='urlbox'>
<div id='greeting'>
Hi, {{ nickname }}! These fanfics you've downloaded previously.
Hi, {{ nickname }}! These are the fanfics you've recently requested.
</div>
</div>
<div id='helpbox'>
{% for fic in fics %}
<p> <a href="/file?id={{ fic.key }}">{{ fic.name }}</a> by {{ fic.author }} ({{ fic.format }})<br/><small><a href="{{ fic.url }}">{{ fic.url }}</a></small></p>
<p>
{% if fic.completed %}
<a href="/file?id={{ fic.key }}">{{ fic.title }}</a>
by {{ fic.author }} ({{ fic.format }})<br/>
{% endif %}
{% if fic.failure %}
<div id='error'>{{ fic.failure }}</div>
{% endif %}
{% if not fic.completed and not fic.failure %}
Request Processing...<br />
{% endif %}
<small><a href="{{ fic.url }}">{{ fic.url }}</a> ({{ fic.format }})</small>
</p>
{% endfor %}
</div>

86
status.html Normal file
View file

@ -0,0 +1,86 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html>
<head>
<link href="css/index.css" rel="stylesheet" type="text/css">
<title>Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="google-site-verification" content="kCFc-G4bka_pJN6Rv8CapPBcwmq0hbAUZPkKWqRsAYU" />
{% if not fic.completed and not fic.failure %}
<meta http-equiv="refresh" content="7">
{% endif %}
</head>
<body>
<div id='main'>
<h1>
<a href="/" style="text-decoration: none; color: black;">FanFiction Downloader</a>
</h1>
<div style="text-align: center">
<script type="text/javascript"><!--
google_ad_client = "pub-2027714004231956";
/* FFD */
google_ad_slot = "7330682770";
google_ad_width = 468;
google_ad_height = 60;
//-->
</script>
<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</div>
<div id='urlbox'>
<div id='greeting'>
<p><a href='{{ fic.url }}'>{{ fic.url }}</a></p>
</div>
<div>
{% if fic.completed %}
<p>Your fic has finished processing and you can download it now:</p>
<p><a href="/file?id={{ fic.key }}">{{ fic.title }}</a>
by {{ fic.author }} ({{ fic.format }})</p>
{% else %}
{% if fic.failure %}
Your fic failed to process. Please check the URL and the error message below.<br />
<div id='error'>
{{ fic.failure }}
</div>
{% else %}
<p>Not done yet. This page will periodically poll to see if your story has finished.</p>
{% endif %}
{% endif %}
</div>
</div>
<div style='text-align: center'>
<img src="http://code.google.com/appengine/images/appengine-silver-120x30.gif"
alt="Powered by Google App Engine" />
<br/><br/>
FanfictionLoader is a web front-end to <A href="http://code.google.com/p/fanficdownloader/">fanficdownloader</a><br/>
Copyright &copy; <a href="http://twitter.com/sigizmund">Roman Kirillov</a>
</div>
<div style="margin-top: 1em; text-align: center'">
<script type="text/javascript"><!--
google_ad_client = "pub-2027714004231956";
/* FFD */
google_ad_slot = "7330682770";
google_ad_width = 468;
google_ad_height = 60;
//-->
</script>
<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</div>
</div>
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
try {
var pageTracker = _gat._getTracker("UA-12136939-1");
pageTracker._trackPageview();
} catch(err) {}</script>
</body>
</html>

View file

@ -24,20 +24,19 @@ class Remover(webapp.RequestHandler):
theDate = datetime.date.today() - datetime.timedelta(days=2)
logging.debug("Will delete stuff older than %s" % theDate)
fics = DownloadedFanfic.all()
fics.order("date")
results = fics.fetch(50)
fics = DownloadMeta.all()
fics.filter("date <",theDate).order("date")
results = fics.fetch(100)
logging.debug([x.name for x in results])
num = 0
for d in results:
# d.blob = None
# d.cleared = True
d.delete()
for c in d.data_chunks:
c.delete()
num = num + 1
logging.debug('Delete '+d.url)
logging.info('Deleted instances: %d' % num)
self.response.out.write('Deleted instances: %d' % num)
@ -50,4 +49,4 @@ def main():
if __name__ == '__main__':
logging.getLogger().setLevel(logging.DEBUG)
main()
main()