From 53e2a0f204578fffd9eca27720feae13904bb6d3 Mon Sep 17 00:00:00 2001 From: retiefjimm Date: Thu, 10 Mar 2011 13:00:11 -0600 Subject: [PATCH] First version with background queue processing and support for stories >1M. --- ffstorage.py | 22 ++++++- index.yaml | 11 ++++ main.py | 152 ++++++++++++++++++++++++++--------------------- queue.yaml | 4 +- recent.html | 17 +++++- status.html | 86 +++++++++++++++++++++++++++ utils/remover.py | 19 +++--- 7 files changed, 228 insertions(+), 83 deletions(-) create mode 100644 status.html diff --git a/ffstorage.py b/ffstorage.py index 8b53013c..df1e6ff9 100644 --- a/ffstorage.py +++ b/ffstorage.py @@ -4,8 +4,8 @@ class OneDownload(db.Model): user = db.UserProperty() url = db.StringProperty() format = db.StringProperty() - #login = db.StringProperty() - #password = db.StringProperty() + login = db.StringProperty() + password = db.StringProperty() failure = db.StringProperty() date = db.DateTimeProperty(auto_now_add=True) @@ -19,3 +19,21 @@ class DownloadedFanfic(db.Model): blob = db.BlobProperty() mac = db.StringProperty() cleared = db.BooleanProperty(default=False) + +class DownloadMeta(db.Model): + user = db.UserProperty() + url = db.StringProperty() + name = db.StringProperty() + title = db.StringProperty() + author = db.StringProperty() + format = db.StringProperty() + failure = db.StringProperty() + completed = db.BooleanProperty(default=False) + date = db.DateTimeProperty(auto_now_add=True) + # data_chunks is implicit from DownloadData def. + +class DownloadData(db.Model): + download = db.ReferenceProperty(DownloadMeta, + collection_name='data_chunks') + blob = db.BlobProperty() + index = db.IntegerProperty() diff --git a/index.yaml b/index.yaml index 2b67374d..16bcaefe 100644 --- a/index.yaml +++ b/index.yaml @@ -10,6 +10,17 @@ indexes: # automatically uploaded to the admin console when you next deploy # your application using appcfg.py. +- kind: DownloadData + properties: + - name: download + - name: index + +- kind: DownloadMeta + properties: + - name: user + - name: date + direction: desc + - kind: DownloadedFanfic properties: - name: cleared diff --git a/main.py b/main.py index 8559ba38..85d5f937 100644 --- a/main.py +++ b/main.py @@ -41,8 +41,6 @@ from fanficdownloader.zipdir import * from ffstorage import * - - class LoginRequired(webapp.RequestHandler): def get(self): user = users.get_current_user() @@ -100,6 +98,8 @@ class FileServer(webapp.RequestHandler): key = db.Key(fileId) fanfic = db.get(key) + + # check for completed & failure. name = fanfic.name.encode('utf-8') @@ -119,20 +119,40 @@ class FileServer(webapp.RequestHandler): elif fanfic.format == 'mobi': self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' - - - self.response.out.write(fanfic.blob) + data = DownloadData.all().filter("download =", fanfic).order("index") + for datum in data: + self.response.out.write(datum.blob) + +class FileStatusServer(webapp.RequestHandler): + def get(self): + logging.info("Status id: %s" % id) + user = users.get_current_user() + if not user: + self.redirect('/login') + + fileId = self.request.get('id') + + if fileId == None or len(fileId) < 3: + self.redirect('/') + + key = db.Key(fileId) + fic = db.get(key) + + logging.info("Status url: %s" % fic.url) + + template_values = dict(fic = fic, nickname = user.nickname()) + path = os.path.join(os.path.dirname(__file__), 'status.html') + self.response.out.write(template.render(path, template_values)) + class RecentFilesServer(webapp.RequestHandler): def get(self): user = users.get_current_user() if not user: self.redirect('/login') -# fics = db.GqlQuery("Select * From DownloadedFanfic WHERE user = :1 and cleared = :2", user) - q = DownloadedFanfic.all() - q.filter('user =', user) - q.filter('cleared =', False) + q = DownloadMeta.all() + q.filter('user =', user).order('-date') fics = q.fetch(100) template_values = dict(fics = fics, nickname = user.nickname()) @@ -164,8 +184,24 @@ class FanfictionDownloader(webapp.RequestHandler): login = self.request.get('login') password = self.request.get('password') - logging.info("Downloading: " + url) + logging.info("Queuing Download: " + url) + # use existing record if available. + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is None or len(q) < 1 ): + download = DownloadMeta() + else: + download = q[0] + download.completed=False + for c in download.data_chunks: + c.delete() + + download.user = user + download.url = url + download.format = format + download.put() + + taskqueue.add(url='/fdowntask', queue_name="download", params={'format':format, @@ -174,7 +210,9 @@ class FanfictionDownloader(webapp.RequestHandler): 'password':password, 'user':user.email()}) - self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=Check recent in a bit for the download.' ) + logging.info("enqueued download key: " + str(download.key())) + self.redirect('/status?id='+str(download.key())) + return @@ -191,25 +229,32 @@ class FanfictionDownloaderTask(webapp.RequestHandler): def post(self): logging.getLogger().setLevel(logging.DEBUG) - format = self.request.get('format') url = self.request.get('url') login = self.request.get('login') password = self.request.get('password') # User object can't pass, just email address - user = user = users.User(self.request.get('user')) + user = users.User(self.request.get('user')) logging.info("Downloading: " + url + " for user: "+user.nickname()) adapter = None writerClass = None - download = OneDownload() + # use existing record if available. + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is None or len(q) < 1 ): + download = DownloadMeta() + else: + download = q[0] + download.completed=False + for c in download.data_chunks: + c.delete() + download.user = user download.url = url - #download.login = login - #download.password = password download.format = format + download.put() logging.info('Creating adapter...') try: @@ -233,14 +278,13 @@ class FanfictionDownloaderTask(webapp.RequestHandler): adapter = mediaminer.MediaMiner(url) else: logging.debug("Bad URL detected") - self.redirect('/?error=bad_url&url=' + urlEscape(url) ) + download.failure = url +" is not a valid story URL." + download.put() return except Exception, e: logging.exception(e) download.failure = "Adapter was not created: " + str(e) download.put() - - self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=' + urlEscape(str(traceback.format_exc())) ) return logging.info('Created an adaper: %s' % adapter) @@ -277,67 +321,38 @@ class FanfictionDownloaderTask(webapp.RequestHandler): logging.exception(e) download.failure = 'Login problem detected' download.put() - - self.redirect('/?error=login_required&url=' + urlEscape(url)) return - except: - e = sys.exc_info()[0] - + except Exception, e: logging.exception(e) - download.failure = 'Some exception happened in downloader: ' + str(e) + download.failure = 'Some exception happened in downloader: ' + str(e) download.put() - - self.redirect('/?error=custom&url=' + urlEscape(url) + '&errtext=' + urlEscape(str(traceback.format_exc())) ) return if data == None: if loader.badLogin: logging.debug("Bad login detected") - - download.failure = 'Login problem detected' + download.failure = 'Login failed' download.put() - - self.redirect('/?error=login_required&url=' + urlEscape(url)) - else: - fic = DownloadedFanfic() - fic.user = user - fic.url = url - fic.format = format - fic.name = self._printableVersion(adapter.getOutputName()) - fic.author = self._printableVersion(adapter.getAuthorName()) - if( len(data)<1024*1000 ): - fic.blob = data - else: - logging.debug("Long file, split required") - fic.blob = data[:1024*1000] - -# try: - fic.put() - key = fic.key() + return + download.failure = 'No data returned by adaptor' download.put() -# self.redirect('/?file='+str(key)+'&name=' + urlEscape(fic.name) + '&author=' + urlEscape(fic.author)) - + else: + download.name = self._printableVersion(adapter.getOutputName()) + download.title = self._printableVersion(adapter.getStoryName()) + download.author = self._printableVersion(adapter.getAuthorName()) + download.put() + index=0 + while( len(data) > 0 ): + DownloadData(download=download, + index=index, + blob=data[:1024*1000]).put() + index += 1 + data = data[1024*1000:] + download.completed=True + download.put() + logging.info("Download finished OK") - self.response.clear() - self.response.set_status(200) return - # except Exception, e: - # logging.exception(e) - # # it was too large, won't save it - # name = str(makeAcceptableFilename(adapter.getStoryName())) - # if format == 'epub': - # self.response.headers['Content-Type'] = 'application/epub+zip' - # self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.epub' - # elif format == 'html': - # self.response.headers['Content-Type'] = 'application/zip' - # self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.html.zip' - # elif format == 'text': - # self.response.headers['Content-Type'] = 'application/zip' - # self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.txt.zip' - # elif format == 'mobi': - # self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' - # self.response.headers['Content-disposition'] = 'attachment; filename=' + name + '.mobi' - # self.response.out.write(data) def toPercentDecimal(match): "Return the %decimal number for the character for url escaping" @@ -354,6 +369,7 @@ def main(): ('/fdowntask', FanfictionDownloaderTask), ('/fdown', FanfictionDownloader), ('/file', FileServer), + ('/status', FileStatusServer), ('/recent', RecentFilesServer), ('/r2d2', RecentAllFilesServer), ('/login', LoginRequired)], diff --git a/queue.yaml b/queue.yaml index 0bfb85d0..2acafa27 100644 --- a/queue.yaml +++ b/queue.yaml @@ -2,4 +2,6 @@ queue: - name: default rate: 1/s - name: download - rate: 10/s \ No newline at end of file + rate: 10/s + retry_parameters: + task_retry_limit: 3 diff --git a/recent.html b/recent.html index 1b199e5e..dbe04fab 100644 --- a/recent.html +++ b/recent.html @@ -32,13 +32,26 @@
- Hi, {{ nickname }}! These fanfics you've downloaded previously. + Hi, {{ nickname }}! These are the fanfics you've recently requested.
{% for fic in fics %} -

{{ fic.name }} by {{ fic.author }} ({{ fic.format }})
{{ fic.url }}

+

+ {% if fic.completed %} + {{ fic.title }} + by {{ fic.author }} ({{ fic.format }})
+ {% endif %} + {% if fic.failure %} +

{{ fic.failure }}
+ {% endif %} + {% if not fic.completed and not fic.failure %} + Request Processing...
+ {% endif %} + {{ fic.url }} ({{ fic.format }}) + +

{% endfor %}
diff --git a/status.html b/status.html new file mode 100644 index 00000000..64c03c30 --- /dev/null +++ b/status.html @@ -0,0 +1,86 @@ + + + + + Fanfiction Downloader - read fanfiction from twilighted.net, fanfiction.net, fictionpress.com, fictionalley.org, ficwad.com, potionsandsnitches.net, harrypotterfanfiction.com, mediaminer.org on Kindle, Nook, Sony Reader, iPad, iPhone, Android, Aldiko, Stanza + + + {% if not fic.completed and not fic.failure %} + + {% endif %} + + +
+

+ FanFiction Downloader +

+ +
+ + +
+ +
+ +
+ {% if fic.completed %} +

Your fic has finished processing and you can download it now:

+

{{ fic.title }} + by {{ fic.author }} ({{ fic.format }})

+ {% else %} + {% if fic.failure %} + Your fic failed to process. Please check the URL and the error message below.
+
+ {{ fic.failure }} +
+ {% else %} +

Not done yet. This page will periodically poll to see if your story has finished.

+ {% endif %} + {% endif %} +
+
+
+ Powered by Google App Engine +

+ FanfictionLoader is a web front-end to fanficdownloader
+ Copyright © Roman Kirillov +
+ +
+ + +
+ +
+ + + + diff --git a/utils/remover.py b/utils/remover.py index 327db984..954e151b 100644 --- a/utils/remover.py +++ b/utils/remover.py @@ -24,20 +24,19 @@ class Remover(webapp.RequestHandler): theDate = datetime.date.today() - datetime.timedelta(days=2) logging.debug("Will delete stuff older than %s" % theDate) - fics = DownloadedFanfic.all() - fics.order("date") - - results = fics.fetch(50) - - + fics = DownloadMeta.all() + fics.filter("date <",theDate).order("date") + results = fics.fetch(100) logging.debug([x.name for x in results]) - + num = 0 for d in results: -# d.blob = None -# d.cleared = True d.delete() + for c in d.data_chunks: + c.delete() num = num + 1 + logging.debug('Delete '+d.url) + logging.info('Deleted instances: %d' % num) self.response.out.write('Deleted instances: %d' % num) @@ -50,4 +49,4 @@ def main(): if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) - main() \ No newline at end of file + main()