Refactorings/tweaks from Update Feature work that are worthwhile even without it.

2025-12-26 19:00:03 +01:00 · 2011-06-12 16:26:05 -05:00 · 2011-06-12 16:26:05 -05:00 · fddd146b39
commit fddd146b39
parent 8bf22729fe
9 changed files with 297 additions and 220 deletions
--- a/app.yaml
+++ b/app.yaml
@ -9,6 +9,10 @@ handlers:
  script: utils/remover.py
  login: admin

+- url: /tally.*
+  script: utils/tally.py
+  login: admin
+
 - url: /fdownloadtask
  script: main.py
  login: admin
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@ -100,25 +100,25 @@ Some more longer description.  "I suck at summaries!"  "Better than it sounds!"
                            ('Chapter 3, Over Cinnabar',self.url+"&chapter=4"),
                            ('Chapter 4',self.url+"&chapter=5"),
                            ('Chapter 5',self.url+"&chapter=6"),
-                            ('Chapter 6',self.url+"&chapter=6"),
-                            ('Chapter 7',self.url+"&chapter=6"),
-                            ('Chapter 8',self.url+"&chapter=6"),
-                            ('Chapter 9',self.url+"&chapter=6"),
-                            ('Chapter 0',self.url+"&chapter=6"),
-                            ('Chapter a',self.url+"&chapter=6"),
-                            ('Chapter b',self.url+"&chapter=6"),
-                            ('Chapter c',self.url+"&chapter=6"),
-                            ('Chapter d',self.url+"&chapter=6"),
-                            ('Chapter e',self.url+"&chapter=6"),
-                            ('Chapter f',self.url+"&chapter=6"),
-                            ('Chapter g',self.url+"&chapter=6"),
-                            ('Chapter h',self.url+"&chapter=6"),
-                            ('Chapter i',self.url+"&chapter=6"),
-                            ('Chapter j',self.url+"&chapter=6"),
-                            ('Chapter k',self.url+"&chapter=6"),
-                            ('Chapter l',self.url+"&chapter=6"),
-                            ('Chapter m',self.url+"&chapter=6"),
-                            ('Chapter n',self.url+"&chapter=6"),
+                            # ('Chapter 6',self.url+"&chapter=6"),
+                            # ('Chapter 7',self.url+"&chapter=6"),
+                            # ('Chapter 8',self.url+"&chapter=6"),
+                            # ('Chapter 9',self.url+"&chapter=6"),
+                            # ('Chapter 0',self.url+"&chapter=6"),
+                            # ('Chapter a',self.url+"&chapter=6"),
+                            # ('Chapter b',self.url+"&chapter=6"),
+                            # ('Chapter c',self.url+"&chapter=6"),
+                            # ('Chapter d',self.url+"&chapter=6"),
+                            # ('Chapter e',self.url+"&chapter=6"),
+                            # ('Chapter f',self.url+"&chapter=6"),
+                            # ('Chapter g',self.url+"&chapter=6"),
+                            # ('Chapter h',self.url+"&chapter=6"),
+                            # ('Chapter i',self.url+"&chapter=6"),
+                            # ('Chapter j',self.url+"&chapter=6"),
+                            # ('Chapter k',self.url+"&chapter=6"),
+                            # ('Chapter l',self.url+"&chapter=6"),
+                            # ('Chapter m',self.url+"&chapter=6"),
+                            # ('Chapter n',self.url+"&chapter=6"),
                            ]
        self.story.setMetadata('numChapters',len(self.chapterUrls))
                            
--- a/fanficdownloader/story.py
+++ b/fanficdownloader/story.py
@ -15,12 +15,17 @@
 # limitations under the License.
 #

+import os
+
 from htmlcleanup import conditionalRemoveEntities

 class Story:
    
    def __init__(self):
-        self.metadata = {'version':'4.0.0'}
+        try:
+            self.metadata = {'version':os.environ['CURRENT_VERSION_ID']}
+        except:
+            self.metadata = {'version':'4.0'}
        self.chapters = [] # chapters will be tuples of (title,html)
        self.listables = {} # some items (extratags, category, warnings & genres) are also kept as lists.

--- a/fanficdownloader/writers/writer_epub.py
+++ b/fanficdownloader/writers/writer_epub.py
@ -188,6 +188,7 @@ h6 { text-align: center; }
        rootfiles.appendChild(newTag(containerdom,"rootfile",{"full-path":"content.opf",
                                                              "media-type":"application/oebps-package+xml"}))
        outputepub.writestr("META-INF/container.xml",containerdom.toxml(encoding='utf-8'))
+        containerdom.unlink()
        del containerdom

        ## Epub has two metadata files with real data.  We're putting
@ -321,6 +322,7 @@ h6 { text-align: center; }
                                            "linear":"yes"}))
        # write content.opf to zip.
        outputepub.writestr("content.opf",contentdom.toxml(encoding='utf-8'))
+        contentdom.unlink()
        del contentdom

        ## create toc.ncx file
@ -370,6 +372,7 @@ h6 { text-align: center; }
        
        # write toc.ncs to zip file
        outputepub.writestr("toc.ncx",tocncxdom.toxml(encoding='utf-8'))
+        tocncxdom.unlink()
        del tocncxdom

        # write stylesheet.css file.
--- a/index.html
+++ b/index.html
@ -26,7 +26,7 @@
  <body>
    <div id='main'>
      <h1>
-	<a href="/" style="text-decoration: none; color: black;">FanFiction Downloader</a>
+	<a href="/" style="text-decoration: none; color: black;">FanFiction Downloader</a> <g:plusone size="medium"></g:plusone>
      </h1>

      <div style="text-align: center">
@ -53,25 +53,15 @@
 	    <p>Hi, {{ nickname }}! This is a fan fiction downloader, which makes reading stories from various websites
 	      much easier. </p>
 	  </div>
-	  <h3>fanfiction.net/fictionpress.com changes</h3>
-	  <p>
-	    The sites fanfiction.net and fictionpress.com changed their
-	    output enough to break the downloader.  Stories appeared
-	    to download, but the chapters would not contain the story
-	    text.
-	  </p>
-	  <p>
-	    It should be fixed now.
-	  </p>
+	  <!-- put announcements here, h3 is a good title size. -->
 	  <p>
 	    If you have any problems with this application, please
 	    report them in
 	    the <a href="http://groups.google.com/group/fanfic-downloader">Fanfiction
 	      Downloader Google Group</a>.  The
-	    <a href="http://3-0-2.fanfictionloader.appspot.com">Old
+	    <a href="http://4-0-2.fanfictionloader.appspot.com">Previous
 	      Version</a> is also available for you to use if necessary.
 	  </p>
-	  <p><g:plusone size="medium"></g:plusone></p>
 	  <div id='error'>
 	    {{ error_message }}
 	  </div>
--- a/index.yaml
+++ b/index.yaml
@ -1,33 +1,33 @@
-indexes:
-
-# AUTOGENERATED
-
-# This index.yaml is automatically updated whenever the dev_appserver
-# detects that a new type of query is run.  If you want to manage the
-# index.yaml file manually, remove the above marker line (the line
-# saying "# AUTOGENERATED").  If you want to manage some indexes
-# manually, move them above the marker line.  The index.yaml file is
-# automatically uploaded to the admin console when you next deploy
-# your application using appcfg.py.
-
- kind: DownloadData
-  properties:
-  - name: download
-  - name: index
-
- kind: DownloadMeta
-  properties:
-  - name: user
-  - name: date
-    direction: desc
-
- kind: DownloadedFanfic
-  properties:
-  - name: cleared
-  - name: date
-
- kind: DownloadedFanfic
-  properties:
-  - name: user
-  - name: date
-    direction: desc
+indexes:
+
+# AUTOGENERATED
+
+# This index.yaml is automatically updated whenever the dev_appserver
+# detects that a new type of query is run.  If you want to manage the
+# index.yaml file manually, remove the above marker line (the line
+# saying "# AUTOGENERATED").  If you want to manage some indexes
+# manually, move them above the marker line.  The index.yaml file is
+# automatically uploaded to the admin console when you next deploy
+# your application using appcfg.py.
+
+- kind: DownloadData
+  properties:
+  - name: download
+  - name: index
+
+- kind: DownloadMeta
+  properties:
+  - name: user
+  - name: date
+    direction: desc
+
+- kind: DownloadedFanfic
+  properties:
+  - name: cleared
+  - name: date
+
+- kind: DownloadedFanfic
+  properties:
+  - name: user
+  - name: date
+    direction: desc
--- a/main.py
+++ b/main.py
@ -28,7 +28,8 @@ import urllib
 import datetime

 import traceback
-import StringIO
+from StringIO import StringIO
+import ConfigParser

 ## Just to shut up the appengine warning about "You are using the
 ## default Django version (0.96). The default Django version will
@ -36,6 +37,10 @@ import StringIO
 ## use_library() to explicitly select a Django version. For more
 ## information see
 ## http://code.google.com/appengine/docs/python/tools/libraries.html#Django"
+## Note that if you are using the SDK App Engine Launcher and hit an SDK
+## Console page first, you will get a django version mismatch error when you
+## to go hit one of the application pages.  Just change a file again, and
+## make sure to hit an app page before the SDK page to clear it.
 os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
 from google.appengine.dist import use_library
 use_library('django', '1.2')
@ -51,7 +56,6 @@ from google.appengine.runtime import DeadlineExceededError
 from ffstorage import *

 from fanficdownloader import adapters, writers, exceptions
-import ConfigParser

 class MainHandler(webapp.RequestHandler):
    def get(self):
@ -61,7 +65,7 @@ class MainHandler(webapp.RequestHandler):
            template_values = {'nickname' : user.nickname(), 'authorized': True}
            url = self.request.get('url')
            template_values['url'] = url
-            
+
            if error:
                if error == 'login_required':
                    template_values['error_message'] = 'This story (or one of the chapters) requires you to be logged in.'
@ -73,11 +77,11 @@ class MainHandler(webapp.RequestHandler):
                    template_values['error_message'] = 'Configuration Saved'
                elif error == 'recentcleared':
                    template_values['error_message'] = 'Your Recent Downloads List has been Cleared'
-            
+
            filename = self.request.get('file')
            if len(filename) > 1:
                template_values['yourfile'] = '''<div id='yourfile'><a href='/file?id=%s'>"%s" by %s</a></div>''' % (filename, self.request.get('name'), self.request.get('author'))
-            
+
            self.response.headers['Content-Type'] = 'text/html'
            path = os.path.join(os.path.dirname(__file__), 'index.html')

@ -99,7 +103,7 @@ class EditConfigServer(webapp.RequestHandler):
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return
-        
+
        template_values = {'nickname' : user.nickname(), 'authorized': True}

        ## Pull user's config record.
@ -129,29 +133,26 @@ class EditConfigServer(webapp.RequestHandler):
            config = configfile.read()
            configfile.close()
            template_values['defaultsini'] = config
-            
+
            path = os.path.join(os.path.dirname(__file__), 'editconfig.html')
            self.response.headers['Content-Type'] = 'text/html'
            self.response.out.write(template.render(path, template_values))

-        
+
 class FileServer(webapp.RequestHandler):

    def get(self):
        fileId = self.request.get('id')
-        
+
        if fileId == None or len(fileId) < 3:
            self.redirect('/')
            return

        try:
-            key = db.Key(fileId)
-            fanfic = db.get(key)
+            download = getDownloadMeta(id=fileId)
+
+            name = download.name.encode('utf-8')

-            # check for completed & failure.
-        
-            name = fanfic.name.encode('utf-8')
-        
            logging.info("Serving file: %s" % name)

            if name.endswith('.epub'):
@ -166,15 +167,15 @@ class FileServer(webapp.RequestHandler):
                self.response.headers['Content-Type'] = 'application/zip'
            else:
                self.response.headers['Content-Type'] = 'application/octet-stream'
-            
-            self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name 

-            data = DownloadData.all().filter("download =", fanfic).order("index")
+            self.response.headers['Content-disposition'] = 'attachment; filename="%s"' % name
+
+            data = DownloadData.all().filter("download =", download).order("index")
            # epubs are all already compressed.
            # Each chunk is compress individually to avoid having
            # to hold the whole in memory just for the
            # compress/uncompress
-            if fanfic.format != 'epub':
+            if download.format != 'epub':
                def dc(data):
                    try:
                        return zlib.decompress(data)
@ -184,65 +185,64 @@ class FileServer(webapp.RequestHandler):
            else:
                def dc(data):
                    return data
-                
+
            for datum in data:
                self.response.out.write(dc(datum.blob))

        except Exception, e:
            fic = DownloadMeta()
            fic.failure = unicode(e)
-                        
+
            template_values = dict(fic = fic,
                                   #nickname = user.nickname(),
                                   #escaped_url = escaped_url
                                   )
            path = os.path.join(os.path.dirname(__file__), 'status.html')
            self.response.out.write(template.render(path, template_values))
-            
+
 class FileStatusServer(webapp.RequestHandler):
    def get(self):
        user = users.get_current_user()
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return
-        
+
        fileId = self.request.get('id')
-        
+
        if fileId == None or len(fileId) < 3:
            self.redirect('/')

        escaped_url=False

        try:
-            key = db.Key(fileId)
-            fic = db.get(key)
+            download = getDownloadMeta(id=fileId)

-            if fic:
-                logging.info("Status url: %s" % fic.url)
-                if fic.completed and fic.format=='epub':
-                    escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+fileId+"&fake=file."+fic.format)
+            if download:
+                logging.info("Status url: %s" % download.url)
+                if download.completed and download.format=='epub':
+                    escaped_url = urlEscape(self.request.host_url+"/file/"+download.name+"."+download.format+"?id="+fileId+"&fake=file."+download.format)
            else:
-                fic = DownloadMeta()
-                fic.failure = "Download not found"
-                
+                download = DownloadMeta()
+                download.failure = "Download not found"
+
        except Exception, e:
-            fic = DownloadMeta()
-            fic.failure = unicode(e)
-                        
-        template_values = dict(fic = fic,
+            download = DownloadMeta()
+            download.failure = unicode(e)
+
+        template_values = dict(fic = download,
                               nickname = user.nickname(),
                               escaped_url = escaped_url
                               )
        path = os.path.join(os.path.dirname(__file__), 'status.html')
        self.response.out.write(template.render(path, template_values))
-        
+
 class ClearRecentServer(webapp.RequestHandler):
    def get(self):
        user = users.get_current_user()
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return
-        
+
        logging.info("Clearing Recent List for user: "+user.nickname())
        q = DownloadMeta.all()
        q.filter('user =', user)
@ -260,7 +260,7 @@ class ClearRecentServer(webapp.RequestHandler):
                break
        logging.info('Deleted %d instances download.' % num)
        self.redirect("/?error=recentcleared")
-            
+
 class RecentFilesServer(webapp.RequestHandler):
    def get(self):
        user = users.get_current_user()
@ -276,7 +276,7 @@ class RecentFilesServer(webapp.RequestHandler):
        for fic in fics:
            if fic.completed and fic.format == 'epub':
                fic.escaped_url = urlEscape(self.request.host_url+"/file/"+fic.name+"."+fic.format+"?id="+str(fic.key())+"&fake=file."+fic.format)
-        
+
        template_values = dict(fics = fics, nickname = user.nickname())
        path = os.path.join(os.path.dirname(__file__), 'recent.html')
        self.response.out.write(template.render(path, template_values))
@ -287,17 +287,16 @@ class UserConfigServer(webapp.RequestHandler):

        logging.debug('reading defaults.ini config file')
        config.read('defaults.ini')
-        
+
        ## Pull user's config record.
        l = UserConfig.all().filter('user =', user).fetch(1)
-        ## TEST THIS
        if l and l[0].config:
            uconfig=l[0]
            #logging.debug('reading config from UserConfig(%s)'%uconfig.config)
-            config.readfp(StringIO.StringIO(uconfig.config))                
+            config.readfp(StringIO(uconfig.config))

        return config
-        
+
 class FanfictionDownloader(UserConfigServer):
    def get(self):
        self.post()
@ -311,32 +310,20 @@ class FanfictionDownloader(UserConfigServer):

        format = self.request.get('format')
        url = self.request.get('url')
-        
+
        if not url or url.strip() == "":
            self.redirect('/')
            return
-        
+
        logging.info("Queuing Download: %s" % url)
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult') == "on"
-        
-        # use existing record if available.
-        q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
-        if( q is None or len(q) < 1 ):
-            download = DownloadMeta()
-        else:
-            download = q[0]
-            download.completed=False
-            download.failure=None
-            download.date=datetime.datetime.now()
-            for c in download.data_chunks:
-                c.delete()
-                
-        download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID'])
-        download.user = user
-        download.url = url
-        download.format = format
+
+        # use existing record if available.  Fetched/Created before
+        # the adapter can normalize the URL in case we need to record
+        # an exception.
+        download = getDownloadMeta(url=url,user=user,format=format,new=True)

        adapter = None
        try:
@ -348,7 +335,7 @@ class FanfictionDownloader(UserConfigServer):
                adapter.username=login
                adapter.password=password
            adapter.is_adult=is_adult
-            
+
            ## This scrapes the metadata, which will be
            ## duplicated in the queue task, but it
            ## detects bad URLs, bad login, bad story, etc
@ -356,6 +343,12 @@ class FanfictionDownloader(UserConfigServer):
            ## it's worth the double up.  Could maybe save
            ## it all in the download object someday.
            story = adapter.getStoryMetadataOnly()
+
+            ## Fetch again using normalized story URL.  The one
+            ## fetched/created above, if different, will not be saved.
+            download = getDownloadMeta(url=story.getMetadata('storyUrl'),
+                                       user=user,format=format,new=True)
+
            download.title = story.getMetadata('title')
            download.author = story.getMetadata('author')
            download.url = story.getMetadata('storyUrl')
@ -370,7 +363,7 @@ class FanfictionDownloader(UserConfigServer):
                                  'password':password,
                                  'user':user.email(),
                                  'is_adult':is_adult})
-            
+
            logging.info("enqueued download key: " + str(download.key()))

        except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e:
@ -389,7 +382,7 @@ class FanfictionDownloader(UserConfigServer):
            if isinstance(e,exceptions.AdultCheckRequired):
                template_values['login']=login
                template_values['password']=password
-                
+
            path = os.path.join(os.path.dirname(__file__), 'login.html')
            self.response.out.write(template.render(path, template_values))
            return
@ -402,136 +395,148 @@ class FanfictionDownloader(UserConfigServer):
            logging.exception(e)
            download.failure = unicode(e)
            download.put()
-        
+
        self.redirect('/status?id='+str(download.key()))

        return


 class FanfictionDownloaderTask(UserConfigServer):
-    def _printableVersion(self, text):
-        text = removeEntities(text)
-        try:
-            d = text.decode('utf-8')
-        except:
-            d = text
-        return d
-    

    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        fileId = self.request.get('id')
+        # User object can't pass, just email address
+        user = users.User(self.request.get('user'))
        format = self.request.get('format')
        url = self.request.get('url')
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult')
-        # User object can't pass, just email address
-        user = users.User(self.request.get('user'))
-        
+
        logging.info("Downloading: " + url + " for user: "+user.nickname())
        logging.info("ID: " + fileId)
-        
+
        adapter = None
        writerClass = None

-        if fileId:
-            try:
-                ## try to get download rec from passed id first.
-                ## may need to fall back to user/url/format during transition.
-                download = db.get(db.Key(fileId))
-                logging.info("DownloadMeta found by ID:"+fileId)
-            except:
-                pass
-
-        if not download:
-            # use existing record if available.
-            q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
-            if( q is None or len(q) < 1 ):
-                logging.info("New DownloadMeta")
-                download = DownloadMeta()
-            else:
-                logging.info("DownloadMeta found by user/url/format")
-                download = q[0]
-
-        ## populate DownloadMeta, regardless of how found or created.
-        download.failure=None
-        download.date=datetime.datetime.now()
-        download.completed=False
-        download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID'])
-        download.user = user
-        download.url = url
-        download.format = format
+        # use existing record if available.
+        # fileId should have record from /fdown.
+        download = getDownloadMeta(id=fileId,url=url,user=user,format=format,new=True)
        for c in download.data_chunks:
            c.delete()
        download.put()
-        
+
        logging.info('Creating adapter...')
-        
+
        try:
            config = self.getUserConfig(user)
            adapter = adapters.getAdapter(config,url)
-        except Exception, e:
-            logging.exception(e)
-            download.failure = unicode(e)
-            download.put()
-            return
-        
-        logging.info('Created an adapter: %s' % adapter)
-        
-        if len(login) > 1:
-            adapter.username=login
-            adapter.password=password
-        adapter.is_adult=is_adult

-        try:
+            logging.info('Created an adapter: %s' % adapter)
+
+            if len(login) > 1:
+                adapter.username=login
+                adapter.password=password
+            adapter.is_adult=is_adult
+
            # adapter.getStory() is what does all the heavy lifting.
+            # adapter.getStoryMetadataOnly() only fetches enough to
+            # get metadata.  writer.writeStory() will call
+            # adapter.getStory(), too.
            writer = writers.getWriter(format,config,adapter)
            download.name = writer.getOutputFileName()
-            logging.debug('output_filename:'+writer.getConfig('output_filename'))
+            #logging.debug('output_filename:'+writer.getConfig('output_filename'))
            logging.debug('getOutputFileName:'+writer.getOutputFileName())
            download.title = adapter.getStory().getMetadata('title')
            download.author = adapter.getStory().getMetadata('author')
            download.url = adapter.getStory().getMetadata('storyUrl')
            download.put()
+
+            outbuffer = StringIO()
+            writer.writeStory(outbuffer)
+            data = outbuffer.getvalue()
+            outbuffer.close()
+            del outbuffer
+            #del writer.adapter
+            #del writer.story
+            del writer
+            #del adapter.story
+            del adapter
+
+            # epubs are all already compressed.  Each chunk is
+            # compressed individually to avoid having to hold the
+            # whole in memory just for the compress/uncompress.
+            if format != 'epub':
+                def c(data):
+                    return zlib.compress(data)
+            else:
+                def c(data):
+                    return data
+
+            index=0
+            while( len(data) > 0 ):
+                DownloadData(download=download,
+                             index=index,
+                             blob=c(data[:1000000])).put()
+                index += 1
+                data = data[1000000:]
+            download.completed=True
+            download.put()
+
+            logging.info("Download finished OK")
+            del data
+
        except Exception, e:
            logging.exception(e)
            download.failure = unicode(e)
            download.put()
            return
-        
-        outbuffer = StringIO.StringIO()
-        writer.writeStory(outbuffer)
-        data = outbuffer.getvalue()
-        outbuffer.close()
-        del writer
-        del adapter

-        # epubs are all already compressed.
-        # Each chunk is compressed individually to avoid having
-        # to hold the whole in memory just for the
-        # compress/uncompress.
-        if format != 'epub':
-            def c(data):
-                return zlib.compress(data)
-        else:
-            def c(data):
-                return data
-            
-        index=0
-        while( len(data) > 0 ):
-            DownloadData(download=download,
-                     index=index,
-                     blob=c(data[:1000000])).put()
-            index += 1
-            data = data[1000000:]
-        download.completed=True
-        download.put()
-            
-        logging.info("Download finished OK")
        return
-                
-def toPercentDecimal(match): 
+
+def getDownloadMeta(id=None,url=None,user=None,format=None,new=False):
+    ## try to get download rec from passed id first.  then fall back
+    ## to user/url/format
+    download = None
+    if id:
+        try:
+            download = db.get(db.Key(id))
+            logging.info("DownloadMeta found by ID:"+id)
+        except:
+            pass
+
+    if not download and url and user and format:
+        try:
+            q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
+            if( q is not None and len(q) > 0 ):
+                logging.debug("DownloadMeta found by user:%s url:%s format:%s"%(user,url,format))
+                download = q[0]
+        except:
+            pass
+
+    if new:
+        # NOT clearing existing chunks here, because this record may
+        # never be saved.
+        if not download:
+            logging.debug("New DownloadMeta")
+            download = DownloadMeta()
+
+        download.completed=False
+        download.failure=None
+        download.date=datetime.datetime.now()
+
+        download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID'])
+        if user:
+            download.user = user
+        if url:
+            download.url = url
+        if format:
+            download.format = format
+
+    return download
+
+def toPercentDecimal(match):
    "Return the %decimal number for the character for url escaping"
    s = match.group(1)
    return "%%%02x" % ord(s)
--- a/utils/init.py
+++ b/utils/init.py
@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
--- a/utils/tally.py
+++ b/utils/tally.py
@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# encoding: utf-8
+# Copyright 2011 Fanficdownloader team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import datetime
+import logging
+
+from google.appengine.ext.webapp import util
+from google.appengine.ext import webapp
+from google.appengine.api import users
+from google.appengine.api import taskqueue
+from google.appengine.api import memcache
+
+from ffstorage import *
+
+class Tally(webapp.RequestHandler):
+    def get(self):
+        logging.debug("Starting Tally")
+        user = users.get_current_user()
+        logging.debug("Working as user %s" % user)
+        
+        fics = DownloadMeta.all()
+        
+        cursor = memcache.get('tally_search_cursor')
+        if cursor:
+            fics.with_cursor(cursor)
+
+        self.response.out.write('"user","url","name","title","author","format","failure","completed","date","version"<br/>')
+        num = 0
+        step = 500
+        results = fics.fetch(step)
+        for d in results:
+            self.response.out.write('"%s","%s","%s","%s","%s","%s","%s","%s","%s","%s"<br/>' %
+                                    (d.user,d.url,d.name,d.title,d.author,
+                                     d.format,d.failure,d.completed,d.date,
+                                     d.version))
+            num += 1
+        if num < step:
+            memcache.delete('tally_search_cursor')
+            logging.warn('Tally search reached end, starting over next time.')
+        else:
+            memcache.set('tally_search_cursor',fics.cursor())
+        
+        logging.info('Tallied %d fics.' % num)
+        self.response.out.write('<br/>Tallied %d fics.<br/>' % num)
+
+def main():
+    application = webapp.WSGIApplication([('/tally', Tally),
+                                          ],
+                                         debug=False)
+    util.run_wsgi_app(application)
+
+
+if __name__ == '__main__':
+    logging.getLogger().setLevel(logging.DEBUG)
+    main()