4-0-1 Add mobi back in, changes to recent&status appearance, more reliability.

2025-12-06 08:52:55 +01:00 · 2011-05-30 23:02:00 -05:00 · 2011-05-30 23:02:00 -05:00 · 50b80b6d68
commit 50b80b6d68
parent f7857ad6dc
12 changed files with 803 additions and 64 deletions
--- a/app.yaml
+++ b/app.yaml
@ -1,6 +1,6 @@
 # fanfictionloader ffd-retief
 application: fanfictionloader
-version: 4-0-0
+version: 4-0-1
 runtime: python
 api_version: 1

--- a/css/index.css
+++ b/css/index.css
@ -13,7 +13,7 @@ body

 #greeting
 {
-	margin-bottom: 1em;
+#	margin-bottom: 1em;
 	border-color: #efefef;
 }

@ -66,6 +66,8 @@ div.field

 #error
 {
-	font-size: small;
 	color: #f00;
 }
+.recent {
+    font-size: large;
+}
--- a/defaults.ini
+++ b/defaults.ini
@ -108,7 +108,7 @@ extratags: FanFiction

 [txt]
 ## Add URLs since there aren't links.
-titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, author URL, description
+titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description

 ## use \r\n for line endings, the windows convention.  text output only.
 windows_eol: true
@ -132,6 +132,10 @@ titlepage_use_table: false
 ## When using tables, make these span both columns.
 wide_titlepage_entries: description, storyUrl, author URL

+[mobi]
+## mobi TOC cannot be turned off right now.
+#include_tocpage: true
+

 ## Each site has a section that overrides [defaults] *and* the format
 ## sections test1.com specifically is not a real story site.  Instead,
--- a/fanficdownloader/adapters/adapter_test1.py
+++ b/fanficdownloader/adapters/adapter_test1.py
@ -16,6 +16,7 @@
 #

 import datetime
+import time
 import logging

 import fanficdownloader.BeautifulSoup as bs
@ -105,6 +106,10 @@ Some more longer description.  "I suck at summaries!"  "Better than it sounds!"
        if self.story.getMetadata('storyId') == '667':
            raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url)

+        if self.story.getMetadata('storyId') == '670' and self.getConfig('slow_down_sleep_time'):
+            time.sleep(float(self.getConfig('slow_down_sleep_time')))
+
+        
        if "chapter=1" in url :
            text=u'''
 <div>
@ -116,6 +121,7 @@ Some more longer description.  "I suck at summaries!"  "Better than it sounds!"
 <p>http://test1.com?sid=667 - raises FailedToDownload on chapter 1</p>
 <p>http://test1.com?sid=668 - raises FailedToLogin unless username='Me'</p>
 <p>http://test1.com?sid=669 - Succeeds with Updated Date=now</p>
+<p>http://test1.com?sid=670 - Succeeds, but applies slow_down_sleep_time</p>
 <p>And other storyId will succeed with the same output.</p>
 </div>
 '''
--- a/fanficdownloader/html.py
+++ b/fanficdownloader/html.py
@ -0,0 +1,126 @@
+#!/usr/bin/python
+# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan
+
+import re
+import sys
+import StringIO
+import urllib
+
+from BeautifulSoup import BeautifulSoup
+
+class HtmlProcessor:
+  WHITESPACE_RE = re.compile(r'\s')
+  # Look for </blockquote  <p>
+  BAD_TAG_RE = re.compile(r'<[^>]+<', re.MULTILINE)
+
+  def __init__(self, html, unfill=0):
+    self.unfill = unfill
+    html = self._ProcessRawHtml(html)
+    self._soup = BeautifulSoup(html)
+    if self._soup.title:
+      self.title = self._soup.title.contents[0]
+    else:
+      self.title = None
+
+  def _ProcessRawHtml(self, html):
+    new_html, count = HtmlProcessor.BAD_TAG_RE.subn('<', html)
+    if count:
+      print >>sys.stderr, 'Replaced %d bad tags' % count
+    return new_html
+
+  def _StubInternalAnchors(self):
+    '''Replace each internal anchor with a fixed-size filepos anchor.
+
+    Looks for every anchor with <a href="#myanchor"> and replaces that
+    with <a filepos="00000000050">. Stores anchors in self._anchor_references'''
+    self._anchor_references = []
+    anchor_num = 0
+    # anchor links
+    anchorlist = self._soup.findAll('a', href=re.compile('^#'))
+    # treat reference tags like a tags for TOCTOP.
+    anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#')))
+    for anchor in anchorlist:
+      self._anchor_references.append((anchor_num, anchor['href']))
+      del anchor['href']
+      anchor['filepos'] = '%.10d' % anchor_num
+      anchor_num += 1
+            
+  def _ReplaceAnchorStubs(self):
+    # TODO: Browsers allow extra whitespace in the href names.
+    # use __str__ instead of prettify--it inserts extra spaces.
+    assembled_text = self._soup.__str__('utf8')
+    del self._soup # shouldn't touch this anymore
+    for anchor_num, original_ref in self._anchor_references:
+      ref = urllib.unquote(original_ref[1:]) # remove leading '#'
+      # Find the position of ref in the utf-8 document.
+      # TODO(chatham): Using regexes and looking for name= would be better.
+      newpos = assembled_text.rfind(ref.encode('utf-8'))
+      if newpos == -1:
+        print >>sys.stderr, 'Could not find anchor "%s"' % original_ref
+        continue
+      newpos += len(ref) + 2  # don't point into the middle of the <a name> tag
+      old_filepos = 'filepos="%.10d"' % anchor_num
+      new_filepos = 'filepos="%.10d"' % newpos
+      assert assembled_text.find(old_filepos) != -1
+      assembled_text = assembled_text.replace(old_filepos, new_filepos, 1)
+    return assembled_text
+
+  def _FixPreTags(self):
+    '''Replace <pre> tags with HTML-ified text.'''
+    pres = self._soup.findAll('pre')
+    for pre in pres:
+      pre.replaceWith(self._FixPreContents(str(pre.contents[0])))
+
+  def _FixPreContents(self, text):
+    if self.unfill:
+      line_splitter = '\n\n'
+      line_joiner = '<p>'
+    else:
+      line_splitter = '\n'
+      line_joiner = '<br>'
+    lines = []
+    for line in text.split(line_splitter):
+      lines.append(self.WHITESPACE_RE.subn('&nbsp;', line)[0])
+    return line_joiner.join(lines)
+
+  def _RemoveUnsupported(self):
+    '''Remove any tags which the kindle cannot handle.'''
+    # TODO(chatham): <link> tags to script?
+    unsupported_tags = ('script', 'style')
+    for tag_type in unsupported_tags:
+      for element in self._soup.findAll(tag_type):
+        element.extract()
+
+  def RenameAnchors(self, prefix):
+    '''Rename every internal anchor to have the given prefix, then
+    return the contents of the body tag.'''
+    for anchor in self._soup.findAll('a', href=re.compile('^#')):
+      anchor['href'] = '#' + prefix + anchor['href'][1:]
+    for a in self._soup.findAll('a'):
+      if a.get('name'):
+        a['name'] = prefix + a['name']
+
+    # TODO(chatham): figure out how to fix this. sometimes body comes out
+    # as NoneType.
+    content = []
+    if self._soup.body is not None:
+      content = [unicode(c) for c in self._soup.body.contents]
+    return '\n'.join(content)
+
+  def CleanHtml(self):
+    # TODO(chatham): fix_html_br, fix_html
+    self._RemoveUnsupported()
+    self._StubInternalAnchors()
+    self._FixPreTags()
+    return self._ReplaceAnchorStubs()
+
+
+if __name__ == '__main__':
+  FILE ='/tmp/documentation.html'
+  #FILE = '/tmp/multipre.html'
+  FILE = '/tmp/view.html'
+  import codecs
+  d = open(FILE).read()
+  h = HtmlProcessor(d)
+  s = h.CleanHtml()
+  #print s
--- a/fanficdownloader/mobi.py
+++ b/fanficdownloader/mobi.py
@ -0,0 +1,384 @@
+#!/usr/bin/python
+# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan
+
+    
+import StringIO
+import struct
+import time
+import random
+import logging
+
+from html import HtmlProcessor
+
+# http://wiki.mobileread.com/wiki/MOBI
+# http://membres.lycos.fr/microfirst/palm/pdb.html
+
+encoding = {
+  'UTF-8' : 65001,
+  'latin-1' : 1252,
+}
+
+languages = {"en-us" : 0x0409,
+             "sv"    : 0x041d,
+             "fi"    : 0x000b,
+             "en"    : 0x0009,
+             "en-gb" : 0x0809}
+
+def ToHex(s):
+  v = ['%.2x' % ord(c) for c in s]
+  return ' '.join(v)
+
+class _SubEntry:
+  def __init__(self, pos, html_data):
+    self.pos = pos
+    self.html = HtmlProcessor(html_data)
+    self.title = self.html.title
+    self._name = 'mobi_article_%d' % pos
+    if not self.title:
+      self.title = 'Article %d' % self.pos
+
+  def TocLink(self):
+    return '<a href="#%s_MOBI_START">%.80s</a>' % (self._name, self.title)
+  
+  def Anchor(self):
+    return '<a name="%s_MOBI_START">' % self._name
+
+  def Body(self):
+    return self.html.RenameAnchors(self._name + '_')
+
+class Converter:
+  def __init__(self, refresh_url='', title='Unknown', author='Unknown', publisher='Unknown'):
+    self._header = Header()
+    self._header.SetTitle(title)
+    self._header.SetAuthor(author)
+    self._header.SetPublisher(publisher)
+    self._refresh_url = refresh_url
+
+  def ConvertString(self, s):
+    out = StringIO.StringIO()
+    self._ConvertStringToFile(s, out)
+    return out.getvalue()
+
+  def ConvertStrings(self, html_strs):
+    out = StringIO.StringIO()
+    self._ConvertStringsToFile(html_strs, out)
+    return out.getvalue()
+
+  def ConvertFile(self, html_file, out_file):
+    self._ConvertStringToFile(open(html_file,'rb').read(),
+                              open(out_file, 'wb'))
+
+  def ConvertFiles(self, html_files, out_file):
+    html_strs = [open(f,'rb').read() for f in html_files]
+    self._ConvertStringsToFile(html_strs, open(out_file, 'wb'))
+
+  def MakeOneHTML(self, html_strs):
+    """This takes a list of HTML strings and returns a big HTML file with
+    all contents consolidated.  It constructs a table of contents and adds
+    anchors within the text
+    """
+    title_html = []
+    toc_html = []
+    body_html = []
+
+    PAGE_BREAK = '<mbp:pagebreak>'
+
+    # pull out the title page, assumed first html_strs.
+    htmltitle = html_strs[0]
+    entrytitle = _SubEntry(1, htmltitle)
+    title_html.append(entrytitle.Body())
+    
+    title_html.append(PAGE_BREAK)
+    toc_html.append('<a name="TOCTOP"><h3>Table of Contents</h3><br />')
+
+    for pos, html in enumerate(html_strs[1:]):
+      entry = _SubEntry(pos+1, html)
+      toc_html.append('%s<br />' % entry.TocLink())
+
+      # give some space between bodies of work.
+      body_html.append(PAGE_BREAK)
+        
+      body_html.append(entry.Anchor())
+      
+      body_html.append(entry.Body())
+      
+    # TODO: this title can get way too long with RSS feeds. Not sure how to fix
+    # cheat slightly and use the <a href> code to set filepos in references.
+    header = '''<html>
+<head>
+<title>Bibliorize %s GMT</title>
+  <guide>
+    <reference href="#TOCTOP" type="toc" title="Table of Contents"/>
+  </guide>
+</head>
+<body>
+''' % time.ctime(time.time())
+
+    footer = '</body></html>'
+    all_html = header + '\n'.join(title_html + toc_html + body_html) + footer
+    #print "%s" % all_html.encode('utf8')
+    return all_html
+
+  def _ConvertStringsToFile(self, html_strs, out_file):
+    try:
+      tmp = self.MakeOneHTML(html_strs)
+      self._ConvertStringToFile(tmp, out_file)
+    except Exception, e:
+      logging.error('Error %s', e)
+      logging.debug('Details: %s' % html_strs)
+
+  def _ConvertStringToFile(self, html_data, out):
+    html = HtmlProcessor(html_data)
+    data = html.CleanHtml()
+
+    # collect offsets of '<mbp:pagebreak>' tags, use to make index list.
+    # indexlist = [] # list of (offset,length) tuples.
+    # not in current use.
+    
+    # j=0
+    # lastj=0
+    # while True:
+    #   j=data.find('<mbp:pagebreak>',lastj+10) # plus a bit so we find the next.
+    #   if j < 0:
+    #     break
+    #   indexlist.append((lastj,j-lastj))
+    #   print "index offset: %d length: %d" % (lastj,j-lastj)
+    #   lastj=j
+
+    records = []
+#    title = html.title
+#    if title:
+#      self._header.SetTitle(title)
+    record_id = 1
+    for start_pos in range(0, len(data), Record.MAX_SIZE):
+      end = min(len(data), start_pos + Record.MAX_SIZE)
+      record_data = data[start_pos:end]
+      records.append(self._header.AddRecord(record_data, record_id))
+      #print "HTML Record %03d: (size:%d) [[%s ... %s]]" % ( record_id, len(record_data), record_data[:20], record_data[-20:] )
+      record_id += 1
+    self._header.SetImageRecordIndex(record_id)
+    records[0:0] = [self._header.MobiHeader()]
+
+    header, rec_offset = self._header.PDBHeader(len(records))
+    out.write(header)
+    for record in records:
+      record.WriteHeader(out, rec_offset)
+      #print "rec_offset: %d len(record.data): %d" % (rec_offset,len(record.data))
+      rec_offset += (len(record.data)+1) # plus one for trailing null
+
+    # Write to nuls for some reason
+    out.write('\0\0')
+    for record in records:
+      record.WriteData(out)
+      out.write('\0')
+      # needs a trailing null, I believe it indicates zero length 'overlap'.
+      # otherwise, the readers eat the last char of each html record.
+      # Calibre writes another 6-7 bytes of stuff after that, but we seem
+      # to be getting along without it.
+
+class Record:
+  MAX_SIZE = 4096
+  INDEX_LEN = 8
+  _unique_id_seed = 28  # should be arbitrary, but taken from MobiHeader
+
+  # TODO(chatham): Record compression doesn't look that hard.
+
+  def __init__(self, data, record_id):
+    assert len(data) <= self.MAX_SIZE
+    self.data = data
+    if record_id != 0:
+      self._id = record_id
+    else:
+      Record._unique_id_seed += 1
+      self._id = 0
+
+  def __repr__(self):
+    return 'Record: id=%d len=%d' % (self._id, len(self.data))
+
+  def _SetUniqueId(self):
+    Record._unique_id_seed += 1
+    # TODO(chatham): Wraparound crap
+    self._id = Record._unique_id_seed
+
+  def WriteData(self, out):
+    out.write(self.data)
+
+  def WriteHeader(self, out, rec_offset):
+    attributes =  64 # dirty?
+    header = struct.pack('>IbbH',
+                         rec_offset,
+                         attributes,
+                         0, self._id)
+    assert len(header) == Record.INDEX_LEN
+    out.write(header)
+
+EXTH_HEADER_FIELDS = {
+  'author' : 100,
+  'publisher' : 101,
+}
+
+class Header:
+  EPOCH_1904 = 2082844800
+
+  def __init__(self):
+    self._length = 0
+    self._record_count = 0
+    self._title = '2008_2_34'
+    self._author = 'Unknown author'
+    self._publisher = 'Unknown publisher'
+    self._first_image_index = 0
+
+  def SetAuthor(self, author):
+    self._author = author.encode('ascii','ignore')
+
+  def SetTitle(self, title):
+    # TODO(chatham): Reevaluate whether this needs to be ASCII.
+    # maybe just do sys.setdefaultencoding('utf-8')? Problems
+    # appending self._title with other things.
+    self._title = title.encode('ascii','ignore')
+
+  def SetPublisher(self, publisher):
+    self._publisher = publisher.encode('ascii','ignore')
+
+  def AddRecord(self, data, record_id):
+    self.max_record_size = max(Record.MAX_SIZE, len(data))
+    self._record_count += 1
+    self._length += len(data)
+    return Record(data, record_id)
+
+  def _ReplaceWord(self, data, pos, word):
+    return data[:pos] + struct.pack('>I', word) + data[pos+4:]
+
+  def PalmDocHeader(self):
+    compression = 1  # no compression
+    unused = 0
+    encryption_type = 0  # no ecryption
+    records = self._record_count + 1  # the header record itself
+    palmdoc_header = struct.pack('>HHIHHHH',
+                                 compression,
+                                 unused,
+                                 self._length,
+                                 records,
+                                 Record.MAX_SIZE,
+                                 encryption_type,
+                                 unused)
+    assert len(palmdoc_header) == 16
+    return palmdoc_header
+
+  def PDBHeader(self, num_records):
+    HEADER_LEN = 32+2+2+9*4
+    RECORD_INDEX_HEADER_LEN = 6
+    RESOURCE_INDEX_LEN = 10
+
+    index_len = RECORD_INDEX_HEADER_LEN + num_records * Record.INDEX_LEN
+    rec_offset = HEADER_LEN + index_len + 2
+
+    short_title = self._title[0:31]
+    attributes = 0
+    version = 0
+    ctime = self.EPOCH_1904 + int(time.time())
+    mtime = self.EPOCH_1904 + int(time.time())
+    backup_time = self.EPOCH_1904 + int(time.time())
+    modnum = 0
+    appinfo_offset = 0
+    sort_offset = 0
+    type = 'BOOK'
+    creator = 'MOBI'
+    id_seed = 36
+    header = struct.pack('>32sHHII',
+                         short_title, attributes, version,
+                         ctime, mtime)
+    header += struct.pack('>IIII', backup_time, modnum,
+                         appinfo_offset, sort_offset)
+    header += struct.pack('>4s4sI',
+                         type, creator, id_seed)
+    next_record = 0  # not used?
+    header += struct.pack('>IH', next_record, num_records)
+    return header, rec_offset
+
+  def _GetExthHeader(self):
+    # They set author, publisher, coveroffset, thumboffset
+    data = {'author' : self._author,
+            'publisher' : self._publisher,
+            }
+    # Turn string type names into EXTH typeids.
+    r = []
+    for key, value in data.items():
+      typeid = EXTH_HEADER_FIELDS[key]
+      length_encoding_len = 8
+      r.append(struct.pack('>LL', typeid, len(value) + length_encoding_len,) + value)
+    content = ''.join(r)
+
+    # Pad to word boundary
+    while len(content) % 4:
+      content += '\0'
+    TODO_mysterious = 12
+    exth = 'EXTH' + struct.pack('>LL', len(content) + TODO_mysterious, len(data)) + content
+    return exth
+
+  def SetImageRecordIndex(self, idx):
+    self._first_image_index = idx
+
+  def MobiHeader(self):
+    exth_header = self._GetExthHeader();
+    palmdoc_header = self.PalmDocHeader()
+
+    fs = 0xffffffff
+
+    # Record 0
+    header_len = 0xE4 # TODO
+    mobi_type = 2 # BOOK
+    text_encoding = encoding['UTF-8']
+    unique_id = random.randint(1, 1<<32)
+    creator_version = 4
+    reserved = '%c' % 0xff * 40
+    nonbook_index = fs
+    full_name_offset = header_len + len(palmdoc_header) + len(exth_header) # put full name after header
+    language = languages['en-us']
+    unused = 0
+    mobi_header = struct.pack('>4sIIIII40sIIIIII',
+                              'MOBI',
+                              header_len,
+                              mobi_type,
+                              text_encoding,
+                              unique_id,
+                              creator_version,
+                              reserved,
+                              nonbook_index,
+                              full_name_offset,
+                              len(self._title),
+                              language,
+                              fs, fs)
+    assert len(mobi_header) == 104 - 16
+
+    unknown_fields = chr(0) * 32
+    drm_offset = 0
+    drm_count = 0
+    drm_size = 0
+    drm_flags = 0
+    exth_flags = 0x50
+    header_end = chr(0) * 64
+    mobi_header += struct.pack('>IIIIIII',
+                               creator_version,
+                               self._first_image_index,
+                               fs,
+                               unused,
+                               fs,
+                               unused,
+                               exth_flags)
+    mobi_header += '\0' * 112 # TODO: Why this much padding?
+    # Set some magic offsets to be 0xFFFFFFF.
+    for pos in (0x94, 0x98, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xdc):
+      mobi_header = self._ReplaceWord(mobi_header, pos, fs)
+
+    # 16 bytes?
+    padding = '\0' * 48 * 4 # why?
+    total_header = palmdoc_header + mobi_header + exth_header + self._title + padding
+
+    return self.AddRecord(total_header, 0)
+
+if __name__ == '__main__':
+  import sys
+  m = Converter(title='Testing Mobi', author='Mobi Author', publisher='mobi converter')
+  m.ConvertFiles(sys.argv[1:], 'test.mobi')
+  #m.ConvertFile(sys.argv[1], 'test.mobi')
--- a/fanficdownloader/writers/init.py
+++ b/fanficdownloader/writers/init.py
@ -23,6 +23,7 @@ from fanficdownloader.exceptions import FailedToDownload
 from writer_html import HTMLWriter
 from writer_txt  import TextWriter
 from writer_epub import EpubWriter
+from writer_mobi import MobiWriter

 def getWriter(type,config,story):
    if type == "html":
@ -31,5 +32,7 @@ def getWriter(type,config,story):
        return TextWriter(config,story)
    if type == "epub":
        return EpubWriter(config,story)
+    if type == "mobi":
+        return MobiWriter(config,story)

    raise FailedToDownload("(%s) is not a supported download format."%type)
--- a/fanficdownloader/writers/writer_mobi.py
+++ b/fanficdownloader/writers/writer_mobi.py
@ -0,0 +1,196 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2011 Fanficdownloader team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+import string
+import StringIO
+
+from base_writer import *
+from fanficdownloader.htmlcleanup import stripHTML
+from fanficdownloader.mobi import Converter
+
+class MobiWriter(BaseStoryWriter):
+
+    @staticmethod
+    def getFormatName():
+        return 'mobi'
+
+    @staticmethod
+    def getFormatExt():
+        return '.mobi'
+
+    def __init__(self, config, story):
+        BaseStoryWriter.__init__(self, config, story)
+
+        self.MOBI_TITLE_PAGE_START = string.Template('''<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>${title} by ${author}</title>
+<link href="stylesheet.css" type="text/css" charset="UTF-8" rel="stylesheet"/>
+</head>
+<body>
+<h3><a href="${storyUrl}">${title}</a> by <a href="${authorUrl}">${author}</a></h3>
+<div>
+''')
+
+        self.MOBI_TITLE_ENTRY = string.Template('''
+<b>${label}:</b> ${value}<br />
+''')
+
+        self.MOBI_TITLE_PAGE_END = string.Template('''
+</div>
+
+</body>
+</html>
+''')
+
+        self.MOBI_TABLE_TITLE_PAGE_START = string.Template('''<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>${title} by ${author}</title>
+<link href="stylesheet.css" type="text/css" charset="UTF-8" rel="stylesheet"/>
+</head>
+<body>
+<h3><a href="${storyUrl}">${title}</a> by <a href="${authorUrl}">${author}</a></h3>
+<table class="full">
+''')
+
+        self.MOBI_TABLE_TITLE_ENTRY = string.Template('''
+<tr><td><b>${label}:</b></td><td>${value}</td></tr>
+''')
+
+        self.MOBI_TABLE_TITLE_WIDE_ENTRY = string.Template('''
+<tr><td colspan="2"><b>${label}:</b> ${value}</td></tr>
+''')
+
+        self.MOBI_TABLE_TITLE_PAGE_END = string.Template('''
+</table>
+
+</body>
+</html>
+''')
+
+        self.MOBI_TOC_PAGE_START = string.Template('''<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>${title} by ${author}</title>
+<link href="stylesheet.css" type="text/css" charset="UTF-8" rel="stylesheet"/>
+</head>
+<body>
+<div>
+<h3>Table of Contents</h3>
+''')
+
+        self.MOBI_TOC_ENTRY = string.Template('''
+<a href="file${index}.xhtml">${chapter}</a><br />
+''')
+                          
+        self.MOBI_TOC_PAGE_END = string.Template('''
+</div>
+</body>
+</html>
+''')
+
+        self.MOBI_CHAPTER_START = string.Template('''<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>${chapter}</title>
+<link href="stylesheet.css" type="text/css" charset="UTF-8" rel="stylesheet"/>
+</head>
+<body>
+<h3>${chapter}</h3>
+''')
+
+        self.MOBI_CHAPTER_END = string.Template('''
+</body>
+</html>
+''')
+
+    def getMetadata(self,key):
+        return stripHTML(self.story.getMetadata(key))
+
+    def writeStoryImpl(self, out):
+
+        files = []
+        
+        # write title page.
+        if self.getConfig("titlepage_use_table"):
+            TITLE_PAGE_START  = self.MOBI_TABLE_TITLE_PAGE_START
+            TITLE_ENTRY       = self.MOBI_TABLE_TITLE_ENTRY
+            WIDE_TITLE_ENTRY  = self.MOBI_TABLE_TITLE_WIDE_ENTRY
+            TITLE_PAGE_END    = self.MOBI_TABLE_TITLE_PAGE_END
+        else:
+            TITLE_PAGE_START  = self.MOBI_TITLE_PAGE_START
+            TITLE_ENTRY       = self.MOBI_TITLE_ENTRY
+            WIDE_TITLE_ENTRY  = self.MOBI_TITLE_ENTRY # same, only wide in tables.
+            TITLE_PAGE_END    = self.MOBI_TITLE_PAGE_END
+        
+        titlepageIO = StringIO.StringIO()
+        self.writeTitlePage(out=titlepageIO,
+                            START=TITLE_PAGE_START,
+                            ENTRY=TITLE_ENTRY,
+                            WIDE_ENTRY=WIDE_TITLE_ENTRY,
+                            END=TITLE_PAGE_END)
+        if titlepageIO.getvalue(): # will be false if no title page.
+            files.append(titlepageIO.getvalue())
+        titlepageIO.close()
+
+        ## MOBI always has a TOC injected by mobi.py because there's
+        ## no meta-data TOC.
+        # # write toc page.  
+        # tocpageIO = StringIO.StringIO()
+        # self.writeTOCPage(tocpageIO,
+        #                   self.MOBI_TOC_PAGE_START,
+        #                   self.MOBI_TOC_ENTRY,
+        #                   self.MOBI_TOC_PAGE_END)
+        # if tocpageIO.getvalue(): # will be false if no toc page.
+        #     files.append(tocpageIO.getvalue())
+        # tocpageIO.close()
+
+        for index, (title,html) in enumerate(self.story.getChapters()):
+            logging.debug('Writing chapter text for: %s' % title)
+            fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1})
+            # ffnet(& maybe others) gives the whole chapter text as
+            # one line.  This causes problems for nook(at least) when
+            # the chapter size starts getting big (200k+)
+            fullhtml = fullhtml.replace('</p>','</p>\n').replace('<br />','<br />\n')
+            files.append(fullhtml.encode('utf-8'))
+            del fullhtml
+
+        c = Converter(title=self.getMetadata('title'),
+                      author=self.getMetadata('author'),
+                      publisher=self.getMetadata('site'))
+        mobidata = c.ConvertStrings(files)
+        out.write(mobidata)
+        
+        del files
+        del mobidata
+
+## Utility method for creating new tags.
+def newTag(dom,name,attrs=None,text=None):
+    tag = dom.createElement(name)
+    if( attrs is not None ):
+        for attr in attrs.keys():
+            tag.setAttribute(attr,attrs[attr])
+    if( text is not None ):
+        tag.appendChild(dom.createTextNode(text))
+    return tag
+    
--- a/index.html
+++ b/index.html
@ -57,7 +57,9 @@
 	    Login/Password is only asked for when required now, as is 'Are you an Adult?' where required.
 	  </p>
 	  <p>
-	    Mobi support (for Kindle) is only via EPub conversion in this version.
+	    The same (rather crude) Mobi support (for Kindle) that we
+	    had before is restored.  Mobi via EPub conversion is still
+	    available and provides better output.
 	  </p>
 	  <p>
 	    If you have any problems with this new version, please
@ -81,12 +83,12 @@
 	    <input type='radio' name='format' value='epub' checked>EPub</input>
 	    <input type='radio' name='format' value='html'>HTML</input>
 	    <input type='radio' name='format' value='txt'>Plain Text</input>
+	    <input type='radio' name='format' value='mobi'>Mobi(Kindle)</input>
 	  </div>
 	  <div>
 	    <br />
 	    <input type="submit" value="Download">
 	    <p><i>For most readers, including Sony Reader, Nook and iPad, use EPub.</i></p>
-	    <p><i>For Kindle and other Mobi readers, select EPub and use the Convert link when it's finished.</i></p>
 	  </div>
 	</div>
 	<div id='typebox'>
--- a/main.py
+++ b/main.py
@ -158,6 +158,8 @@ class FileServer(webapp.RequestHandler):
                self.response.headers['Content-Type'] = 'text/html'
            elif name.endswith('.txt'):
                self.response.headers['Content-Type'] = 'text/plain'
+            elif name.endswith('.mobi'):
+                self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook'
            elif name.endswith('.zip'):
                self.response.headers['Content-Type'] = 'application/zip'
            else:
@ -334,13 +336,14 @@ class FanfictionDownloader(UserConfigServer):

            taskqueue.add(url='/fdowntask',
                      queue_name="download",
-                      params={'format':format,
-                              'url':download.url,
-                              'login':login,
-                              'password':password,
-                              'user':user.email(),
-                              'is_adult':is_adult})
-        
+                          params={'id':str(download.key()),
+                                  'format':format,
+                                  'url':download.url,
+                                  'login':login,
+                                  'password':password,
+                                  'user':user.email(),
+                                  'is_adult':is_adult})
+            
            logging.info("enqueued download key: " + str(download.key()))

        except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e:
@ -390,6 +393,7 @@ class FanfictionDownloaderTask(UserConfigServer):

    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
+        fileId = self.request.get('id')
        format = self.request.get('format')
        url = self.request.get('url')
        login = self.request.get('login')
@ -399,27 +403,42 @@ class FanfictionDownloaderTask(UserConfigServer):
        user = users.User(self.request.get('user'))
        
        logging.info("Downloading: " + url + " for user: "+user.nickname())
+        logging.info("ID: " + fileId)
        
        adapter = None
        writerClass = None

-        # use existing record if available.
-        q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
-        if( q is None or len(q) < 1 ):
-            download = DownloadMeta()
-        else:
-            download = q[0]
-            download.failure=None
-            download.date=datetime.datetime.now()
-            download.completed=False
-            for c in download.data_chunks:
-                c.delete()
-                
+        if fileId:
+            try:
+                ## try to get download rec from passed id first.
+                ## may need to fall back to user/url/format during transition.
+                download = db.get(db.Key(fileId))
+                logging.info("DownloadMeta found by ID:"+fileId)
+            except:
+                pass
+
+        if not download:
+            # use existing record if available.
+            q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1)
+            if( q is None or len(q) < 1 ):
+                logging.info("New DownloadMeta")
+                download = DownloadMeta()
+            else:
+                logging.info("DownloadMeta found by user/url/format")
+                download = q[0]
+
+        ## populate DownloadMeta, regardless of how found or created.
+        download.failure=None
+        download.date=datetime.datetime.now()
+        download.completed=False
        download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID'])
        download.user = user
        download.url = url
        download.format = format
+        for c in download.data_chunks:
+            c.delete()
        download.put()
+        
        logging.info('Creating adapter...')
        
        try:
@ -441,21 +460,19 @@ class FanfictionDownloaderTask(UserConfigServer):
        try:
            # adapter.getStory() is what does all the heavy lifting.
            writer = writers.getWriter(format,config,adapter)
+            download.name = writer.getOutputFileName()
+            logging.debug('output_filename:'+writer.getConfig('output_filename'))
+            logging.debug('getOutputFileName:'+writer.getOutputFileName())
+            download.title = adapter.getStory().getMetadata('title')
+            download.author = adapter.getStory().getMetadata('author')
+            download.url = adapter.getStory().getMetadata('storyUrl')
+            download.put()
        except Exception, e:
            logging.exception(e)
            download.failure = unicode(e)
            download.put()
            return
        
-        download.name = writer.getOutputFileName()
-        logging.debug('output_filename:'+writer.getConfig('output_filename'))
-        logging.debug('getOutputFileName:'+writer.getOutputFileName())
-        download.title = adapter.getStory().getMetadata('title')
-        download.author = adapter.getStory().getMetadata('author')
-        download.url = adapter.getStory().getMetadata('storyUrl')
-        download.put()
-        index=0
-
        outbuffer = StringIO.StringIO()
        writer.writeStory(outbuffer)
        data = outbuffer.getvalue()
@ -474,6 +491,7 @@ class FanfictionDownloaderTask(UserConfigServer):
            def c(data):
                return data
            
+        index=0
        while( len(data) > 0 ):
            DownloadData(download=download,
                     index=index,
--- a/recent.html
+++ b/recent.html
@ -49,20 +49,20 @@
 	{% for fic in fics %}
 	<p>
 	  {% if fic.completed %}
-	  <a href="/file?id={{ fic.key }}">Download {{ fic.title }}</a>
-	  by {{ fic.author }} ({{ fic.format }})<br/>
-   	  {% if fic.escaped_url %}
-   	  <a href="http://www.convertfiles.com/index.php?url={{ fic.escaped_url }}">Convert {{ fic.title }} to other formats</a><br />
+  	      <span class="recent"><a href="/file?id={{ fic.key }}">Download {{ fic.title }}</a></span>
+	      by {{ fic.author }} ({{ fic.format }})
 	  {% endif %}
+	  {% if  not fic.completed and not fic.failure %}
+  	      <span class="recent">Processing {{ fic.title }}</span>
+	      by {{ fic.author }} ({{ fic.format }})
 	  {% endif %}
 	  {% if fic.failure %}
-	  <div id='error'>{{ fic.failure }}</div>
+	      <span id='error'>{{ fic.failure }}</span>
 	  {% endif %}
-	  {% if not fic.completed and not fic.failure %}
-	  Request Processing...<br />
+	  <a href="{{ fic.url }}" title="Link to original story">Source</a>
+   	  {% if fic.completed and fic.escaped_url %}
+   	      <a href="http://www.convertfiles.com/index.php?url={{ fic.escaped_url }}" title="Convert to other formats using Convertfiles.com">Convert</a>
 	  {% endif %}
-	  <small><a href="{{ fic.url }}">{{ fic.url }}</a></small>
-	  
 	</p>
 	{% endfor %}
      </div>
--- a/status.html
+++ b/status.html
@ -44,29 +44,27 @@
      <div id='urlbox'>
 	{% if fic.url %}
 	<div id='greeting'>
-	  <p><a href='{{ fic.url }}'>{{ fic.url }}</a></p>
+	<p>
+	  {% if fic.completed %}
+ 	      <p>Your fic has finished processing and you can download it now.</p>
+  	      <span class="recent"><a href="/file?id={{ fic.key }}">Download {{ fic.title }}</a></span>
+	      by {{ fic.author }} ({{ fic.format }})
+	  {% endif %}
+	  {% if fic.failure %}
+	      <span id='error'>{{ fic.failure }}</span>
+	  {% endif %}
+	  {% if  not fic.completed and not fic.failure %}
+  	      <span class="recent">Processing {{ fic.title }}</span>
+	      by {{ fic.author }} ({{ fic.format }})
+	  {% endif %}
+	  <a href="{{ fic.url }}" title="Link to original story">Source</a>
+   	  {% if fic.completed and escaped_url %}
+   	      <a href="http://www.convertfiles.com/index.php?url={{ escaped_url }}" title="Convert to other formats using Convertfiles.com">Convert</a>
+	  {% endif %}
+	</p>
 	</div>
 	{% endif %}
-	<div>
-	  {% if fic.completed %}
-	  <p>Your fic has finished processing and you can download it now:</p>
-	  <p><a href="/file?id={{ fic.key }}">Download {{ fic.title }}</a>
-	    by {{ fic.author }} ({{ fic.format }})</p>
-   	  {% if escaped_url %}
-	  <p><a href="http://www.convertfiles.com/index.php?url={{ escaped_url }}">Convert {{ fic.title }} to other formats</a></p>
-	  {% endif %}
-	  {% else %}
-	  {% if fic.failure %}
-	  Your fic failed to process.  Please check the URL and the error message below.<br />
-	  <div id='error'>
-	    {{ fic.failure }}
-	  </div>
-	  {% else %}
-	  <p>Not done yet.  This page will periodically poll to see if your story has finished.</p>
-	  {% endif %}
-	  {% endif %}
-	  <p>Or see your personal list of <a href="/recent">previously downloaded fanfics</a>.</p>
-	</div>
+	  <p>See your personal list of <a href="/recent">previously downloaded fanfics</a>.</p>
      </div>
      <div style='text-align: center'>
 	<img src="http://code.google.com/appengine/images/appengine-silver-120x30.gif"