diff --git a/app.yaml b/app.yaml index ae323c15..80d89e3d 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,6 @@ # fanfictionloader ffd-retief application: fanfictionloader -version: 4-0-0 +version: 4-0-1 runtime: python api_version: 1 diff --git a/css/index.css b/css/index.css index d77f4246..eae546b7 100644 --- a/css/index.css +++ b/css/index.css @@ -13,7 +13,7 @@ body #greeting { - margin-bottom: 1em; +# margin-bottom: 1em; border-color: #efefef; } @@ -66,6 +66,8 @@ div.field #error { - font-size: small; color: #f00; } +.recent { + font-size: large; +} diff --git a/defaults.ini b/defaults.ini index e8bfdecb..10f4e731 100644 --- a/defaults.ini +++ b/defaults.ini @@ -108,7 +108,7 @@ extratags: FanFiction [txt] ## Add URLs since there aren't links. -titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, author URL, description +titlepage_entries: category,genre,status,datePublished,dateUpdated,dateCreated,rating,warnings,numChapters,numWords,site,storyUrl, authorUrl, description ## use \r\n for line endings, the windows convention. text output only. windows_eol: true @@ -132,6 +132,10 @@ titlepage_use_table: false ## When using tables, make these span both columns. wide_titlepage_entries: description, storyUrl, author URL +[mobi] +## mobi TOC cannot be turned off right now. +#include_tocpage: true + ## Each site has a section that overrides [defaults] *and* the format ## sections test1.com specifically is not a real story site. Instead, diff --git a/fanficdownloader/adapters/adapter_test1.py b/fanficdownloader/adapters/adapter_test1.py index 7b7e94c1..840d0930 100644 --- a/fanficdownloader/adapters/adapter_test1.py +++ b/fanficdownloader/adapters/adapter_test1.py @@ -16,6 +16,7 @@ # import datetime +import time import logging import fanficdownloader.BeautifulSoup as bs @@ -105,6 +106,10 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!" if self.story.getMetadata('storyId') == '667': raise exceptions.FailedToDownload("Error downloading Chapter: %s!" % url) + if self.story.getMetadata('storyId') == '670' and self.getConfig('slow_down_sleep_time'): + time.sleep(float(self.getConfig('slow_down_sleep_time'))) + + if "chapter=1" in url : text=u'''
@@ -116,6 +121,7 @@ Some more longer description. "I suck at summaries!" "Better than it sounds!"

http://test1.com?sid=667 - raises FailedToDownload on chapter 1

http://test1.com?sid=668 - raises FailedToLogin unless username='Me'

http://test1.com?sid=669 - Succeeds with Updated Date=now

+

http://test1.com?sid=670 - Succeeds, but applies slow_down_sleep_time

And other storyId will succeed with the same output.

''' diff --git a/fanficdownloader/html.py b/fanficdownloader/html.py new file mode 100644 index 00000000..e1ca7db5 --- /dev/null +++ b/fanficdownloader/html.py @@ -0,0 +1,126 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + +import re +import sys +import StringIO +import urllib + +from BeautifulSoup import BeautifulSoup + +class HtmlProcessor: + WHITESPACE_RE = re.compile(r'\s') + # Look for + BAD_TAG_RE = re.compile(r'<[^>]+<', re.MULTILINE) + + def __init__(self, html, unfill=0): + self.unfill = unfill + html = self._ProcessRawHtml(html) + self._soup = BeautifulSoup(html) + if self._soup.title: + self.title = self._soup.title.contents[0] + else: + self.title = None + + def _ProcessRawHtml(self, html): + new_html, count = HtmlProcessor.BAD_TAG_RE.subn('<', html) + if count: + print >>sys.stderr, 'Replaced %d bad tags' % count + return new_html + + def _StubInternalAnchors(self): + '''Replace each internal anchor with a fixed-size filepos anchor. + + Looks for every anchor with and replaces that + with . Stores anchors in self._anchor_references''' + self._anchor_references = [] + anchor_num = 0 + # anchor links + anchorlist = self._soup.findAll('a', href=re.compile('^#')) + # treat reference tags like a tags for TOCTOP. + anchorlist.extend(self._soup.findAll('reference', href=re.compile('^#'))) + for anchor in anchorlist: + self._anchor_references.append((anchor_num, anchor['href'])) + del anchor['href'] + anchor['filepos'] = '%.10d' % anchor_num + anchor_num += 1 + + def _ReplaceAnchorStubs(self): + # TODO: Browsers allow extra whitespace in the href names. + # use __str__ instead of prettify--it inserts extra spaces. + assembled_text = self._soup.__str__('utf8') + del self._soup # shouldn't touch this anymore + for anchor_num, original_ref in self._anchor_references: + ref = urllib.unquote(original_ref[1:]) # remove leading '#' + # Find the position of ref in the utf-8 document. + # TODO(chatham): Using regexes and looking for name= would be better. + newpos = assembled_text.rfind(ref.encode('utf-8')) + if newpos == -1: + print >>sys.stderr, 'Could not find anchor "%s"' % original_ref + continue + newpos += len(ref) + 2 # don't point into the middle of the tag + old_filepos = 'filepos="%.10d"' % anchor_num + new_filepos = 'filepos="%.10d"' % newpos + assert assembled_text.find(old_filepos) != -1 + assembled_text = assembled_text.replace(old_filepos, new_filepos, 1) + return assembled_text + + def _FixPreTags(self): + '''Replace
 tags with HTML-ified text.'''
+    pres = self._soup.findAll('pre')
+    for pre in pres:
+      pre.replaceWith(self._FixPreContents(str(pre.contents[0])))
+
+  def _FixPreContents(self, text):
+    if self.unfill:
+      line_splitter = '\n\n'
+      line_joiner = '

' + else: + line_splitter = '\n' + line_joiner = '
' + lines = [] + for line in text.split(line_splitter): + lines.append(self.WHITESPACE_RE.subn(' ', line)[0]) + return line_joiner.join(lines) + + def _RemoveUnsupported(self): + '''Remove any tags which the kindle cannot handle.''' + # TODO(chatham): tags to script? + unsupported_tags = ('script', 'style') + for tag_type in unsupported_tags: + for element in self._soup.findAll(tag_type): + element.extract() + + def RenameAnchors(self, prefix): + '''Rename every internal anchor to have the given prefix, then + return the contents of the body tag.''' + for anchor in self._soup.findAll('a', href=re.compile('^#')): + anchor['href'] = '#' + prefix + anchor['href'][1:] + for a in self._soup.findAll('a'): + if a.get('name'): + a['name'] = prefix + a['name'] + + # TODO(chatham): figure out how to fix this. sometimes body comes out + # as NoneType. + content = [] + if self._soup.body is not None: + content = [unicode(c) for c in self._soup.body.contents] + return '\n'.join(content) + + def CleanHtml(self): + # TODO(chatham): fix_html_br, fix_html + self._RemoveUnsupported() + self._StubInternalAnchors() + self._FixPreTags() + return self._ReplaceAnchorStubs() + + +if __name__ == '__main__': + FILE ='/tmp/documentation.html' + #FILE = '/tmp/multipre.html' + FILE = '/tmp/view.html' + import codecs + d = open(FILE).read() + h = HtmlProcessor(d) + s = h.CleanHtml() + #print s diff --git a/fanficdownloader/mobi.py b/fanficdownloader/mobi.py new file mode 100644 index 00000000..4748e202 --- /dev/null +++ b/fanficdownloader/mobi.py @@ -0,0 +1,384 @@ +#!/usr/bin/python +# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan + + +import StringIO +import struct +import time +import random +import logging + +from html import HtmlProcessor + +# http://wiki.mobileread.com/wiki/MOBI +# http://membres.lycos.fr/microfirst/palm/pdb.html + +encoding = { + 'UTF-8' : 65001, + 'latin-1' : 1252, +} + +languages = {"en-us" : 0x0409, + "sv" : 0x041d, + "fi" : 0x000b, + "en" : 0x0009, + "en-gb" : 0x0809} + +def ToHex(s): + v = ['%.2x' % ord(c) for c in s] + return ' '.join(v) + +class _SubEntry: + def __init__(self, pos, html_data): + self.pos = pos + self.html = HtmlProcessor(html_data) + self.title = self.html.title + self._name = 'mobi_article_%d' % pos + if not self.title: + self.title = 'Article %d' % self.pos + + def TocLink(self): + return '
%.80s' % (self._name, self.title) + + def Anchor(self): + return '' % self._name + + def Body(self): + return self.html.RenameAnchors(self._name + '_') + +class Converter: + def __init__(self, refresh_url='', title='Unknown', author='Unknown', publisher='Unknown'): + self._header = Header() + self._header.SetTitle(title) + self._header.SetAuthor(author) + self._header.SetPublisher(publisher) + self._refresh_url = refresh_url + + def ConvertString(self, s): + out = StringIO.StringIO() + self._ConvertStringToFile(s, out) + return out.getvalue() + + def ConvertStrings(self, html_strs): + out = StringIO.StringIO() + self._ConvertStringsToFile(html_strs, out) + return out.getvalue() + + def ConvertFile(self, html_file, out_file): + self._ConvertStringToFile(open(html_file,'rb').read(), + open(out_file, 'wb')) + + def ConvertFiles(self, html_files, out_file): + html_strs = [open(f,'rb').read() for f in html_files] + self._ConvertStringsToFile(html_strs, open(out_file, 'wb')) + + def MakeOneHTML(self, html_strs): + """This takes a list of HTML strings and returns a big HTML file with + all contents consolidated. It constructs a table of contents and adds + anchors within the text + """ + title_html = [] + toc_html = [] + body_html = [] + + PAGE_BREAK = '' + + # pull out the title page, assumed first html_strs. + htmltitle = html_strs[0] + entrytitle = _SubEntry(1, htmltitle) + title_html.append(entrytitle.Body()) + + title_html.append(PAGE_BREAK) + toc_html.append('

Table of Contents


') + + for pos, html in enumerate(html_strs[1:]): + entry = _SubEntry(pos+1, html) + toc_html.append('%s
' % entry.TocLink()) + + # give some space between bodies of work. + body_html.append(PAGE_BREAK) + + body_html.append(entry.Anchor()) + + body_html.append(entry.Body()) + + # TODO: this title can get way too long with RSS feeds. Not sure how to fix + # cheat slightly and use the
code to set filepos in references. + header = ''' + +Bibliorize %s GMT + + + + + +''' % time.ctime(time.time()) + + footer = '' + all_html = header + '\n'.join(title_html + toc_html + body_html) + footer + #print "%s" % all_html.encode('utf8') + return all_html + + def _ConvertStringsToFile(self, html_strs, out_file): + try: + tmp = self.MakeOneHTML(html_strs) + self._ConvertStringToFile(tmp, out_file) + except Exception, e: + logging.error('Error %s', e) + logging.debug('Details: %s' % html_strs) + + def _ConvertStringToFile(self, html_data, out): + html = HtmlProcessor(html_data) + data = html.CleanHtml() + + # collect offsets of '' tags, use to make index list. + # indexlist = [] # list of (offset,length) tuples. + # not in current use. + + # j=0 + # lastj=0 + # while True: + # j=data.find('',lastj+10) # plus a bit so we find the next. + # if j < 0: + # break + # indexlist.append((lastj,j-lastj)) + # print "index offset: %d length: %d" % (lastj,j-lastj) + # lastj=j + + records = [] +# title = html.title +# if title: +# self._header.SetTitle(title) + record_id = 1 + for start_pos in range(0, len(data), Record.MAX_SIZE): + end = min(len(data), start_pos + Record.MAX_SIZE) + record_data = data[start_pos:end] + records.append(self._header.AddRecord(record_data, record_id)) + #print "HTML Record %03d: (size:%d) [[%s ... %s]]" % ( record_id, len(record_data), record_data[:20], record_data[-20:] ) + record_id += 1 + self._header.SetImageRecordIndex(record_id) + records[0:0] = [self._header.MobiHeader()] + + header, rec_offset = self._header.PDBHeader(len(records)) + out.write(header) + for record in records: + record.WriteHeader(out, rec_offset) + #print "rec_offset: %d len(record.data): %d" % (rec_offset,len(record.data)) + rec_offset += (len(record.data)+1) # plus one for trailing null + + # Write to nuls for some reason + out.write('\0\0') + for record in records: + record.WriteData(out) + out.write('\0') + # needs a trailing null, I believe it indicates zero length 'overlap'. + # otherwise, the readers eat the last char of each html record. + # Calibre writes another 6-7 bytes of stuff after that, but we seem + # to be getting along without it. + +class Record: + MAX_SIZE = 4096 + INDEX_LEN = 8 + _unique_id_seed = 28 # should be arbitrary, but taken from MobiHeader + + # TODO(chatham): Record compression doesn't look that hard. + + def __init__(self, data, record_id): + assert len(data) <= self.MAX_SIZE + self.data = data + if record_id != 0: + self._id = record_id + else: + Record._unique_id_seed += 1 + self._id = 0 + + def __repr__(self): + return 'Record: id=%d len=%d' % (self._id, len(self.data)) + + def _SetUniqueId(self): + Record._unique_id_seed += 1 + # TODO(chatham): Wraparound crap + self._id = Record._unique_id_seed + + def WriteData(self, out): + out.write(self.data) + + def WriteHeader(self, out, rec_offset): + attributes = 64 # dirty? + header = struct.pack('>IbbH', + rec_offset, + attributes, + 0, self._id) + assert len(header) == Record.INDEX_LEN + out.write(header) + +EXTH_HEADER_FIELDS = { + 'author' : 100, + 'publisher' : 101, +} + +class Header: + EPOCH_1904 = 2082844800 + + def __init__(self): + self._length = 0 + self._record_count = 0 + self._title = '2008_2_34' + self._author = 'Unknown author' + self._publisher = 'Unknown publisher' + self._first_image_index = 0 + + def SetAuthor(self, author): + self._author = author.encode('ascii','ignore') + + def SetTitle(self, title): + # TODO(chatham): Reevaluate whether this needs to be ASCII. + # maybe just do sys.setdefaultencoding('utf-8')? Problems + # appending self._title with other things. + self._title = title.encode('ascii','ignore') + + def SetPublisher(self, publisher): + self._publisher = publisher.encode('ascii','ignore') + + def AddRecord(self, data, record_id): + self.max_record_size = max(Record.MAX_SIZE, len(data)) + self._record_count += 1 + self._length += len(data) + return Record(data, record_id) + + def _ReplaceWord(self, data, pos, word): + return data[:pos] + struct.pack('>I', word) + data[pos+4:] + + def PalmDocHeader(self): + compression = 1 # no compression + unused = 0 + encryption_type = 0 # no ecryption + records = self._record_count + 1 # the header record itself + palmdoc_header = struct.pack('>HHIHHHH', + compression, + unused, + self._length, + records, + Record.MAX_SIZE, + encryption_type, + unused) + assert len(palmdoc_header) == 16 + return palmdoc_header + + def PDBHeader(self, num_records): + HEADER_LEN = 32+2+2+9*4 + RECORD_INDEX_HEADER_LEN = 6 + RESOURCE_INDEX_LEN = 10 + + index_len = RECORD_INDEX_HEADER_LEN + num_records * Record.INDEX_LEN + rec_offset = HEADER_LEN + index_len + 2 + + short_title = self._title[0:31] + attributes = 0 + version = 0 + ctime = self.EPOCH_1904 + int(time.time()) + mtime = self.EPOCH_1904 + int(time.time()) + backup_time = self.EPOCH_1904 + int(time.time()) + modnum = 0 + appinfo_offset = 0 + sort_offset = 0 + type = 'BOOK' + creator = 'MOBI' + id_seed = 36 + header = struct.pack('>32sHHII', + short_title, attributes, version, + ctime, mtime) + header += struct.pack('>IIII', backup_time, modnum, + appinfo_offset, sort_offset) + header += struct.pack('>4s4sI', + type, creator, id_seed) + next_record = 0 # not used? + header += struct.pack('>IH', next_record, num_records) + return header, rec_offset + + def _GetExthHeader(self): + # They set author, publisher, coveroffset, thumboffset + data = {'author' : self._author, + 'publisher' : self._publisher, + } + # Turn string type names into EXTH typeids. + r = [] + for key, value in data.items(): + typeid = EXTH_HEADER_FIELDS[key] + length_encoding_len = 8 + r.append(struct.pack('>LL', typeid, len(value) + length_encoding_len,) + value) + content = ''.join(r) + + # Pad to word boundary + while len(content) % 4: + content += '\0' + TODO_mysterious = 12 + exth = 'EXTH' + struct.pack('>LL', len(content) + TODO_mysterious, len(data)) + content + return exth + + def SetImageRecordIndex(self, idx): + self._first_image_index = idx + + def MobiHeader(self): + exth_header = self._GetExthHeader(); + palmdoc_header = self.PalmDocHeader() + + fs = 0xffffffff + + # Record 0 + header_len = 0xE4 # TODO + mobi_type = 2 # BOOK + text_encoding = encoding['UTF-8'] + unique_id = random.randint(1, 1<<32) + creator_version = 4 + reserved = '%c' % 0xff * 40 + nonbook_index = fs + full_name_offset = header_len + len(palmdoc_header) + len(exth_header) # put full name after header + language = languages['en-us'] + unused = 0 + mobi_header = struct.pack('>4sIIIII40sIIIIII', + 'MOBI', + header_len, + mobi_type, + text_encoding, + unique_id, + creator_version, + reserved, + nonbook_index, + full_name_offset, + len(self._title), + language, + fs, fs) + assert len(mobi_header) == 104 - 16 + + unknown_fields = chr(0) * 32 + drm_offset = 0 + drm_count = 0 + drm_size = 0 + drm_flags = 0 + exth_flags = 0x50 + header_end = chr(0) * 64 + mobi_header += struct.pack('>IIIIIII', + creator_version, + self._first_image_index, + fs, + unused, + fs, + unused, + exth_flags) + mobi_header += '\0' * 112 # TODO: Why this much padding? + # Set some magic offsets to be 0xFFFFFFF. + for pos in (0x94, 0x98, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xdc): + mobi_header = self._ReplaceWord(mobi_header, pos, fs) + + # 16 bytes? + padding = '\0' * 48 * 4 # why? + total_header = palmdoc_header + mobi_header + exth_header + self._title + padding + + return self.AddRecord(total_header, 0) + +if __name__ == '__main__': + import sys + m = Converter(title='Testing Mobi', author='Mobi Author', publisher='mobi converter') + m.ConvertFiles(sys.argv[1:], 'test.mobi') + #m.ConvertFile(sys.argv[1], 'test.mobi') diff --git a/fanficdownloader/writers/__init__.py b/fanficdownloader/writers/__init__.py index 19b4f42e..7a3f7032 100644 --- a/fanficdownloader/writers/__init__.py +++ b/fanficdownloader/writers/__init__.py @@ -23,6 +23,7 @@ from fanficdownloader.exceptions import FailedToDownload from writer_html import HTMLWriter from writer_txt import TextWriter from writer_epub import EpubWriter +from writer_mobi import MobiWriter def getWriter(type,config,story): if type == "html": @@ -31,5 +32,7 @@ def getWriter(type,config,story): return TextWriter(config,story) if type == "epub": return EpubWriter(config,story) + if type == "mobi": + return MobiWriter(config,story) raise FailedToDownload("(%s) is not a supported download format."%type) diff --git a/fanficdownloader/writers/writer_mobi.py b/fanficdownloader/writers/writer_mobi.py new file mode 100644 index 00000000..4dd98c6b --- /dev/null +++ b/fanficdownloader/writers/writer_mobi.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- + +# Copyright 2011 Fanficdownloader team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import string +import StringIO + +from base_writer import * +from fanficdownloader.htmlcleanup import stripHTML +from fanficdownloader.mobi import Converter + +class MobiWriter(BaseStoryWriter): + + @staticmethod + def getFormatName(): + return 'mobi' + + @staticmethod + def getFormatExt(): + return '.mobi' + + def __init__(self, config, story): + BaseStoryWriter.__init__(self, config, story) + + self.MOBI_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +

${title} by ${author}

+
+''') + + self.MOBI_TITLE_ENTRY = string.Template(''' +${label}: ${value}
+''') + + self.MOBI_TITLE_PAGE_END = string.Template(''' +
+ + + +''') + + self.MOBI_TABLE_TITLE_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +

${title} by ${author}

+ +''') + + self.MOBI_TABLE_TITLE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_TITLE_WIDE_ENTRY = string.Template(''' + +''') + + self.MOBI_TABLE_TITLE_PAGE_END = string.Template(''' +
${label}:${value}
${label}: ${value}
+ + + +''') + + self.MOBI_TOC_PAGE_START = string.Template(''' + + + +${title} by ${author} + + + +
+

Table of Contents

+''') + + self.MOBI_TOC_ENTRY = string.Template(''' +${chapter}
+''') + + self.MOBI_TOC_PAGE_END = string.Template(''' +
+ + +''') + + self.MOBI_CHAPTER_START = string.Template(''' + + + +${chapter} + + + +

${chapter}

+''') + + self.MOBI_CHAPTER_END = string.Template(''' + + +''') + + def getMetadata(self,key): + return stripHTML(self.story.getMetadata(key)) + + def writeStoryImpl(self, out): + + files = [] + + # write title page. + if self.getConfig("titlepage_use_table"): + TITLE_PAGE_START = self.MOBI_TABLE_TITLE_PAGE_START + TITLE_ENTRY = self.MOBI_TABLE_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.MOBI_TABLE_TITLE_WIDE_ENTRY + TITLE_PAGE_END = self.MOBI_TABLE_TITLE_PAGE_END + else: + TITLE_PAGE_START = self.MOBI_TITLE_PAGE_START + TITLE_ENTRY = self.MOBI_TITLE_ENTRY + WIDE_TITLE_ENTRY = self.MOBI_TITLE_ENTRY # same, only wide in tables. + TITLE_PAGE_END = self.MOBI_TITLE_PAGE_END + + titlepageIO = StringIO.StringIO() + self.writeTitlePage(out=titlepageIO, + START=TITLE_PAGE_START, + ENTRY=TITLE_ENTRY, + WIDE_ENTRY=WIDE_TITLE_ENTRY, + END=TITLE_PAGE_END) + if titlepageIO.getvalue(): # will be false if no title page. + files.append(titlepageIO.getvalue()) + titlepageIO.close() + + ## MOBI always has a TOC injected by mobi.py because there's + ## no meta-data TOC. + # # write toc page. + # tocpageIO = StringIO.StringIO() + # self.writeTOCPage(tocpageIO, + # self.MOBI_TOC_PAGE_START, + # self.MOBI_TOC_ENTRY, + # self.MOBI_TOC_PAGE_END) + # if tocpageIO.getvalue(): # will be false if no toc page. + # files.append(tocpageIO.getvalue()) + # tocpageIO.close() + + for index, (title,html) in enumerate(self.story.getChapters()): + logging.debug('Writing chapter text for: %s' % title) + fullhtml = self.MOBI_CHAPTER_START.substitute({'chapter':title, 'index':index+1}) + html + self.MOBI_CHAPTER_END.substitute({'chapter':title, 'index':index+1}) + # ffnet(& maybe others) gives the whole chapter text as + # one line. This causes problems for nook(at least) when + # the chapter size starts getting big (200k+) + fullhtml = fullhtml.replace('

','

\n').replace('
','
\n') + files.append(fullhtml.encode('utf-8')) + del fullhtml + + c = Converter(title=self.getMetadata('title'), + author=self.getMetadata('author'), + publisher=self.getMetadata('site')) + mobidata = c.ConvertStrings(files) + out.write(mobidata) + + del files + del mobidata + +## Utility method for creating new tags. +def newTag(dom,name,attrs=None,text=None): + tag = dom.createElement(name) + if( attrs is not None ): + for attr in attrs.keys(): + tag.setAttribute(attr,attrs[attr]) + if( text is not None ): + tag.appendChild(dom.createTextNode(text)) + return tag + diff --git a/index.html b/index.html index e96b9f2b..603060a6 100644 --- a/index.html +++ b/index.html @@ -57,7 +57,9 @@ Login/Password is only asked for when required now, as is 'Are you an Adult?' where required.

- Mobi support (for Kindle) is only via EPub conversion in this version. + The same (rather crude) Mobi support (for Kindle) that we + had before is restored. Mobi via EPub conversion is still + available and provides better output.

If you have any problems with this new version, please @@ -81,12 +83,12 @@ EPub HTML Plain Text + Mobi(Kindle)


For most readers, including Sony Reader, Nook and iPad, use EPub.

-

For Kindle and other Mobi readers, select EPub and use the Convert link when it's finished.

diff --git a/main.py b/main.py index fe945714..7496909b 100644 --- a/main.py +++ b/main.py @@ -158,6 +158,8 @@ class FileServer(webapp.RequestHandler): self.response.headers['Content-Type'] = 'text/html' elif name.endswith('.txt'): self.response.headers['Content-Type'] = 'text/plain' + elif name.endswith('.mobi'): + self.response.headers['Content-Type'] = 'application/x-mobipocket-ebook' elif name.endswith('.zip'): self.response.headers['Content-Type'] = 'application/zip' else: @@ -334,13 +336,14 @@ class FanfictionDownloader(UserConfigServer): taskqueue.add(url='/fdowntask', queue_name="download", - params={'format':format, - 'url':download.url, - 'login':login, - 'password':password, - 'user':user.email(), - 'is_adult':is_adult}) - + params={'id':str(download.key()), + 'format':format, + 'url':download.url, + 'login':login, + 'password':password, + 'user':user.email(), + 'is_adult':is_adult}) + logging.info("enqueued download key: " + str(download.key())) except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e: @@ -390,6 +393,7 @@ class FanfictionDownloaderTask(UserConfigServer): def post(self): logging.getLogger().setLevel(logging.DEBUG) + fileId = self.request.get('id') format = self.request.get('format') url = self.request.get('url') login = self.request.get('login') @@ -399,27 +403,42 @@ class FanfictionDownloaderTask(UserConfigServer): user = users.User(self.request.get('user')) logging.info("Downloading: " + url + " for user: "+user.nickname()) + logging.info("ID: " + fileId) adapter = None writerClass = None - # use existing record if available. - q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) - if( q is None or len(q) < 1 ): - download = DownloadMeta() - else: - download = q[0] - download.failure=None - download.date=datetime.datetime.now() - download.completed=False - for c in download.data_chunks: - c.delete() - + if fileId: + try: + ## try to get download rec from passed id first. + ## may need to fall back to user/url/format during transition. + download = db.get(db.Key(fileId)) + logging.info("DownloadMeta found by ID:"+fileId) + except: + pass + + if not download: + # use existing record if available. + q = DownloadMeta.all().filter('user =', user).filter('url =',url).filter('format =',format).fetch(1) + if( q is None or len(q) < 1 ): + logging.info("New DownloadMeta") + download = DownloadMeta() + else: + logging.info("DownloadMeta found by user/url/format") + download = q[0] + + ## populate DownloadMeta, regardless of how found or created. + download.failure=None + download.date=datetime.datetime.now() + download.completed=False download.version = "%s:%s" % (os.environ['APPLICATION_ID'],os.environ['CURRENT_VERSION_ID']) download.user = user download.url = url download.format = format + for c in download.data_chunks: + c.delete() download.put() + logging.info('Creating adapter...') try: @@ -441,21 +460,19 @@ class FanfictionDownloaderTask(UserConfigServer): try: # adapter.getStory() is what does all the heavy lifting. writer = writers.getWriter(format,config,adapter) + download.name = writer.getOutputFileName() + logging.debug('output_filename:'+writer.getConfig('output_filename')) + logging.debug('getOutputFileName:'+writer.getOutputFileName()) + download.title = adapter.getStory().getMetadata('title') + download.author = adapter.getStory().getMetadata('author') + download.url = adapter.getStory().getMetadata('storyUrl') + download.put() except Exception, e: logging.exception(e) download.failure = unicode(e) download.put() return - download.name = writer.getOutputFileName() - logging.debug('output_filename:'+writer.getConfig('output_filename')) - logging.debug('getOutputFileName:'+writer.getOutputFileName()) - download.title = adapter.getStory().getMetadata('title') - download.author = adapter.getStory().getMetadata('author') - download.url = adapter.getStory().getMetadata('storyUrl') - download.put() - index=0 - outbuffer = StringIO.StringIO() writer.writeStory(outbuffer) data = outbuffer.getvalue() @@ -474,6 +491,7 @@ class FanfictionDownloaderTask(UserConfigServer): def c(data): return data + index=0 while( len(data) > 0 ): DownloadData(download=download, index=index, diff --git a/recent.html b/recent.html index 2db3ac92..2bd3fdb1 100644 --- a/recent.html +++ b/recent.html @@ -49,20 +49,20 @@ {% for fic in fics %}

{% if fic.completed %} - Download {{ fic.title }} - by {{ fic.author }} ({{ fic.format }})
- {% if fic.escaped_url %} - Convert {{ fic.title }} to other formats
+ Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) {% endif %} + {% if not fic.completed and not fic.failure %} + Processing {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) {% endif %} {% if fic.failure %} -

{{ fic.failure }}
+ {{ fic.failure }} {% endif %} - {% if not fic.completed and not fic.failure %} - Request Processing...
+ Source + {% if fic.completed and fic.escaped_url %} + Convert {% endif %} - {{ fic.url }} -

{% endfor %}
diff --git a/status.html b/status.html index 58c5b5e0..58325301 100644 --- a/status.html +++ b/status.html @@ -44,29 +44,27 @@
{% if fic.url %}
-

{{ fic.url }}

+

+ {% if fic.completed %} +

Your fic has finished processing and you can download it now.

+ Download {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + {% if fic.failure %} + {{ fic.failure }} + {% endif %} + {% if not fic.completed and not fic.failure %} + Processing {{ fic.title }} + by {{ fic.author }} ({{ fic.format }}) + {% endif %} + Source + {% if fic.completed and escaped_url %} + Convert + {% endif %} +

{% endif %} -
- {% if fic.completed %} -

Your fic has finished processing and you can download it now:

-

Download {{ fic.title }} - by {{ fic.author }} ({{ fic.format }})

- {% if escaped_url %} -

Convert {{ fic.title }} to other formats

- {% endif %} - {% else %} - {% if fic.failure %} - Your fic failed to process. Please check the URL and the error message below.
-
- {{ fic.failure }} -
- {% else %} -

Not done yet. This page will periodically poll to see if your story has finished.

- {% endif %} - {% endif %} -

Or see your personal list of previously downloaded fanfics.

-
+

See your personal list of previously downloaded fanfics.