FanFicFare/output.py
2009-12-18 18:56:44 +00:00

252 lines
6.2 KiB
Python

# -*- coding: utf-8 -*-
import os
import re
import sys
import cgi
import uuid
import codecs
import shutil
import string
import base64
import os.path
import zipfile
import StringIO
import logging
import urllib as u
import pprint as pp
import urllib2 as u2
import urlparse as up
import BeautifulSoup as bs
import htmlentitydefs as hdefs
import zipdir
import html_constants
from constants import *
class FanficWriter:
def __init__(self):
pass
def writeChapter(self, title, text):
pass
def finalise(self):
pass
class HTMLWriter(FanficWriter):
body = ''
def __init__(self, base, name, author, inmemory=False, compress=False):
self.basePath = base
self.name = name.replace(" ", "_")
self.storyTitle = name
self.fileName = self.basePath + '/' + self.name + '.html'
self.authorName = author
self.inmemory = inmemory
if not self.inmemory and os.path.exists(self.fileName):
os.remove(self.fileName)
if self.inmemory:
self.output = StringIO.StringIO()
else:
self.output = open(self.fileName, 'w')
self.xhtmlTemplate = string.Template(html_constants.XHTML_START)
self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START)
def writeChapter(self, title, text):
title = title.decode('utf-8')
text = text.decode('utf-8')
self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title})
self.body = self.body + '\n' + text
def finalise(self):
html = self.xhtmlTemplate.substitute({'title' : self.storyTitle, 'author' : self.authorName, 'body' : self.body})
soup = bs.BeautifulSoup(html)
result = soup.prettify()
# f = open(self.fileName, 'w')
# f.write(result)
# f.close()
self.output.write(result)
if not self.inmemory:
self.output.close()
class EPubFanficWriter(FanficWriter):
chapters = []
files = {}
def _writeFile(self, fileName, data):
if fileName in self.files:
self.files[fileName].write(data.decode('utf-8'))
else:
if self.inmemory:
self.files[fileName] = StringIO.StringIO()
else:
self.files[fileName] = open(self.directory + '/' + fileName, 'w')
self._writeFile(fileName, data)
def _closeFiles(self):
if not self.inmemory:
for f in self.files:
self.files[f].close()
def __init__(self, base, name, author, inmemory=False, compress=True):
self.basePath = base
self.name = name.replace(" ", "_")
self.storyTitle = name
self.directory = self.basePath + '/' + self.name
self.inmemory = inmemory
self.authorName = author
self.files = {}
self.chapters = []
if not self.inmemory:
self.inmemory = True
self.writeToFile = True
else:
self.writeToFile = False
if not self.inmemory:
if os.path.exists(self.directory):
shutil.rmtree(self.directory)
os.mkdir(self.directory)
os.mkdir(self.directory + '/META-INF')
os.mkdir(self.directory + '/OEBPS')
# print >> codecs.open(self.directory + '/mimetype', 'w', 'utf-8'), MIMETYPE
# print >> codecs.open(self.directory + '/META-INF/container.xml', 'w', 'utf-8'), CONTAINER
# print >> codecs.open(self.directory + '/OEBPS/stylesheet.css', 'w', 'utf-8'), CSS
self._writeFile('mimetype', MIMETYPE)
self._writeFile('META-INF/container.xml', CONTAINER)
self._writeFile('OEBPS/stylesheet.css', CSS)
def _removeEntities(self, text):
for e in entities:
v = entities[e]
text = text.replace(e, v)
text = text.replace('&', '&')
return text
def writeChapter(self, title, text):
fileName = base64.b64encode(title).replace('/', '_') + ".xhtml"
filePath = self.directory + "/OEBPS/" + fileName
fn = 'OEBPS/' + fileName
# f = open(filePath, 'w')
text = self._removeEntities(text)
self.soup = bs.BeautifulStoneSoup(text)
allTags = self.soup.findAll(recursive=True)
for t in allTags:
for attr in t._getAttrMap().keys():
if attr not in acceptable_attributes:
del t[attr]
allPs = self.soup.findAll(recursive=True)
for p in allPs:
if p.string != None and (len(p.string.strip()) == 0 or p.string.strip() == ' ' ) :
p.extract()
allBrs = self.soup.findAll(recursive=True, name = ["br", "hr", 'div'])
for br in allBrs:
if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
br.name = 'p'
# cleanup(self.soup )
text = self.soup.prettify()
tt = self._removeEntities(title)
self._writeFile(fn, XHTML_START % (tt, tt))
self._writeFile(fn, text)
self._writeFile(fn, XHTML_END)
# print >> f, XHTML_START % (tt, tt)
# f.write(text)
# print >> f, XHTML_END
self.chapters.append((title, fileName))
def finalise(self):
logging.debug("Finalising...")
### writing table of contents -- ncx file
tocFilePath = "OEBPS/toc.ncx"
# toc = open(tocFilePath, 'w')
# print >> toc, TOC_START % self.storyTitle
self._writeFile(tocFilePath, TOC_START % self.storyTitle)
### writing content -- opf file
opfFilePath = "OEBPS/content.opf"
# opf = open(opfFilePath, 'w')
self._writeFile(opfFilePath, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName))
# print >> opf, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName)
ids = []
i = 0
for t,f in self.chapters:
chapterId = base64.b64encode(t)
# print >> toc, TOC_ITEM % (chapterId, i, cgi.escape(t), f)
self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, cgi.escape(t), f))
# print >> opf, CONTENT_ITEM % (chapterId, f)
self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f))
ids.append(chapterId)
i = i + 1
# logging.d('Toc and refs printed, proceesing to ref-ids....')
# print >> toc, TOC_END
# print >> opf, CONTENT_END_MANIFEST
self._writeFile(tocFilePath, TOC_END)
self._writeFile(opfFilePath, CONTENT_END_MANIFEST)
for chapterId in ids:
# print >> opf, CONTENT_ITEMREF % chapterId
self._writeFile(opfFilePath, CONTENT_ITEMREF % chapterId)
# print >> opf, CONTENT_END
self._writeFile(opfFilePath, CONTENT_END)
# opf.close()
# toc.close()
# print('Finished')
self._closeFiles()
filename = self.directory + '.epub'
zipdata = zipdir.inMemoryZip(self.files)
if self.writeToFile:
f = open(filename, 'w')
f.write(zipdata.getvalue())
f.close()
else:
self.output = zipdata
# zipdir.toZip(filename, self.directory)