mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-06 08:52:55 +01:00
281 lines
7 KiB
Python
281 lines
7 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import cgi
|
|
import uuid
|
|
import codecs
|
|
import shutil
|
|
import string
|
|
import base64
|
|
import os.path
|
|
import zipfile
|
|
import StringIO
|
|
import logging
|
|
import hashlib
|
|
import urllib as u
|
|
import pprint as pp
|
|
import urllib2 as u2
|
|
import urlparse as up
|
|
import BeautifulSoup as bs
|
|
import htmlentitydefs as hdefs
|
|
|
|
import zipdir
|
|
import html_constants
|
|
from constants import *
|
|
|
|
import html2text
|
|
|
|
|
|
class FanficWriter:
|
|
def __init__(self):
|
|
pass
|
|
|
|
def writeChapter(self, title, text):
|
|
pass
|
|
|
|
def finalise(self):
|
|
pass
|
|
|
|
class TextWriter(FanficWriter):
|
|
htmlWriter = None
|
|
|
|
def __init__(self, base, name, author, inmemory=False, compress=False):
|
|
self.htmlWriter = HTMLWriter(base, name, author, True, False)
|
|
|
|
def writeChapter(self, title, text):
|
|
self.htmlWriter.writeChapter(title, text)
|
|
|
|
def finalise(self):
|
|
self.htmlWriter.finalise()
|
|
self.output = StringIO.StringIO()
|
|
self.output.write(html2text.html2text(self.htmlWriter.output.getvalue().decode('utf-8')).encode('utf-8'))
|
|
self.name = self.htmlWriter.name
|
|
|
|
|
|
class HTMLWriter(FanficWriter):
|
|
body = ''
|
|
|
|
def __init__(self, base, name, author, inmemory=False, compress=False):
|
|
self.basePath = base
|
|
self.name = name.replace(" ", "_")
|
|
self.storyTitle = name
|
|
self.fileName = self.basePath + '/' + self.name + '.html'
|
|
self.authorName = author
|
|
|
|
self.inmemory = inmemory
|
|
|
|
if not self.inmemory and os.path.exists(self.fileName):
|
|
os.remove(self.fileName)
|
|
|
|
if self.inmemory:
|
|
self.output = StringIO.StringIO()
|
|
else:
|
|
self.output = open(self.fileName, 'w')
|
|
|
|
self.xhtmlTemplate = string.Template(html_constants.XHTML_START)
|
|
self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START)
|
|
|
|
def _printableVersion(self, text):
|
|
try:
|
|
d = text.decode('utf-8')
|
|
return d
|
|
except:
|
|
return text
|
|
|
|
def writeChapter(self, title, text):
|
|
title = self._printableVersion(title) #title.decode('utf-8')
|
|
text = self._printableVersion(text) #text.decode('utf-8')
|
|
self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title})
|
|
self.body = self.body + '\n' + text
|
|
|
|
def finalise(self):
|
|
html = self.xhtmlTemplate.substitute({'title' : self.storyTitle, 'author' : self.authorName, 'body' : self.body})
|
|
soup = bs.BeautifulSoup(html)
|
|
result = soup.prettify()
|
|
|
|
# f = open(self.fileName, 'w')
|
|
# f.write(result)
|
|
# f.close()
|
|
|
|
self.output.write(result)
|
|
if not self.inmemory:
|
|
self.output.close()
|
|
|
|
class EPubFanficWriter(FanficWriter):
|
|
chapters = []
|
|
|
|
files = {}
|
|
|
|
def _writeFile(self, fileName, data):
|
|
#logging.debug('_writeFile(`%s`, data)' % fileName)
|
|
if fileName in self.files:
|
|
try:
|
|
d = data.decode('utf-8')
|
|
except UnicodeEncodeError, e:
|
|
d = data
|
|
|
|
self.files[fileName].write(d)
|
|
else:
|
|
if self.inmemory:
|
|
self.files[fileName] = StringIO.StringIO()
|
|
else:
|
|
self.files[fileName] = open(self.directory + '/' + fileName, 'w')
|
|
|
|
self._writeFile(fileName, data)
|
|
|
|
|
|
def _closeFiles(self):
|
|
if not self.inmemory:
|
|
for f in self.files:
|
|
self.files[f].close()
|
|
|
|
def __init__(self, base, name, author, inmemory=False, compress=True):
|
|
self.basePath = base
|
|
self.name = name.replace(" ", "_").replace(":","_")
|
|
self.storyTitle = name
|
|
self.directory = self.basePath + '/' + self.name
|
|
self.inmemory = inmemory
|
|
self.authorName = author
|
|
|
|
self.files = {}
|
|
self.chapters = []
|
|
|
|
if not self.inmemory:
|
|
self.inmemory = True
|
|
self.writeToFile = True
|
|
else:
|
|
self.writeToFile = False
|
|
|
|
|
|
if not self.inmemory:
|
|
if os.path.exists(self.directory):
|
|
shutil.rmtree(self.directory)
|
|
|
|
os.mkdir(self.directory)
|
|
|
|
os.mkdir(self.directory + '/META-INF')
|
|
os.mkdir(self.directory + '/OEBPS')
|
|
|
|
self._writeFile('mimetype', MIMETYPE)
|
|
self._writeFile('META-INF/container.xml', CONTAINER)
|
|
self._writeFile('OEBPS/stylesheet.css', CSS)
|
|
|
|
def _removeEntities(self, text):
|
|
for e in entities:
|
|
v = entities[e]
|
|
text = text.replace(e, v)
|
|
|
|
text = text.replace('&', '&')
|
|
|
|
return text
|
|
|
|
def writeChapter(self, title, text):
|
|
logging.debug("Writing chapter: %s" % title)
|
|
try:
|
|
fileName = base64.b64encode(title).replace('/', '_') + ".xhtml"
|
|
except UnicodeEncodeError, e:
|
|
fileName = base64.b64encode(title.encode('utf-8')).replace('/', '_') + ".xhtml"
|
|
# title = cgi.esca#title.decode('utf-8')
|
|
# sha = hashlib.sha224(title)
|
|
# fileName = sha.hexdigest() + ".xhtml"
|
|
#fileName = cgi.escape(title) + '.xhtml'
|
|
filePath = self.directory + "/OEBPS/" + fileName
|
|
|
|
fn = 'OEBPS/' + fileName
|
|
|
|
# f = open(filePath, 'w')
|
|
|
|
text = self._removeEntities(text)
|
|
|
|
# BeautifulStoneSoup doesn't have any selfClosingTags by default.
|
|
# hr needs to be if it's going to work.
|
|
self.soup = bs.BeautifulStoneSoup(text.decode('utf-8'), selfClosingTags=('hr'))
|
|
|
|
allTags = self.soup.findAll(recursive=True)
|
|
for t in allTags:
|
|
for attr in t._getAttrMap().keys():
|
|
if attr not in acceptable_attributes:
|
|
del t[attr]
|
|
|
|
allPs = self.soup.findAll(recursive=True)
|
|
for p in allPs:
|
|
if p.string != None and (len(p.string.strip()) == 0 or p.string.strip() == ' ' ) :
|
|
p.extract()
|
|
|
|
allBrs = self.soup.findAll(recursive=True, name = ["br", 'div'])
|
|
for br in allBrs:
|
|
if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
|
|
br.name = 'p'
|
|
|
|
# cleanup(self.soup )
|
|
|
|
text = self.soup.prettify()
|
|
|
|
tt = self._removeEntities(title)
|
|
|
|
self._writeFile(fn, XHTML_START % (tt, tt))
|
|
self._writeFile(fn, text)
|
|
self._writeFile(fn, XHTML_END)
|
|
# print >> f, XHTML_START % (tt, tt)
|
|
# f.write(text)
|
|
# print >> f, XHTML_END
|
|
|
|
self.chapters.append((title, fileName))
|
|
|
|
def finalise(self):
|
|
logging.debug("Finalising...")
|
|
### writing table of contents -- ncx file
|
|
|
|
tocFilePath = "OEBPS/toc.ncx"
|
|
# toc = open(tocFilePath, 'w')
|
|
# print >> toc, TOC_START % self.storyTitle
|
|
self._writeFile(tocFilePath, TOC_START % self.storyTitle)
|
|
### writing content -- opf file
|
|
opfFilePath = "OEBPS/content.opf"
|
|
|
|
# opf = open(opfFilePath, 'w')
|
|
self._writeFile(opfFilePath, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName))
|
|
# print >> opf, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName)
|
|
|
|
ids = []
|
|
|
|
i = 0
|
|
for t,f in self.chapters:
|
|
try:
|
|
chapterId = base64.b64encode(t)
|
|
except UnicodeEncodeError, e:
|
|
chapterId = base64.b64encode(t.encode('utf-8'))
|
|
|
|
self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, cgi.escape(t), f))
|
|
self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f))
|
|
|
|
ids.append(chapterId)
|
|
|
|
i = i + 1
|
|
|
|
# logging.d('Toc and refs printed, proceesing to ref-ids....')
|
|
|
|
self._writeFile(tocFilePath, TOC_END)
|
|
self._writeFile(opfFilePath, CONTENT_END_MANIFEST)
|
|
|
|
for chapterId in ids:
|
|
self._writeFile(opfFilePath, CONTENT_ITEMREF % chapterId)
|
|
|
|
self._writeFile(opfFilePath, CONTENT_END)
|
|
|
|
self._closeFiles()
|
|
|
|
filename = self.directory + '.epub'
|
|
|
|
zipdata = zipdir.inMemoryZip(self.files)
|
|
|
|
if self.writeToFile:
|
|
f = open(filename, 'wb')
|
|
f.write(zipdata.getvalue())
|
|
f.close()
|
|
else:
|
|
self.output = zipdata
|
|
|
|
# zipdir.toZip(filename, self.directory)
|