Whole lot of fixes related to appengine

2025-12-06 08:52:55 +01:00 · 2009-12-18 14:51:53 +00:00 · 2009-12-18 14:51:53 +00:00 · 1f897843e0
commit 1f897843e0
12 changed files with 2851 additions and 0 deletions
--- a/BeautifulSoup.py
+++ b/BeautifulSoup.py
--- a/init.py
+++ b/init.py
--- a/adapter.py
+++ b/adapter.py
@ -0,0 +1,32 @@
 class FanfictionSiteAdapter:
 	login = ''
 	password = ''
 	def __init__(self, url):
 		pass
 	def requiresLogin(self, url = None):
 		pass
 	def performLogin(self, url = None):
 		pass
 	def extractIndividualUrls(self):
 		pass
 	def getText(self, url):
 		pass
 	def setLogin(self, login):
 		pass
 	def setPassword(self, password):
 		pass
 	def getStoryName(self):
 		pass
 	def getAuthorName(self):
 		pass
 	def getPrintableUrl(self, url):
 		pass
--- a/constants.py
+++ b/constants.py
@ -0,0 +1,135 @@
 CSS = '''body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; }
 pre { font-size: x-small; }
 h1 { text-align: center; }
 h2 { text-align: center; }
 h3 { text-align: center; }
 h4 { text-align: center; }
 h5 { text-align: center; }
 h6 { text-align: center; }
 .CI {
    text-align:center;
    margin-top:0px;
    margin-bottom:0px;
    padding:0px;
    }
 .center   {text-align: center;}
 .smcap    {font-variant: small-caps;}
 .u        {text-decoration: underline;}
 .bold     {font-weight: bold;}
 '''
 MIMETYPE = '''application/epub+zip'''
 CONTAINER = '''<?xml version="1.0"?>
 <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
  </rootfiles>
 </container>
 '''
 CONTENT_START = '''<?xml version="1.0"?>
 <package version="2.0" xmlns="http://www.idpf.org/2007/opf"
         unique-identifier="BookId-Epub-%s">
 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/"
           xmlns:opf="http://www.idpf.org/2007/opf">
   <dc:title>%s</dc:title> 
   <dc:creator opf:role="aut">%s</dc:creator>
   <dc:language>en-UK</dc:language> 
   <dc:rights></dc:rights> 
   <dc:publisher>sgzmd</dc:publisher> 
   <dc:identifier id="BookId">urn:uuid:sigizmund.com062820072147132</dc:identifier>
 </metadata>
 <manifest>
  <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
  <item id="style" href="stylesheet.css" media-type="text/css" />
 '''
 CONTENT_ITEM = '<item id="%s" href="%s" media-type="application/xhtml+xml" />'
 CONTENT_END_MANIFEST = '''</manifest>
 <spine toc="ncx">
 '''
 CONTENT_ITEMREF = '''<itemref idref="%s" />'''
 CONTENT_END = '''</spine>
 </package>
 '''
 TOC_START = '''<?xml version="1.0" encoding="UTF-8"?>
 <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
  <head>
    <meta name="dtb:uid" content="sigizmund.com062820072147132"/>
    <meta name="dtb:depth" content="1"/>
    <meta name="dtb:totalPageCount" content="0"/>
    <meta name="dtb:maxPageNumber" content="0"/>
  </head>
  <docTitle>
    <text>%s</text>
  </docTitle>
  <navMap>
 '''
 TOC_ITEM = '''<navPoint id="%s" playOrder="%d">
  <navLabel>
    <text>%s</text>
  </navLabel>
  <content src="%s"/>
 </navPoint>
 '''
 TOC_END = '''</navMap>
 </ncx>
 '''
 XHTML_START = '''<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <title>%s</title>
 <link href="stylesheet.css" type="text/css" rel="stylesheet" />
 </head>
 <body>
 <div>
 <h3>%s</h3>
 '''
 XHTML_END = '''</div>
 </body>
 </html>
 '''
 acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
      'blockquote', 'br', 'center', 'cite', 'code', 'col',
      'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em',
      'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'i', 
      'ins', 'kbd', 'label', 'li', 'ol', 
      'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike',
      'strong', 'sub', 'sup', 'u', 'ul']
 acceptable_attributes = ['href']
 entities = { '&ndash;' : ' - ', '&mdash;' : ' - ', '&rdquo;' : '"', '&ldquo;' : '"', '&rsquo;' : '\'', '&lsquo;' : '\'', '&quot;' : '"' }
 FB2_PROLOGUE = '<FictionBook>'
 FB2_DESCRIPTION = '''<description>
 <title-info>
  <genre>fanfiction</genre>
  <author>
  <first-name></first-name>
  <middle-name></middle-name>
  <last-name>%s</last-name>
  </author>
  <book-title>%s</book-title>
  <lang>eng</lang>
 </title-info>
 <document-info>
  <author>
  <nickname>sgzmd</nickname>
  </author>
 <date value="%s">%s</date>
 <id>sgzmd_%s</id>
 <version>2.0</version>
 </document-info>
 </description>'''
--- a/downaloder.py
+++ b/downaloder.py
@ -0,0 +1,103 @@
 import os
 import re
 import sys
 import shutil
 import os.path
 import getpass
 import logging
 import urllib as u
 import pprint as pp
 import urllib2 as u2
 import urlparse as up
 import BeautifulSoup as bs
 import htmlentitydefs as hdefs
 import ffa
 import ffnet
 import ficwad
 import output
 import fictionalley
 class FanficLoader:
 	'''A controller class which handles the interaction between various specific downloaders and writers'''
 	booksDirectory = "books"
 	def __init__(self, adapter, writerClass, quiet = False, inmemory = False, compress=True):
 		self.adapter = adapter
 		self.writerClass = writerClass
 		self.quiet = quiet
 		self.inmemory = inmemory
 		self.compress = compress
 		self.badLogin = False
 	def download(self):
 		logging.debug("Trying to download the story")
 		if self.adapter.requiresLogin():
 			logging.debug("Story requires login")
 			if not self.adapter.performLogin():
 				logging.debug("Login/password problem")
 				self.badLogin = True
 				return None
 		urls = self.adapter.extractIndividualUrls()
 		self.writer = self.writerClass(self.booksDirectory, self.adapter.getStoryName(), self.adapter.getAuthorName(), inmemory=self.inmemory, compress=self.compress)
 		i = 0
 		for u,n in urls:
 			if not self.quiet:
 				print('Downloading chapter %d/%d' % (i, len(urls)))
 			i = i+1
 			text = self.adapter.getText(u)
 			self.writer.writeChapter(n, text)
 		self.writer.finalise()
 		if self.inmemory:
 			self.name = self.writer.name
 			return self.writer.output.getvalue()
 if __name__ == '__main__':
 	logging.basicConfig(level=logging.DEBUG)
 	(url, format) = sys.argv[1:]
 	if type(url) is unicode:
 		print('URL is unicode')
 		url = url.encode('latin1')
 	adapter = None
 	writerClass = None
 	if url.find('fanficauthors') != -1:
 		adapter = ffa.FFA(url)
 	elif url.find('fictionalley') != -1:
 		adapter = fictionalley.FictionAlley(url)
 		print >> sys.stderr, "FictionAlley adapter is broken, try to find this fic on fanfiction.net or fanficauthors"
 		sys.exit(0)
 	elif url.find('ficwad') != -1:
 		adapter = ficwad.FicWad(url)
 	elif url.find('fanfiction.net') != -1:
 		adapter = ffnet.FFNet(url)
 	else:
 		print >> sys.stderr, "Oi! I can haz not appropriate adapter for URL %s!" % url
 		sys.exit(1)
 	if format == 'epub':
 		writerClass = output.EPubFanficWriter
 	elif format == 'html':
 		writerClass = output.HTMLWriter
 	if adapter.requiresLogin(url):
 		print("Meow, URL %s requires you to haz been logged in! Please can I haz this datas?" % url)
 		sys.stdout.write("Can I haz ur login? ")
 		login = sys.stdin.readline().strip()
 		password = getpass.getpass(prompt='Can I haz ur password? ')
 		print("Login: `%s`, Password: `%s`" % (login, password))
 		adapter.setLogin(login)
 		adapter.setPassword(password)
 	loader = FanficLoader(adapter, writerClass)
 	loader.download()
--- a/ffa.py
+++ b/ffa.py
@ -0,0 +1,197 @@
 # -*- coding: utf-8 -*-
 import os
 import re
 import sys
 import cgi
 import uuid
 import shutil
 import base64
 import os.path
 import logging
 import unittest
 import urllib as u
 import pprint as pp
 import urllib2 as u2
 import urlparse as up
 import BeautifulSoup as bs
 import htmlentitydefs as hdefs
 from constants import *
 from adapter import *
 try:
 	import login_password
 except:
 	# tough luck
 	pass
 class FFA(FanfictionSiteAdapter):
 	def __init__(self, url):
 		self.url = url
 		parsedUrl = up.urlparse(url)
 		self.host = parsedUrl.netloc
 		self.path = parsedUrl.path
 		self.opener = u2.build_opener(u2.HTTPCookieProcessor())
 		logging.debug("Created FFA: url=%s" % (self.url))
 	def _getLoginScript(self):
 		return self.path
 	def requiresLogin(self, url = None):
 		resp = self.opener.open(self.url)
 		data = resp.read()
 		if data.find('<legend>Please login to continue</legend>') != -1:
 			return True
 		else:
 			return False
 	def performLogin(self, url = None):
 		if url == None:
 			url = self.url
 		data = {}
 		data['username'] = self.login
 		data['password'] = self.password
 		data['submit'] = 'Submit'
 		urlvals = u.urlencode(data)
 		loginUrl = 'http://' + self.host + self._getLoginScript()
 		logging.debug("Will now login to URL %s" % loginUrl)
 		req = self.opener.open(loginUrl, urlvals)
 		if self.requiresLogin():
 			return False
 		else:
 			return True
 	def extractIndividualUrls(self):
 		data = self.opener.open(self.url).read()
 		soup = bs.BeautifulStoneSoup(data)
 		self.author = soup.find('a', {'href' : '/contact/'}).string
 		self.storyName = str(soup.find('h1', {'class' : 'textCenter'}).contents[0]).strip()
 		logging.debug("Story `%s` by `%s`" % (self.storyName, self.author))
 		selector = soup.find('select', {'class' : 'tinput'})
 		options = selector.findAll('option')
 		urls = []
 		for o in options:
 			title = o.string
 			url = o['value']
 			urls.append((url,title))
 		return urls
 	def getText(self, url):
 		if url.find('http://') == -1:
 			url = 'http://' + self.host + '/' + url
 		logging.info('Downloading: %s' % url)
 		data = self.opener.open(url).read()
 		lines = data.split('\n')
 		emit = False
 		post = ''
 		for l in lines:
 			if l.find('</div></form>') != -1:
 				logging.debug('emit = True')
 				emit = True
 				continue
 			elif l.find('<form action="#">') != -1:
 				logging.debug('emit = False')
 				if emit:
 					break
 				else:
 					emit = False
 			if emit:
 				post = post + l + '\n'
 		return post
 	def setLogin(self, login):
 		self.login = login
 	def setPassword(self, password):
 		self.password = password
 	def getStoryName(self):
 		return self.storyName
 	def getAuthorName(self):
 		return self.author
 	def getPrintableUrl(self, url):
 		return url
 class FFA_UnitTests(unittest.TestCase):
 	def setUp(self):
 		logging.basicConfig(level=logging.DEBUG)
 		pass
 	def testRequiresLoginNeg(self):
 		f = FFA('http://jeconais.fanficauthors.net/Happily_Ever_After/Introduction/')
 		self.assertFalse(f.requiresLogin())
 	def testRequiresLogin(self):
 		f = FFA('http://jeconais.fanficauthors.net/Rons_Harem/Rons_Harem/')
 		self.assertTrue(f.requiresLogin())
 	def testPerformLogin(self):
 		f = FFA('http://jeconais.fanficauthors.net/Rons_Harem/Rons_Harem/')
 		if login_password != None:
 			f.setLogin(login_password.login)
 			f.setPassword(login_password.password)
 		self.assertTrue(f.performLogin(None))
 	def testExtractURLsAuthorStoryName(self):
 		f = FFA('http://draco664.fanficauthors.net/Apprentice_Potter/Prologue/')
 		f.extractIndividualUrls()
 		self.assertEquals('Draco664', f.getAuthorName())
 		self.assertEquals('Apprentice Potter', f.getStoryName())
 	def testExtractUrls(self):
 		f = FFA('http://draco664.fanficauthors.net/Apprentice_Potter/Prologue/')
 		urls = f.extractIndividualUrls()
 		self.assertEquals(25, len(urls))
 		self.assertEquals('Grievances', urls[2][1])
 		self.assertEquals('/Apprentice_Potter/Prologue/', urls[0][0])
 	def testGetText(self):
 		f = FFA('http://jeconais.fanficauthors.net/Happily_Ever_After/Introduction/')
 		data = f.getText('http://jeconais.fanficauthors.net/Happily_Ever_After/Introduction/')
 		self.assertTrue(data.find('smiled slightly, and settled back in her rocking chair') != -1)
 	def testGetTextLogin(self):
 		url = 'http://viridian.fanficauthors.net/Out_of_the_Darkness_A_Jinchuurikis_Tale/A_Harrowing_Escape/'
 		f = FFA(url)
 		if login_password != None:
 			f.setLogin(login_password.login)
 			f.setPassword(login_password.password)
 		if f.requiresLogin():
 			f.performLogin()
 		data = f.getText(url)
 		seek = 'So Hokage-sama” I said, “this is how we came'
 		self.assertTrue(data.find(seek) != -1)
 if __name__ == '__main__':
 	unittest.main()
--- a/ffnet.py
+++ b/ffnet.py
@ -0,0 +1,162 @@
 # -*- coding: utf-8 -*-
 import os
 import re
 import sys
 import cgi
 import uuid
 import shutil
 import base64
 import os.path
 import logging
 import unittest
 import urllib as u
 import pprint as pp
 import urllib2 as u2
 import urlparse as up
 import BeautifulSoup as bs
 import htmlentitydefs as hdefs
 from constants import *
 from adapter import *
 try:
 	import login_password
 except:
 	# tough luck
 	pass
 try:
 	from google.appengine.api.urlfetch import fetch as googlefetch
 	appEngine = True
 except:
 	appEngine = False
 class FFNet(FanfictionSiteAdapter):
 	def __init__(self, url):
 		self.url = url
 		parsedUrl = up.urlparse(url)
 		self.host = parsedUrl.netloc
 		self.path = parsedUrl.path
 		self.storyName = 'FF.Net story'
 		self.storyName = 'FF.Net author'
 		spl = self.path.split('/')
 		if len(spl) == 5:
 			self.path = "/".join(spl[1:-1])
 		if self.path.startswith('/'):
 			self.path = self.path[1:]
 		if self.path.endswith('/'):
 			self.path = self.path[:-1]
 		(s, self.storyId, chapter) = self.path.split('/')
 		logging.debug('self.storyId=%s, chapter=%s' % (self.storyId, chapter))
 		if not appEngine:
 			self.opener = u2.build_opener(u2.HTTPCookieProcessor())
 		else:
 			self.opener = None
 		logging.debug("Created FF.Net: url=%s" % (self.url))
 	def _getLoginScript(self):
 		return self.path
 	def requiresLogin(self, url = None):
 		return False
 	def performLogin(self, url = None):
 		return True
 	def _fetchUrl(self, url):
 		if not appEngine:
 			return self.opener.open(url).read().decode('utf-8')
 		else:
 			return googlefetch(url).content
 	def extractIndividualUrls(self):
 		data = self._fetchUrl(self.url)
 		urls = []
 		lines = data.split('\n')
 		for l in lines:
 			if l.find("<img src='http://c.fanfiction.net/static/ficons/script.png' width=16 height=16  border=0  align=absmiddle>") != -1:
 				s2 = bs.BeautifulStoneSoup(l)
 				self.storyName = s2.find('b').string
 			elif l.find("<a href='/u/") != -1:
 				s2 = bs.BeautifulStoneSoup(l)
 				self.authorName = s2.a.string
 			elif l.find("<SELECT title='chapter navigation'") != -1:
 				if len(urls) > 0:
 					continue
 				u = l.decode('utf-8')
 				u = re.sub('&\#[0-9]+;', ' ', u)
 				s2 = bs.BeautifulSoup(u)
 				options = s2.findAll('option')
 				for o in options:
 					url = 'http://fanfiction.net/s/' + self.storyId + '/' + o['value']
 					title = o.string
 					logging.debug('URL = `%s`, Title = `%s`' % (url, title))
 					urls.append((url,title))
 		return urls
 	def getText(self, url):
 		data = self._fetchUrl(url)
 		lines = data.split('\n')
 		for l in lines:
 			if l.find('<!-- start story -->') != -1:
 				s2 = bs.BeautifulStoneSoup(l)
 				return s2.div.prettify()
 	def setLogin(self, login):
 		self.login = login
 	def setPassword(self, password):
 		self.password = password
 	def getStoryName(self):
 		return self.storyName
 	def getAuthorName(self):
 		return self.authorName
 class FFA_UnitTests(unittest.TestCase):
 	def setUp(self):
 		logging.basicConfig(level=logging.DEBUG)
 		pass
 	def testChaptersAuthStory(self):
 		f = FFNet('http://www.fanfiction.net/s/5257563/1')
 		f.extractIndividualUrls()
 		self.assertEquals('Beka0502', f.getAuthorName())
 		self.assertEquals("Draco's Redemption", f.getStoryName())
 	def testChaptersCountNames(self):
 		f = FFNet('http://www.fanfiction.net/s/5257563/1')
 		urls = f.extractIndividualUrls()
 		self.assertEquals(8, len(urls))
 	def testGetText(self):
 		url = 'http://www.fanfiction.net/s/5257563/1'
 		f = FFNet(url)
 		text = f.getText(url)
 		self.assertTrue(text.find('He was just about to look at some photos when he heard a crack') != -1)
 	def testBrokenWands(self):
 		url = 'http://www.fanfiction.net/s/1527263/30/Harry_Potter_and_Broken_Wands'
 		f = FFNet(url)
 		text = f.getText(url)
 		urls = f.extractIndividualUrls()
 if __name__ == '__main__':
 	unittest.main()
--- a/fictionalley.py
+++ b/fictionalley.py
@ -0,0 +1,75 @@
 import os
 import re
 import sys
 import shutil
 import os.path
 import urllib as u
 import pprint as pp
 import urllib2 as u2
 import urlparse as up
 import BeautifulSoup as bs
 import htmlentitydefs as hdefs
 class FictionAlley:
 	def __init__(self):
 		pass
 	def extractIndividualUrls(self, data, host, contents):
 		soup = bs.BeautifulStoneSoup(data)
 		title = soup.find('title').string
 		self.storyName = "-".join(title.split('-')[1:]).strip()
 		authors = soup.findAll('a')
 		print('Story "%s" by %s' % (self.storyName, self.authorName))
 		links = soup.findAll('a', { 'class' : 'chapterlink' } )
 		result = []
 		for a in links:
 			url = a['href']
 			title = a.string
 			result.append((url,title))
 		return result
 	def getStoryName(self):
 		return self.storyName
 	def getAuthorName(self):
 		return self.authorName
 	def getText(self, data, fetch = False):
 		soup = bs.BeautifulStoneSoup(data)
 		div = soup.find('div', {'id' : 'storytext'})
 		if None == div:
 			return '<html/>'
 		return div.prettify()
 	def getPrintableUrl(self, url):
 		return url
 	def getPasswordLine(self):
 		return 'opaopapassword'
 	def getLoginScript(self):
 		return 'opaopaloginscript'
 	def getLoginPasswordOthers(self):
 		login = dict(login = 'name', password = 'pass')
 		other = dict(submit = 'Log In', remember='yes')
 		return (login, other)
 if __name__ == '__main__':
 	url = 'http://www.fictionalley.org/authors/drt/DA.html'
 	data = u2.urlopen(url).read()
 	host = up.urlparse(url).netloc
 	fw = FictionAlley()
 	fw.authorName = 'DrT'
 	urls = fw.extractIndividualUrls(data, host, url)
 	pp.pprint(urls)
 	print(fw.getText(data))
--- a/ficwad.py
+++ b/ficwad.py
@ -0,0 +1,98 @@
 import os
 import re
 import sys
 import shutil
 import os.path
 import urllib as u
 import pprint as pp
 import urllib2 as u2
 import urlparse as up
 import BeautifulSoup as bs
 import htmlentitydefs as hdefs
 from adapter import *
 class FicWad(FanfictionSiteAdapter):
 	def __init__(self, url):
 		self.url = url
 		self.host = up.urlparse(url).netloc
 	def requiresLogin(self, url):
 		return False
 	def performLogin(self, url):
 		pass
 	def setLogin(self, login):
 		self.login = login
 	def setPassword(self, password):
 		self.password = password
 	def extractIndividualUrls(self):
 		data = u2.urlopen(self.url).read()
 		soup = bs.BeautifulStoneSoup(data)
 		title = soup.find('title').string
 		self.storyName = title.split('::')[0].strip()
 		author = soup.find('span', {'class' : 'author'})
 		self.authorName = author.a.string
 		print('Story "%s" by %s' % (self.storyName, self.authorName))
 		select = soup.find('select', { 'name' : 'goto' } )
 		allOptions = select.findAll('option')
 		result = []
 		for o in allOptions:
 			url = o['value']
 #			if type(url) is unicode:
 #				url = url.encode('utf-8')
 			title = o.string
 			result.append((url,title))
 		return result
 	def getStoryName(self):
 		return self.storyName
 	def getAuthorName(self):
 		return self.authorName
 	def getText(self, url):
 		if url.find('http://') == -1:
 			url = 'http://' + self.host + '/' + url
 		data = u2.urlopen(url).read()
 		soup = bs.BeautifulStoneSoup(data)
 		div = soup.find('div', {'id' : 'storytext'})
 		if None == div:
 			return '<html/>'
 		return div.prettify()
 	def getPrintableUrl(self, url):
 		return url
 	def getPasswordLine(self):
 		return 'opaopapassword'
 	def getLoginScript(self):
 		return 'opaopaloginscript'
 	def getLoginPasswordOthers(self):
 		login = dict(login = 'name', password = 'pass')
 		other = dict(submit = 'Log In', remember='yes')
 		return (login, other)
 if __name__ == '__main__':
 	url = 'http://www.ficwad.com/story/14536'
 	data = u2.urlopen(url).read()
 	host = up.urlparse(url).netloc
 	fw = FicWad(url)
 	urls = fw.extractIndividualUrls()
 	pp.pprint(urls)
 	print(fw.getText(data))
--- a/html_constants.py
+++ b/html_constants.py
@ -0,0 +1,17 @@
 XHTML_START = '''<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <title>${title} by ${author}</title>
 <link href="stylesheet.css" type="text/css" rel="stylesheet" />
 </head>
 <body>
 <div>
 <h1>${title} by ${author}</h1>
 ${body}
 </body></html>
 '''
 XHTML_CHAPTER_START = '''<h2>${chapter}</h2>'''
 XHTML_END = ''''''
--- a/output.py
+++ b/output.py
@ -0,0 +1,252 @@
 # -*- coding: utf-8 -*-
 import os
 import re
 import sys
 import cgi
 import uuid
 import codecs
 import shutil
 import string
 import base64
 import os.path
 import zipfile
 import StringIO
 import logging
 import urllib as u
 import pprint as pp
 import urllib2 as u2
 import urlparse as up
 import BeautifulSoup as bs
 import htmlentitydefs as hdefs
 import zipdir
 import html_constants
 from constants import *
 class FanficWriter:
 	def __init__(self):
 		pass
 	def writeChapter(self, title, text):
 		pass
 	def finalise(self):
 		pass
 class HTMLWriter(FanficWriter):
 	body = ''
 	def __init__(self, base, name, author, inmemory=False, compress=False):
 		self.basePath = base
 		self.name = name.replace(" ", "_")
 		self.storyTitle = name
 		self.fileName = self.basePath + '/' + self.name + '.html'
 		self.authorName = author
 		self.inmemory = inmemory
 		if not self.inmemory and os.path.exists(self.fileName):
 			os.remove(self.fileName)
 		if self.inmemory:
 			self.output = StringIO.StringIO()
 		else:
 			self.output = open(self.fileName, 'w')
 		self.xhtmlTemplate = string.Template(html_constants.XHTML_START)
 		self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START)
 	def writeChapter(self, title, text):
 		title = title.decode('utf-8')
 		text = text.decode('utf-8')
 		self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title})
 		self.body = self.body + '\n' + text
 	def finalise(self):
 		html = self.xhtmlTemplate.substitute({'title' : self.storyTitle, 'author' : self.authorName, 'body' : self.body})
 		soup = bs.BeautifulSoup(html)
 		result = soup.prettify()
 #		f = open(self.fileName, 'w')
 #		f.write(result)
 #		f.close()
 		self.output.write(result)
 		if not self.inmemory:
 			self.output.close()
 class EPubFanficWriter(FanficWriter):
 	chapters = []
 	files = {}
 	def _writeFile(self, fileName, data):
 		if fileName in self.files:
 			self.files[fileName].write(data.decode('utf-8'))
 		else:
 			if self.inmemory:
 				self.files[fileName] = StringIO.StringIO()
 			else:
 				self.files[fileName] = open(self.directory + '/' + fileName, 'w')
 			self._writeFile(fileName, data)
 	def _closeFiles(self):
 		if not self.inmemory:
 			for f in self.files:
 				self.files[f].close()
 	def __init__(self, base, name, author, inmemory=False, compress=True):
 		self.basePath = base
 		self.name = name.replace(" ", "_")
 		self.storyTitle = name
 		self.directory = self.basePath + '/' + self.name
 		self.inmemory = inmemory
 		self.authorName = author
 		self.files = {}
 		self.chapters = []
 		if not self.inmemory:
 			self.inmemory = True
 			self.writeToFile = True
 		else:
 			self.writeToFile = False
 		if not self.inmemory:
 			if os.path.exists(self.directory):
 				shutil.rmtree(self.directory)
 			os.mkdir(self.directory)
 			os.mkdir(self.directory + '/META-INF')
 			os.mkdir(self.directory + '/OEBPS')
 #		print >> codecs.open(self.directory + '/mimetype', 'w', 'utf-8'), MIMETYPE
 #		print >> codecs.open(self.directory + '/META-INF/container.xml', 'w', 'utf-8'), CONTAINER
 #		print >> codecs.open(self.directory + '/OEBPS/stylesheet.css', 'w', 'utf-8'), CSS
 		self._writeFile('mimetype', MIMETYPE)
 		self._writeFile('META-INF/container.xml', CONTAINER)
 		self._writeFile('OEBPS/stylesheet.css', CSS)
 	def _removeEntities(self, text):
 		for e in entities:
 			v = entities[e]
 			text = text.replace(e, v)
 		text = text.replace('&', '&amp;')
 		return text
 	def writeChapter(self, title, text):
 		fileName = base64.b64encode(title).replace('/', '_') + ".xhtml"
 		filePath = self.directory + "/OEBPS/" + fileName
 		fn = 'OEBPS/' + fileName
 #		f = open(filePath, 'w')
 		text = self._removeEntities(text)
 		self.soup = bs.BeautifulStoneSoup(text)
 		allTags = self.soup.findAll(recursive=True)
 		for t in allTags:
 			for attr in t._getAttrMap().keys():
 				if attr not in acceptable_attributes:
 					del t[attr]
 		allPs = self.soup.findAll(recursive=True)
 		for p in allPs:
 			if p.string != None and (len(p.string.strip()) == 0 or p.string.strip() == '&nbsp;' ) :
 				p.extract()
 		allBrs = self.soup.findAll(recursive=True, name = ["br", "hr"])
 		for br in allBrs:
 			if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
 				br.name = 'p'
 #		cleanup(self.soup )
 		text = self.soup.prettify()
 		tt = self._removeEntities(title)
 		self._writeFile(fn, XHTML_START % (tt, tt))
 		self._writeFile(fn, text)
 		self._writeFile(fn, XHTML_END)
 #		print >> f, XHTML_START % (tt, tt)
 #		f.write(text)
 #		print >> f, XHTML_END
 		self.chapters.append((title, fileName))
 	def finalise(self):
 		logging.debug("Finalising...")
 		### writing table of contents -- ncx file
 		tocFilePath = "OEBPS/toc.ncx"
 #		toc = open(tocFilePath, 'w')
 #		print >> toc, TOC_START % self.storyTitle
 		self._writeFile(tocFilePath, TOC_START % self.storyTitle)
 		### writing content -- opf file
 		opfFilePath = "OEBPS/content.opf"
 #		opf = open(opfFilePath, 'w')
 		self._writeFile(opfFilePath, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName))
 #		print >> opf, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName)
 		ids = []
 		i = 0
 		for t,f in self.chapters:
 			chapterId = base64.b64encode(t)
 #			print >> toc, TOC_ITEM % (chapterId, i, cgi.escape(t), f)
 			self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, cgi.escape(t), f))
 #			print >> opf, CONTENT_ITEM % (chapterId, f)
 			self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f))
 			ids.append(chapterId)
 			i = i + 1
 #		logging.d('Toc and refs printed, proceesing to ref-ids....')
 #		print >> toc, TOC_END
 #		print >> opf, CONTENT_END_MANIFEST		
 		self._writeFile(tocFilePath, TOC_END)
 		self._writeFile(opfFilePath, CONTENT_END_MANIFEST)
 		for chapterId in ids:
 #			print >> opf, CONTENT_ITEMREF % chapterId
 			self._writeFile(opfFilePath, CONTENT_ITEMREF % chapterId)
 #		print >> opf, CONTENT_END
 		self._writeFile(opfFilePath, CONTENT_END)
 #		opf.close()
 #		toc.close()
 #		print('Finished')
 		self._closeFiles()
 		filename = self.directory + '.epub'
 		zipdata = zipdir.inMemoryZip(self.files)
 		if self.writeToFile:
 			f = open(filename, 'w')
 			f.write(zipdata.getvalue())
 			f.close()
 		else:
 			self.output = zipdata
 #		zipdir.toZip(filename, self.directory)
--- a/zipdir.py
+++ b/zipdir.py
@ -0,0 +1,69 @@
 import os
 import zipfile
 import logging
 import StringIO
 def toZip(filename, directory):
 	zippedHelp = zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED)
 	lst = os.listdir(directory)
 	for entity in lst:
 		if entity.startswith('.'):
 			continue
 		each = os.path.join(directory,entity)
 		print(each)
 		if os.path.isfile(each):
 			print(each)
 			zippedHelp.write(each, arcname=entity)
 		else:
 			addFolderToZip(zippedHelp,entity, each)
 	zippedHelp.close()
 def addFolderToZip(zippedHelp,folder,fpath):
 	#print('addFolderToZip(%s)' % folder)
 	if folder == '.' or folder == '..':
 		return
 	folderFiles = os.listdir(fpath)
 	for f in folderFiles:
 		if os.path.isfile(fpath + '/' + f):
 			#print('basename=%s' % os.path.basename(fpath + '/' + f))
 			zippedHelp.write(fpath + '/' + f, folder + '/' + f, zipfile.ZIP_DEFLATED)
 		elif os.path.isdir(f):
 			addFolderToZip(zippedHelp,f)
 def inMemoryZip(files):
 	# files have a structure of {'path/to/file' => content} dictionary
 	io = StringIO.StringIO()
 	memzip = zipfile.ZipFile(io, 'a', compression=zipfile.ZIP_DEFLATED)
 	memzip.debug = 3
 	for path in files:
 		if type(files[path]) != type('str'):
 			data = files[path].getvalue()
 		else:
 			data = files[path]
 #		logging.debug(data)
 		memzip.writestr(path, data.encode('utf-8'))
 	for zf in memzip.filelist:
 		zf.create_system = 0
 	memzip.close()
 	return io
 if __name__ == '__main__':
 #	toZip('sample.epub', "books/A_Time_To_Reflect")
 #	z = zipfile.ZipFile('sample.epub', 'r')
 	files = {'test.txt' : 'test', 'data/abc.txt' : 'abc'}
 	data = inMemoryZip(files)
 	f = open('res.zip', 'w')
 	f.write(data)
 	f.close()