Whole lot of fixes related to appengine

2025-12-06 00:43:00 +01:00 · 2009-12-18 14:51:53 +00:00 · 2009-12-18 14:51:53 +00:00 · 1f897843e0
commit 1f897843e0
12 changed files with 2851 additions and 0 deletions
--- a/BeautifulSoup.py
+++ b/BeautifulSoup.py
--- a/init.py
+++ b/init.py
--- a/adapter.py
+++ b/adapter.py
@ -0,0 +1,32 @@
+class FanfictionSiteAdapter:
+	login = ''
+	password = ''
+	def __init__(self, url):
+		pass
+	
+	def requiresLogin(self, url = None):
+		pass
+	
+	def performLogin(self, url = None):
+		pass
+	
+	def extractIndividualUrls(self):
+		pass
+		
+	def getText(self, url):
+		pass
+
+	def setLogin(self, login):
+		pass
+
+	def setPassword(self, password):
+		pass
+
+	def getStoryName(self):
+		pass
+
+	def getAuthorName(self):
+		pass
+
+	def getPrintableUrl(self, url):
+		pass
--- a/constants.py
+++ b/constants.py
@ -0,0 +1,135 @@
+CSS = '''body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; }
+pre { font-size: x-small; }
+h1 { text-align: center; }
+h2 { text-align: center; }
+h3 { text-align: center; }
+h4 { text-align: center; }
+h5 { text-align: center; }
+h6 { text-align: center; }
+.CI {
+    text-align:center;
+    margin-top:0px;
+    margin-bottom:0px;
+    padding:0px;
+    }
+.center   {text-align: center;}
+.smcap    {font-variant: small-caps;}
+.u        {text-decoration: underline;}
+.bold     {font-weight: bold;}
+'''
+
+MIMETYPE = '''application/epub+zip'''
+
+CONTAINER = '''<?xml version="1.0"?>
+<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
+  <rootfiles>
+    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
+  </rootfiles>
+</container>
+'''
+
+CONTENT_START = '''<?xml version="1.0"?>
+<package version="2.0" xmlns="http://www.idpf.org/2007/opf"
+         unique-identifier="BookId-Epub-%s">
+ <metadata xmlns:dc="http://purl.org/dc/elements/1.1/"
+           xmlns:opf="http://www.idpf.org/2007/opf">
+   <dc:title>%s</dc:title> 
+   <dc:creator opf:role="aut">%s</dc:creator>
+   <dc:language>en-UK</dc:language> 
+   <dc:rights></dc:rights> 
+   <dc:publisher>sgzmd</dc:publisher> 
+   <dc:identifier id="BookId">urn:uuid:sigizmund.com062820072147132</dc:identifier>
+ </metadata>
+ <manifest>
+  <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
+  <item id="style" href="stylesheet.css" media-type="text/css" />
+'''
+
+CONTENT_ITEM = '<item id="%s" href="%s" media-type="application/xhtml+xml" />'
+
+CONTENT_END_MANIFEST = '''</manifest>
+<spine toc="ncx">
+'''
+
+CONTENT_ITEMREF = '''<itemref idref="%s" />'''
+
+CONTENT_END = '''</spine>
+</package>
+'''
+
+TOC_START = '''<?xml version="1.0" encoding="UTF-8"?>
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
+  <head>
+    <meta name="dtb:uid" content="sigizmund.com062820072147132"/>
+    <meta name="dtb:depth" content="1"/>
+    <meta name="dtb:totalPageCount" content="0"/>
+    <meta name="dtb:maxPageNumber" content="0"/>
+  </head>
+  <docTitle>
+    <text>%s</text>
+  </docTitle>
+  <navMap>
+'''
+
+TOC_ITEM = '''<navPoint id="%s" playOrder="%d">
+  <navLabel>
+    <text>%s</text>
+  </navLabel>
+  <content src="%s"/>
+</navPoint>
+'''
+
+TOC_END = '''</navMap>
+</ncx>
+'''
+
+XHTML_START = '''<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>%s</title>
+<link href="stylesheet.css" type="text/css" rel="stylesheet" />
+</head>
+<body>
+<div>
+<h3>%s</h3>
+'''
+
+XHTML_END = '''</div>
+</body>
+</html>
+'''
+
+acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
+      'blockquote', 'br', 'center', 'cite', 'code', 'col',
+      'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em',
+      'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'i', 
+      'ins', 'kbd', 'label', 'li', 'ol', 
+      'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike',
+      'strong', 'sub', 'sup', 'u', 'ul']
+
+acceptable_attributes = ['href']
+
+entities = { '&ndash;' : ' - ', '&mdash;' : ' - ', '&rdquo;' : '"', '&ldquo;' : '"', '&rsquo;' : '\'', '&lsquo;' : '\'', '&quot;' : '"' }
+
+FB2_PROLOGUE = '<FictionBook>'
+FB2_DESCRIPTION = '''<description>
+<title-info>
+  <genre>fanfiction</genre>
+  <author>
+  <first-name></first-name>
+  <middle-name></middle-name>
+  <last-name>%s</last-name>
+  </author>
+  <book-title>%s</book-title>
+  <lang>eng</lang>
+</title-info>
+<document-info>
+  <author>
+  <nickname>sgzmd</nickname>
+  </author>
+<date value="%s">%s</date>
+<id>sgzmd_%s</id>
+<version>2.0</version>
+</document-info>
+</description>'''
--- a/downaloder.py
+++ b/downaloder.py
@ -0,0 +1,103 @@
+import os
+import re
+import sys
+import shutil
+import os.path
+import getpass
+import logging
+import urllib as u
+import pprint as pp
+import urllib2 as u2
+import urlparse as up
+import BeautifulSoup as bs
+import htmlentitydefs as hdefs
+
+import ffa
+import ffnet
+import ficwad
+import output
+import fictionalley
+
+class FanficLoader:
+	'''A controller class which handles the interaction between various specific downloaders and writers'''
+	booksDirectory = "books"
+	
+	def __init__(self, adapter, writerClass, quiet = False, inmemory = False, compress=True):
+		self.adapter = adapter
+		self.writerClass = writerClass
+		self.quiet = quiet
+		self.inmemory = inmemory
+		self.compress = compress
+		self.badLogin = False
+		
+	def download(self):
+		logging.debug("Trying to download the story")
+		if self.adapter.requiresLogin():
+			logging.debug("Story requires login")
+			if not self.adapter.performLogin():
+				logging.debug("Login/password problem")
+				self.badLogin = True
+				return None
+		
+		urls = self.adapter.extractIndividualUrls()
+		self.writer = self.writerClass(self.booksDirectory, self.adapter.getStoryName(), self.adapter.getAuthorName(), inmemory=self.inmemory, compress=self.compress)
+		
+		i = 0
+		for u,n in urls:
+			if not self.quiet:
+				print('Downloading chapter %d/%d' % (i, len(urls)))
+			i = i+1
+			text = self.adapter.getText(u)
+			self.writer.writeChapter(n, text)
+		
+		self.writer.finalise()
+		
+		if self.inmemory:
+			self.name = self.writer.name
+			return self.writer.output.getvalue()
+	
+
+if __name__ == '__main__':
+	logging.basicConfig(level=logging.DEBUG)
+	(url, format) = sys.argv[1:]
+	
+	if type(url) is unicode:
+		print('URL is unicode')
+		url = url.encode('latin1')
+	
+	adapter = None
+	writerClass = None
+	
+	if url.find('fanficauthors') != -1:
+		adapter = ffa.FFA(url)
+	elif url.find('fictionalley') != -1:
+		adapter = fictionalley.FictionAlley(url)
+		print >> sys.stderr, "FictionAlley adapter is broken, try to find this fic on fanfiction.net or fanficauthors"
+		sys.exit(0)
+	elif url.find('ficwad') != -1:
+		adapter = ficwad.FicWad(url)
+	elif url.find('fanfiction.net') != -1:
+		adapter = ffnet.FFNet(url)
+	else:
+		print >> sys.stderr, "Oi! I can haz not appropriate adapter for URL %s!" % url
+		sys.exit(1)
+
+	if format == 'epub':
+		writerClass = output.EPubFanficWriter
+	elif format == 'html':
+		writerClass = output.HTMLWriter
+	
+	if adapter.requiresLogin(url):
+		print("Meow, URL %s requires you to haz been logged in! Please can I haz this datas?" % url)
+		sys.stdout.write("Can I haz ur login? ")
+		login = sys.stdin.readline().strip()
+		password = getpass.getpass(prompt='Can I haz ur password? ')
+		print("Login: `%s`, Password: `%s`" % (login, password))
+		
+		adapter.setLogin(login)
+		adapter.setPassword(password)
+		
+	
+	loader = FanficLoader(adapter, writerClass)
+	loader.download()
+	
--- a/ffa.py
+++ b/ffa.py
@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import sys
+import cgi
+import uuid
+import shutil
+import base64
+import os.path
+import logging
+import unittest
+import urllib as u
+import pprint as pp
+import urllib2 as u2
+import urlparse as up
+import BeautifulSoup as bs
+import htmlentitydefs as hdefs
+
+from constants import *
+from adapter import *
+
+try:
+	import login_password
+except:
+	# tough luck
+	pass
+
+class FFA(FanfictionSiteAdapter):
+	def __init__(self, url):
+		self.url = url
+		parsedUrl = up.urlparse(url)
+		self.host = parsedUrl.netloc
+		self.path = parsedUrl.path
+		self.opener = u2.build_opener(u2.HTTPCookieProcessor())
+	
+		logging.debug("Created FFA: url=%s" % (self.url))
+	
+	def _getLoginScript(self):
+		return self.path
+
+	def requiresLogin(self, url = None):
+		resp = self.opener.open(self.url)
+		data = resp.read()
+		if data.find('<legend>Please login to continue</legend>') != -1:
+			return True
+		else:
+			return False
+ 
+	def performLogin(self, url = None):
+		if url == None:
+			url = self.url
+		
+		data = {}
+		
+		data['username'] = self.login
+		data['password'] = self.password
+		data['submit'] = 'Submit'
+		
+		urlvals = u.urlencode(data)
+		loginUrl = 'http://' + self.host + self._getLoginScript()
+		logging.debug("Will now login to URL %s" % loginUrl)
+		
+		req = self.opener.open(loginUrl, urlvals)
+		
+		if self.requiresLogin():
+			return False
+		else:
+			return True
+	
+	def extractIndividualUrls(self):
+		data = self.opener.open(self.url).read()
+		soup = bs.BeautifulStoneSoup(data)
+
+		self.author = soup.find('a', {'href' : '/contact/'}).string
+		self.storyName = str(soup.find('h1', {'class' : 'textCenter'}).contents[0]).strip()
+		
+		logging.debug("Story `%s` by `%s`" % (self.storyName, self.author))
+		
+		selector = soup.find('select', {'class' : 'tinput'})
+		options = selector.findAll('option')
+		
+		urls = []
+		
+		for o in options:
+			title = o.string
+			url = o['value']
+			
+			urls.append((url,title))
+		
+		return urls
+
+	def getText(self, url):
+		if url.find('http://') == -1:
+			url = 'http://' + self.host + '/' + url
+		
+		logging.info('Downloading: %s' % url)
+		data = self.opener.open(url).read()
+		
+		lines = data.split('\n')
+		
+		emit = False
+		
+		post = ''
+		
+		for l in lines:
+			if l.find('</div></form>') != -1:
+				logging.debug('emit = True')
+				emit = True
+				continue
+			elif l.find('<form action="#">') != -1:
+				logging.debug('emit = False')
+				if emit:
+					break
+				else:
+					emit = False
+			
+			if emit:
+				post = post + l + '\n'
+		
+		return post
+
+	def setLogin(self, login):
+		self.login = login
+
+	def setPassword(self, password):
+		self.password = password
+	
+	def getStoryName(self):
+		return self.storyName
+		
+	def getAuthorName(self):
+		return self.author
+
+	def getPrintableUrl(self, url):
+		return url
+
+class FFA_UnitTests(unittest.TestCase):
+	def setUp(self):
+		logging.basicConfig(level=logging.DEBUG)
+		pass
+	
+	def testRequiresLoginNeg(self):
+		f = FFA('http://jeconais.fanficauthors.net/Happily_Ever_After/Introduction/')
+		self.assertFalse(f.requiresLogin())
+	
+	def testRequiresLogin(self):
+		f = FFA('http://jeconais.fanficauthors.net/Rons_Harem/Rons_Harem/')
+		self.assertTrue(f.requiresLogin())
+	
+	def testPerformLogin(self):
+		f = FFA('http://jeconais.fanficauthors.net/Rons_Harem/Rons_Harem/')
+		
+		if login_password != None:
+			f.setLogin(login_password.login)
+			f.setPassword(login_password.password)
+		
+		self.assertTrue(f.performLogin(None))
+		
+	def testExtractURLsAuthorStoryName(self):
+		f = FFA('http://draco664.fanficauthors.net/Apprentice_Potter/Prologue/')
+		f.extractIndividualUrls()
+		
+		self.assertEquals('Draco664', f.getAuthorName())
+		self.assertEquals('Apprentice Potter', f.getStoryName())
+	
+	def testExtractUrls(self):
+		f = FFA('http://draco664.fanficauthors.net/Apprentice_Potter/Prologue/')
+		urls = f.extractIndividualUrls()
+		self.assertEquals(25, len(urls))
+		
+		self.assertEquals('Grievances', urls[2][1])
+		self.assertEquals('/Apprentice_Potter/Prologue/', urls[0][0])
+	
+	def testGetText(self):
+		f = FFA('http://jeconais.fanficauthors.net/Happily_Ever_After/Introduction/')
+		data = f.getText('http://jeconais.fanficauthors.net/Happily_Ever_After/Introduction/')
+		
+		self.assertTrue(data.find('smiled slightly, and settled back in her rocking chair') != -1)
+		
+	def testGetTextLogin(self):
+		url = 'http://viridian.fanficauthors.net/Out_of_the_Darkness_A_Jinchuurikis_Tale/A_Harrowing_Escape/'
+		f = FFA(url)
+		
+		if login_password != None:
+			f.setLogin(login_password.login)
+			f.setPassword(login_password.password)
+		
+		if f.requiresLogin():
+			f.performLogin()
+		
+		data = f.getText(url)
+		seek = 'So Hokage-sama” I said, “this is how we came'
+		self.assertTrue(data.find(seek) != -1)
+		
+if __name__ == '__main__':
+	unittest.main()
--- a/ffnet.py
+++ b/ffnet.py
@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import sys
+import cgi
+import uuid
+import shutil
+import base64
+import os.path
+import logging
+import unittest
+import urllib as u
+import pprint as pp
+import urllib2 as u2
+import urlparse as up
+import BeautifulSoup as bs
+import htmlentitydefs as hdefs
+
+from constants import *
+from adapter import *
+
+try:
+	import login_password
+except:
+	# tough luck
+	pass
+
+try:
+	from google.appengine.api.urlfetch import fetch as googlefetch
+	appEngine = True
+except:
+	appEngine = False
+
+class FFNet(FanfictionSiteAdapter):
+	def __init__(self, url):
+		self.url = url
+		parsedUrl = up.urlparse(url)
+		self.host = parsedUrl.netloc
+		self.path = parsedUrl.path
+		
+		self.storyName = 'FF.Net story'
+		self.storyName = 'FF.Net author'
+		
+		spl = self.path.split('/')
+		if len(spl) == 5:
+			self.path = "/".join(spl[1:-1])
+		
+		if self.path.startswith('/'):
+			self.path = self.path[1:]
+		
+		if self.path.endswith('/'):
+			self.path = self.path[:-1]
+		
+		(s, self.storyId, chapter) = self.path.split('/')
+		
+		logging.debug('self.storyId=%s, chapter=%s' % (self.storyId, chapter))
+		
+		if not appEngine:
+			self.opener = u2.build_opener(u2.HTTPCookieProcessor())
+		else:
+			self.opener = None
+	
+		logging.debug("Created FF.Net: url=%s" % (self.url))
+	
+	def _getLoginScript(self):
+		return self.path
+
+	def requiresLogin(self, url = None):
+		return False
+
+	def performLogin(self, url = None):
+		return True
+	
+	def _fetchUrl(self, url):
+		if not appEngine:
+			return self.opener.open(url).read().decode('utf-8')
+		else:
+			return googlefetch(url).content
+	
+	def extractIndividualUrls(self):
+		data = self._fetchUrl(self.url)
+
+		urls = []
+		lines = data.split('\n')
+		for l in lines:
+			if l.find("<img src='http://c.fanfiction.net/static/ficons/script.png' width=16 height=16  border=0  align=absmiddle>") != -1:
+				s2 = bs.BeautifulStoneSoup(l)
+				self.storyName = s2.find('b').string
+			elif l.find("<a href='/u/") != -1:
+				s2 = bs.BeautifulStoneSoup(l)
+				self.authorName = s2.a.string
+			elif l.find("<SELECT title='chapter navigation'") != -1:
+				if len(urls) > 0:
+					continue
+				u = l.decode('utf-8')
+				u = re.sub('&\#[0-9]+;', ' ', u)
+				s2 = bs.BeautifulSoup(u)
+				options = s2.findAll('option')
+				for o in options:
+					url = 'http://fanfiction.net/s/' + self.storyId + '/' + o['value']
+					title = o.string
+					logging.debug('URL = `%s`, Title = `%s`' % (url, title))
+					urls.append((url,title))
+		
+		return urls
+	
+	def getText(self, url):
+		data = self._fetchUrl(url)
+		lines = data.split('\n')
+		for l in lines:
+			if l.find('<!-- start story -->') != -1:
+				s2 = bs.BeautifulStoneSoup(l)
+				return s2.div.prettify()
+		
+		
+	def setLogin(self, login):
+		self.login = login
+
+	def setPassword(self, password):
+		self.password = password
+
+	def getStoryName(self):
+		return self.storyName
+
+	def getAuthorName(self):
+		return self.authorName
+
+class FFA_UnitTests(unittest.TestCase):
+	def setUp(self):
+		logging.basicConfig(level=logging.DEBUG)
+		pass
+	
+	def testChaptersAuthStory(self):
+		f = FFNet('http://www.fanfiction.net/s/5257563/1')
+		f.extractIndividualUrls()
+		
+		self.assertEquals('Beka0502', f.getAuthorName())
+		self.assertEquals("Draco's Redemption", f.getStoryName())
+
+	def testChaptersCountNames(self):
+		f = FFNet('http://www.fanfiction.net/s/5257563/1')
+		urls = f.extractIndividualUrls()
+		
+		self.assertEquals(8, len(urls))
+	
+	def testGetText(self):
+		url = 'http://www.fanfiction.net/s/5257563/1'
+		f = FFNet(url)
+		text = f.getText(url)
+		self.assertTrue(text.find('He was just about to look at some photos when he heard a crack') != -1)
+	
+	def testBrokenWands(self):
+		url = 'http://www.fanfiction.net/s/1527263/30/Harry_Potter_and_Broken_Wands'
+		f = FFNet(url)
+		text = f.getText(url)
+		
+		urls = f.extractIndividualUrls()
+		
+	
+if __name__ == '__main__':
+	unittest.main()
--- a/fictionalley.py
+++ b/fictionalley.py
@ -0,0 +1,75 @@
+import os
+import re
+import sys
+import shutil
+import os.path
+import urllib as u
+import pprint as pp
+import urllib2 as u2
+import urlparse as up
+import BeautifulSoup as bs
+import htmlentitydefs as hdefs
+
+class FictionAlley:
+	def __init__(self):
+		pass
+	
+	def extractIndividualUrls(self, data, host, contents):
+		soup = bs.BeautifulStoneSoup(data)
+		
+		title = soup.find('title').string
+		self.storyName = "-".join(title.split('-')[1:]).strip()
+		
+		authors = soup.findAll('a')
+		
+		print('Story "%s" by %s' % (self.storyName, self.authorName))
+		
+		links = soup.findAll('a', { 'class' : 'chapterlink' } )
+
+		result = []
+		for a in links:
+			url = a['href']
+			title = a.string
+			result.append((url,title))
+			
+		return result
+	
+	def getStoryName(self):
+		return self.storyName
+
+	def getAuthorName(self):
+		return self.authorName
+	
+
+	def getText(self, data, fetch = False):
+		soup = bs.BeautifulStoneSoup(data)
+		div = soup.find('div', {'id' : 'storytext'})
+		if None == div:
+			return '<html/>'
+		
+		return div.prettify()
+	
+	def getPrintableUrl(self, url):
+		return url
+	
+	def getPasswordLine(self):
+		return 'opaopapassword'
+
+	def getLoginScript(self):
+		return 'opaopaloginscript'
+
+	def getLoginPasswordOthers(self):
+		login = dict(login = 'name', password = 'pass')
+		other = dict(submit = 'Log In', remember='yes')
+		return (login, other)
+
+		
+if __name__ == '__main__':
+	url = 'http://www.fictionalley.org/authors/drt/DA.html'
+	data = u2.urlopen(url).read()
+	host = up.urlparse(url).netloc
+	fw = FictionAlley()
+	fw.authorName = 'DrT'
+	urls = fw.extractIndividualUrls(data, host, url)
+	pp.pprint(urls)
+	print(fw.getText(data))
--- a/ficwad.py
+++ b/ficwad.py
@ -0,0 +1,98 @@
+import os
+import re
+import sys
+import shutil
+import os.path
+import urllib as u
+import pprint as pp
+import urllib2 as u2
+import urlparse as up
+import BeautifulSoup as bs
+import htmlentitydefs as hdefs
+
+from adapter import *
+
+class FicWad(FanfictionSiteAdapter):
+	def __init__(self, url):
+		self.url = url
+		self.host = up.urlparse(url).netloc
+	
+	def requiresLogin(self, url):
+		return False
+	
+	def performLogin(self, url):
+		pass
+		
+	def setLogin(self, login):
+		self.login = login
+	
+	def setPassword(self, password):
+		self.password = password
+	
+	def extractIndividualUrls(self):
+		data = u2.urlopen(self.url).read()
+		soup = bs.BeautifulStoneSoup(data)
+		
+		title = soup.find('title').string
+		self.storyName = title.split('::')[0].strip()
+		
+		author = soup.find('span', {'class' : 'author'})
+		self.authorName = author.a.string
+		
+		print('Story "%s" by %s' % (self.storyName, self.authorName))
+		
+		select = soup.find('select', { 'name' : 'goto' } )
+		
+		allOptions = select.findAll('option')
+		result = []
+		for o in allOptions:
+			url = o['value']
+#			if type(url) is unicode:
+#				url = url.encode('utf-8')
+			title = o.string
+			result.append((url,title))
+			
+		return result
+	
+	def getStoryName(self):
+		return self.storyName
+
+	def getAuthorName(self):
+		return self.authorName
+	
+	def getText(self, url):
+		if url.find('http://') == -1:
+			url = 'http://' + self.host + '/' + url
+		
+		data = u2.urlopen(url).read()
+		
+		soup = bs.BeautifulStoneSoup(data)
+		div = soup.find('div', {'id' : 'storytext'})
+		if None == div:
+			return '<html/>'
+		
+		return div.prettify()
+	
+	def getPrintableUrl(self, url):
+		return url
+	
+	def getPasswordLine(self):
+		return 'opaopapassword'
+
+	def getLoginScript(self):
+		return 'opaopaloginscript'
+
+	def getLoginPasswordOthers(self):
+		login = dict(login = 'name', password = 'pass')
+		other = dict(submit = 'Log In', remember='yes')
+		return (login, other)
+
+		
+if __name__ == '__main__':
+	url = 'http://www.ficwad.com/story/14536'
+	data = u2.urlopen(url).read()
+	host = up.urlparse(url).netloc
+	fw = FicWad(url)
+	urls = fw.extractIndividualUrls()
+	pp.pprint(urls)
+	print(fw.getText(data))
--- a/html_constants.py
+++ b/html_constants.py
@ -0,0 +1,17 @@
+XHTML_START = '''<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>${title} by ${author}</title>
+<link href="stylesheet.css" type="text/css" rel="stylesheet" />
+</head>
+<body>
+<div>
+<h1>${title} by ${author}</h1>
+${body}
+</body></html>
+'''
+
+XHTML_CHAPTER_START = '''<h2>${chapter}</h2>'''
+
+XHTML_END = ''''''
--- a/output.py
+++ b/output.py
@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import sys
+import cgi
+import uuid
+import codecs
+import shutil
+import string
+import base64
+import os.path
+import zipfile
+import StringIO
+import logging
+import urllib as u
+import pprint as pp
+import urllib2 as u2
+import urlparse as up
+import BeautifulSoup as bs
+import htmlentitydefs as hdefs
+
+import zipdir
+import html_constants
+from constants import *
+
+
+
+class FanficWriter:
+	def __init__(self):
+		pass
+		
+	def writeChapter(self, title, text):
+		pass
+	
+	def finalise(self):
+		pass
+
+class HTMLWriter(FanficWriter):
+	body = ''
+	
+	def __init__(self, base, name, author, inmemory=False, compress=False):
+		self.basePath = base
+		self.name = name.replace(" ", "_")
+		self.storyTitle = name
+		self.fileName = self.basePath + '/' + self.name + '.html'
+		self.authorName = author
+		
+		self.inmemory = inmemory
+
+		if not self.inmemory and os.path.exists(self.fileName):
+			os.remove(self.fileName)
+		
+		if self.inmemory:
+			self.output = StringIO.StringIO()
+		else:
+			self.output = open(self.fileName, 'w')
+		
+		self.xhtmlTemplate = string.Template(html_constants.XHTML_START)
+		self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START)
+		
+	def writeChapter(self, title, text):
+		title = title.decode('utf-8')
+		text = text.decode('utf-8')
+		self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title})
+		self.body = self.body + '\n' + text
+	
+	def finalise(self):
+		html = self.xhtmlTemplate.substitute({'title' : self.storyTitle, 'author' : self.authorName, 'body' : self.body})
+		soup = bs.BeautifulSoup(html)
+		result = soup.prettify()
+		
+#		f = open(self.fileName, 'w')
+#		f.write(result)
+#		f.close()
+
+		self.output.write(result)
+		if not self.inmemory:
+			self.output.close()
+
+class EPubFanficWriter(FanficWriter):
+	chapters = []
+	
+	files = {}
+	
+	def _writeFile(self, fileName, data):
+		if fileName in self.files:
+			self.files[fileName].write(data.decode('utf-8'))
+		else:
+			if self.inmemory:
+				self.files[fileName] = StringIO.StringIO()
+			else:
+				self.files[fileName] = open(self.directory + '/' + fileName, 'w')
+
+			self._writeFile(fileName, data)
+		
+		
+	def _closeFiles(self):
+		if not self.inmemory:
+			for f in self.files:
+				self.files[f].close()
+	
+	def __init__(self, base, name, author, inmemory=False, compress=True):
+		self.basePath = base
+		self.name = name.replace(" ", "_")
+		self.storyTitle = name
+		self.directory = self.basePath + '/' + self.name
+		self.inmemory = inmemory
+		self.authorName = author
+		
+		self.files = {}
+		self.chapters = []
+		
+		if not self.inmemory:
+			self.inmemory = True
+			self.writeToFile = True
+		else:
+			self.writeToFile = False
+		
+
+		if not self.inmemory:
+			if os.path.exists(self.directory):
+				shutil.rmtree(self.directory)
+		
+			os.mkdir(self.directory)
+		
+			os.mkdir(self.directory + '/META-INF')
+			os.mkdir(self.directory + '/OEBPS')
+		
+#		print >> codecs.open(self.directory + '/mimetype', 'w', 'utf-8'), MIMETYPE
+#		print >> codecs.open(self.directory + '/META-INF/container.xml', 'w', 'utf-8'), CONTAINER
+#		print >> codecs.open(self.directory + '/OEBPS/stylesheet.css', 'w', 'utf-8'), CSS
+
+		self._writeFile('mimetype', MIMETYPE)
+		self._writeFile('META-INF/container.xml', CONTAINER)
+		self._writeFile('OEBPS/stylesheet.css', CSS)
+
+	def _removeEntities(self, text):
+		for e in entities:
+			v = entities[e]
+			text = text.replace(e, v)
+		
+		text = text.replace('&', '&amp;')
+		
+		return text
+	
+	def writeChapter(self, title, text):
+		fileName = base64.b64encode(title).replace('/', '_') + ".xhtml"
+		filePath = self.directory + "/OEBPS/" + fileName
+		
+		fn = 'OEBPS/' + fileName
+		
+#		f = open(filePath, 'w')
+		
+		text = self._removeEntities(text)
+		
+		self.soup = bs.BeautifulStoneSoup(text)
+
+		allTags = self.soup.findAll(recursive=True)
+		for t in allTags:
+			for attr in t._getAttrMap().keys():
+				if attr not in acceptable_attributes:
+					del t[attr]
+	    
+		allPs = self.soup.findAll(recursive=True)
+		for p in allPs:
+			if p.string != None and (len(p.string.strip()) == 0 or p.string.strip() == '&nbsp;' ) :
+				p.extract()
+				
+		allBrs = self.soup.findAll(recursive=True, name = ["br", "hr"])
+		for br in allBrs:
+			if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
+				br.name = 'p'
+
+#		cleanup(self.soup )
+		
+		text = self.soup.prettify()
+		
+		tt = self._removeEntities(title)
+		
+		self._writeFile(fn, XHTML_START % (tt, tt))
+		self._writeFile(fn, text)
+		self._writeFile(fn, XHTML_END)
+#		print >> f, XHTML_START % (tt, tt)
+#		f.write(text)
+#		print >> f, XHTML_END
+		
+		self.chapters.append((title, fileName))
+	
+	def finalise(self):
+		logging.debug("Finalising...")
+		### writing table of contents -- ncx file
+		
+		tocFilePath = "OEBPS/toc.ncx"
+#		toc = open(tocFilePath, 'w')
+#		print >> toc, TOC_START % self.storyTitle
+		self._writeFile(tocFilePath, TOC_START % self.storyTitle)
+		### writing content -- opf file
+		opfFilePath = "OEBPS/content.opf"
+		
+#		opf = open(opfFilePath, 'w')
+		self._writeFile(opfFilePath, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName))
+#		print >> opf, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName)
+
+		ids = []
+		
+		i = 0
+		for t,f in self.chapters:
+			chapterId = base64.b64encode(t)
+#			print >> toc, TOC_ITEM % (chapterId, i, cgi.escape(t), f)
+			self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, cgi.escape(t), f))
+#			print >> opf, CONTENT_ITEM % (chapterId, f)
+			self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f))
+			
+			ids.append(chapterId)
+			
+			i = i + 1
+			
+#		logging.d('Toc and refs printed, proceesing to ref-ids....')
+		
+#		print >> toc, TOC_END
+#		print >> opf, CONTENT_END_MANIFEST		
+
+		self._writeFile(tocFilePath, TOC_END)
+		self._writeFile(opfFilePath, CONTENT_END_MANIFEST)
+		
+		for chapterId in ids:
+#			print >> opf, CONTENT_ITEMREF % chapterId
+			self._writeFile(opfFilePath, CONTENT_ITEMREF % chapterId)
+		
+#		print >> opf, CONTENT_END
+		self._writeFile(opfFilePath, CONTENT_END)
+		
+#		opf.close()
+#		toc.close()
+		
+#		print('Finished')
+		
+		self._closeFiles()
+		
+		filename = self.directory + '.epub'
+		
+		zipdata = zipdir.inMemoryZip(self.files)
+		
+		if self.writeToFile:
+			f = open(filename, 'w')
+			f.write(zipdata.getvalue())
+			f.close()
+		else:
+			self.output = zipdata
+			
+#		zipdir.toZip(filename, self.directory)
--- a/zipdir.py
+++ b/zipdir.py
@ -0,0 +1,69 @@
+import os
+import zipfile
+import logging
+
+import StringIO
+
+def toZip(filename, directory):
+	zippedHelp = zipfile.ZipFile(filename, "w", compression=zipfile.ZIP_DEFLATED)
+	lst = os.listdir(directory)
+	
+	for entity in lst:
+		if entity.startswith('.'):
+			continue
+
+		each = os.path.join(directory,entity)
+		print(each)
+
+		if os.path.isfile(each):
+			print(each)
+			zippedHelp.write(each, arcname=entity)
+		else:
+			addFolderToZip(zippedHelp,entity, each)
+ 	
+	zippedHelp.close()
+
+def addFolderToZip(zippedHelp,folder,fpath):
+	#print('addFolderToZip(%s)' % folder)
+	
+	if folder == '.' or folder == '..':
+		return
+	
+	folderFiles = os.listdir(fpath)
+	for f in folderFiles:
+		if os.path.isfile(fpath + '/' + f):
+			#print('basename=%s' % os.path.basename(fpath + '/' + f))
+			zippedHelp.write(fpath + '/' + f, folder + '/' + f, zipfile.ZIP_DEFLATED)
+		elif os.path.isdir(f):
+			addFolderToZip(zippedHelp,f)
+
+def inMemoryZip(files):
+	# files have a structure of {'path/to/file' => content} dictionary
+	io = StringIO.StringIO()
+	memzip = zipfile.ZipFile(io, 'a', compression=zipfile.ZIP_DEFLATED)
+	memzip.debug = 3
+	
+	for path in files:
+		if type(files[path]) != type('str'):
+			data = files[path].getvalue()
+		else:
+			data = files[path]
+		
+#		logging.debug(data)
+		memzip.writestr(path, data.encode('utf-8'))
+	
+	for zf in memzip.filelist:
+		zf.create_system = 0
+	
+	memzip.close()
+	
+	return io
+
+if __name__ == '__main__':
+#	toZip('sample.epub', "books/A_Time_To_Reflect")
+#	z = zipfile.ZipFile('sample.epub', 'r')
+	files = {'test.txt' : 'test', 'data/abc.txt' : 'abc'}
+	data = inMemoryZip(files)
+	f = open('res.zip', 'w')
+	f.write(data)
+	f.close()