# -*- coding: utf-8 -*-

import os
import re
import sys
import cgi
import uuid
import codecs
import shutil
import string
import base64
import os.path
import zipfile
import StringIO
import logging
import hashlib
import urllib as u
import pprint as pp
import urllib2 as u2
import urlparse as up
import BeautifulSoup as bs
import htmlentitydefs as hdefs

import zipdir
import html_constants
from constants import *


import html2text


class FanficWriter:
	def __init__(self):
		pass
		
	def writeChapter(self, title, text):
		pass
	
	def finalise(self):
		pass

class TextWriter(FanficWriter):
	htmlWriter = None
	
	def __init__(self, base, name, author, inmemory=False, compress=False):
		self.htmlWriter = HTMLWriter(base, name, author, True, False)
	
	def writeChapter(self, title, text):
		self.htmlWriter.writeChapter(title, text)
	
	def finalise(self):
		self.htmlWriter.finalise()
		self.output = StringIO.StringIO()
		self.output.write(html2text.html2text(self.htmlWriter.output.getvalue().decode('utf-8')).encode('utf-8'))
		self.name = self.htmlWriter.name
		

class HTMLWriter(FanficWriter):
	body = ''
	
	def __init__(self, base, name, author, inmemory=False, compress=False):
		self.basePath = base
		self.name = re.sub('&\#[0-9]+;', '_', name.replace(" ", "_").replace(":","_"))
		self.storyTitle = name
		self.fileName = self.basePath + '/' + self.name + '.html'
		self.authorName = author
		
		self.inmemory = inmemory

		if not self.inmemory and os.path.exists(self.fileName):
			os.remove(self.fileName)
		
		if self.inmemory:
			self.output = StringIO.StringIO()
		else:
			self.output = open(self.fileName, 'w')
		
		self.xhtmlTemplate = string.Template(html_constants.XHTML_START)
		self.chapterStartTemplate = string.Template(html_constants.XHTML_CHAPTER_START)
	
	def _printableVersion(self, text):
		try:
			d = text.decode('utf-8')
			return d
		except:
			return text
	
	def writeChapter(self, title, text):
		title = self._printableVersion(title) #title.decode('utf-8')
		text = self._printableVersion(text) #text.decode('utf-8')
		self.body = self.body + '\n' + self.chapterStartTemplate.substitute({'chapter' : title})
		self.body = self.body + '\n' + text
	
	def finalise(self):
		html = self.xhtmlTemplate.substitute({'title' : self.storyTitle, 'author' : self.authorName, 'body' : self.body})
		soup = bs.BeautifulSoup(html)
		result = soup.prettify()
		
#		f = open(self.fileName, 'w')
#		f.write(result)
#		f.close()

		self.output.write(result)
		if not self.inmemory:
			self.output.close()

class EPubFanficWriter(FanficWriter):
	chapters = []
	
	files = {}
	
	def _writeFile(self, fileName, data):
		#logging.debug('_writeFile(`%s`, data)' % fileName)
		if fileName in self.files:
			try:
				d = data.decode('utf-8')
			except UnicodeEncodeError, e:
				d = data
			
			self.files[fileName].write(d)
		else:
			if self.inmemory:
				self.files[fileName] = StringIO.StringIO()
			else:
				self.files[fileName] = open(self.directory + '/' + fileName, 'w')
			
			self._writeFile(fileName, data)
		
		
	def _closeFiles(self):
		if not self.inmemory:
			for f in self.files:
				self.files[f].close()
	
	def __init__(self, base, name, author, inmemory=False, compress=True):
		self.basePath = base
		self.name = re.sub('&\#[0-9]+;', '_', name.replace(" ", "_").replace(":","_"))
		self.storyTitle = name
		self.directory = self.basePath + '/' + self.name
		self.inmemory = inmemory
		self.authorName = author
		
		self.files = {}
		self.chapters = []
		
		if not self.inmemory:
			self.inmemory = True
			self.writeToFile = True
		else:
			self.writeToFile = False
		

		if not self.inmemory:
			if os.path.exists(self.directory):
				shutil.rmtree(self.directory)
		
			os.mkdir(self.directory)
		
			os.mkdir(self.directory + '/META-INF')
			os.mkdir(self.directory + '/OEBPS')
		
		self._writeFile('mimetype', MIMETYPE)
		self._writeFile('META-INF/container.xml', CONTAINER)
		self._writeFile('OEBPS/stylesheet.css', CSS)

	def _removeEntities(self, text):
		for e in entities:
			v = entities[e]
			try:
				text = text.replace(e, v)
			except UnicodeDecodeError, ex:
				# for the pound symbol in constants.py
				text = text.replace(e, v.decode('utf-8'))
		
		# &lt; &lt; and &amp; are the only html entities allowed in xhtml.
		text = text.replace('&', '&amp;').replace('&amp;lt;', '&lt;').replace('&amp;gt;', '&gt;')
		
		return text
	
	def writeChapter(self, title, text):
		logging.debug("Writing chapter: %s" % title)
		try:
			fileName = base64.b64encode(title) + ".xhtml"
		except UnicodeEncodeError, e:
			fileName = base64.b64encode(title.encode('utf-8')) + ".xhtml"
		# Base64 can include +, / and =, which XML technically doesn't like
		# in it's id attributes.  _ and - are okay and not otherwise used in Base64.
		# The = for padding is superfluous
		fileName = fileName.replace('/', '_').replace('+', '-').replace('=','')

		filePath = self.directory + "/OEBPS/" + fileName
		
		fn = 'OEBPS/' + fileName
		
#		f = open(filePath, 'w')
		
		text = self._removeEntities(text)
		
		# BeautifulStoneSoup doesn't have any selfClosingTags by default.  
		# hr & br needs to be if they're going to work.
		# Some stories do use multiple br tags as their section breaks...
		self.soup = bs.BeautifulStoneSoup(text.decode('utf-8'), selfClosingTags=('br','hr'))

		allTags = self.soup.findAll(recursive=True)
		for t in allTags:
			for attr in t._getAttrMap().keys():
				if attr not in acceptable_attributes:
					del t[attr]

		allPs = self.soup.findAll(recursive=True)
		for p in allPs:
			if p.string != None and len(p.string.strip()) == 0 :
				p.extract()

		# xhtml doesn't like <p> nesting in <p>, so leave divs.
		# allBrs = self.soup.findAll(recursive=True, name = ['div'])
		# for br in allBrs:
			# if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
				# br.name = 'p'

#		cleanup(self.soup )
		
		text = self.soup.prettify()
		
		tt = self._removeEntities(title)
		
		self._writeFile(fn, XHTML_START % (tt, tt))
		self._writeFile(fn, text)
		self._writeFile(fn, XHTML_END)
#		print >> f, XHTML_START % (tt, tt)
#		f.write(text)
#		print >> f, XHTML_END
		
		self.chapters.append((title, fileName))
	
	def finalise(self):
		logging.debug("Finalising...")
		### writing table of contents -- ncx file
		
		tocFilePath = "OEBPS/toc.ncx"
#		toc = open(tocFilePath, 'w')
#		print >> toc, TOC_START % self.storyTitle
		self._writeFile(tocFilePath, TOC_START % self.storyTitle)
		### writing content -- opf file
		opfFilePath = "OEBPS/content.opf"
		
#		opf = open(opfFilePath, 'w')
		self._writeFile(opfFilePath, CONTENT_START % (self.storyTitle, self.authorName, uuid.uuid4().urn))
#		print >> opf, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName)

		ids = []
		
		i = 1
		for t,f in self.chapters:
			try:
				chapterId = base64.b64encode(t)
			except UnicodeEncodeError, e:
				chapterId = base64.b64encode(t.encode('utf-8'))
			# Base64 can include +, / and =, which XML technically doesn't like
			# in it's id attributes.  _ and - are okay and not otherwise used in Base64.
			# The = for padding is superfluous
			chapterId = chapterId.replace('/', '_').replace('+', '-').replace('=','')
			
			self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, cgi.escape(t), f))
			self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f))
			
			ids.append(chapterId)
			
			i = i + 1
			
#		logging.d('Toc and refs printed, proceesing to ref-ids....')
		
		self._writeFile(tocFilePath, TOC_END)
		self._writeFile(opfFilePath, CONTENT_END_MANIFEST)
		
		for chapterId in ids:
			self._writeFile(opfFilePath, CONTENT_ITEMREF % chapterId)
		
		self._writeFile(opfFilePath, CONTENT_END)
		
		self._closeFiles()
		
		filename = self.directory + '.epub'
		
		zipdata = zipdir.inMemoryZip(self.files)
		
		if self.writeToFile:
			f = open(filename, 'wb')
			f.write(zipdata.getvalue())
			f.close()
		else:
			self.output = zipdata
			
#		zipdir.toZip(filename, self.directory)