From ed9e2992c00afafb4f88461bbf93a9ec1c5cfec1 Mon Sep 17 00:00:00 2001
From: sigizmund <sigizmund@gmail.com>
Date: Fri, 18 Dec 2009 18:57:53 +0000
Subject: [PATCH] hpfiction added - not yet working

---
 hpfiction.py | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 hpfiction.py

diff --git a/hpfiction.py b/hpfiction.py
new file mode 100644
index 00000000..f33849a7
--- /dev/null
+++ b/hpfiction.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import sys
+import cgi
+import uuid
+import shutil
+import base64
+import os.path
+import logging
+import unittest
+import urllib as u
+import pprint as pp
+import urllib2 as u2
+import urlparse as up
+import BeautifulSoup as bs
+import htmlentitydefs as hdefs
+
+from constants import *
+from adapter import *
+
+try:
+	import login_password
+except:
+	# tough luck
+	pass
+
+class HPFiction(FanfictionSiteAdapter):
+	def __init__(self, url):
+		self.url = url
+		parsedUrl = up.urlparse(url)
+		self.host = parsedUrl.netloc
+		self.path = parsedUrl.path
+		
+		self.opener = u2.build_opener(u2.HTTPCookieProcessor())
+	
+		logging.debug("Created HPFiction: url=%s" % (self.url))
+	
+	def _getLoginScript(self):
+		return self.path
+
+	def requiresLogin(self, url = None):
+		return False
+
+	def performLogin(self, url = None):
+		return True
+	
+	def extractIndividualUrls(self):
+		data = self.opener.open(self.url).read()
+		soup = bs.BeautifulSoup(data)
+		
+		links = soup.findAll('a')
+		
+		for a in links:
+			if a['href'].find('psid') != -1:
+				self.storyName = a.string
+			elif a['href'].find('viewuser.php') != -1:
+				self.authorName = a.string
+		
+		select = soup.find('select', {'name' : 'chapterid'})
+		urls = []
+		for o in select.findAll('option'):
+			if 'value' in o._getAttrMap():
+				url = 'http://' + self.host + '/' + self.path + o['value']
+				title = o.string
+				urls.append((url,title))
+		return urls
+
+	def getStoryName(self):
+		return self.storyName
+
+	def getAuthorName(self):
+		return self.authorName
+	
+	def getText(self, url):
+		logging.debug('Downloading from URL: %s' % url)
+		data = self.opener.open(self.url).read()
+		soup = bs.BeautifulSoup(data)
+		divtext = soup.find('div', {'id' : 'fluidtext'})
+		return divtext.prettify()
+
+class FF_UnitTests(unittest.TestCase):
+	def setUp(self):
+		logging.basicConfig(level=logging.DEBUG)
+		pass
+
+	def testChaptersAuthStory(self):
+		f = HPFiction('http://www.harrypotterfanfiction.com/viewstory.php?chapterid=80123')
+		urls = f.extractIndividualUrls()
+		
+		self.assertEquals(49, len(urls))
+		self.assertEquals('Elisha', f.getAuthorName())
+		self.assertEquals('A Secret Thought', f.getStoryName())
+	
+	def testGetText(self):
+		url = 'http://www.harrypotterfanfiction.com/viewstory.php?chapterid=80123'
+		f = HPFiction(url)
+		#urls = f.extractIndividualUrls()
+		text = f.getText(url)
+		self.assertTrue(text.find('She pulled out of his arms and felt the subtle regret') != -1)
+
+if __name__ == '__main__':
+	unittest.main()
+
+	
\ No newline at end of file