Fictionalley.com working, also add twilighted.com to downaloder.py.

2026-05-08 21:11:59 +02:00 · 2010-09-28 19:26:06 -05:00 · 2010-09-28 19:26:06 -05:00 · 84fea5b896
commit 84fea5b896
parent fe67201f95
2 changed files with 74 additions and 22 deletions
--- a/downaloder.py
+++ b/downaloder.py
@ -80,14 +80,16 @@ if __name__ == '__main__':
 		adapter = ffa.FFA(url)
 	elif url.find('fictionalley') != -1:
 		adapter = fictionalley.FictionAlley(url)
-		print >> sys.stderr, "FictionAlley adapter is broken, try to find this fic on fanfiction.net or fanficauthors"
-		sys.exit(0)
+		#print >> sys.stderr, "FictionAlley adapter is broken, try to find this fic on fanfiction.net or fanficauthors"
+		#sys.exit(0)
 	elif url.find('ficwad') != -1:
 		adapter = ficwad.FicWad(url)
 	elif url.find('fanfiction.net') != -1 or url.find('fictionpress.com') != -1:
 		adapter = ffnet.FFNet(url)
 	elif url.find('harrypotterfanfiction.com') != -1:
 		adapter = hpfiction.HPFiction(url)
+	elif url.find('twilighted.com') != -1:
+		adapter = twilighted.Twilighted(url)
 	else:
 		print >> sys.stderr, "Oi! I can haz not appropriate adapter for URL %s!" % url
 		sys.exit(1)
--- a/fictionalley.py
+++ b/fictionalley.py
@ -2,36 +2,78 @@ import os
 import re
 import sys
 import shutil
+import logging
 import os.path
 import urllib as u
 import pprint as pp
 import urllib2 as u2
+import cookielib as cl
 import urlparse as up
 import BeautifulSoup as bs
 import htmlentitydefs as hdefs
+import time as time
+from adapter import *

-class FictionAlley:
-	def __init__(self):
-		pass
+
+class FictionAlley(FanfictionSiteAdapter):
+	def __init__(self, url):
+		self.url = url
+		self.host = up.urlparse(url).netloc
+		cookieproc = u2.HTTPCookieProcessor()
+
+		# FictionAlley wants a cookie to prove you're old enough to read R+ rated stuff.
+		cookie = cl.Cookie(version=0, name='fauser', value='wizard',
+                 port=None, port_specified=False,
+                  domain='www.fictionalley.org', domain_specified=False, domain_initial_dot=False,
+                 path='/authors', path_specified=True,
+                 secure=False,
+                 expires=time.time()+100,
+                 discard=False,
+                 comment=None,
+                 comment_url=None,
+                 rest={'HttpOnly': None},
+					  rfc2109=False)
+		cookieproc.cookiejar.set_cookie(cookie)
+		self.opener = u2.build_opener(cookieproc)
+		
+	def requiresLogin(self, url = None):
+		return False
 	
-	def extractIndividualUrls(self, data, host, contents):
+	def performLogin(self, url = None):
+		pass
+		
+	def setLogin(self, login):
+		self.login = login
+	
+	def setPassword(self, password):
+		self.password = password
+	
+	def extractIndividualUrls(self):
+		data = self.opener.open(self.url).read()		
 		soup = bs.BeautifulStoneSoup(data)
-		
+				
+		# Get title from <title>, remove before '-'.
 		title = soup.find('title').string
-		self.storyName = "-".join(title.split('-')[1:]).strip()
-		
-		authors = soup.findAll('a')
-		
-		print('Story "%s" by %s' % (self.storyName, self.authorName))
+		self.storyName = "-".join(title.split('-')[1:]).strip().replace(" (Story Text)","")
 		
 		links = soup.findAll('a', { 'class' : 'chapterlink' } )

 		result = []
-		for a in links:
-			url = a['href']
-			title = a.string
-			result.append((url,title))
+		if len(links) == 0:
+			breadcrumbs = soup.find('div', {'class': 'breadcrumbs'})
+			self.authorName = breadcrumbs.a.string.replace("'s Fics","")
+			result.append((self.url,self.storyName))
+		else:
+			author = soup.find('h1', {'class' : 'title'})
+			self.authorName = author.a.string
 			
+			for a in links:
+				url = a['href']
+				title = a.string
+				result.append((url,title))
+					
+		print('Story "%s" by %s' % (self.storyName, self.authorName))
+		
 		return result
 	
 	def getStoryName(self):
@ -40,11 +82,20 @@ class FictionAlley:
 	def getAuthorName(self):
 		return self.authorName
 	
-
-	def getText(self, data, fetch = False):
+	def getText(self, url):
+		# fictionalley uses full URLs in chapter list.
+		data = self.opener.open(url).read()
+		
+		# find <!-- headerend --> & <!-- footerstart -->
+		# and replaced with matching div pair for easier parsing.
+		# Yes, it's an evil kludge, but what can ya do?
+		data = data.replace('<!-- headerend -->','<div id="storytext">').replace('<!-- footerstart -->','</div>')
 		soup = bs.BeautifulStoneSoup(data)
+		
 		div = soup.find('div', {'id' : 'storytext'})
 		if None == div:
+			logging.error("Error downloading Chapter: %s" % url)
+			exit(1)
 			return '<html/>'
 		
 		return div.prettify()
@ -62,14 +113,13 @@ class FictionAlley:
 		login = dict(login = 'name', password = 'pass')
 		other = dict(submit = 'Log In', remember='yes')
 		return (login, other)
-
+	
 		
 if __name__ == '__main__':
 	url = 'http://www.fictionalley.org/authors/drt/DA.html'
-	data = u2.urlopen(url).read()
+	data = self.opener.open(url).read()
 	host = up.urlparse(url).netloc
-	fw = FictionAlley()
-	fw.authorName = 'DrT'
+	fw = FictionAlley(url)
 	urls = fw.extractIndividualUrls(data, host, url)
 	pp.pprint(urls)
 	print(fw.getText(data))