Allow '&' and '£'. Allow single chapter/one-shot stories from FicWad. Correct story names from FicWad. Stop creating empty 'Story Index' chapter in stories from FicWad. Exit on chapter d/l error from FicWad.

2026-02-13 11:02:37 +01:00 · 2010-09-26 19:08:49 -05:00 · 2010-09-26 19:08:49 -05:00 · ccb3fb36c5
commit ccb3fb36c5
parent 31c68ff796
3 changed files with 34 additions and 14 deletions
--- a/constants.py
+++ b/constants.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 CSS = '''body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; }
 pre { font-size: x-small; }
 h1 { text-align: center; }
@ -111,7 +113,8 @@ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',

 acceptable_attributes = ['href']

-entities = { '&ndash;' : ' - ', '&mdash;' : ' - ', '&rdquo;' : '"', '&ldquo;' : '"', '&rsquo;' : '\'', '&lsquo;' : '\'', '&quot;' : '"', '&hellip;' : '...' }
+entities = { '&ndash;' : ' - ', '&mdash;' : ' - ', '&rdquo;' : '"', '&ldquo;' : '"', '&rsquo;' : '\'', 
+		'&lsquo;' : '\'', '&quot;' : '"', '&hellip;' : '...', '&amp;' : '&', '&pound;' : '£' }

 FB2_PROLOGUE = '<FictionBook>'
 FB2_DESCRIPTION = '''<description>
--- a/ficwad.py
+++ b/ficwad.py
@ -33,25 +33,37 @@ class FicWad(FanfictionSiteAdapter):
 		data = u2.urlopen(self.url).read()
 		soup = bs.BeautifulStoneSoup(data)
 		
-		title = soup.find('title').string
-		self.storyName = title.split('::')[0].strip()
+		story = soup.find('div', {'id' : 'story'})
+		crumbtrail = story.find('h3') # the only h3 ficwad uses.
+		allAhrefs = crumbtrail.findAll('a')
+		# last of crumbtrail
+		self.storyName = allAhrefs[-1].string.strip()
+		# save chapter name from header in case of one-shot.
+		chaptername = story.find('h4').find('a').string.strip()
 		
 		author = soup.find('span', {'class' : 'author'})
 		self.authorName = str(author.a.string)
 		
-		print('Story "%s" by %s' % (self.storyName, self.authorName))
-		
 		select = soup.find('select', { 'name' : 'goto' } )
 		
-		allOptions = select.findAll('option')
 		result = []
-		for o in allOptions:
-			url = o['value']
-#			if type(url) is unicode:
-#				url = url.encode('utf-8')
-			title = o.string
-			result.append((url,title))
+		if select is None:
+			# Single chapter storys don't have title in crumbtrail, just 'chapter' title in h4.
+			self.storyName = chaptername
+			# no chapters found, try url by itself.
+			result.append((self.url,self.storyName))
+		else:
+			allOptions = select.findAll('option')
+			for o in allOptions:
+				url = o['value']
+				title = o.string
+				# ficwad includes 'Story Index' in the dropdown of chapters, 
+				# but it's not a real chapter.
+				if title != "Story Index":
+					result.append((url,title))
 			
+		print('Story "%s" by %s' % (self.storyName, self.authorName))
+		
 		return result
 	
 	def getStoryName(self):
@ -69,8 +81,9 @@ class FicWad(FanfictionSiteAdapter):
 		soup = bs.BeautifulStoneSoup(data)
 		div = soup.find('div', {'id' : 'storytext'})
 		if None == div:
+			logging.error("Error downloading Chapter: %s" % url)
+			exit(1)
 			return '<html/>'
-		
 		return div.prettify()
 	
 	def getPrintableUrl(self, url):
--- a/output.py
+++ b/output.py
@ -165,7 +165,11 @@ class EPubFanficWriter(FanficWriter):
 	def _removeEntities(self, text):
 		for e in entities:
 			v = entities[e]
-			text = text.replace(e, v)
+			try:
+				text = text.replace(e, v)
+			except UnicodeDecodeError, ex:
+				# for the pound symbol in constants.py
+				text = text.replace(e, v.decode('utf-8'))
 		
 		text = text.replace('&', '&amp;')