Allow '&' and '£'. Allow single chapter/one-shot stories from FicWad. Correct story names from FicWad. Stop creating empty 'Story Index' chapter in stories from FicWad. Exit on chapter d/l error from FicWad.

This commit is contained in:
retiefjimm 2010-09-26 19:08:49 -05:00
parent 31c68ff796
commit ccb3fb36c5
3 changed files with 34 additions and 14 deletions

View file

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
CSS = '''body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; }
pre { font-size: x-small; }
h1 { text-align: center; }
@ -111,7 +113,8 @@ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
acceptable_attributes = ['href']
entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'', '‘' : '\'', '"' : '"', '…' : '...' }
entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'',
'‘' : '\'', '"' : '"', '…' : '...', '&' : '&', '£' : '£' }
FB2_PROLOGUE = '<FictionBook>'
FB2_DESCRIPTION = '''<description>

View file

@ -33,25 +33,37 @@ class FicWad(FanfictionSiteAdapter):
data = u2.urlopen(self.url).read()
soup = bs.BeautifulStoneSoup(data)
title = soup.find('title').string
self.storyName = title.split('::')[0].strip()
story = soup.find('div', {'id' : 'story'})
crumbtrail = story.find('h3') # the only h3 ficwad uses.
allAhrefs = crumbtrail.findAll('a')
# last of crumbtrail
self.storyName = allAhrefs[-1].string.strip()
# save chapter name from header in case of one-shot.
chaptername = story.find('h4').find('a').string.strip()
author = soup.find('span', {'class' : 'author'})
self.authorName = str(author.a.string)
print('Story "%s" by %s' % (self.storyName, self.authorName))
select = soup.find('select', { 'name' : 'goto' } )
allOptions = select.findAll('option')
result = []
for o in allOptions:
url = o['value']
# if type(url) is unicode:
# url = url.encode('utf-8')
title = o.string
result.append((url,title))
if select is None:
# Single chapter storys don't have title in crumbtrail, just 'chapter' title in h4.
self.storyName = chaptername
# no chapters found, try url by itself.
result.append((self.url,self.storyName))
else:
allOptions = select.findAll('option')
for o in allOptions:
url = o['value']
title = o.string
# ficwad includes 'Story Index' in the dropdown of chapters,
# but it's not a real chapter.
if title != "Story Index":
result.append((url,title))
print('Story "%s" by %s' % (self.storyName, self.authorName))
return result
def getStoryName(self):
@ -69,8 +81,9 @@ class FicWad(FanfictionSiteAdapter):
soup = bs.BeautifulStoneSoup(data)
div = soup.find('div', {'id' : 'storytext'})
if None == div:
logging.error("Error downloading Chapter: %s" % url)
exit(1)
return '<html/>'
return div.prettify()
def getPrintableUrl(self, url):

View file

@ -165,7 +165,11 @@ class EPubFanficWriter(FanficWriter):
def _removeEntities(self, text):
for e in entities:
v = entities[e]
text = text.replace(e, v)
try:
text = text.replace(e, v)
except UnicodeDecodeError, ex:
# for the pound symbol in constants.py
text = text.replace(e, v.decode('utf-8'))
text = text.replace('&', '&amp;')