diff --git a/constants.py b/constants.py index a8691435..8d389e32 100644 --- a/constants.py +++ b/constants.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + CSS = '''body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; } pre { font-size: x-small; } h1 { text-align: center; } @@ -111,7 +113,8 @@ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', acceptable_attributes = ['href'] -entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'', '‘' : '\'', '"' : '"', '…' : '...' } +entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'', + '‘' : '\'', '"' : '"', '…' : '...', '&' : '&', '£' : '£' } FB2_PROLOGUE = '' FB2_DESCRIPTION = ''' diff --git a/ficwad.py b/ficwad.py index 26be350f..1e769a00 100644 --- a/ficwad.py +++ b/ficwad.py @@ -33,25 +33,37 @@ class FicWad(FanfictionSiteAdapter): data = u2.urlopen(self.url).read() soup = bs.BeautifulStoneSoup(data) - title = soup.find('title').string - self.storyName = title.split('::')[0].strip() + story = soup.find('div', {'id' : 'story'}) + crumbtrail = story.find('h3') # the only h3 ficwad uses. + allAhrefs = crumbtrail.findAll('a') + # last of crumbtrail + self.storyName = allAhrefs[-1].string.strip() + # save chapter name from header in case of one-shot. + chaptername = story.find('h4').find('a').string.strip() author = soup.find('span', {'class' : 'author'}) self.authorName = str(author.a.string) - print('Story "%s" by %s' % (self.storyName, self.authorName)) - select = soup.find('select', { 'name' : 'goto' } ) - allOptions = select.findAll('option') result = [] - for o in allOptions: - url = o['value'] -# if type(url) is unicode: -# url = url.encode('utf-8') - title = o.string - result.append((url,title)) + if select is None: + # Single chapter storys don't have title in crumbtrail, just 'chapter' title in h4. + self.storyName = chaptername + # no chapters found, try url by itself. + result.append((self.url,self.storyName)) + else: + allOptions = select.findAll('option') + for o in allOptions: + url = o['value'] + title = o.string + # ficwad includes 'Story Index' in the dropdown of chapters, + # but it's not a real chapter. + if title != "Story Index": + result.append((url,title)) + print('Story "%s" by %s' % (self.storyName, self.authorName)) + return result def getStoryName(self): @@ -69,8 +81,9 @@ class FicWad(FanfictionSiteAdapter): soup = bs.BeautifulStoneSoup(data) div = soup.find('div', {'id' : 'storytext'}) if None == div: + logging.error("Error downloading Chapter: %s" % url) + exit(1) return '' - return div.prettify() def getPrintableUrl(self, url): diff --git a/output.py b/output.py index a0784c25..bd17b02a 100644 --- a/output.py +++ b/output.py @@ -165,7 +165,11 @@ class EPubFanficWriter(FanficWriter): def _removeEntities(self, text): for e in entities: v = entities[e] - text = text.replace(e, v) + try: + text = text.replace(e, v) + except UnicodeDecodeError, ex: + # for the pound symbol in constants.py + text = text.replace(e, v.decode('utf-8')) text = text.replace('&', '&')