Include notes with chapter text for thewriterscoffeeshop.com.

This commit is contained in:
Jim Miller 2013-04-07 12:13:16 -05:00
parent aaccb45df5
commit 6dd5522b7a

View file

@ -244,16 +244,23 @@ class TheWritersCoffeeShopComSiteAdapter(BaseSiteAdapter):
# issues with different SGML parsers in python. This is a
# nasty hack, but it works.
data = data[data.index("<body"):]
soup = bs.BeautifulStoneSoup(data,
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
span = soup.find('div', {'id' : 'story'})
if None == span:
chapter=bs.BeautifulSoup('<div class="story"></div>')
soup = bs.BeautifulSoup(data)
found=False
for div in soup.findAll('div'):
if div.has_key('class') and div['class'] == 'notes':
chapter.append(div)
if div.has_key('id') and div['id'] == 'story':
chapter.append(div)
found=True
if not found:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,span)
return self.utf8FromSoup(url,chapter)
def getClass():
return TheWritersCoffeeShopComSiteAdapter