mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-22 08:44:13 +01:00
twiwrite adult stories broke Soup in Plugin only. Suspect different SGMLParsers.
Removing all the headers first works around it.
This commit is contained in:
parent
fbcc582541
commit
f7c321a36e
1 changed files with 7 additions and 2 deletions
|
|
@ -84,7 +84,7 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
params['submit'] = 'Submit'
|
||||
|
||||
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
|
||||
logging.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
logging.info("Will now login to URL (%s) as (%s)" % (loginUrl,
|
||||
params['penname']))
|
||||
|
||||
d = self._fetchUrl(loginUrl, params)
|
||||
|
|
@ -119,6 +119,8 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
raise exceptions.FailedToDownload(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
|
||||
|
||||
# use BeautifulSoup HTML parser to make everything easier to find.
|
||||
data = data[data.index("<body"):] # desperate--strip before <body
|
||||
# in calibre plugin only, soup wasn't parsing the html properly.
|
||||
soup = bs.BeautifulSoup(data)
|
||||
|
||||
## Title
|
||||
|
|
@ -211,7 +213,10 @@ class TwiwriteNetSiteAdapter(BaseSiteAdapter):
|
|||
|
||||
logging.debug('Getting chapter text from: %s' % url)
|
||||
|
||||
soup = bs.BeautifulStoneSoup(self._fetchUrl(url),
|
||||
data = self._fetchUrl(url)
|
||||
data = data[data.index("<body"):] # desperate--strip before <body
|
||||
# in calibre plugin only, soup wasn't parsing the html properly.
|
||||
soup = bs.BeautifulStoneSoup(data,
|
||||
selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
|
||||
|
||||
span = soup.find('div', {'id' : 'story'})
|
||||
|
|
|
|||
Loading…
Reference in a new issue