From f9982faaef50d8ef779a72d4d8380b14f2d513aa Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 8 Nov 2016 21:19:16 -0600 Subject: [PATCH] Remove noscript tags from base_xenforoforum content--otherwise double images appear in Kindle and Nook. --- fanficfare/adapters/base_xenforoforum_adapter.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fanficfare/adapters/base_xenforoforum_adapter.py b/fanficfare/adapters/base_xenforoforum_adapter.py index f51e15c6..0860564d 100644 --- a/fanficfare/adapters/base_xenforoforum_adapter.py +++ b/fanficfare/adapters/base_xenforoforum_adapter.py @@ -170,10 +170,6 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl, params['login'])) - # soup = self.make_soup(self._fetchUrl(loginUrl)) - # params['ctkn']=soup.find('input', {'name':'ctkn'})['value'] - # params[soup.find('input', {'id':'password'})['name']] = params['password'] - d = self._fetchUrl(loginUrl, params) if "Log Out" not in d : @@ -184,6 +180,16 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): else: return True + def make_soup(self,data): + soup = super(BaseXenForoForumAdapter, self).make_soup(data) + ## after lazy load images, there are noscript blocks also + ## containing tags. The problem comes in when they hit + ## book readers such as Kindle and Nook and then you see the + ## same images twice. + for noscript in soup.find_all('noscript'): + noscript.extract() + return soup + ## Getting the chapter list and the meta data, plus 'is adult' checking. def extractChapterUrlsAndMetadata(self): @@ -385,4 +391,4 @@ class BaseXenForoForumAdapter(BaseSiteAdapter): legend.string = stripHTML(div.button.span) div.insert(0,legend) div.button.extract() - +