From be0d48ec7b47f1e746a1b703d74cf11534345320 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 28 Mar 2016 12:05:37 -0500 Subject: [PATCH] Add exception for bad HTML breaking img finding. --- fanficfare/adapters/base_adapter.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fanficfare/adapters/base_adapter.py b/fanficfare/adapters/base_adapter.py index 5c78e2de..ca015f5b 100644 --- a/fanficfare/adapters/base_adapter.py +++ b/fanficfare/adapters/base_adapter.py @@ -571,11 +571,14 @@ class BaseSiteAdapter(Configurable): #print("include_images:"+self.getConfig('include_images')) if self.getConfig('include_images'): acceptable_attributes.extend(('src','alt','longdesc')) - for img in soup.findAll('img'): - # some pre-existing epubs have img tags that had src stripped off. - if img.has_attr('src'): - (img['src'],img['longdesc'])=self.story.addImgUrl(url,img['src'],fetch, - coverexclusion=self.getConfig('cover_exclusion_regexp')) + try: + for img in soup.find_all('img'): + # some pre-existing epubs have img tags that had src stripped off. + if img.has_attr('src'): + (img['src'],img['longdesc'])=self.story.addImgUrl(url,img['src'],fetch, + coverexclusion=self.getConfig('cover_exclusion_regexp')) + except AttributeError as ae: + logger.info("Parsing for img tags failed--probably poor input HTML. Skipping images.") for attr in self.get_attr_keys(soup): if attr not in acceptable_attributes: