Add exception for bad HTML breaking img finding.

This commit is contained in:
Jim Miller 2016-03-28 12:05:37 -05:00
parent cf54f274d4
commit be0d48ec7b

View file

@ -571,11 +571,14 @@ class BaseSiteAdapter(Configurable):
#print("include_images:"+self.getConfig('include_images'))
if self.getConfig('include_images'):
acceptable_attributes.extend(('src','alt','longdesc'))
for img in soup.findAll('img'):
# some pre-existing epubs have img tags that had src stripped off.
if img.has_attr('src'):
(img['src'],img['longdesc'])=self.story.addImgUrl(url,img['src'],fetch,
coverexclusion=self.getConfig('cover_exclusion_regexp'))
try:
for img in soup.find_all('img'):
# some pre-existing epubs have img tags that had src stripped off.
if img.has_attr('src'):
(img['src'],img['longdesc'])=self.story.addImgUrl(url,img['src'],fetch,
coverexclusion=self.getConfig('cover_exclusion_regexp'))
except AttributeError as ae:
logger.info("Parsing for img tags failed--probably poor input HTML. Skipping images.")
for attr in self.get_attr_keys(soup):
if attr not in acceptable_attributes: