Explicit html parse for BS4 when reading in existing epub. For cal 1.48.

This commit is contained in:
Jim Miller 2015-03-31 09:16:41 -05:00
parent 06da8235cc
commit 3d90849c0b

View file

@ -107,7 +107,7 @@ def get_update_data(inputio,
pass # corner case I bumped into while testing. pass # corner case I bumped into while testing.
if re.match(r'.*/(file|chapter)\d+\.x?html',href): if re.match(r'.*/(file|chapter)\d+\.x?html',href):
if getsoups: if getsoups:
soup = bs.BeautifulSoup(epub.read(href).decode("utf-8")) soup = bs.BeautifulSoup(epub.read(href).decode("utf-8"),"html5lib")
for img in soup.findAll('img'): for img in soup.findAll('img'):
newsrc='' newsrc=''
longdesc='' longdesc=''