Explicit html parse for BS4 when reading in existing epub. For cal 1.48.

This commit is contained in:
Jim Miller 2015-03-31 09:16:41 -05:00
parent 06da8235cc
commit 3d90849c0b

View file

@ -107,7 +107,7 @@ def get_update_data(inputio,
pass # corner case I bumped into while testing.
if re.match(r'.*/(file|chapter)\d+\.x?html',href):
if getsoups:
soup = bs.BeautifulSoup(epub.read(href).decode("utf-8"))
soup = bs.BeautifulSoup(epub.read(href).decode("utf-8"),"html5lib")
for img in soup.findAll('img'):
newsrc=''
longdesc=''