From 3d90849c0be98f090322ee71dbdb86b03edf18cc Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 31 Mar 2015 09:16:41 -0500 Subject: [PATCH] Explicit html parse for BS4 when reading in existing epub. For cal 1.48. --- fanficdownloader/epubutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fanficdownloader/epubutils.py b/fanficdownloader/epubutils.py index 2e1b041f..f55745df 100644 --- a/fanficdownloader/epubutils.py +++ b/fanficdownloader/epubutils.py @@ -107,7 +107,7 @@ def get_update_data(inputio, pass # corner case I bumped into while testing. if re.match(r'.*/(file|chapter)\d+\.x?html',href): if getsoups: - soup = bs.BeautifulSoup(epub.read(href).decode("utf-8")) + soup = bs.BeautifulSoup(epub.read(href).decode("utf-8"),"html5lib") for img in soup.findAll('img'): newsrc='' longdesc=''