Explicit html parse for BS4 when reading in existing epub. For cal 1.48.

2025-12-06 08:52:55 +01:00 · 2015-03-31 09:16:41 -05:00 · 2015-03-31 09:16:41 -05:00 · 3d90849c0b
commit 3d90849c0b
parent 06da8235cc
1 changed files with 1 additions and 1 deletions
--- a/fanficdownloader/epubutils.py
+++ b/fanficdownloader/epubutils.py
@ -107,7 +107,7 @@ def get_update_data(inputio,
                        pass # corner case I bumped into while testing.
                if re.match(r'.*/(file|chapter)\d+\.x?html',href):
                    if getsoups:
-                        soup = bs.BeautifulSoup(epub.read(href).decode("utf-8"))
+                        soup = bs.BeautifulSoup(epub.read(href).decode("utf-8"),"html5lib")
                        for img in soup.findAll('img'):
                            newsrc=''
                            longdesc=''