mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-04-26 04:13:07 +02:00
Handle invalid bytes when in index_to_soup() for JavascriptRecipe
This commit is contained in:
parent
e58cd115e1
commit
7b284b949f
1 changed files with 2 additions and 1 deletions
|
|
@ -16,6 +16,7 @@
|
|||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.fetch.javascript import fetch_page, AbortFetch, links_from_selectors
|
||||
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
|
||||
def image_data_to_url(data, base='cover'):
|
||||
from calibre.utils.imghdr import what
|
||||
|
|
@ -221,7 +222,7 @@ def index_to_soup(self, url_or_raw, raw=False):
|
|||
if raw:
|
||||
return html
|
||||
import html5lib
|
||||
root = html5lib.parse(html, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||
root = html5lib.parse(clean_xml_chars(html), treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||
return root
|
||||
|
||||
# ***************************** Internal API *****************************
|
||||
|
|
|
|||
Loading…
Reference in a new issue