mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-08 22:06:02 +02:00
Allow saving index html easily
This commit is contained in:
parent
661c47501a
commit
6ce808c499
1 changed files with 4 additions and 1 deletions
|
|
@ -657,7 +657,7 @@ def canonicalize_internal_url(self, url, is_link=True):
|
|||
return frozenset()
|
||||
return frozenset([(parts.netloc, (parts.path or '').rstrip('/'))])
|
||||
|
||||
def index_to_soup(self, url_or_raw, raw=False, as_tree=False):
|
||||
def index_to_soup(self, url_or_raw, raw=False, as_tree=False, save_raw=None):
|
||||
'''
|
||||
Convenience method that takes an URL to the index page and returns
|
||||
a `BeautifulSoup <https://www.crummy.com/software/BeautifulSoup/bs3/documentation.html>`_
|
||||
|
|
@ -692,6 +692,9 @@ def index_to_soup(self, url_or_raw, raw=False, as_tree=False):
|
|||
else:
|
||||
_raw = xml_to_unicode(_raw, strip_encoding_pats=True, resolve_entities=True)[0]
|
||||
_raw = clean_xml_chars(_raw)
|
||||
if save_raw:
|
||||
with lopen(save_raw, 'wb') as f:
|
||||
f.write(_raw.encode('utf-8'))
|
||||
if as_tree:
|
||||
from html5_parser import parse
|
||||
return parse(_raw)
|
||||
|
|
|
|||
Loading…
Reference in a new issue