mirror of
https://github.com/kemayo/leech
synced 2026-02-03 13:23:28 +01:00
Don't die immediately if cleaning fails
This commit is contained in:
parent
7ba7d64959
commit
9092386cca
1 changed files with 6 additions and 3 deletions
|
|
@ -9,7 +9,7 @@ def match(url):
|
|||
|
||||
def extract(url, fetch):
|
||||
page = fetch(url)
|
||||
soup = BeautifulSoup(page)
|
||||
soup = BeautifulSoup(page, 'html5lib')
|
||||
content = soup.find(id="content_wrapper_inner")
|
||||
if not content:
|
||||
return
|
||||
|
|
@ -51,7 +51,10 @@ def _extract_chapter(url, title, fetch):
|
|||
|
||||
# clean up some invalid xhtml attributes
|
||||
# TODO: be more selective about this somehow
|
||||
for tag in text.find_all(True):
|
||||
tag.attrs = None
|
||||
try:
|
||||
for tag in text.find_all(True):
|
||||
tag.attrs = None
|
||||
except Exception as e:
|
||||
print("Trouble cleaning attributes", e)
|
||||
|
||||
return (title, text.prettify())
|
||||
|
|
|
|||
Loading…
Reference in a new issue