1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2026-02-03 13:23:28 +01:00

Don't die immediately if cleaning fails

This commit is contained in:
David Lynch 2013-12-24 03:03:59 -06:00
parent 7ba7d64959
commit 9092386cca

View file

@ -9,7 +9,7 @@ def match(url):
def extract(url, fetch):
page = fetch(url)
soup = BeautifulSoup(page)
soup = BeautifulSoup(page, 'html5lib')
content = soup.find(id="content_wrapper_inner")
if not content:
return
@ -51,7 +51,10 @@ def _extract_chapter(url, title, fetch):
# clean up some invalid xhtml attributes
# TODO: be more selective about this somehow
for tag in text.find_all(True):
tag.attrs = None
try:
for tag in text.find_all(True):
tag.attrs = None
except Exception as e:
print("Trouble cleaning attributes", e)
return (title, text.prettify())