From 9092386cca01169bfe009a02eb483c326e6b4ff7 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Tue, 24 Dec 2013 03:03:59 -0600 Subject: [PATCH] Don't die immediately if cleaning fails --- sites/fanfictionnet.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sites/fanfictionnet.py b/sites/fanfictionnet.py index 76be5ad..2acda92 100644 --- a/sites/fanfictionnet.py +++ b/sites/fanfictionnet.py @@ -9,7 +9,7 @@ def match(url): def extract(url, fetch): page = fetch(url) - soup = BeautifulSoup(page) + soup = BeautifulSoup(page, 'html5lib') content = soup.find(id="content_wrapper_inner") if not content: return @@ -51,7 +51,10 @@ def _extract_chapter(url, title, fetch): # clean up some invalid xhtml attributes # TODO: be more selective about this somehow - for tag in text.find_all(True): - tag.attrs = None + try: + for tag in text.find_all(True): + tag.attrs = None + except Exception as e: + print("Trouble cleaning attributes", e) return (title, text.prettify())