From df13ee7b2a967f5c19eddb07b433a5e83f8939db Mon Sep 17 00:00:00 2001 From: David Lynch Date: Tue, 24 Dec 2013 00:54:38 -0600 Subject: [PATCH] Just full-on strip attributes --- leech.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/leech.py b/leech.py index c7678af..c093696 100644 --- a/leech.py +++ b/leech.py @@ -79,12 +79,9 @@ def _extract_chapter(url, title): text = content.find(id="storytext") # clean up some invalid xhtml attributes - # TODO: be more thorough about this somehow - for tag in text.find_all('hr'): - if 'size' in tag.attrs: - del(tag.attrs['size']) - if 'noshade' in tag.attrs: - del(tag.attrs['noshade']) + # TODO: be more selective about this somehow + for tag in text.find_all(True): + tag.attrs = None return (title, text.prettify())