From e434d9839c0a04caaf8b26ff208dedc2e02a9697 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Wed, 20 Nov 2013 14:53:35 -0600 Subject: [PATCH] More error-resistant tag sanitizing --- leech.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/leech.py b/leech.py index d97d96a..c7678af 100644 --- a/leech.py +++ b/leech.py @@ -81,10 +81,12 @@ def _extract_chapter(url, title): # clean up some invalid xhtml attributes # TODO: be more thorough about this somehow for tag in text.find_all('hr'): - del(tag.attrs['size']) - del(tag.attrs['noshade']) + if 'size' in tag.attrs: + del(tag.attrs['size']) + if 'noshade' in tag.attrs: + del(tag.attrs['noshade']) return (title, text.prettify()) if __name__ == '__main__': - leech('https://www.fanfiction.net/s/9380249/1/Rationalising-Death') + leech('https://www.fanfiction.net/s/4510497/1/Neon-Genesis-Evangelion-Redux')