1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-15 04:44:06 +01:00

More error-resistant tag sanitizing

This commit is contained in:
David Lynch 2013-11-20 14:53:35 -06:00
parent db7c1a3c39
commit e434d9839c

View file

@ -81,10 +81,12 @@ def _extract_chapter(url, title):
# clean up some invalid xhtml attributes
# TODO: be more thorough about this somehow
for tag in text.find_all('hr'):
del(tag.attrs['size'])
del(tag.attrs['noshade'])
if 'size' in tag.attrs:
del(tag.attrs['size'])
if 'noshade' in tag.attrs:
del(tag.attrs['noshade'])
return (title, text.prettify())
if __name__ == '__main__':
leech('https://www.fanfiction.net/s/9380249/1/Rationalising-Death')
leech('https://www.fanfiction.net/s/4510497/1/Neon-Genesis-Evangelion-Redux')