mirror of
https://github.com/kemayo/leech
synced 2025-12-15 04:44:06 +01:00
More error-resistant tag sanitizing
This commit is contained in:
parent
db7c1a3c39
commit
e434d9839c
1 changed files with 5 additions and 3 deletions
8
leech.py
8
leech.py
|
|
@ -81,10 +81,12 @@ def _extract_chapter(url, title):
|
|||
# clean up some invalid xhtml attributes
|
||||
# TODO: be more thorough about this somehow
|
||||
for tag in text.find_all('hr'):
|
||||
del(tag.attrs['size'])
|
||||
del(tag.attrs['noshade'])
|
||||
if 'size' in tag.attrs:
|
||||
del(tag.attrs['size'])
|
||||
if 'noshade' in tag.attrs:
|
||||
del(tag.attrs['noshade'])
|
||||
|
||||
return (title, text.prettify())
|
||||
|
||||
if __name__ == '__main__':
|
||||
leech('https://www.fanfiction.net/s/9380249/1/Rationalising-Death')
|
||||
leech('https://www.fanfiction.net/s/4510497/1/Neon-Genesis-Evangelion-Redux')
|
||||
|
|
|
|||
Loading…
Reference in a new issue