1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-30 04:02:21 +01:00

Just full-on strip attributes

This commit is contained in:
David Lynch 2013-12-24 00:54:38 -06:00
parent e434d9839c
commit df13ee7b2a

View file

@ -79,12 +79,9 @@ def _extract_chapter(url, title):
text = content.find(id="storytext")
# clean up some invalid xhtml attributes
# TODO: be more thorough about this somehow
for tag in text.find_all('hr'):
if 'size' in tag.attrs:
del(tag.attrs['size'])
if 'noshade' in tag.attrs:
del(tag.attrs['noshade'])
# TODO: be more selective about this somehow
for tag in text.find_all(True):
tag.attrs = None
return (title, text.prettify())