mirror of
https://github.com/kemayo/leech
synced 2025-12-30 04:02:21 +01:00
Just full-on strip attributes
This commit is contained in:
parent
e434d9839c
commit
df13ee7b2a
1 changed files with 3 additions and 6 deletions
9
leech.py
9
leech.py
|
|
@ -79,12 +79,9 @@ def _extract_chapter(url, title):
|
|||
text = content.find(id="storytext")
|
||||
|
||||
# clean up some invalid xhtml attributes
|
||||
# TODO: be more thorough about this somehow
|
||||
for tag in text.find_all('hr'):
|
||||
if 'size' in tag.attrs:
|
||||
del(tag.attrs['size'])
|
||||
if 'noshade' in tag.attrs:
|
||||
del(tag.attrs['noshade'])
|
||||
# TODO: be more selective about this somehow
|
||||
for tag in text.find_all(True):
|
||||
tag.attrs = None
|
||||
|
||||
return (title, text.prettify())
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue