mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-05 02:51:48 +02:00
Remove extra html body from ancient ffnet chapters.
This commit is contained in:
parent
850567afde
commit
42473d4f1d
1 changed files with 9 additions and 0 deletions
|
|
@ -239,6 +239,15 @@ class FanFictionNetSiteAdapter(BaseSiteAdapter):
|
|||
## additional to what ever the
|
||||
## slow_down_sleep_time setting is.
|
||||
data = self._fetchUrl(url)
|
||||
|
||||
# some ancient stories have body tags inside them that cause
|
||||
# soup parsing to discard the content. For story text we
|
||||
# don't care about anything before "<div class='storytextp"
|
||||
# (there's a space after storytextp, so no close quote(')) and
|
||||
# this kills any body tags.
|
||||
data = data[data.index("<div class='storytextp"):]
|
||||
data.replace("<body","<notbody").replace("<BODY","<NOTBODY")
|
||||
|
||||
soup = bs.BeautifulSoup(data)
|
||||
|
||||
## Remove the 'share' button.
|
||||
|
|
|
|||
Loading…
Reference in a new issue