mirror of
https://github.com/kemayo/leech
synced 2025-12-08 01:14:10 +01:00
Strip out the new stolen-content warnings on royalroad
They might make these harder to work out in the future, but for now...
This commit is contained in:
parent
9171672663
commit
d30e56a518
1 changed files with 14 additions and 1 deletions
|
|
@ -86,7 +86,7 @@ class RoyalRoad(Site):
|
|||
soup = self._soup(url)
|
||||
content = soup.find('div', class_='chapter-content')
|
||||
|
||||
self._clean(content)
|
||||
self._clean(content, soup)
|
||||
self._clean_spoilers(content, chapterid)
|
||||
|
||||
content = str(content)
|
||||
|
|
@ -108,6 +108,19 @@ class RoyalRoad(Site):
|
|||
|
||||
return content, updated
|
||||
|
||||
def _clean(self, contents, full_page):
|
||||
contents = super()._clean(contents)
|
||||
|
||||
# Royalroad has started inserting "this was stolen" notices into its
|
||||
# HTML, and hiding them with CSS. Currently the CSS is very easy to
|
||||
# find, so do so and filter them out.
|
||||
for style in full_page.find_all('style'):
|
||||
if m := re.match(r'\s*\.(\w+)\s*{\s*display:\s*none;\s*}', style.string):
|
||||
for warning in contents.find_all(class_=m.group(1)):
|
||||
warning.decompose()
|
||||
|
||||
return contents
|
||||
|
||||
def _clean_spoilers(self, content, chapterid):
|
||||
# Spoilers to footnotes
|
||||
for spoiler in content.find_all(class_=('spoiler-new')):
|
||||
|
|
|
|||
Loading…
Reference in a new issue