1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2025-12-08 01:14:10 +01:00

Strip out the new stolen-content warnings on royalroad

They might make these harder to work out in the future, but for now...
This commit is contained in:
David Lynch 2024-01-19 21:34:39 -06:00
parent 9171672663
commit d30e56a518

View file

@ -86,7 +86,7 @@ class RoyalRoad(Site):
soup = self._soup(url)
content = soup.find('div', class_='chapter-content')
self._clean(content)
self._clean(content, soup)
self._clean_spoilers(content, chapterid)
content = str(content)
@ -108,6 +108,19 @@ class RoyalRoad(Site):
return content, updated
def _clean(self, contents, full_page):
contents = super()._clean(contents)
# Royalroad has started inserting "this was stolen" notices into its
# HTML, and hiding them with CSS. Currently the CSS is very easy to
# find, so do so and filter them out.
for style in full_page.find_all('style'):
if m := re.match(r'\s*\.(\w+)\s*{\s*display:\s*none;\s*}', style.string):
for warning in contents.find_all(class_=m.group(1)):
warning.decompose()
return contents
def _clean_spoilers(self, content, chapterid):
# Spoilers to footnotes
for spoiler in content.find_all(class_=('spoiler-new')):