From d30e56a518f866749fc204e05db79f38bbb494eb Mon Sep 17 00:00:00 2001 From: David Lynch Date: Fri, 19 Jan 2024 21:34:39 -0600 Subject: [PATCH] Strip out the new stolen-content warnings on royalroad They might make these harder to work out in the future, but for now... --- sites/royalroad.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sites/royalroad.py b/sites/royalroad.py index 936d1dd..a40df35 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -86,7 +86,7 @@ class RoyalRoad(Site): soup = self._soup(url) content = soup.find('div', class_='chapter-content') - self._clean(content) + self._clean(content, soup) self._clean_spoilers(content, chapterid) content = str(content) @@ -108,6 +108,19 @@ class RoyalRoad(Site): return content, updated + def _clean(self, contents, full_page): + contents = super()._clean(contents) + + # Royalroad has started inserting "this was stolen" notices into its + # HTML, and hiding them with CSS. Currently the CSS is very easy to + # find, so do so and filter them out. + for style in full_page.find_all('style'): + if m := re.match(r'\s*\.(\w+)\s*{\s*display:\s*none;\s*}', style.string): + for warning in contents.find_all(class_=m.group(1)): + warning.decompose() + + return contents + def _clean_spoilers(self, content, chapterid): # Spoilers to footnotes for spoiler in content.find_all(class_=('spoiler-new')):