mirror of
https://github.com/kemayo/leech
synced 2025-12-06 16:33:16 +01:00
Extract spoilers to footnotes on royalroad
This commit is contained in:
parent
0d0bdf470e
commit
ce998c84c3
1 changed files with 36 additions and 4 deletions
|
|
@ -4,7 +4,7 @@ import http.client
|
||||||
import logging
|
import logging
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
from . import register, Site, Section, Chapter
|
from . import register, Site, Section, Chapter, SiteSpecificOption
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -13,6 +13,17 @@ logger = logging.getLogger(__name__)
|
||||||
class RoyalRoad(Site):
|
class RoyalRoad(Site):
|
||||||
domain = r'royalroad'
|
domain = r'royalroad'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_site_specific_option_defs():
|
||||||
|
return [
|
||||||
|
SiteSpecificOption(
|
||||||
|
'skip_spoilers',
|
||||||
|
'--skip-spoilers/--include-spoilers',
|
||||||
|
default=True,
|
||||||
|
help="If true, do not transcribe any tags that are marked as a spoiler."
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
"""Royal Road: a place where people write novels, mostly seeming to be light-novel in tone."""
|
"""Royal Road: a place where people write novels, mostly seeming to be light-novel in tone."""
|
||||||
@classmethod
|
@classmethod
|
||||||
def matches(cls, url):
|
def matches(cls, url):
|
||||||
|
|
@ -39,18 +50,25 @@ class RoyalRoad(Site):
|
||||||
for chapter in soup.select('#chapters tbody tr[data-url]'):
|
for chapter in soup.select('#chapters tbody tr[data-url]'):
|
||||||
chapter_url = str(self._join_url(story.url, str(chapter.get('data-url'))))
|
chapter_url = str(self._join_url(story.url, str(chapter.get('data-url'))))
|
||||||
|
|
||||||
contents, updated = self._chapter(chapter_url)
|
contents, updated = self._chapter(chapter_url, len(story) + 1)
|
||||||
|
|
||||||
story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=contents, date=updated))
|
story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=contents, date=updated))
|
||||||
|
|
||||||
http.client._MAXHEADERS = original_maxheaders
|
http.client._MAXHEADERS = original_maxheaders
|
||||||
|
|
||||||
|
story.footnotes = self.footnotes
|
||||||
|
self.footnotes = []
|
||||||
|
|
||||||
return story
|
return story
|
||||||
|
|
||||||
def _chapter(self, url):
|
def _chapter(self, url, chapterid):
|
||||||
logger.info("Extracting chapter @ %s", url)
|
logger.info("Extracting chapter @ %s", url)
|
||||||
soup = self._soup(url)
|
soup = self._soup(url)
|
||||||
content = soup.find('div', class_='chapter-content').prettify()
|
content = soup.find('div', class_='chapter-content')
|
||||||
|
|
||||||
|
self._clean_spoilers(content, chapterid)
|
||||||
|
|
||||||
|
content = content.prettify()
|
||||||
|
|
||||||
author_note = soup.find_all('div', class_='author-note-portlet')
|
author_note = soup.find_all('div', class_='author-note-portlet')
|
||||||
|
|
||||||
|
|
@ -69,6 +87,20 @@ class RoyalRoad(Site):
|
||||||
|
|
||||||
return content, updated
|
return content, updated
|
||||||
|
|
||||||
|
def _clean_spoilers(self, content, chapterid):
|
||||||
|
# Spoilers to footnotes
|
||||||
|
for spoiler in content.find_all(class_=('spoiler-new')):
|
||||||
|
spoiler_title = spoiler['data-caption']
|
||||||
|
if self.options['skip_spoilers']:
|
||||||
|
link = self._footnote(spoiler, chapterid)
|
||||||
|
if spoiler_title:
|
||||||
|
link.string = spoiler_title
|
||||||
|
else:
|
||||||
|
link = spoiler_title and f'[SPOILER: {spoiler_title}]' or '[SPOILER]'
|
||||||
|
new_spoiler = self._new_tag('div')
|
||||||
|
new_spoiler.append(link)
|
||||||
|
spoiler.replace_with(new_spoiler)
|
||||||
|
|
||||||
|
|
||||||
@register
|
@register
|
||||||
class RoyalRoadL(RoyalRoad):
|
class RoyalRoadL(RoyalRoad):
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue