From e099f47e66a2a529d354cd06304995cb69f97a24 Mon Sep 17 00:00:00 2001
From: David Lynch <kemayo@gmail.com>
Date: Fri, 17 Nov 2017 21:37:13 -0600
Subject: [PATCH] Support: RoyalRoad

---
 README.markdown    |  2 ++
 sites/royalroad.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 sites/royalroad.py
diff --git a/README.markdown b/README.markdown
index 84e69ae..0bfcc13 100644
--- a/README.markdown
+++ b/README.markdown
@@ -33,6 +33,8 @@ Supports
  * ArchiveOfOurOwn
    * Yes, it has its own built-in EPUB export, but the formatting is horrible
  * Various XenForo-based sites: SpaceBattles and SufficientVelocity, most notably
+ * RoyalRoad
+ * Fiction.live (Anonkun)
  * DeviantArt galleries/collections
  * Sta.sh
  * Completely arbitrary sites, with a bit more work (see below)
diff --git a/sites/royalroad.py b/sites/royalroad.py
new file mode 100644
index 0000000..6a64a41
--- /dev/null
+++ b/sites/royalroad.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+
+import http.client
+import logging
+import datetime
+import re
+import urllib
+from . import register, Site, Section, Chapter
+
+logger = logging.getLogger(__name__)
+
+
+@register
+class RoyalRoad(Site):
+    """Royal Road: a place where people write novels, mostly seeming to be light-novel in tone."""
+    @staticmethod
+    def matches(url):
+        # e.g. https://royalroadl.com/fiction/6752/lament-of-the-fallen
+        match = re.match(r'^(https?://royalroadl\.com/fiction/\d+)/?.*', url)
+        if match:
+            return match.group(1) + '/'
+
+    def extract(self, url):
+        workid = re.match(r'^https?://royalroadl\.com/fiction/(\d+)/?.*', url).group(1)
+        soup = self._soup('https://royalroadl.com/fiction/{}'.format(workid))
+        # should have gotten redirected, for a valid title
+
+        original_maxheaders = http.client._MAXHEADERS
+        http.client._MAXHEADERS = 1000
+
+        metadata = soup.select('#main h2.heading a')
+        story = Section(
+            title=soup.find('h1', property='name').string.strip(),
+            author=soup.find('meta', property='books:author').get('content').strip(),
+            url=soup.find('meta', property='og:url').get('content').strip()
+        )
+
+        for chapter in soup.select('#chapters tbody tr[data-url]'):
+            chapter_url = str(urllib.parse.urljoin(story.url, str(chapter.get('data-url'))))
+
+            updated = datetime.datetime.fromtimestamp(
+                int(chapter.find('time').get('unixtime')),
+            )
+
+            story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=self._chapter(chapter_url), date=updated))
+
+        http.client._MAXHEADERS = original_maxheaders
+
+        return story
+
+    def _chapter(self, url):
+        logger.info("Extracting chapter @ %s", url)
+        soup = self._soup(url)
+        content = soup.find('div', class_='chapter-content')
+
+        # TODO: this could be more robust, and I don't know if there's post-chapter notes anywhere as well.
+        author_note = soup.find('div', class_='author-note-portlet')
+
+        return (author_note and (author_note.prettify() + '<hr/>') or '') + content.prettify()