From 17cd0ea4e282c7afff3f25825f4202dbc84c4c08 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Wed, 29 Aug 2018 23:07:06 -0500 Subject: [PATCH] Royalroad domain name fiddliness --- sites/royalroad.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sites/royalroad.py b/sites/royalroad.py index 0f73691..111e2d5 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -12,17 +12,19 @@ logger = logging.getLogger(__name__) @register class RoyalRoad(Site): + domain = r'royalroad' + """Royal Road: a place where people write novels, mostly seeming to be light-novel in tone.""" - @staticmethod - def matches(url): - # e.g. https://royalroadl.com/fiction/6752/lament-of-the-fallen - match = re.match(r'^(https?://(?:www\.)?royalroadl\.com/fiction/\d+)/?.*', url) + @classmethod + def matches(cls, url): + # e.g. https://royalroad.com/fiction/6752/lament-of-the-fallen + match = re.match(r'^(https?://(?:www\.)?%s\.com/fiction/\d+)/?.*' % cls.domain, url) if match: return match.group(1) + '/' def extract(self, url): - workid = re.match(r'^https?://(?:www\.)?royalroadl\.com/fiction/(\d+)/?.*', url).group(1) - soup = self._soup('https://www.royalroadl.com/fiction/{}'.format(workid)) + workid = re.match(r'^https?://(?:www\.)?%s\.com/fiction/(\d+)/?.*' % self.domain, url).group(1) + soup = self._soup('https://www.{}.com/fiction/{}'.format(self.domain, workid)) # should have gotten redirected, for a valid title original_maxheaders = http.client._MAXHEADERS @@ -58,3 +60,7 @@ class RoyalRoad(Site): ) return (author_note and (author_note.prettify() + '
') or '') + content.prettify(), updated + +@register +class RoyalRoadL(RoyalRoad): + domain = 'royalroadl'