From 23b76d2aace3b64487c4715cbc2371c5eb7b80f4 Mon Sep 17 00:00:00 2001 From: random human Date: Thu, 30 Aug 2018 03:50:28 +0530 Subject: [PATCH 1/4] Fix royalroadl.com chapter dates Since the timestamp provided with the chapter list is approximate, fetch the actual chapter in order to get unixtime. --- sites/royalroad.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sites/royalroad.py b/sites/royalroad.py index cb63dc2..5cd3d99 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -37,8 +37,10 @@ class RoyalRoad(Site): for chapter in soup.select('#chapters tbody tr[data-url]'): chapter_url = str(urllib.parse.urljoin(story.url, str(chapter.get('data-url')))) + # Have to get exact publishing time from the chapter page + chapter_soup = self._soup(chapter_url) updated = datetime.datetime.fromtimestamp( - int(chapter.find('time').get('unixtime')), + int(chapter_soup.find(class_="profile-info").find('time').get('unixtime')), ) story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=self._chapter(chapter_url), date=updated)) From 69c9c21f47ab8074fd9bdcda5e5e9f1907106a66 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Wed, 29 Aug 2018 23:00:45 -0500 Subject: [PATCH 2/4] Avoid double-fetching the chapter contents Doesn't matter hugely if caching is enabled, but it's still suboptimal. --- sites/royalroad.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sites/royalroad.py b/sites/royalroad.py index 5cd3d99..d133262 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -39,11 +39,9 @@ class RoyalRoad(Site): # Have to get exact publishing time from the chapter page chapter_soup = self._soup(chapter_url) - updated = datetime.datetime.fromtimestamp( - int(chapter_soup.find(class_="profile-info").find('time').get('unixtime')), - ) + contents, updated = self._chapter(chapter_url) - story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=self._chapter(chapter_url), date=updated)) + story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=contents, date=updated)) http.client._MAXHEADERS = original_maxheaders @@ -56,5 +54,7 @@ class RoyalRoad(Site): # TODO: this could be more robust, and I don't know if there's post-chapter notes anywhere as well. author_note = soup.find('div', class_='author-note-portlet') + + updated = int(soup.find(class_="profile-info").find('time').get('unixtime')) - return (author_note and (author_note.prettify() + '
') or '') + content.prettify() + return (author_note and (author_note.prettify() + '
') or '') + content.prettify(), updated From a151f02c8438f3351f2c768e8f3112606ea2e614 Mon Sep 17 00:00:00 2001 From: David Lynch Date: Wed, 29 Aug 2018 23:01:43 -0500 Subject: [PATCH 3/4] Fix spacing ...I'm bad at the web interface. --- sites/royalroad.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sites/royalroad.py b/sites/royalroad.py index d133262..ad1c099 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -37,8 +37,6 @@ class RoyalRoad(Site): for chapter in soup.select('#chapters tbody tr[data-url]'): chapter_url = str(urllib.parse.urljoin(story.url, str(chapter.get('data-url')))) - # Have to get exact publishing time from the chapter page - chapter_soup = self._soup(chapter_url) contents, updated = self._chapter(chapter_url) story.add(Chapter(title=chapter.find('a', href=True).string.strip(), contents=contents, date=updated)) @@ -54,7 +52,7 @@ class RoyalRoad(Site): # TODO: this could be more robust, and I don't know if there's post-chapter notes anywhere as well. author_note = soup.find('div', class_='author-note-portlet') - + updated = int(soup.find(class_="profile-info").find('time').get('unixtime')) return (author_note and (author_note.prettify() + '
') or '') + content.prettify(), updated From 6c8ac39d64c605523d07b750391e845e212a511c Mon Sep 17 00:00:00 2001 From: David Lynch Date: Wed, 29 Aug 2018 23:04:17 -0500 Subject: [PATCH 4/4] fromtimestamp still needed My bad. --- sites/royalroad.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sites/royalroad.py b/sites/royalroad.py index ad1c099..0f73691 100644 --- a/sites/royalroad.py +++ b/sites/royalroad.py @@ -53,6 +53,8 @@ class RoyalRoad(Site): # TODO: this could be more robust, and I don't know if there's post-chapter notes anywhere as well. author_note = soup.find('div', class_='author-note-portlet') - updated = int(soup.find(class_="profile-info").find('time').get('unixtime')) + updated = datetime.datetime.fromtimestamp( + int(soup.find(class_="profile-info").find('time').get('unixtime')) + ) return (author_note and (author_note.prettify() + '
') or '') + content.prettify(), updated