diff --git a/sites/fictionlive.py b/sites/fictionlive.py new file mode 100644 index 0000000..def19aa --- /dev/null +++ b/sites/fictionlive.py @@ -0,0 +1,84 @@ +#!/usr/bin/python + +import itertools +import datetime +import re +from . import register, Site, Section, Chapter + + +@register +class FictionLive(Site): + """Archive of Our Own: it has its own epub export, but the formatting is awful""" + @staticmethod + def matches(url): + # e.g. https://fiction.live/stories/Descendant-of-a-Demon-Lord/SBBA49fQavNQMWxFT + match = re.match(r'^(https?://fiction\.live/stories/[^\/]+/[0-9a-zA-Z]+)/?.*', url) + if match: + return match.group(1) + + def extract(self, url): + workid = re.match(r'^https?://fiction\.live/stories/[^\/]+/([0-9a-zA-Z]+)/?.*', url).group(1) + return self._extract_work(workid) + + def _extract_work(self, workid): + response = self.session.get('https://fiction.live/api/node/{}'.format(workid)).json() + + story = Section( + title=response['t'], + author=response['u'][0]['n'], + url='https://fiction.live/stories/{}/{}'.format(response['t'].replace(' ', '-'), workid) + ) + + # There's a summary in `d` and `b`. + + chapters = ({'ct': 0},) + tuple(c for c in response['bm'] if not c['title'].startswith('#special')) + ({'ct': 9999999999999999},) + + for prevc, currc, nextc in contextiterate(chapters): + # `id`, `title`, `ct`, `isFirst` + # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/0/1448245168594 + # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1449266444062/1449615394752 + # https://fiction.live/api/anonkun/chapters/SBBA49fQavNQMWxFT/1502823848216/9999999999999998 + # i.e. format is [current timestamp] / [next timestamp - 1] + chapter_url = 'https://fiction.live/api/anonkun/chapters/{}/{}/{}'.format(workid, currc['ct'], nextc['ct'] - 1) + print("Extracting chapter from", chapter_url) + data = self.session.get(chapter_url).json() + html = [] + + updated = currc['ct'] + for segment in (d for d in data if not d.get('t', '').startswith('#special')): + updated = max(updated, segment['ct']) + if segment['nt'] == 'chapter': + html.extend(('