#!/usr/bin/env python import argparse import sys import json import sites import epub import cover from fetch import Fetch fetch = Fetch("leech") html_template = ''' {title}

{title}

{text} ''' cover_template = ''' Cover

''' def leech(url, filename=None, cache=True, args=None): # we have: a page, which could be absolutely any part of a story, or not a story at all # check a bunch of things which are completely ff.n specific, to get text from it site = sites.get(url) if not site: raise Exception("No site handler found") handler = site(fetch, cache=cache, args=args) with open('leech.json') as store_file: store = json.load(store_file) login = store.get('logins', {}).get(site.__name__, False) if login: handler.login(login) story = handler.extract(url) if not story: raise Exception("Couldn't extract story") dates = [c[2] for c in story['chapters'] if c[2]] metadata = { 'title': story['title'], 'author': story['author'], 'unique_id': url, 'started': min(dates), 'updated': max(dates), } html = [('Cover', 'cover.html', cover_template)] for i, chapter in enumerate(story['chapters']): html.append((chapter[0], 'chapter%d.html' % (i + 1), html_template.format(title=chapter[0], text=chapter[1]))) if 'footnotes' in story and story['footnotes']: html.append(("Footnotes", 'footnotes.html', html_template.format(title="Footnotes", text=story['footnotes']))) css = ('Styles/base.css', fetch('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css'), 'text/css') cover_image = ('images/cover.png', cover.make_cover(story['title'], story['author']).read(), 'image/png') filename = filename or story['title'] + '.epub' filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image)) return filename if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('url', help="url of a story to fetch", nargs='?') parser.add_argument('--filename', help="output filename (the title is used if this isn't provided)") parser.add_argument('--no-cache', dest='cache', action='store_false') parser.add_argument('--flush', dest='flush', action='store_true') parser.set_defaults(cache=True, flush=False) args, extra_args = parser.parse_known_args() if args.flush: rows = fetch.flush() print("Flushed cache of {} rows".format(rows)) sys.exit() if not args.url: sys.exit("URL is required") filename = leech(args.url, filename=args.filename, cache=args.cache, args=extra_args) print("File created:", filename)