From aa4ba528b7679d1cb57af351b5741716cf8640ff Mon Sep 17 00:00:00 2001 From: David Lynch Date: Sat, 5 Dec 2015 01:34:20 -0600 Subject: [PATCH] Let sites define their own custom arguments Use this to let xenforo force the inclusion of the index-post --- leech.py | 10 +++++----- sites/__init__.py | 12 +++++++++++- sites/xenforo.py | 8 ++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/leech.py b/leech.py index 35dc617..ada168b 100755 --- a/leech.py +++ b/leech.py @@ -43,14 +43,14 @@ cover_template = ''' ''' -def leech(url, filename=None, cache=True): +def leech(url, filename=None, cache=True, args=None): # we have: a page, which could be absolutely any part of a story, or not a story at all # check a bunch of things which are completely ff.n specific, to get text from it site = sites.get(url) if not site: raise Exception("No site handler found") - handler = site(fetch, cache=cache) + handler = site(fetch, cache=cache, args=args) with open('leech.json') as store_file: store = json.load(store_file) @@ -82,7 +82,7 @@ def leech(url, filename=None, cache=True): filename = filename or story['title'] + '.epub' - filename = epub.make_epub(filename, html, metadata, extra_files = (css, cover_image)) + filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image)) return filename @@ -92,7 +92,7 @@ if __name__ == '__main__': parser.add_argument('--filename', help="output filename (the title is used if this isn't provided)") parser.add_argument('--no-cache', dest='cache', action='store_false') parser.set_defaults(cache=True) - args = parser.parse_args() + args, extra_args = parser.parse_known_args() - filename = leech(args.url, filename=args.filename, cache=args.cache) + filename = leech(args.url, filename=args.filename, cache=args.cache, args=extra_args) print("File created:", filename) diff --git a/sites/__init__.py b/sites/__init__.py index 0efb3ef..5cf4ddb 100644 --- a/sites/__init__.py +++ b/sites/__init__.py @@ -1,4 +1,5 @@ +import argparse from bs4 import BeautifulSoup _sites = [] @@ -8,11 +9,12 @@ class Site: """A Site handles checking whether a URL might represent a site, and then extracting the content of a story from said site. """ - def __init__(self, fetch, cache=True): + def __init__(self, fetch, cache=True, args=None): super().__init__() self.fetch = fetch self.cache = cache self.footnotes = [] + self.options = self._parse_args(args) @staticmethod def matches(url): @@ -24,6 +26,14 @@ class Site: def login(self, login_details): raise NotImplementedError() + def _parse_args(self, args): + parser = argparse.ArgumentParser() + self._add_arguments(parser) + return parser.parse_args(args) + + def _add_arguments(self, parser): + pass + def _soup(self, url, method='html5lib', **kw): page = self.fetch(url, cached=self.cache, **kw) if not page: diff --git a/sites/xenforo.py b/sites/xenforo.py index 5846581..fa47352 100644 --- a/sites/xenforo.py +++ b/sites/xenforo.py @@ -82,6 +82,11 @@ class XenForo(Site): if not links: raise SiteException("No links in index?") + if self.options.include_index: + fake_link = self._new_tag('a', href=url) + fake_link.string = "Index" + links.insert(0, fake_link) + return links def _chapter(self, url, chapter_number): @@ -129,6 +134,9 @@ class XenForo(Site): return datetime.datetime.strptime(maybe_date['title'], "%b %d, %Y at %I:%M %p") raise SiteException("No date", maybe_date) + def _add_arguments(self, parser): + parser.add_argument('--include-index', dest='include_index', action='store_true', default=False) + class XenForoIndex(XenForo): @classmethod