Let sites define their own custom arguments

Use this to let xenforo force the inclusion of the index-post
2025-12-06 16:33:16 +01:00 · 2015-12-05 01:34:20 -06:00 · 2015-12-05 01:34:20 -06:00 · aa4ba528b7
commit aa4ba528b7
parent c69eb1e33e
3 changed files with 24 additions and 6 deletions
--- a/leech.py
+++ b/leech.py
@ -43,14 +43,14 @@ cover_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
 '''


-def leech(url, filename=None, cache=True):
+def leech(url, filename=None, cache=True, args=None):
    # we have: a page, which could be absolutely any part of a story, or not a story at all
    # check a bunch of things which are completely ff.n specific, to get text from it
    site = sites.get(url)
    if not site:
        raise Exception("No site handler found")

-    handler = site(fetch, cache=cache)
+    handler = site(fetch, cache=cache, args=args)

    with open('leech.json') as store_file:
        store = json.load(store_file)
@ -82,7 +82,7 @@ def leech(url, filename=None, cache=True):

    filename = filename or story['title'] + '.epub'

-    filename = epub.make_epub(filename, html, metadata, extra_files = (css, cover_image))
+    filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image))

    return filename

@ -92,7 +92,7 @@ if __name__ == '__main__':
    parser.add_argument('--filename', help="output filename (the title is used if this isn't provided)")
    parser.add_argument('--no-cache', dest='cache', action='store_false')
    parser.set_defaults(cache=True)
-    args = parser.parse_args()
+    args, extra_args = parser.parse_known_args()

-    filename = leech(args.url, filename=args.filename, cache=args.cache)
+    filename = leech(args.url, filename=args.filename, cache=args.cache, args=extra_args)
    print("File created:", filename)
--- a/sites/init.py
+++ b/sites/init.py
@ -1,4 +1,5 @@

+import argparse
 from bs4 import BeautifulSoup

 _sites = []
@ -8,11 +9,12 @@ class Site:
    """A Site handles checking whether a URL might represent a site, and then
    extracting the content of a story from said site.
    """
-    def __init__(self, fetch, cache=True):
+    def __init__(self, fetch, cache=True, args=None):
        super().__init__()
        self.fetch = fetch
        self.cache = cache
        self.footnotes = []
+        self.options = self._parse_args(args)

    @staticmethod
    def matches(url):
@ -24,6 +26,14 @@ class Site:
    def login(self, login_details):
        raise NotImplementedError()

+    def _parse_args(self, args):
+        parser = argparse.ArgumentParser()
+        self._add_arguments(parser)
+        return parser.parse_args(args)
+
+    def _add_arguments(self, parser):
+        pass
+
    def _soup(self, url, method='html5lib', **kw):
        page = self.fetch(url, cached=self.cache, **kw)
        if not page:
--- a/sites/xenforo.py
+++ b/sites/xenforo.py
@ -82,6 +82,11 @@ class XenForo(Site):
        if not links:
            raise SiteException("No links in index?")

+        if self.options.include_index:
+            fake_link = self._new_tag('a', href=url)
+            fake_link.string = "Index"
+            links.insert(0, fake_link)
+
        return links

    def _chapter(self, url, chapter_number):
@ -129,6 +134,9 @@ class XenForo(Site):
            return datetime.datetime.strptime(maybe_date['title'], "%b %d, %Y at %I:%M %p")
        raise SiteException("No date", maybe_date)

+    def _add_arguments(self, parser):
+        parser.add_argument('--include-index', dest='include_index', action='store_true', default=False)
+

 class XenForoIndex(XenForo):
    @classmethod