mirror of
https://github.com/kemayo/leech
synced 2025-12-06 16:33:16 +01:00
Let sites define their own custom arguments
Use this to let xenforo force the inclusion of the index-post
This commit is contained in:
parent
c69eb1e33e
commit
aa4ba528b7
3 changed files with 24 additions and 6 deletions
10
leech.py
10
leech.py
|
|
@ -43,14 +43,14 @@ cover_template = '''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def leech(url, filename=None, cache=True):
|
def leech(url, filename=None, cache=True, args=None):
|
||||||
# we have: a page, which could be absolutely any part of a story, or not a story at all
|
# we have: a page, which could be absolutely any part of a story, or not a story at all
|
||||||
# check a bunch of things which are completely ff.n specific, to get text from it
|
# check a bunch of things which are completely ff.n specific, to get text from it
|
||||||
site = sites.get(url)
|
site = sites.get(url)
|
||||||
if not site:
|
if not site:
|
||||||
raise Exception("No site handler found")
|
raise Exception("No site handler found")
|
||||||
|
|
||||||
handler = site(fetch, cache=cache)
|
handler = site(fetch, cache=cache, args=args)
|
||||||
|
|
||||||
with open('leech.json') as store_file:
|
with open('leech.json') as store_file:
|
||||||
store = json.load(store_file)
|
store = json.load(store_file)
|
||||||
|
|
@ -82,7 +82,7 @@ def leech(url, filename=None, cache=True):
|
||||||
|
|
||||||
filename = filename or story['title'] + '.epub'
|
filename = filename or story['title'] + '.epub'
|
||||||
|
|
||||||
filename = epub.make_epub(filename, html, metadata, extra_files = (css, cover_image))
|
filename = epub.make_epub(filename, html, metadata, extra_files=(css, cover_image))
|
||||||
|
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
|
@ -92,7 +92,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--filename', help="output filename (the title is used if this isn't provided)")
|
parser.add_argument('--filename', help="output filename (the title is used if this isn't provided)")
|
||||||
parser.add_argument('--no-cache', dest='cache', action='store_false')
|
parser.add_argument('--no-cache', dest='cache', action='store_false')
|
||||||
parser.set_defaults(cache=True)
|
parser.set_defaults(cache=True)
|
||||||
args = parser.parse_args()
|
args, extra_args = parser.parse_known_args()
|
||||||
|
|
||||||
filename = leech(args.url, filename=args.filename, cache=args.cache)
|
filename = leech(args.url, filename=args.filename, cache=args.cache, args=extra_args)
|
||||||
print("File created:", filename)
|
print("File created:", filename)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
|
|
||||||
|
import argparse
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
_sites = []
|
_sites = []
|
||||||
|
|
@ -8,11 +9,12 @@ class Site:
|
||||||
"""A Site handles checking whether a URL might represent a site, and then
|
"""A Site handles checking whether a URL might represent a site, and then
|
||||||
extracting the content of a story from said site.
|
extracting the content of a story from said site.
|
||||||
"""
|
"""
|
||||||
def __init__(self, fetch, cache=True):
|
def __init__(self, fetch, cache=True, args=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.fetch = fetch
|
self.fetch = fetch
|
||||||
self.cache = cache
|
self.cache = cache
|
||||||
self.footnotes = []
|
self.footnotes = []
|
||||||
|
self.options = self._parse_args(args)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def matches(url):
|
def matches(url):
|
||||||
|
|
@ -24,6 +26,14 @@ class Site:
|
||||||
def login(self, login_details):
|
def login(self, login_details):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def _parse_args(self, args):
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
self._add_arguments(parser)
|
||||||
|
return parser.parse_args(args)
|
||||||
|
|
||||||
|
def _add_arguments(self, parser):
|
||||||
|
pass
|
||||||
|
|
||||||
def _soup(self, url, method='html5lib', **kw):
|
def _soup(self, url, method='html5lib', **kw):
|
||||||
page = self.fetch(url, cached=self.cache, **kw)
|
page = self.fetch(url, cached=self.cache, **kw)
|
||||||
if not page:
|
if not page:
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,11 @@ class XenForo(Site):
|
||||||
if not links:
|
if not links:
|
||||||
raise SiteException("No links in index?")
|
raise SiteException("No links in index?")
|
||||||
|
|
||||||
|
if self.options.include_index:
|
||||||
|
fake_link = self._new_tag('a', href=url)
|
||||||
|
fake_link.string = "Index"
|
||||||
|
links.insert(0, fake_link)
|
||||||
|
|
||||||
return links
|
return links
|
||||||
|
|
||||||
def _chapter(self, url, chapter_number):
|
def _chapter(self, url, chapter_number):
|
||||||
|
|
@ -129,6 +134,9 @@ class XenForo(Site):
|
||||||
return datetime.datetime.strptime(maybe_date['title'], "%b %d, %Y at %I:%M %p")
|
return datetime.datetime.strptime(maybe_date['title'], "%b %d, %Y at %I:%M %p")
|
||||||
raise SiteException("No date", maybe_date)
|
raise SiteException("No date", maybe_date)
|
||||||
|
|
||||||
|
def _add_arguments(self, parser):
|
||||||
|
parser.add_argument('--include-index', dest='include_index', action='store_true', default=False)
|
||||||
|
|
||||||
|
|
||||||
class XenForoIndex(XenForo):
|
class XenForoIndex(XenForo):
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue