mirror of
https://github.com/kemayo/leech
synced 2025-12-06 00:15:22 +01:00
Make the parser used for BeautifulSoup configurable, still default lxml
Refs #98
This commit is contained in:
parent
9ed2d54db7
commit
4d9c31b6ac
3 changed files with 17 additions and 5 deletions
|
|
@ -91,7 +91,8 @@ def chapter_html(
|
|||
image_options,
|
||||
titleprefix=None,
|
||||
normalize=False,
|
||||
session=None
|
||||
session=None,
|
||||
parser='lxml'
|
||||
):
|
||||
already_fetched_images = {}
|
||||
chapters = []
|
||||
|
|
@ -170,7 +171,7 @@ def chapter_html(
|
|||
return chapters
|
||||
|
||||
|
||||
def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False, session=None):
|
||||
def generate_epub(story, cover_options={}, image_options={}, output_filename=None, output_dir=None, normalize=False, allow_spaces=False, session=None, parser='lxml'):
|
||||
dates = list(story.dates())
|
||||
metadata = {
|
||||
'title': story.title,
|
||||
|
|
@ -230,7 +231,8 @@ def generate_epub(story, cover_options={}, image_options={}, output_filename=Non
|
|||
story,
|
||||
image_options=image_options,
|
||||
normalize=normalize,
|
||||
session=session
|
||||
session=session,
|
||||
parser=parser
|
||||
),
|
||||
EpubFile(
|
||||
path='Styles/base.css',
|
||||
|
|
|
|||
3
leech.py
3
leech.py
|
|
@ -185,7 +185,8 @@ def download(urls, site_options, cache, verbose, normalize, output_dir, **other_
|
|||
normalize=normalize,
|
||||
output_dir=output_dir or options.get('output_dir', os.getcwd()),
|
||||
allow_spaces=options.get('allow_spaces', False),
|
||||
session=session
|
||||
session=session,
|
||||
parser=options.get('parser', 'lxml')
|
||||
)
|
||||
logger.info("File created: " + filename)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -132,6 +132,13 @@ class Site:
|
|||
"callback": lambda ctx, param, value: ctx.params.update({"spoilers": value and "skip" or "include"}),
|
||||
},
|
||||
),
|
||||
SiteSpecificOption(
|
||||
'parser',
|
||||
'--parser',
|
||||
help="Which HTML parser to use",
|
||||
choices=('lxml', 'html5lib', 'html.parser', 'lxml-xml'),
|
||||
default='lxml',
|
||||
),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
|
|
@ -176,7 +183,9 @@ class Site:
|
|||
def login(self, login_details):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _soup(self, url, method='lxml', delay=0, retry=3, retry_delay=10, **kw):
|
||||
def _soup(self, url, method=False, delay=0, retry=3, retry_delay=10, **kw):
|
||||
if not method:
|
||||
method = self.options.get('parser', 'lxml')
|
||||
if url.startswith('http://') or url.startswith('https://'):
|
||||
page = self.session.get(url, **kw)
|
||||
if not page:
|
||||
|
|
|
|||
Loading…
Reference in a new issue