mirror of
https://github.com/kemayo/leech
synced 2026-01-06 07:39:07 +01:00
Normalize fancy unicode characters by default
Kindle can't display the "𝖙𝖍𝖚𝖌 𝖑𝖎𝖋𝖊" mathematical bold fraktur codepoints so NFKC normalize them (and anything else) into its plain equivalent. Can be disabled by running with `--no-normalize` if needed.
This commit is contained in:
parent
ba77390f27
commit
533c14f0d7
2 changed files with 13 additions and 7 deletions
|
|
@ -2,6 +2,7 @@ from .epub import make_epub
|
|||
from .cover import make_cover
|
||||
from .cover import make_cover_from_url
|
||||
|
||||
import unicodedata
|
||||
import datetime
|
||||
import requests
|
||||
import attr
|
||||
|
|
@ -73,26 +74,30 @@ class CoverOptions:
|
|||
cover_url = attr.ib(default=None, converter=attr.converters.optional(str))
|
||||
|
||||
|
||||
def chapter_html(story, titleprefix=None):
|
||||
def chapter_html(story, titleprefix=None, normalize=False):
|
||||
chapters = []
|
||||
for i, chapter in enumerate(story):
|
||||
title = chapter.title or f'#{i}'
|
||||
if hasattr(chapter, '__iter__'):
|
||||
# This is a Section
|
||||
chapters.extend(chapter_html(chapter, titleprefix=title))
|
||||
chapters.extend(chapter_html(chapter, titleprefix=title, normalize=normalize))
|
||||
else:
|
||||
title = titleprefix and f'{titleprefix}: {title}' or title
|
||||
contents = chapter.contents
|
||||
if normalize:
|
||||
title = unicodedata.normalize('NFKC', title)
|
||||
contents = unicodedata.normalize('NFKC', contents)
|
||||
chapters.append((
|
||||
title,
|
||||
f'{story.id}/chapter{i + 1}.html',
|
||||
html_template.format(title=title, text=chapter.contents)
|
||||
html_template.format(title=title, text=contents)
|
||||
))
|
||||
if story.footnotes:
|
||||
chapters.append(("Footnotes", f'{story.id}/footnotes.html', html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
|
||||
return chapters
|
||||
|
||||
|
||||
def generate_epub(story, cover_options={}, output_filename=None):
|
||||
def generate_epub(story, cover_options={}, output_filename=None, normalize=False):
|
||||
dates = list(story.dates())
|
||||
metadata = {
|
||||
'title': story.title,
|
||||
|
|
@ -120,7 +125,7 @@ def generate_epub(story, cover_options={}, output_filename=None):
|
|||
|
||||
html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
|
||||
|
||||
html.extend(chapter_html(story))
|
||||
html.extend(chapter_html(story, normalize=normalize))
|
||||
|
||||
css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')
|
||||
|
||||
|
|
|
|||
5
leech.py
5
leech.py
|
|
@ -140,9 +140,10 @@ def flush(verbose):
|
|||
help='JSON object encoding any site specific option.'
|
||||
)
|
||||
@click.option('--cache/--no-cache', default=True)
|
||||
@click.option('--normalize/--no-normalize', default=True, help="Whether to normalize strange unicode text")
|
||||
@click.option('--verbose', '-v', is_flag=True, help="Verbose debugging output")
|
||||
@site_specific_options # Includes other click.options specific to sites
|
||||
def download(url, site_options, cache, verbose, **other_flags):
|
||||
def download(url, site_options, cache, verbose, normalize, **other_flags):
|
||||
"""Downloads a story and saves it on disk as a ebpub ebook."""
|
||||
configure_logging(verbose)
|
||||
session = create_session(cache)
|
||||
|
|
@ -151,7 +152,7 @@ def download(url, site_options, cache, verbose, **other_flags):
|
|||
options, login = create_options(site, site_options, other_flags)
|
||||
story = open_story(site, url, session, login, options)
|
||||
|
||||
filename = ebook.generate_epub(story, options)
|
||||
filename = ebook.generate_epub(story, options, normalize=normalize)
|
||||
logger.info("File created: " + filename)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue