1
0
Fork 0
mirror of https://github.com/kemayo/leech synced 2026-01-06 07:39:07 +01:00

Normalize fancy unicode characters by default

Kindle can't display the "𝖙𝖍𝖚𝖌 𝖑𝖎𝖋𝖊" mathematical bold fraktur codepoints
so NFKC normalize them (and anything else) into its plain equivalent.

Can be disabled by running with `--no-normalize` if needed.
This commit is contained in:
David Lynch 2021-02-05 01:59:20 -06:00
parent ba77390f27
commit 533c14f0d7
2 changed files with 13 additions and 7 deletions

View file

@ -2,6 +2,7 @@ from .epub import make_epub
from .cover import make_cover
from .cover import make_cover_from_url
import unicodedata
import datetime
import requests
import attr
@ -73,26 +74,30 @@ class CoverOptions:
cover_url = attr.ib(default=None, converter=attr.converters.optional(str))
def chapter_html(story, titleprefix=None):
def chapter_html(story, titleprefix=None, normalize=False):
chapters = []
for i, chapter in enumerate(story):
title = chapter.title or f'#{i}'
if hasattr(chapter, '__iter__'):
# This is a Section
chapters.extend(chapter_html(chapter, titleprefix=title))
chapters.extend(chapter_html(chapter, titleprefix=title, normalize=normalize))
else:
title = titleprefix and f'{titleprefix}: {title}' or title
contents = chapter.contents
if normalize:
title = unicodedata.normalize('NFKC', title)
contents = unicodedata.normalize('NFKC', contents)
chapters.append((
title,
f'{story.id}/chapter{i + 1}.html',
html_template.format(title=title, text=chapter.contents)
html_template.format(title=title, text=contents)
))
if story.footnotes:
chapters.append(("Footnotes", f'{story.id}/footnotes.html', html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
return chapters
def generate_epub(story, cover_options={}, output_filename=None):
def generate_epub(story, cover_options={}, output_filename=None, normalize=False):
dates = list(story.dates())
metadata = {
'title': story.title,
@ -120,7 +125,7 @@ def generate_epub(story, cover_options={}, output_filename=None):
html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))
html.extend(chapter_html(story))
html.extend(chapter_html(story, normalize=normalize))
css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')

View file

@ -140,9 +140,10 @@ def flush(verbose):
help='JSON object encoding any site specific option.'
)
@click.option('--cache/--no-cache', default=True)
@click.option('--normalize/--no-normalize', default=True, help="Whether to normalize strange unicode text")
@click.option('--verbose', '-v', is_flag=True, help="Verbose debugging output")
@site_specific_options # Includes other click.options specific to sites
def download(url, site_options, cache, verbose, **other_flags):
def download(url, site_options, cache, verbose, normalize, **other_flags):
"""Downloads a story and saves it on disk as a ebpub ebook."""
configure_logging(verbose)
session = create_session(cache)
@ -151,7 +152,7 @@ def download(url, site_options, cache, verbose, **other_flags):
options, login = create_options(site, site_options, other_flags)
story = open_story(site, url, session, login, options)
filename = ebook.generate_epub(story, options)
filename = ebook.generate_epub(story, options, normalize=normalize)
logger.info("File created: " + filename)