Normalize fancy unicode characters by default

Kindle can't display the "𝖙𝖍𝖚𝖌 𝖑𝖎𝖋𝖊" mathematical bold fraktur codepoints so NFKC normalize them (and anything else) into its plain equivalent. Can be disabled by running with `--no-normalize` if needed.
2026-01-06 07:39:07 +01:00 · 2021-02-05 01:59:20 -06:00 · 2021-02-05 01:59:20 -06:00 · 533c14f0d7
commit 533c14f0d7
parent ba77390f27
2 changed files with 13 additions and 7 deletions
--- a/ebook/init.py
+++ b/ebook/init.py
@ -2,6 +2,7 @@ from .epub import make_epub
 from .cover import make_cover
 from .cover import make_cover_from_url

+import unicodedata
 import datetime
 import requests
 import attr
@ -73,26 +74,30 @@ class CoverOptions:
    cover_url = attr.ib(default=None, converter=attr.converters.optional(str))


-def chapter_html(story, titleprefix=None):
+def chapter_html(story, titleprefix=None, normalize=False):
    chapters = []
    for i, chapter in enumerate(story):
        title = chapter.title or f'#{i}'
        if hasattr(chapter, '__iter__'):
            # This is a Section
-            chapters.extend(chapter_html(chapter, titleprefix=title))
+            chapters.extend(chapter_html(chapter, titleprefix=title, normalize=normalize))
        else:
            title = titleprefix and f'{titleprefix}: {title}' or title
+            contents = chapter.contents
+            if normalize:
+                title = unicodedata.normalize('NFKC', title)
+                contents = unicodedata.normalize('NFKC', contents)
            chapters.append((
                title,
                f'{story.id}/chapter{i + 1}.html',
-                html_template.format(title=title, text=chapter.contents)
+                html_template.format(title=title, text=contents)
            ))
    if story.footnotes:
        chapters.append(("Footnotes", f'{story.id}/footnotes.html', html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes))))
    return chapters


-def generate_epub(story, cover_options={}, output_filename=None):
+def generate_epub(story, cover_options={}, output_filename=None, normalize=False):
    dates = list(story.dates())
    metadata = {
        'title': story.title,
@ -120,7 +125,7 @@ def generate_epub(story, cover_options={}, output_filename=None):

    html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata)))

-    html.extend(chapter_html(story))
+    html.extend(chapter_html(story, normalize=normalize))

    css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css')

--- a/leech.py
+++ b/leech.py
@ -140,9 +140,10 @@ def flush(verbose):
    help='JSON object encoding any site specific option.'
 )
@click.option('--cache/--no-cache', default=True)
+@click.option('--normalize/--no-normalize', default=True, help="Whether to normalize strange unicode text")
@click.option('--verbose', '-v', is_flag=True, help="Verbose debugging output")
@site_specific_options  # Includes other click.options specific to sites
-def download(url, site_options, cache, verbose, **other_flags):
+def download(url, site_options, cache, verbose, normalize, **other_flags):
    """Downloads a story and saves it on disk as a ebpub ebook."""
    configure_logging(verbose)
    session = create_session(cache)
@ -151,7 +152,7 @@ def download(url, site_options, cache, verbose, **other_flags):
    options, login = create_options(site, site_options, other_flags)
    story = open_story(site, url, session, login, options)

-    filename = ebook.generate_epub(story, options)
+    filename = ebook.generate_epub(story, options, normalize=normalize)
    logger.info("File created: " + filename)