diff --git a/ebook/__init__.py b/ebook/__init__.py index 07ed528..b7e8a93 100644 --- a/ebook/__init__.py +++ b/ebook/__init__.py @@ -2,6 +2,7 @@ from .epub import make_epub from .cover import make_cover from .cover import make_cover_from_url +import unicodedata import datetime import requests import attr @@ -73,26 +74,30 @@ class CoverOptions: cover_url = attr.ib(default=None, converter=attr.converters.optional(str)) -def chapter_html(story, titleprefix=None): +def chapter_html(story, titleprefix=None, normalize=False): chapters = [] for i, chapter in enumerate(story): title = chapter.title or f'#{i}' if hasattr(chapter, '__iter__'): # This is a Section - chapters.extend(chapter_html(chapter, titleprefix=title)) + chapters.extend(chapter_html(chapter, titleprefix=title, normalize=normalize)) else: title = titleprefix and f'{titleprefix}: {title}' or title + contents = chapter.contents + if normalize: + title = unicodedata.normalize('NFKC', title) + contents = unicodedata.normalize('NFKC', contents) chapters.append(( title, f'{story.id}/chapter{i + 1}.html', - html_template.format(title=title, text=chapter.contents) + html_template.format(title=title, text=contents) )) if story.footnotes: chapters.append(("Footnotes", f'{story.id}/footnotes.html', html_template.format(title="Footnotes", text='\n\n'.join(story.footnotes)))) return chapters -def generate_epub(story, cover_options={}, output_filename=None): +def generate_epub(story, cover_options={}, output_filename=None, normalize=False): dates = list(story.dates()) metadata = { 'title': story.title, @@ -120,7 +125,7 @@ def generate_epub(story, cover_options={}, output_filename=None): html.append(('Front Matter', 'frontmatter.html', frontmatter_template.format(now=datetime.datetime.now(), **metadata))) - html.extend(chapter_html(story)) + html.extend(chapter_html(story, normalize=normalize)) css = ('Styles/base.css', requests.Session().get('https://raw.githubusercontent.com/mattharrison/epub-css-starter-kit/master/css/base.css').text, 'text/css') diff --git a/leech.py b/leech.py index d3688f0..1b92867 100755 --- a/leech.py +++ b/leech.py @@ -140,9 +140,10 @@ def flush(verbose): help='JSON object encoding any site specific option.' ) @click.option('--cache/--no-cache', default=True) +@click.option('--normalize/--no-normalize', default=True, help="Whether to normalize strange unicode text") @click.option('--verbose', '-v', is_flag=True, help="Verbose debugging output") @site_specific_options # Includes other click.options specific to sites -def download(url, site_options, cache, verbose, **other_flags): +def download(url, site_options, cache, verbose, normalize, **other_flags): """Downloads a story and saves it on disk as a ebpub ebook.""" configure_logging(verbose) session = create_session(cache) @@ -151,7 +152,7 @@ def download(url, site_options, cache, verbose, **other_flags): options, login = create_options(site, site_options, other_flags) story = open_story(site, url, session, login, options) - filename = ebook.generate_epub(story, options) + filename = ebook.generate_epub(story, options, normalize=normalize) logger.info("File created: " + filename)