From a93eeec5ebb68e3bed81e6ac26193f3440c4ccb5 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Thu, 9 Aug 2018 19:54:01 -0500 Subject: [PATCH] Fix for mobi output--link to TOC works again--was broken by html5lib enforcing html5 rules. --- fanficfare/mobi.py | 4 +++- fanficfare/mobihtml.py | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fanficfare/mobi.py b/fanficfare/mobi.py index 1a220f2e..406d6180 100644 --- a/fanficfare/mobi.py +++ b/fanficfare/mobi.py @@ -91,7 +91,9 @@ class Converter: toc_html = [] body_html = [] - PAGE_BREAK = '' + ## This gets broken by html5lib/bs4fixed being helpful, but we'll + ## fix it inside mobihtml.py + PAGE_BREAK = '' # pull out the title page, assumed first html_strs. htmltitle = html_strs[0] diff --git a/fanficfare/mobihtml.py b/fanficfare/mobihtml.py index 639d1c67..f9125795 100644 --- a/fanficfare/mobihtml.py +++ b/fanficfare/mobihtml.py @@ -33,6 +33,12 @@ class HtmlProcessor: self.unfill = unfill # html = self._ProcessRawHtml(html) self._soup = BeautifulSoup(html,'html5lib') + ## mobi format wants to find this tag inside . + ## html5lib, on the other hand, moved it to . So we'll move + ## it back. + guide = self._soup.find('guide') + if guide: + self._soup.head.append(guide) if self._soup.title.contents: self.title = self._soup.title.contents[0] else: @@ -66,10 +72,8 @@ class HtmlProcessor: def _ReplaceAnchorStubs(self): # TODO: Browsers allow extra whitespace in the href names. - # str() instead of unicode() rather than figure out how to fix - # ancient mobi.py code. assembled_text = ensure_binary(unicode(self._soup)) - # bs4 creating close tags for + # html5lib/bs4 creates close tags for assembled_text = assembled_text.replace(b'',b'') assembled_text = assembled_text.replace(b'',b'')