From a93eeec5ebb68e3bed81e6ac26193f3440c4ccb5 Mon Sep 17 00:00:00 2001
From: Jim Miller <retiefjimm@gmail.com>
Date: Thu, 9 Aug 2018 19:54:01 -0500
Subject: [PATCH] Fix for mobi output--link to TOC works again--was broken by
 html5lib enforcing html5 rules.

---
 fanficfare/mobi.py     |  4 +++-
 fanficfare/mobihtml.py | 10 +++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/fanficfare/mobi.py b/fanficfare/mobi.py
index 1a220f2e..406d6180 100644
--- a/fanficfare/mobi.py
+++ b/fanficfare/mobi.py
@@ -91,7 +91,9 @@ class Converter:
     toc_html = []
     body_html = []
 
-    PAGE_BREAK = '<mbp:pagebreak>'
+    ## This gets broken by html5lib/bs4fixed being helpful, but we'll
+    ## fix it inside mobihtml.py
+    PAGE_BREAK = '<mbp:pagebreak/>'
 
     # pull out the title page, assumed first html_strs.
     htmltitle = html_strs[0]
diff --git a/fanficfare/mobihtml.py b/fanficfare/mobihtml.py
index 639d1c67..f9125795 100644
--- a/fanficfare/mobihtml.py
+++ b/fanficfare/mobihtml.py
@@ -33,6 +33,12 @@ class HtmlProcessor:
     self.unfill = unfill
 #    html = self._ProcessRawHtml(html)
     self._soup = BeautifulSoup(html,'html5lib')
+    ## mobi format wants to find this <guide> tag inside <head>.
+    ## html5lib, on the other hand, moved it to <body>.  So we'll move
+    ## it back.
+    guide = self._soup.find('guide')
+    if guide:
+      self._soup.head.append(guide)
     if self._soup.title.contents:
       self.title = self._soup.title.contents[0]
     else:
@@ -66,10 +72,8 @@ class HtmlProcessor:
   def _ReplaceAnchorStubs(self):
     # TODO: Browsers allow extra whitespace in the href names.
 
-    # str() instead of unicode() rather than figure out how to fix
-    # ancient mobi.py code.
     assembled_text = ensure_binary(unicode(self._soup))
-    # bs4 creating close tags for <mbp:pagebreak>
+    # html5lib/bs4 creates close tags for <mbp:pagebreak>
     assembled_text = assembled_text.replace(b'<mbp:pagebreak>',b'<mbp:pagebreak/>')
     assembled_text = assembled_text.replace(b'</mbp:pagebreak>',b'')