diff --git a/fanficfare/mobi.py b/fanficfare/mobi.py index 1a220f2e..406d6180 100644 --- a/fanficfare/mobi.py +++ b/fanficfare/mobi.py @@ -91,7 +91,9 @@ class Converter: toc_html = [] body_html = [] - PAGE_BREAK = '' + ## This gets broken by html5lib/bs4fixed being helpful, but we'll + ## fix it inside mobihtml.py + PAGE_BREAK = '' # pull out the title page, assumed first html_strs. htmltitle = html_strs[0] diff --git a/fanficfare/mobihtml.py b/fanficfare/mobihtml.py index 639d1c67..f9125795 100644 --- a/fanficfare/mobihtml.py +++ b/fanficfare/mobihtml.py @@ -33,6 +33,12 @@ class HtmlProcessor: self.unfill = unfill # html = self._ProcessRawHtml(html) self._soup = BeautifulSoup(html,'html5lib') + ## mobi format wants to find this tag inside . + ## html5lib, on the other hand, moved it to . So we'll move + ## it back. + guide = self._soup.find('guide') + if guide: + self._soup.head.append(guide) if self._soup.title.contents: self.title = self._soup.title.contents[0] else: @@ -66,10 +72,8 @@ class HtmlProcessor: def _ReplaceAnchorStubs(self): # TODO: Browsers allow extra whitespace in the href names. - # str() instead of unicode() rather than figure out how to fix - # ancient mobi.py code. assembled_text = ensure_binary(unicode(self._soup)) - # bs4 creating close tags for + # html5lib/bs4 creates close tags for assembled_text = assembled_text.replace(b'',b'') assembled_text = assembled_text.replace(b'',b'')