test1.com with epub/txt/html output working, mobi broken.

2026-05-09 05:21:13 +02:00 · 2018-07-26 17:26:24 -05:00 · 2018-07-26 17:26:24 -05:00 · 8627bee253
commit 8627bee253
parent 33d2a77c07
5 changed files with 51 additions and 26 deletions
--- a/fanficfare/mobi.py
+++ b/fanficfare/mobi.py
@ -1,13 +1,18 @@
 #!/usr/bin/python
 # Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan
+# Changes Copyright 2018 FanFicFare team

    
-from six import StringIO
 import struct
 import time
 import random
 import logging

+# py2 vs py3 transition
+from six import text_type as unicode
+from six import string_types as basestring
+from six import BytesIO # StringIO under py2
+
 logger = logging.getLogger(__name__)

 from mobihtml import HtmlProcessor
@ -57,12 +62,12 @@ class Converter:
    self._refresh_url = refresh_url

  def ConvertString(self, s):
-    out = StringIO.StringIO()
+    out = BytesIO()
    self._ConvertStringToFile(s, out)
    return out.getvalue()

  def ConvertStrings(self, html_strs):
-    out = StringIO.StringIO()
+    out = BytesIO()
    self._ConvertStringsToFile(html_strs, out)
    return out.getvalue()

@ -126,6 +131,7 @@ class Converter:
      tmp = self.MakeOneHTML(html_strs)
      self._ConvertStringToFile(tmp, out_file)
    except Exception as e:
+      raise
      logger.error('Error %s', e)
      #logger.debug('Details: %s' % html_strs)

--- a/fanficfare/mobihtml.py
+++ b/fanficfare/mobihtml.py
@ -5,8 +5,11 @@

 import re
 import sys
-from six import StringIO
-from six.moves import urllib
+from six.moves.urllib.parse import unquote
+
+# py2 vs py3 transition
+from six import text_type as unicode
+from six import binary_type as bytes

 # import bs4
 # BeautifulSoup = bs4.BeautifulSoup
@ -55,14 +58,14 @@ class HtmlProcessor:

    # str() instead of unicode() rather than figure out how to fix
    # ancient mobi.py code.
-    assembled_text = str(self._soup)
+    assembled_text = unicode(self._soup)

    del self._soup # shouldn't touch this anymore
    for anchor_num, original_ref in self._anchor_references:
-      ref = urllib.unquote(original_ref[1:]) # remove leading '#'
+      ref = unquote(original_ref[1:]) # remove leading '#'
      # Find the position of ref in the utf-8 document.
      # TODO(chatham): Using regexes and looking for name= would be better.
-      newpos = assembled_text.rfind(ref.encode('utf-8'))
+      newpos = assembled_text.rfind(ref) # .encode('utf-8')
      if newpos == -1:
        print >>sys.stderr, 'Could not find anchor "%s"' % original_ref
        continue
--- a/fanficfare/writers/base_writer.py
+++ b/fanficfare/writers/base_writer.py
@ -21,11 +21,14 @@ import os.path
 import datetime
 import string
 import six
-from six import StringIO
 import zipfile
 from zipfile import ZipFile, ZIP_DEFLATED
 import logging

+# py2 vs py3 transition
+from six import text_type as unicode
+from six import BytesIO # StringIO under py2
+
 from ..configurable import Configurable
 from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML

@ -179,7 +182,7 @@ class BaseStoryWriter(Configurable):
            logger.info("Save directly to file: %s" % outfilename)
            if self.getConfig('make_directories'):
                path=""
-                outputdirs = os.path.dirname(outfilename).split('/')
+                outputdirs = os.path.dirname(unicode(outfilename)).split('/')
                for dir in outputdirs:
                    path+=dir+"/"
                    if not os.path.exists(path):
@ -213,7 +216,7 @@ class BaseStoryWriter(Configurable):
                                                 # above, it will only
                                                 # fetch once.
        if self.getConfig('zip_output'):
-            out = StringIO()
+            out = BytesIO()
            self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
            self.writeStoryImpl(out)
            self.zipout.writestr(self.getBaseFileName(),out.getvalue())
--- a/fanficfare/writers/writer_epub.py
+++ b/fanficfare/writers/writer_epub.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
+# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -15,14 +15,19 @@
 # limitations under the License.
 #

+from __future__ import absolute_import
 import logging
 import string
-from six import StringIO
 import zipfile
 from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
 import urllib
 import re

+# py2 vs py3 transition
+from six import text_type as unicode
+from six import string_types as basestring
+from six import BytesIO # StringIO under py2
+
 ## XML isn't as forgiving as HTML, so rather than generate as strings,
 ## use DOM to generate the XML files.
 from xml.dom.minidom import parse, parseString, getDOMImplementation
@ -299,11 +304,11 @@ div { margin: 0pt; padding: 0pt; }
    def writeStoryImpl(self, out):

        ## Python 2.5 ZipFile is rather more primative than later
-        ## versions.  It can operate on a file, or on a StringIO, but
+        ## versions.  It can operate on a file, or on a BytesIO, but
        ## not on an open stream.  OTOH, I suspect we would have had
        ## problems with closing and opening again to change the
        ## compression type anyway.
-        zipio = StringIO()
+        zipio = BytesIO()

        ## mimetype must be first file and uncompressed.  Python 2.5
        ## ZipFile can't change compression type file-by-file, so we
@ -518,8 +523,8 @@ div { margin: 0pt; padding: 0pt; }
                COVER = string.Template(self.getConfig("cover_content"))
            else:
                COVER = self.EPUB_COVER
-            coverIO = StringIO()
-            coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
+            coverIO = BytesIO()
+            coverIO.write(COVER.substitute(dict(list(self.story.getAllMetadata().items())+list({'coverimg':self.story.cover}.items()))))

        if self.getConfig("include_titlepage"):
            items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
@ -577,8 +582,10 @@ div { margin: 0pt; padding: 0pt; }
        contentxml = contentdom.toxml(encoding='utf-8')

        # tweak for brain damaged Nook STR.  Nook insists on name before content.
-        contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%coverimgid,
-                                        '<meta name="cover" content="%s"/>'%coverimgid)
+        contentxml = unicode(contentxml).replace('<meta content="%s" name="cover"/>'%coverimgid,
+                                                 '<meta name="cover" content="%s"/>'%coverimgid)
+
+        
        outputepub.writestr("content.opf",contentxml)

        contentdom.unlink()
@ -655,7 +662,7 @@ div { margin: 0pt; padding: 0pt; }
            outputepub.writestr("OEBPS/cover.xhtml",coverIO.getvalue())
            coverIO.close()

-        titlepageIO = StringIO()
+        titlepageIO = BytesIO()
        self.writeTitlePage(out=titlepageIO,
                            START=TITLE_PAGE_START,
                            ENTRY=TITLE_ENTRY,
@ -667,7 +674,7 @@ div { margin: 0pt; padding: 0pt; }
        titlepageIO.close()

        # write toc page.
-        tocpageIO = StringIO()
+        tocpageIO = BytesIO()
        self.writeTOCPage(tocpageIO,
                          self.EPUB_TOC_PAGE_START,
                          self.EPUB_TOC_ENTRY,
@ -678,7 +685,7 @@ div { margin: 0pt; padding: 0pt; }

        if dologpage:
            # write log page.
-            logpageIO = StringIO()
+            logpageIO = BytesIO()
            self.writeLogPage(logpageIO)
            outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
            logpageIO.close()
--- a/fanficfare/writers/writer_mobi.py
+++ b/fanficfare/writers/writer_mobi.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
+# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -15,15 +15,21 @@
 # limitations under the License.
 #

+from __future__ import absolute_import
 import logging
 import string
-from six import StringIO

 from .base_writer import *
 from ..htmlcleanup import stripHTML
 from ..mobi import Converter
 from ..exceptions import FailedToWriteOutput

+# py2 vs py3 transition
+from six import text_type as unicode
+from six import string_types as basestring
+from six import binary_type as bytes
+from six import BytesIO # StringIO under py2
+
 logger = logging.getLogger(__name__)

 class MobiWriter(BaseStoryWriter):
@ -128,7 +134,7 @@ ${value}<br />
            NO_TITLE_ENTRY    = self.MOBI_NO_TITLE_ENTRY
            TITLE_PAGE_END    = self.MOBI_TITLE_PAGE_END
        
-        titlepageIO = StringIO.StringIO()
+        titlepageIO = BytesIO()
        self.writeTitlePage(out=titlepageIO,
                            START=TITLE_PAGE_START,
                            ENTRY=TITLE_ENTRY,
@ -142,7 +148,7 @@ ${value}<br />
        ## MOBI always has a TOC injected by mobi.py because there's
        ## no meta-data TOC.
        # # write toc page.  
-        # tocpageIO = StringIO.StringIO()
+        # tocpageIO = BytesIO()
        # self.writeTOCPage(tocpageIO,
        #                   self.MOBI_TOC_PAGE_START,
        #                   self.MOBI_TOC_ENTRY,