diff --git a/fanficfare/mobi.py b/fanficfare/mobi.py index 76bae5e8..adc95096 100644 --- a/fanficfare/mobi.py +++ b/fanficfare/mobi.py @@ -1,13 +1,18 @@ #!/usr/bin/python # Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan +# Changes Copyright 2018 FanFicFare team -from six import StringIO import struct import time import random import logging +# py2 vs py3 transition +from six import text_type as unicode +from six import string_types as basestring +from six import BytesIO # StringIO under py2 + logger = logging.getLogger(__name__) from mobihtml import HtmlProcessor @@ -57,12 +62,12 @@ class Converter: self._refresh_url = refresh_url def ConvertString(self, s): - out = StringIO.StringIO() + out = BytesIO() self._ConvertStringToFile(s, out) return out.getvalue() def ConvertStrings(self, html_strs): - out = StringIO.StringIO() + out = BytesIO() self._ConvertStringsToFile(html_strs, out) return out.getvalue() @@ -126,6 +131,7 @@ class Converter: tmp = self.MakeOneHTML(html_strs) self._ConvertStringToFile(tmp, out_file) except Exception as e: + raise logger.error('Error %s', e) #logger.debug('Details: %s' % html_strs) diff --git a/fanficfare/mobihtml.py b/fanficfare/mobihtml.py index 7782a43c..c0dcf029 100644 --- a/fanficfare/mobihtml.py +++ b/fanficfare/mobihtml.py @@ -5,8 +5,11 @@ import re import sys -from six import StringIO -from six.moves import urllib +from six.moves.urllib.parse import unquote + +# py2 vs py3 transition +from six import text_type as unicode +from six import binary_type as bytes # import bs4 # BeautifulSoup = bs4.BeautifulSoup @@ -55,14 +58,14 @@ class HtmlProcessor: # str() instead of unicode() rather than figure out how to fix # ancient mobi.py code. - assembled_text = str(self._soup) + assembled_text = unicode(self._soup) del self._soup # shouldn't touch this anymore for anchor_num, original_ref in self._anchor_references: - ref = urllib.unquote(original_ref[1:]) # remove leading '#' + ref = unquote(original_ref[1:]) # remove leading '#' # Find the position of ref in the utf-8 document. # TODO(chatham): Using regexes and looking for name= would be better. - newpos = assembled_text.rfind(ref.encode('utf-8')) + newpos = assembled_text.rfind(ref) # .encode('utf-8') if newpos == -1: print >>sys.stderr, 'Could not find anchor "%s"' % original_ref continue diff --git a/fanficfare/writers/base_writer.py b/fanficfare/writers/base_writer.py index 3e97a356..12eada78 100644 --- a/fanficfare/writers/base_writer.py +++ b/fanficfare/writers/base_writer.py @@ -21,11 +21,14 @@ import os.path import datetime import string import six -from six import StringIO import zipfile from zipfile import ZipFile, ZIP_DEFLATED import logging +# py2 vs py3 transition +from six import text_type as unicode +from six import BytesIO # StringIO under py2 + from ..configurable import Configurable from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML @@ -179,7 +182,7 @@ class BaseStoryWriter(Configurable): logger.info("Save directly to file: %s" % outfilename) if self.getConfig('make_directories'): path="" - outputdirs = os.path.dirname(outfilename).split('/') + outputdirs = os.path.dirname(unicode(outfilename)).split('/') for dir in outputdirs: path+=dir+"/" if not os.path.exists(path): @@ -213,7 +216,7 @@ class BaseStoryWriter(Configurable): # above, it will only # fetch once. if self.getConfig('zip_output'): - out = StringIO() + out = BytesIO() self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED) self.writeStoryImpl(out) self.zipout.writestr(self.getBaseFileName(),out.getvalue()) diff --git a/fanficfare/writers/writer_epub.py b/fanficfare/writers/writer_epub.py index 8ecfeda0..93bad44f 100644 --- a/fanficfare/writers/writer_epub.py +++ b/fanficfare/writers/writer_epub.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team +# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,14 +15,19 @@ # limitations under the License. # +from __future__ import absolute_import import logging import string -from six import StringIO import zipfile from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED import urllib import re +# py2 vs py3 transition +from six import text_type as unicode +from six import string_types as basestring +from six import BytesIO # StringIO under py2 + ## XML isn't as forgiving as HTML, so rather than generate as strings, ## use DOM to generate the XML files. from xml.dom.minidom import parse, parseString, getDOMImplementation @@ -299,11 +304,11 @@ div { margin: 0pt; padding: 0pt; } def writeStoryImpl(self, out): ## Python 2.5 ZipFile is rather more primative than later - ## versions. It can operate on a file, or on a StringIO, but + ## versions. It can operate on a file, or on a BytesIO, but ## not on an open stream. OTOH, I suspect we would have had ## problems with closing and opening again to change the ## compression type anyway. - zipio = StringIO() + zipio = BytesIO() ## mimetype must be first file and uncompressed. Python 2.5 ## ZipFile can't change compression type file-by-file, so we @@ -518,8 +523,8 @@ div { margin: 0pt; padding: 0pt; } COVER = string.Template(self.getConfig("cover_content")) else: COVER = self.EPUB_COVER - coverIO = StringIO() - coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items()))) + coverIO = BytesIO() + coverIO.write(COVER.substitute(dict(list(self.story.getAllMetadata().items())+list({'coverimg':self.story.cover}.items())))) if self.getConfig("include_titlepage"): items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page")) @@ -577,8 +582,10 @@ div { margin: 0pt; padding: 0pt; } contentxml = contentdom.toxml(encoding='utf-8') # tweak for brain damaged Nook STR. Nook insists on name before content. - contentxml = contentxml.replace(''%coverimgid, - ''%coverimgid) + contentxml = unicode(contentxml).replace(''%coverimgid, + ''%coverimgid) + + outputepub.writestr("content.opf",contentxml) contentdom.unlink() @@ -655,7 +662,7 @@ div { margin: 0pt; padding: 0pt; } outputepub.writestr("OEBPS/cover.xhtml",coverIO.getvalue()) coverIO.close() - titlepageIO = StringIO() + titlepageIO = BytesIO() self.writeTitlePage(out=titlepageIO, START=TITLE_PAGE_START, ENTRY=TITLE_ENTRY, @@ -667,7 +674,7 @@ div { margin: 0pt; padding: 0pt; } titlepageIO.close() # write toc page. - tocpageIO = StringIO() + tocpageIO = BytesIO() self.writeTOCPage(tocpageIO, self.EPUB_TOC_PAGE_START, self.EPUB_TOC_ENTRY, @@ -678,7 +685,7 @@ div { margin: 0pt; padding: 0pt; } if dologpage: # write log page. - logpageIO = StringIO() + logpageIO = BytesIO() self.writeLogPage(logpageIO) outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue()) logpageIO.close() diff --git a/fanficfare/writers/writer_mobi.py b/fanficfare/writers/writer_mobi.py index 97fdfcbc..ec7a2599 100644 --- a/fanficfare/writers/writer_mobi.py +++ b/fanficfare/writers/writer_mobi.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team +# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,15 +15,21 @@ # limitations under the License. # +from __future__ import absolute_import import logging import string -from six import StringIO from .base_writer import * from ..htmlcleanup import stripHTML from ..mobi import Converter from ..exceptions import FailedToWriteOutput +# py2 vs py3 transition +from six import text_type as unicode +from six import string_types as basestring +from six import binary_type as bytes +from six import BytesIO # StringIO under py2 + logger = logging.getLogger(__name__) class MobiWriter(BaseStoryWriter): @@ -128,7 +134,7 @@ ${value}
NO_TITLE_ENTRY = self.MOBI_NO_TITLE_ENTRY TITLE_PAGE_END = self.MOBI_TITLE_PAGE_END - titlepageIO = StringIO.StringIO() + titlepageIO = BytesIO() self.writeTitlePage(out=titlepageIO, START=TITLE_PAGE_START, ENTRY=TITLE_ENTRY, @@ -142,7 +148,7 @@ ${value}
## MOBI always has a TOC injected by mobi.py because there's ## no meta-data TOC. # # write toc page. - # tocpageIO = StringIO.StringIO() + # tocpageIO = BytesIO() # self.writeTOCPage(tocpageIO, # self.MOBI_TOC_PAGE_START, # self.MOBI_TOC_ENTRY,