test1.com with epub/txt/html output working, mobi broken.

This commit is contained in:
Jim Miller 2018-07-26 17:26:24 -05:00
parent 33d2a77c07
commit 8627bee253
5 changed files with 51 additions and 26 deletions

View file

@ -1,13 +1,18 @@
#!/usr/bin/python
# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan
# Changes Copyright 2018 FanFicFare team
from six import StringIO
import struct
import time
import random
import logging
# py2 vs py3 transition
from six import text_type as unicode
from six import string_types as basestring
from six import BytesIO # StringIO under py2
logger = logging.getLogger(__name__)
from mobihtml import HtmlProcessor
@ -57,12 +62,12 @@ class Converter:
self._refresh_url = refresh_url
def ConvertString(self, s):
out = StringIO.StringIO()
out = BytesIO()
self._ConvertStringToFile(s, out)
return out.getvalue()
def ConvertStrings(self, html_strs):
out = StringIO.StringIO()
out = BytesIO()
self._ConvertStringsToFile(html_strs, out)
return out.getvalue()
@ -126,6 +131,7 @@ class Converter:
tmp = self.MakeOneHTML(html_strs)
self._ConvertStringToFile(tmp, out_file)
except Exception as e:
raise
logger.error('Error %s', e)
#logger.debug('Details: %s' % html_strs)

View file

@ -5,8 +5,11 @@
import re
import sys
from six import StringIO
from six.moves import urllib
from six.moves.urllib.parse import unquote
# py2 vs py3 transition
from six import text_type as unicode
from six import binary_type as bytes
# import bs4
# BeautifulSoup = bs4.BeautifulSoup
@ -55,14 +58,14 @@ class HtmlProcessor:
# str() instead of unicode() rather than figure out how to fix
# ancient mobi.py code.
assembled_text = str(self._soup)
assembled_text = unicode(self._soup)
del self._soup # shouldn't touch this anymore
for anchor_num, original_ref in self._anchor_references:
ref = urllib.unquote(original_ref[1:]) # remove leading '#'
ref = unquote(original_ref[1:]) # remove leading '#'
# Find the position of ref in the utf-8 document.
# TODO(chatham): Using regexes and looking for name= would be better.
newpos = assembled_text.rfind(ref.encode('utf-8'))
newpos = assembled_text.rfind(ref) # .encode('utf-8')
if newpos == -1:
print >>sys.stderr, 'Could not find anchor "%s"' % original_ref
continue

View file

@ -21,11 +21,14 @@ import os.path
import datetime
import string
import six
from six import StringIO
import zipfile
from zipfile import ZipFile, ZIP_DEFLATED
import logging
# py2 vs py3 transition
from six import text_type as unicode
from six import BytesIO # StringIO under py2
from ..configurable import Configurable
from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML
@ -179,7 +182,7 @@ class BaseStoryWriter(Configurable):
logger.info("Save directly to file: %s" % outfilename)
if self.getConfig('make_directories'):
path=""
outputdirs = os.path.dirname(outfilename).split('/')
outputdirs = os.path.dirname(unicode(outfilename)).split('/')
for dir in outputdirs:
path+=dir+"/"
if not os.path.exists(path):
@ -213,7 +216,7 @@ class BaseStoryWriter(Configurable):
# above, it will only
# fetch once.
if self.getConfig('zip_output'):
out = StringIO()
out = BytesIO()
self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
self.writeStoryImpl(out)
self.zipout.writestr(self.getBaseFileName(),out.getvalue())

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -15,14 +15,19 @@
# limitations under the License.
#
from __future__ import absolute_import
import logging
import string
from six import StringIO
import zipfile
from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
import urllib
import re
# py2 vs py3 transition
from six import text_type as unicode
from six import string_types as basestring
from six import BytesIO # StringIO under py2
## XML isn't as forgiving as HTML, so rather than generate as strings,
## use DOM to generate the XML files.
from xml.dom.minidom import parse, parseString, getDOMImplementation
@ -299,11 +304,11 @@ div { margin: 0pt; padding: 0pt; }
def writeStoryImpl(self, out):
## Python 2.5 ZipFile is rather more primative than later
## versions. It can operate on a file, or on a StringIO, but
## versions. It can operate on a file, or on a BytesIO, but
## not on an open stream. OTOH, I suspect we would have had
## problems with closing and opening again to change the
## compression type anyway.
zipio = StringIO()
zipio = BytesIO()
## mimetype must be first file and uncompressed. Python 2.5
## ZipFile can't change compression type file-by-file, so we
@ -518,8 +523,8 @@ div { margin: 0pt; padding: 0pt; }
COVER = string.Template(self.getConfig("cover_content"))
else:
COVER = self.EPUB_COVER
coverIO = StringIO()
coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
coverIO = BytesIO()
coverIO.write(COVER.substitute(dict(list(self.story.getAllMetadata().items())+list({'coverimg':self.story.cover}.items()))))
if self.getConfig("include_titlepage"):
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
@ -577,8 +582,10 @@ div { margin: 0pt; padding: 0pt; }
contentxml = contentdom.toxml(encoding='utf-8')
# tweak for brain damaged Nook STR. Nook insists on name before content.
contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%coverimgid,
'<meta name="cover" content="%s"/>'%coverimgid)
contentxml = unicode(contentxml).replace('<meta content="%s" name="cover"/>'%coverimgid,
'<meta name="cover" content="%s"/>'%coverimgid)
outputepub.writestr("content.opf",contentxml)
contentdom.unlink()
@ -655,7 +662,7 @@ div { margin: 0pt; padding: 0pt; }
outputepub.writestr("OEBPS/cover.xhtml",coverIO.getvalue())
coverIO.close()
titlepageIO = StringIO()
titlepageIO = BytesIO()
self.writeTitlePage(out=titlepageIO,
START=TITLE_PAGE_START,
ENTRY=TITLE_ENTRY,
@ -667,7 +674,7 @@ div { margin: 0pt; padding: 0pt; }
titlepageIO.close()
# write toc page.
tocpageIO = StringIO()
tocpageIO = BytesIO()
self.writeTOCPage(tocpageIO,
self.EPUB_TOC_PAGE_START,
self.EPUB_TOC_ENTRY,
@ -678,7 +685,7 @@ div { margin: 0pt; padding: 0pt; }
if dologpage:
# write log page.
logpageIO = StringIO()
logpageIO = BytesIO()
self.writeLogPage(logpageIO)
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
logpageIO.close()

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -15,15 +15,21 @@
# limitations under the License.
#
from __future__ import absolute_import
import logging
import string
from six import StringIO
from .base_writer import *
from ..htmlcleanup import stripHTML
from ..mobi import Converter
from ..exceptions import FailedToWriteOutput
# py2 vs py3 transition
from six import text_type as unicode
from six import string_types as basestring
from six import binary_type as bytes
from six import BytesIO # StringIO under py2
logger = logging.getLogger(__name__)
class MobiWriter(BaseStoryWriter):
@ -128,7 +134,7 @@ ${value}<br />
NO_TITLE_ENTRY = self.MOBI_NO_TITLE_ENTRY
TITLE_PAGE_END = self.MOBI_TITLE_PAGE_END
titlepageIO = StringIO.StringIO()
titlepageIO = BytesIO()
self.writeTitlePage(out=titlepageIO,
START=TITLE_PAGE_START,
ENTRY=TITLE_ENTRY,
@ -142,7 +148,7 @@ ${value}<br />
## MOBI always has a TOC injected by mobi.py because there's
## no meta-data TOC.
# # write toc page.
# tocpageIO = StringIO.StringIO()
# tocpageIO = BytesIO()
# self.writeTOCPage(tocpageIO,
# self.MOBI_TOC_PAGE_START,
# self.MOBI_TOC_ENTRY,