mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2026-05-09 05:21:13 +02:00
test1.com with epub/txt/html output working, mobi broken.
This commit is contained in:
parent
33d2a77c07
commit
8627bee253
5 changed files with 51 additions and 26 deletions
|
|
@ -1,13 +1,18 @@
|
|||
#!/usr/bin/python
|
||||
# Copyright(c) 2009 Andrew Chatham and Vijay Pandurangan
|
||||
# Changes Copyright 2018 FanFicFare team
|
||||
|
||||
|
||||
from six import StringIO
|
||||
import struct
|
||||
import time
|
||||
import random
|
||||
import logging
|
||||
|
||||
# py2 vs py3 transition
|
||||
from six import text_type as unicode
|
||||
from six import string_types as basestring
|
||||
from six import BytesIO # StringIO under py2
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from mobihtml import HtmlProcessor
|
||||
|
|
@ -57,12 +62,12 @@ class Converter:
|
|||
self._refresh_url = refresh_url
|
||||
|
||||
def ConvertString(self, s):
|
||||
out = StringIO.StringIO()
|
||||
out = BytesIO()
|
||||
self._ConvertStringToFile(s, out)
|
||||
return out.getvalue()
|
||||
|
||||
def ConvertStrings(self, html_strs):
|
||||
out = StringIO.StringIO()
|
||||
out = BytesIO()
|
||||
self._ConvertStringsToFile(html_strs, out)
|
||||
return out.getvalue()
|
||||
|
||||
|
|
@ -126,6 +131,7 @@ class Converter:
|
|||
tmp = self.MakeOneHTML(html_strs)
|
||||
self._ConvertStringToFile(tmp, out_file)
|
||||
except Exception as e:
|
||||
raise
|
||||
logger.error('Error %s', e)
|
||||
#logger.debug('Details: %s' % html_strs)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,11 @@
|
|||
|
||||
import re
|
||||
import sys
|
||||
from six import StringIO
|
||||
from six.moves import urllib
|
||||
from six.moves.urllib.parse import unquote
|
||||
|
||||
# py2 vs py3 transition
|
||||
from six import text_type as unicode
|
||||
from six import binary_type as bytes
|
||||
|
||||
# import bs4
|
||||
# BeautifulSoup = bs4.BeautifulSoup
|
||||
|
|
@ -55,14 +58,14 @@ class HtmlProcessor:
|
|||
|
||||
# str() instead of unicode() rather than figure out how to fix
|
||||
# ancient mobi.py code.
|
||||
assembled_text = str(self._soup)
|
||||
assembled_text = unicode(self._soup)
|
||||
|
||||
del self._soup # shouldn't touch this anymore
|
||||
for anchor_num, original_ref in self._anchor_references:
|
||||
ref = urllib.unquote(original_ref[1:]) # remove leading '#'
|
||||
ref = unquote(original_ref[1:]) # remove leading '#'
|
||||
# Find the position of ref in the utf-8 document.
|
||||
# TODO(chatham): Using regexes and looking for name= would be better.
|
||||
newpos = assembled_text.rfind(ref.encode('utf-8'))
|
||||
newpos = assembled_text.rfind(ref) # .encode('utf-8')
|
||||
if newpos == -1:
|
||||
print >>sys.stderr, 'Could not find anchor "%s"' % original_ref
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -21,11 +21,14 @@ import os.path
|
|||
import datetime
|
||||
import string
|
||||
import six
|
||||
from six import StringIO
|
||||
import zipfile
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
import logging
|
||||
|
||||
# py2 vs py3 transition
|
||||
from six import text_type as unicode
|
||||
from six import BytesIO # StringIO under py2
|
||||
|
||||
from ..configurable import Configurable
|
||||
from ..htmlcleanup import removeEntities, removeAllEntities, stripHTML
|
||||
|
||||
|
|
@ -179,7 +182,7 @@ class BaseStoryWriter(Configurable):
|
|||
logger.info("Save directly to file: %s" % outfilename)
|
||||
if self.getConfig('make_directories'):
|
||||
path=""
|
||||
outputdirs = os.path.dirname(outfilename).split('/')
|
||||
outputdirs = os.path.dirname(unicode(outfilename)).split('/')
|
||||
for dir in outputdirs:
|
||||
path+=dir+"/"
|
||||
if not os.path.exists(path):
|
||||
|
|
@ -213,7 +216,7 @@ class BaseStoryWriter(Configurable):
|
|||
# above, it will only
|
||||
# fetch once.
|
||||
if self.getConfig('zip_output'):
|
||||
out = StringIO()
|
||||
out = BytesIO()
|
||||
self.zipout = ZipFile(outstream, 'w', compression=ZIP_DEFLATED)
|
||||
self.writeStoryImpl(out)
|
||||
self.zipout.writestr(self.getBaseFileName(),out.getvalue())
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2017 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,14 +15,19 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import string
|
||||
from six import StringIO
|
||||
import zipfile
|
||||
from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
|
||||
import urllib
|
||||
import re
|
||||
|
||||
# py2 vs py3 transition
|
||||
from six import text_type as unicode
|
||||
from six import string_types as basestring
|
||||
from six import BytesIO # StringIO under py2
|
||||
|
||||
## XML isn't as forgiving as HTML, so rather than generate as strings,
|
||||
## use DOM to generate the XML files.
|
||||
from xml.dom.minidom import parse, parseString, getDOMImplementation
|
||||
|
|
@ -299,11 +304,11 @@ div { margin: 0pt; padding: 0pt; }
|
|||
def writeStoryImpl(self, out):
|
||||
|
||||
## Python 2.5 ZipFile is rather more primative than later
|
||||
## versions. It can operate on a file, or on a StringIO, but
|
||||
## versions. It can operate on a file, or on a BytesIO, but
|
||||
## not on an open stream. OTOH, I suspect we would have had
|
||||
## problems with closing and opening again to change the
|
||||
## compression type anyway.
|
||||
zipio = StringIO()
|
||||
zipio = BytesIO()
|
||||
|
||||
## mimetype must be first file and uncompressed. Python 2.5
|
||||
## ZipFile can't change compression type file-by-file, so we
|
||||
|
|
@ -518,8 +523,8 @@ div { margin: 0pt; padding: 0pt; }
|
|||
COVER = string.Template(self.getConfig("cover_content"))
|
||||
else:
|
||||
COVER = self.EPUB_COVER
|
||||
coverIO = StringIO()
|
||||
coverIO.write(COVER.substitute(dict(self.story.getAllMetadata().items()+{'coverimg':self.story.cover}.items())))
|
||||
coverIO = BytesIO()
|
||||
coverIO.write(COVER.substitute(dict(list(self.story.getAllMetadata().items())+list({'coverimg':self.story.cover}.items()))))
|
||||
|
||||
if self.getConfig("include_titlepage"):
|
||||
items.append(("title_page","OEBPS/title_page.xhtml","application/xhtml+xml","Title Page"))
|
||||
|
|
@ -577,8 +582,10 @@ div { margin: 0pt; padding: 0pt; }
|
|||
contentxml = contentdom.toxml(encoding='utf-8')
|
||||
|
||||
# tweak for brain damaged Nook STR. Nook insists on name before content.
|
||||
contentxml = contentxml.replace('<meta content="%s" name="cover"/>'%coverimgid,
|
||||
'<meta name="cover" content="%s"/>'%coverimgid)
|
||||
contentxml = unicode(contentxml).replace('<meta content="%s" name="cover"/>'%coverimgid,
|
||||
'<meta name="cover" content="%s"/>'%coverimgid)
|
||||
|
||||
|
||||
outputepub.writestr("content.opf",contentxml)
|
||||
|
||||
contentdom.unlink()
|
||||
|
|
@ -655,7 +662,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
outputepub.writestr("OEBPS/cover.xhtml",coverIO.getvalue())
|
||||
coverIO.close()
|
||||
|
||||
titlepageIO = StringIO()
|
||||
titlepageIO = BytesIO()
|
||||
self.writeTitlePage(out=titlepageIO,
|
||||
START=TITLE_PAGE_START,
|
||||
ENTRY=TITLE_ENTRY,
|
||||
|
|
@ -667,7 +674,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
titlepageIO.close()
|
||||
|
||||
# write toc page.
|
||||
tocpageIO = StringIO()
|
||||
tocpageIO = BytesIO()
|
||||
self.writeTOCPage(tocpageIO,
|
||||
self.EPUB_TOC_PAGE_START,
|
||||
self.EPUB_TOC_ENTRY,
|
||||
|
|
@ -678,7 +685,7 @@ div { margin: 0pt; padding: 0pt; }
|
|||
|
||||
if dologpage:
|
||||
# write log page.
|
||||
logpageIO = StringIO()
|
||||
logpageIO = BytesIO()
|
||||
self.writeLogPage(logpageIO)
|
||||
outputepub.writestr("OEBPS/log_page.xhtml",logpageIO.getvalue())
|
||||
logpageIO.close()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2011 Fanficdownloader team, 2015 FanFicFare team
|
||||
# Copyright 2011 Fanficdownloader team, 2018 FanFicFare team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -15,15 +15,21 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
import string
|
||||
from six import StringIO
|
||||
|
||||
from .base_writer import *
|
||||
from ..htmlcleanup import stripHTML
|
||||
from ..mobi import Converter
|
||||
from ..exceptions import FailedToWriteOutput
|
||||
|
||||
# py2 vs py3 transition
|
||||
from six import text_type as unicode
|
||||
from six import string_types as basestring
|
||||
from six import binary_type as bytes
|
||||
from six import BytesIO # StringIO under py2
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MobiWriter(BaseStoryWriter):
|
||||
|
|
@ -128,7 +134,7 @@ ${value}<br />
|
|||
NO_TITLE_ENTRY = self.MOBI_NO_TITLE_ENTRY
|
||||
TITLE_PAGE_END = self.MOBI_TITLE_PAGE_END
|
||||
|
||||
titlepageIO = StringIO.StringIO()
|
||||
titlepageIO = BytesIO()
|
||||
self.writeTitlePage(out=titlepageIO,
|
||||
START=TITLE_PAGE_START,
|
||||
ENTRY=TITLE_ENTRY,
|
||||
|
|
@ -142,7 +148,7 @@ ${value}<br />
|
|||
## MOBI always has a TOC injected by mobi.py because there's
|
||||
## no meta-data TOC.
|
||||
# # write toc page.
|
||||
# tocpageIO = StringIO.StringIO()
|
||||
# tocpageIO = BytesIO()
|
||||
# self.writeTOCPage(tocpageIO,
|
||||
# self.MOBI_TOC_PAGE_START,
|
||||
# self.MOBI_TOC_ENTRY,
|
||||
|
|
|
|||
Loading…
Reference in a new issue