mirror of
https://github.com/JimmXinu/FanFicFare.git
synced 2025-12-15 21:32:28 +01:00
Improved metadata for epubcheck, fixed a problem I introduced with >/<.
This commit is contained in:
parent
9434431c7b
commit
773ff3c03c
2 changed files with 25 additions and 19 deletions
|
|
@ -32,7 +32,7 @@ CONTAINER = '''<?xml version="1.0"?>
|
|||
|
||||
CONTENT_START = '''<?xml version="1.0"?>
|
||||
<package version="2.0" xmlns="http://www.idpf.org/2007/opf"
|
||||
unique-identifier="BookId-Epub-%s">
|
||||
unique-identifier="BookID">
|
||||
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:opf="http://www.idpf.org/2007/opf">
|
||||
<dc:title>%s</dc:title>
|
||||
|
|
@ -41,7 +41,7 @@ CONTENT_START = '''<?xml version="1.0"?>
|
|||
<dc:rights></dc:rights>
|
||||
<dc:subject>fanfiction</dc:subject>
|
||||
<dc:publisher>sgzmd</dc:publisher>
|
||||
<dc:identifier id="BookId">urn:uuid:sigizmund.com062820072147132</dc:identifier>
|
||||
<dc:identifier id="BookID">%s</dc:identifier>
|
||||
</metadata>
|
||||
<manifest>
|
||||
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
|
||||
|
|
@ -114,8 +114,7 @@ acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
|
|||
acceptable_attributes = ['href']
|
||||
|
||||
entities = { '–' : ' - ', '—' : ' - ', '”' : '"', '“' : '"', '’' : '\'',
|
||||
'‘' : '\'', '"' : '"', '…' : '...', '&' : '&', '£' : '£', ' ' : ' ',
|
||||
'<' : '<', '>' : '>' }
|
||||
'‘' : '\'', '"' : '"', '…' : '...', '&' : '&', '£' : '£', ' ' : ' ' }
|
||||
|
||||
FB2_PROLOGUE = '<FictionBook>'
|
||||
FB2_DESCRIPTION = '''<description>
|
||||
|
|
|
|||
37
output.py
37
output.py
|
|
@ -173,20 +173,22 @@ class EPubFanficWriter(FanficWriter):
|
|||
# for the pound symbol in constants.py
|
||||
text = text.replace(e, v.decode('utf-8'))
|
||||
|
||||
text = text.replace('&', '&')
|
||||
# < < and & are the only html entities allowed in xhtml.
|
||||
text = text.replace('&', '&').replace('&lt;', '<').replace('&gt;', '>')
|
||||
|
||||
return text
|
||||
|
||||
def writeChapter(self, title, text):
|
||||
logging.debug("Writing chapter: %s" % title)
|
||||
try:
|
||||
fileName = base64.b64encode(title).replace('/', '_') + ".xhtml"
|
||||
fileName = base64.b64encode(title) + ".xhtml"
|
||||
except UnicodeEncodeError, e:
|
||||
fileName = base64.b64encode(title.encode('utf-8')).replace('/', '_') + ".xhtml"
|
||||
# title = cgi.esca#title.decode('utf-8')
|
||||
# sha = hashlib.sha224(title)
|
||||
# fileName = sha.hexdigest() + ".xhtml"
|
||||
#fileName = cgi.escape(title) + '.xhtml'
|
||||
fileName = base64.b64encode(title.encode('utf-8')) + ".xhtml"
|
||||
# Base64 can include +, / and =, which XML technically doesn't like
|
||||
# in it's id attributes. _ and - are okay and not otherwise used in Base64.
|
||||
# The = for padding is superfluous
|
||||
fileName = fileName.replace('/', '_').replace('+', '-').replace('=','')
|
||||
|
||||
filePath = self.directory + "/OEBPS/" + fileName
|
||||
|
||||
fn = 'OEBPS/' + fileName
|
||||
|
|
@ -208,13 +210,14 @@ class EPubFanficWriter(FanficWriter):
|
|||
|
||||
allPs = self.soup.findAll(recursive=True)
|
||||
for p in allPs:
|
||||
if p.string != None and (len(p.string.strip()) == 0 or p.string.strip() == ' ' ) :
|
||||
if p.string != None and len(p.string.strip()) == 0 :
|
||||
p.extract()
|
||||
|
||||
allBrs = self.soup.findAll(recursive=True, name = ['div'])
|
||||
for br in allBrs:
|
||||
if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
|
||||
br.name = 'p'
|
||||
|
||||
# xhtml doesn't like <p> nesting in <p>, so leave divs.
|
||||
# allBrs = self.soup.findAll(recursive=True, name = ['div'])
|
||||
# for br in allBrs:
|
||||
# if (br.string != None and len(br.string.strip()) != 0) or (br.contents != None):
|
||||
# br.name = 'p'
|
||||
|
||||
# cleanup(self.soup )
|
||||
|
||||
|
|
@ -243,17 +246,21 @@ class EPubFanficWriter(FanficWriter):
|
|||
opfFilePath = "OEBPS/content.opf"
|
||||
|
||||
# opf = open(opfFilePath, 'w')
|
||||
self._writeFile(opfFilePath, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName))
|
||||
self._writeFile(opfFilePath, CONTENT_START % (self.storyTitle, self.authorName, uuid.uuid4().urn))
|
||||
# print >> opf, CONTENT_START % (uuid.uuid4().urn, self.storyTitle, self.authorName)
|
||||
|
||||
ids = []
|
||||
|
||||
i = 0
|
||||
i = 1
|
||||
for t,f in self.chapters:
|
||||
try:
|
||||
chapterId = base64.b64encode(t)
|
||||
except UnicodeEncodeError, e:
|
||||
chapterId = base64.b64encode(t.encode('utf-8'))
|
||||
# Base64 can include +, / and =, which XML technically doesn't like
|
||||
# in it's id attributes. _ and - are okay and not otherwise used in Base64.
|
||||
# The = for padding is superfluous
|
||||
chapterId = chapterId.replace('/', '_').replace('+', '-').replace('=','')
|
||||
|
||||
self._writeFile(tocFilePath, TOC_ITEM % (chapterId, i, cgi.escape(t), f))
|
||||
self._writeFile(opfFilePath, CONTENT_ITEM % (chapterId, f))
|
||||
|
|
|
|||
Loading…
Reference in a new issue