mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-04-21 07:13:20 +02:00
IGN:...
This commit is contained in:
commit
280561dc2b
1 changed files with 5 additions and 18 deletions
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
import sys, struct, cStringIO, os
|
||||
import functools
|
||||
import re
|
||||
from calibre.ebooks.lit import LitError
|
||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
|
|
@ -93,33 +94,21 @@ def read_utf8_char(bytes, pos):
|
|||
return unichr(c), pos+elsize
|
||||
|
||||
class UnBinary(object):
|
||||
AMPERSAND_RE = re.compile(
|
||||
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
|
||||
|
||||
def __init__(self, bin, manifest, map=OPF_MAP):
|
||||
self.manifest = manifest
|
||||
self.tag_map, self.attr_map, self.tag_to_attr_map = map
|
||||
self.opf = map is OPF_MAP
|
||||
self.bin = bin
|
||||
self.buf = cStringIO.StringIO()
|
||||
self.ampersands = []
|
||||
self.binary_to_text()
|
||||
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
|
||||
self.escape_ampersands()
|
||||
|
||||
def escape_ampersands(self):
|
||||
offset = 0
|
||||
for pos in self.ampersands:
|
||||
test = self.raw[pos+offset:pos+offset+6]
|
||||
if test.startswith('&#') and ';' in test:
|
||||
continue
|
||||
escape = True
|
||||
for ent in XML_ENTITIES:
|
||||
if test.startswith(ent):
|
||||
escape = False
|
||||
break
|
||||
if not escape:
|
||||
continue
|
||||
self.raw = '&'.join(
|
||||
(self.raw[:pos+offset], self.raw[pos+offset+1:]))
|
||||
offset += 4
|
||||
self.raw = self.AMPERSAND_RE.sub('&', self.raw)
|
||||
|
||||
def item_path(self, internal_id):
|
||||
try:
|
||||
|
|
@ -148,8 +137,6 @@ def binary_to_text(self, base=0, depth=0):
|
|||
continue
|
||||
elif c == '\v':
|
||||
c = '\n'
|
||||
elif c == '&':
|
||||
self.ampersands.append(self.buf.tell()-1)
|
||||
self.buf.write(c.encode('utf-8'))
|
||||
|
||||
elif state == 'get flags':
|
||||
|
|
|
|||
Loading…
Reference in a new issue