mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-01-06 06:45:31 +01:00
Fix bug 2687: Text encoding bug.
This commit is contained in:
parent
cf42eb60ca
commit
2d39bceb64
9 changed files with 10 additions and 11 deletions
|
|
@ -30,7 +30,7 @@ def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
|||
|
||||
out_stream.seek(0)
|
||||
out_stream.truncate()
|
||||
out_stream.write(fb2_content.encode('utf-8'))
|
||||
out_stream.write(fb2_content.encode('utf-8', 'replace'))
|
||||
|
||||
if close:
|
||||
out_stream.close()
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@
|
|||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.ebooks.pdb.ereader.writer import Writer
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
|
||||
class EREADEROutput(OutputFormatPlugin):
|
||||
|
||||
|
|
|
|||
|
|
@ -73,9 +73,9 @@ def section_data(self, number):
|
|||
|
||||
def decompress_text(self, number):
|
||||
if self.header_record.version == 2:
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
if self.header_record.version == 10:
|
||||
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
def get_image(self, number):
|
||||
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ def section_data(self, number):
|
|||
return self.sections[number]
|
||||
|
||||
def decompress_text(self, number):
|
||||
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
def get_image(self, number):
|
||||
name = None
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ def decompress_text(self, number):
|
|||
if self.header_record.compression == 1:
|
||||
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
if self.header_record.compression == 2:
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
return ''
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ def section_data(self, number):
|
|||
def decompress_text(self, number):
|
||||
if number == 1:
|
||||
self.uncompressor = zlib.decompressobj()
|
||||
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
txt = ''
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ def process_pml(self, pml_path, html_path):
|
|||
|
||||
self.log.debug('Converting PML to HTML...')
|
||||
html = pml_to_html(pml_stream.read().decode(ienc))
|
||||
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8') + '</body></html>')
|
||||
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8', 'replace') + '</body></html>')
|
||||
|
||||
if pclose:
|
||||
pml_stream.close()
|
||||
|
|
|
|||
|
|
@ -84,9 +84,9 @@ def get_text(self, toc_item, output_dir):
|
|||
|
||||
for size in chunck_sizes:
|
||||
cm_chunck = self.stream.read(size)
|
||||
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
else:
|
||||
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
||||
html.write(output.encode('utf-8'))
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ def convert(self, stream, options, file_ext, log,
|
|||
if options.input_encoding:
|
||||
ienc = options.input_encoding
|
||||
log.debug('Reading text from file...')
|
||||
txt = stream.read().decode(ienc)
|
||||
txt = stream.read().decode(ienc, 'replace')
|
||||
|
||||
log.debug('Running text though markdown conversion...')
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in a new issue