mirror of
git://github.com/kovidgoyal/calibre.git
synced 2026-05-05 03:03:56 +02:00
pdf input to txt output giving correct output
This commit is contained in:
parent
a9a74acbde
commit
754923ce07
2 changed files with 3 additions and 3 deletions
|
|
@ -25,7 +25,7 @@ def convert(self, stream, options, file_ext, log,
|
|||
html = pdftohtml(stream.name)
|
||||
|
||||
with open('index.html', 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
index.write(html)
|
||||
|
||||
#mi = metadata_from_formats([stream.name])
|
||||
mi = MetaInformation(_('Unknown'), _('Unknown'))
|
||||
|
|
|
|||
|
|
@ -68,8 +68,8 @@ def pdftohtml(pdf_path):
|
|||
raise DRMError()
|
||||
|
||||
with open(index, 'rb') as i:
|
||||
raw = i.read().decode('latin-1')
|
||||
raw = i.read()
|
||||
if not '<br' in raw[:4000]:
|
||||
raise ConversionError(os.path.basename(pdf_path) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
|
||||
|
||||
return u'<!-- created by calibre\'s pdftohtml -->\n' + raw
|
||||
return '<!-- created by calibre\'s pdftohtml -->\n' + raw
|
||||
|
|
|
|||
Loading…
Reference in a new issue