RocketBook (rb) input.

This commit is contained in:
John Schember 2009-05-23 11:55:53 -04:00
parent 9b890e279d
commit 503b697653
5 changed files with 168 additions and 2 deletions

View file

@ -321,6 +321,7 @@ def set_metadata(self, stream, mi, type):
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.fb2.output import FB2Output
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rb.input import RBInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.comic.input import ComicInput
@ -351,7 +352,7 @@ def set_metadata(self, stream, mi, type):
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput]
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput, RBInput]
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
EB600, JETBOOK, BEBOOK, BEBOOK_MINI]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View file

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
class RocketBookError(Exception):
pass

View file

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.ebooks.rb.reader import Reader
from calibre.customize.conversion import InputFormatPlugin
class RBInput(InputFormatPlugin):
name = 'RB Input'
author = 'John Schember'
description = 'Convert RB files to HTML'
file_types = set(['rb'])
def convert(self, stream, options, file_ext, log,
accelerators):
reader = Reader(stream, log, options.input_encoding)
opf = reader.extract_content(os.getcwd())
return opf

View file

@ -0,0 +1,131 @@
import os.path
import zlib
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import struct
from urllib import unquote as urlunquote
from calibre import CurrentDir
from calibre.ebooks.rb import HEADER
from calibre.ebooks.rb import RocketBookError
from calibre.ebooks.metadata.rb import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
class RBToc(list):
class Item(object):
def __init__(self, name='', size=0, offset=0, flags=0):
self.name = name
self.size = size
self.offset = offset
self.flags = flags
class Reader(object):
def __init__(self, stream, log, encoding=None):
self.stream = stream
self.log = log
self.encoding = encoding
self.verify_file()
self.mi = get_metadata(self.stream)
self.toc = self.get_toc()
def read_i32(self):
return struct.unpack('<I', self.stream.read(4))[0]
def verify_file(self):
self.stream.seek(0)
if self.stream.read(14) != HEADER:
raise RocketBookError('Could not read file: %s. Does not contain a valid RocketBook Header.' % self.stream.name)
self.stream.seek(28)
size = self.read_i32()
self.stream.seek(0, os.SEEK_END)
real_size = self.stream.tell()
if size != real_size:
raise RocketBookError('File is corrupt. The file size recorded in the header does not match the actual file size.')
def get_toc(self):
self.stream.seek(24)
toc_offset = self.read_i32()
self.stream.seek(toc_offset)
pages = self.read_i32()
toc = RBToc()
for i in range(pages):
name = urlunquote(self.stream.read(32).strip('\x00'))
size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32()
toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags))
return toc
def get_text(self, toc_item, output_dir):
if toc_item.flags != 8:
return
output = u''
self.stream.seek(toc_item.offset)
count = self.read_i32()
self.read_i32() # Uncompressed size.
chunck_sizes = []
for i in range(count):
chunck_sizes.append(self.read_i32())
for size in chunck_sizes:
cm_chunck = self.stream.read(size)
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
html.write(output.encode('utf-8'))
def get_image(self, toc_item, output_dir):
if toc_item.flags != 0:
return
self.stream.seek(toc_item.offset)
data = self.stream.read(toc_item.size)
with open(os.path.join(output_dir, toc_item.name), 'wb') as img:
img.write(data)
def extract_content(self, output_dir):
html = []
images = []
for item in self.toc:
if item.name.lower().endswith('html'):
html.append(item.name)
self.get_text(item, output_dir)
if item.name.lower().endswith('png'):
images.append(item.name)
self.get_image(item, output_dir)
opf_path = self.create_opf(output_dir, html, images)
return opf_path
def create_opf(self, output_dir, pages, images):
with CurrentDir(output_dir):
opf = OPFCreator(output_dir, self.mi)
manifest = []
for page in pages+images:
manifest.append((page, None))
opf.create_manifest(manifest)
opf.create_spine(pages)
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
return os.path.join(output_dir, 'metadata.opf')

View file

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'