Update to brotlidecpy v1.0.3

This commit is contained in:
Jim Miller 2021-02-03 08:45:53 -06:00
parent a3ff446a3d
commit cb27cb64b6
3 changed files with 84 additions and 243 deletions

View file

@ -1,6 +1,6 @@
from __future__ import absolute_import
__version__ = "1.0.0"
__version__ = "1.0.3"
# noinspection PyUnresolvedReferences
from .decode import brotli_decompress_buffer as decompress

View file

@ -2,95 +2,62 @@
# Distributed under MIT license.
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
BROTLI_READ_SIZE = 4096
BROTLI_IBUF_SIZE = (2 * BROTLI_READ_SIZE + 32)
BROTLI_IBUF_MASK = (2 * BROTLI_READ_SIZE - 1)
kBitMask = [
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
]
class BrotliBitReader:
def __init__(self, input_stream):
self.buf_ = bytearray([0] * BROTLI_IBUF_SIZE)
self.input_ = input_stream # input stream
self.buf_ptr_ = 0 # next input will write here
self.val_ = 0 # pre-fetched bits
self.pos_ = 0 # byte position in stream
self.bit_pos_ = 0 # current bit-reading position in val_
self.bit_end_pos_ = 0 # bit-reading end position from LSB of val_
self.eos_ = 0 # input stream is finished
self.reset()
"""Wrap a bytes buffer to enable reading 0 < n <=24 bits at a time, or transfer of arbitrary number of bytes"""
READ_SIZE = BROTLI_READ_SIZE
IBUF_MASK = BROTLI_IBUF_MASK
kBitMask = [
0x000000, 0x000001, 0x000003, 0x000007, 0x00000f, 0x00001f, 0x00003f, 0x00007f,
0x0000ff, 0x0001ff, 0x0003ff, 0x0007ff, 0x000fff, 0x001fff, 0x003fff, 0x007fff,
0x00ffff, 0x01ffff, 0x03ffff, 0x07ffff, 0x0fffff, 0x1fffff, 0x3fffff, 0x7fffff,
0xffffff
]
def __init__(self, input_buffer):
self.buf_ = bytearray(input_buffer)
self.buf_len_ = len(input_buffer)
self.pos_ = 0 # byte position in stream
self.bit_pos_ = 0 # current bit-reading position in current byte (number bits already read from byte, 0-7)
def reset(self):
self.buf_ptr_ = 0 # next input will write here
self.val_ = 0 # pre-fetched bits
self.pos_ = 0 # byte position in stream
self.bit_pos_ = 0 # current bit-reading position in val_
self.bit_end_pos_ = 0 # bit-reading end position from LSB of val_
self.eos_ = 0 # input stream is finished
"""Reset an initialized BrotliBitReader to start of input buffer"""
self.pos_ = 0
self.bit_pos_ = 0
self.read_more_input()
for i in range(0, 4):
self.val_ |= self.buf_[self.pos_] << (8 * i)
self.pos_ += 1
return self.bit_end_pos_ > 0
def read_more_input(self):
""" Fills up the input ringbuffer by calling the input callback.
Does nothing if there are at least 32 bytes present after current position.
Returns 0 if either:
- the input callback returned an error, or
- there is no more input and the position is past the end of the stream.
After encountering the end of the input stream, 32 additional zero bytes are
copied to the ringbuffer, therefore it is safe to call this function after
every 32 bytes of input is read"""
if self.bit_end_pos_ > 256:
return
elif self.eos_:
if self.bit_pos_ > self.bit_end_pos_:
raise Exception('Unexpected end of input %s %s' % (self.bit_pos_, self.bit_end_pos_))
else:
dst = self.buf_ptr_
bytes_read = self.input_.readinto(memoryview(self.buf_)[dst:dst+BROTLI_READ_SIZE])
if bytes_read < 0:
raise Exception('Unexpected end of input')
if bytes_read < BROTLI_READ_SIZE:
self.eos_ = 1
# Store 32 bytes of zero after the stream end
for p in range(0, 32):
self.buf_[dst + bytes_read + p] = 0
if dst == 0:
# Copy the head of the ringbuffer to the slack region
for p in range(0, 32):
self.buf_[(BROTLI_READ_SIZE << 1) + p] = self.buf_[p]
self.buf_ptr_ = BROTLI_READ_SIZE
else:
self.buf_ptr_ = 0
self.bit_end_pos_ += bytes_read << 3
def fill_bit_window(self):
"""Guarantees that there are at least 24 bits in the buffer"""
while self.bit_pos_ >= 8:
self.val_ >>= 8
self.val_ |= self.buf_[self.pos_ & BROTLI_IBUF_MASK] << 24
self.pos_ += 1
self.bit_pos_ -= 8
self.bit_end_pos_ -= 8
def read_bits(self, n_bits):
if 32 - self.bit_pos_ < n_bits:
self.fill_bit_window()
val = ((self.val_ >> self.bit_pos_) & kBitMask[n_bits])
self.bit_pos_ += n_bits
def read_bits(self, n_bits, bits_to_skip=None):
"""Get n_bits unsigned integer treating input as little-endian byte stream, maybe advancing input buffer pointer
n_bits: is number of bits to read from input buffer. Set to None or 0 to seek ahead ignoring the value
bits_to_skip: number of bits to advance in input_buffer, defaults to n_bits if it is None
pass in 0 to peek at the next n_bits of value without advancing
It is ok to have n_bits and bits_to_skip be different non-zero values if that is what is wanted
Returns: the next n_bits from the buffer as a little-endian integer, 0 if n_bits is None or 0
"""
val = 0
if bits_to_skip is None:
bits_to_skip = n_bits
if n_bits:
bytes_shift = 0
buf_pos = self.pos_
bit_pos_when_done = n_bits + self.bit_pos_
while bytes_shift < bit_pos_when_done:
if buf_pos >= self.buf_len_:
break # if hit end of buffer, this simulates zero padding after end, which is correct
val |= self.buf_[buf_pos] << bytes_shift
bytes_shift += 8
buf_pos += 1
val = (val >> self.bit_pos_) & self.kBitMask[n_bits]
if bits_to_skip:
next_in_bits = self.bit_pos_ + bits_to_skip
self.bit_pos_ = next_in_bits & 7
self.pos_ += next_in_bits >> 3
return val
def copy_bytes(self, dest_buffer, dest_pos, n_bytes):
"""Copy bytes from input buffer. This will first skip to next byte boundary if not already on one"""
if self.bit_pos_ != 0:
self.bit_pos_ = 0
self.pos_ += 1
if n_bytes > 0: # call with n_bytes == 0 to just skip to next byte boundary
new_pos = self.pos_ + n_bytes
memoryview(dest_buffer)[dest_pos:dest_pos+n_bytes] = self.buf_[self.pos_:new_pos]
self.pos_ = new_pos

View file

@ -8,7 +8,6 @@ from .bit_reader import BrotliBitReader
from .dictionary import BrotliDictionary
from .context import Context
from .transform import Transform, kNumTransforms
from io import BytesIO
kDefaultCodeLength = 8
kCodeLengthRepeatCode = 16
@ -105,15 +104,15 @@ def decode_meta_block_length(br):
def read_symbol(table, index, br):
"""Decodes the next Huffman code from bit-stream."""
br.fill_bit_window()
index += (br.val_ >> br.bit_pos_) & HUFFMAN_TABLE_MASK
"""Decodes the next Huffman code from bit-stream. table is array of nodes in a huffman tree, index points to root"""
x_bits = br.read_bits(16, 0) # The C reference version assumes 15 is the max needed and uses 16 in this function
index += (x_bits & HUFFMAN_TABLE_MASK)
nbits = table[index].bits - HUFFMAN_TABLE_BITS
skip = 0
if nbits > 0:
br.bit_pos_ += HUFFMAN_TABLE_BITS
index += table[index].value
index += (br.val_ >> br.bit_pos_) & ((1 << nbits) - 1)
br.bit_pos_ += table[index].bits
skip = HUFFMAN_TABLE_BITS
index += table[index].value + ((x_bits >> HUFFMAN_TABLE_BITS) & br.kBitMask[nbits])
br.read_bits(None, skip + table[index].bits)
return table[index].value
@ -130,10 +129,8 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length
while (symbol < num_symbols) and (space > 0):
p = 0
br.read_more_input()
br.fill_bit_window()
p += (br.val_ >> br.bit_pos_) & 31
br.bit_pos_ += table[p].bits
p += br.read_bits(5, 0)
br.read_bits(None, table[p].bits)
code_len = table[p].value & 0xff
if code_len < kCodeLengthRepeatCode:
repeat = 0
@ -141,7 +138,7 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length
symbol += 1
if code_len != 0:
prev_code_len = code_len
space -= 32768 >> code_len
space -= 0x8000 >> code_len
else:
extra_bits = code_len - 14
new_len = 0
@ -177,8 +174,6 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length
def read_huffman_code(alphabet_size, tables, table, br):
code_lengths = bytearray([0] * alphabet_size)
br.read_more_input()
# simple_code_or_skip is used as follows:
# 1 for simple code
# 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths
@ -228,9 +223,8 @@ def read_huffman_code(alphabet_size, tables, table, br):
break
code_len_idx = kCodeLengthCodeOrder[i]
p = 0
br.fill_bit_window()
p += (br.val_ >> br.bit_pos_) & 15
br.bit_pos_ += huff[p].bits
p += br.read_bits(4, 0)
br.read_bits(None, huff[p].bits)
v = huff[p].value
code_length_code_lengths[code_len_idx] = v
if v != 0:
@ -298,8 +292,6 @@ class HuffmanTreeGroup:
class DecodeContextMap:
def __init__(self, context_map_size, br):
max_run_length_prefix = 0
br.read_more_input()
self.num_huff_trees = decode_var_len_uint8(br) + 1
self.context_map = bytearray([0] * context_map_size)
@ -316,7 +308,6 @@ class DecodeContextMap:
i = 0
while i < context_map_size:
br.read_more_input()
code = read_symbol(table, 0, br)
if code == 0:
self.context_map[i] = 0
@ -351,99 +342,19 @@ def decode_block_type(max_block_type, trees, tree_type, block_types, ring_buffer
indexes[index] += 1
def copy_uncompressed_block_to_output(output, length, pos, ringbuffer, ringbuffer_mask, br):
rb_size = ringbuffer_mask + 1
rb_pos = pos & ringbuffer_mask
br_pos = br.pos_ & BrotliBitReader.IBUF_MASK
# For short lengths copy byte-by-byte
if (length < 8) or (br.bit_pos_ + (length << 3) < br.bit_end_pos_):
for i in range(0, length):
br.read_more_input()
ringbuffer[rb_pos] = br.read_bits(8)
rb_pos += 1
if rb_pos == rb_size:
output.write(ringbuffer[:rb_size])
rb_pos = 0
return
if br.bit_end_pos_ < 32:
raise Exception('[copy_uncompressed_block_to_output] br.bit_end_pos_ < 32')
# Copy remaining 0-4 bytes from br.val_ to ringbuffer.
while br.bit_pos_ < 32:
ringbuffer[rb_pos] = (br.val_ >> br.bit_pos_)
br.bit_pos_ += 8
rb_pos += 1
length -= 1
# Copy remaining bytes from br.buf_ to ringbuffer.
num_bytes = (br.bit_end_pos_ - br.bit_pos_) >> 3
if br_pos + num_bytes > BrotliBitReader.IBUF_MASK:
tail = BrotliBitReader.IBUF_MASK + 1 - br_pos
for x in range(0, tail):
ringbuffer[rb_pos + x] = br.buf_[br_pos + x]
num_bytes -= tail
rb_pos += tail
length -= tail
br_pos = 0
for x in range(0, num_bytes):
ringbuffer[rb_pos + x] = br.buf_[br_pos + x]
rb_pos += num_bytes
length -= num_bytes
# If we wrote past the logical end of the ringbuffer, copy the tail of the
# ringbuffer to its beginning and flush the ringbuffer to the output.
if rb_pos >= rb_size:
output.write(ringbuffer[:rb_size])
rb_pos -= rb_size
for x in range(0, rb_pos):
ringbuffer[x] = ringbuffer[rb_size + x]
# If we have more to copy than the remaining size of the ringbuffer, then we first
# fill the ringbuffer from the input and then flush the ringbuffer to the output
while rb_pos + length >= rb_size:
num_bytes = rb_size - rb_pos
if br.input_.readinto(memoryview(ringbuffer)[rb_pos:rb_pos+num_bytes]) < num_bytes:
raise Exception('[copy_uncompressed_block_to_output] not enough bytes')
output.write(ringbuffer[:rb_size])
length -= num_bytes
rb_pos = 0
# Copy straight from the input onto the ringbuffer. The ringbuffer will be flushed to the output at a later time.
if br.input_.readinto(memoryview(ringbuffer)[rb_pos:rb_pos+length]) < length:
raise Exception('[copy_uncompressed_block_to_output] not enough bytes')
# Restore the state of the bit reader.
br.reset()
def copy_uncompressed_block_to_output(length, pos, output_buffer, br):
"""This only is called when input is on a byte boundary. Copy length raw bytes from input to output[pos]"""
br.copy_bytes(output_buffer, pos, length)
def jump_to_byte_boundary(br):
"""Advances the bit reader position to the next byte boundary and verifies that any skipped bits are set to zero"""
new_bit_pos = (br.bit_pos_ + 7) & ~7
pad_bits = br.read_bits(new_bit_pos - br.bit_pos_)
return pad_bits == 0
def brotli_decompressed_size(input_buffer):
with BytesIO(input_buffer) as input_stream:
br = BrotliBitReader(input_stream)
decode_window_bits(br)
out = decode_meta_block_length(br)
return out.meta_block_length
"""Advances the bit reader position if needed to put it on a byte boundary"""
br.copy_bytes(b'', 0, 0)
def brotli_decompress_buffer(input_buffer):
with BytesIO(input_buffer) as input_stream:
with BytesIO() as output_stream:
brotli_decompress(input_stream, output_stream)
return output_stream.getvalue()
def brotli_decompress(input_stream, output_stream):
br = BrotliBitReader(input_buffer)
output_buffer = bytearray([])
pos = 0
input_end = 0
max_distance = 0
@ -452,24 +363,10 @@ def brotli_decompress(input_stream, output_stream):
dist_rb_idx = 0
hgroup = [HuffmanTreeGroup(0, 0), HuffmanTreeGroup(0, 0), HuffmanTreeGroup(0, 0)]
# We need the slack region for the following reasons:
# - always doing two 8-byte copies for fast backward copying
# - transforms
# - flushing the input ringbuffer when decoding uncompressed blocks
_ring_buffer_write_ahead_slack = 128 + BrotliBitReader.READ_SIZE
br = BrotliBitReader(input_stream)
# Decode window size.
window_bits = decode_window_bits(br)
max_backward_distance = (1 << window_bits) - 16
ringbuffer_size = 1 << window_bits
ringbuffer_mask = ringbuffer_size - 1
ringbuffer = bytearray(
[0] * (ringbuffer_size + _ring_buffer_write_ahead_slack + BrotliDictionary.maxDictionaryWordLength))
ringbuffer_end = ringbuffer_size
block_type_trees = [HuffmanCode(0, 0) for _ in range(0, 3 * HUFFMAN_MAX_TABLE_SIZE)]
block_len_trees = [HuffmanCode(0, 0) for _ in range(0, 3 * HUFFMAN_MAX_TABLE_SIZE)]
@ -484,8 +381,6 @@ def brotli_decompress(input_stream, output_stream):
hgroup[i].codes = None
hgroup[i].huff_trees = None
br.read_more_input()
_out = decode_meta_block_length(br)
meta_block_remaining_len = _out.meta_block_length
input_end = _out.input_end
@ -495,7 +390,6 @@ def brotli_decompress(input_stream, output_stream):
jump_to_byte_boundary(br)
while meta_block_remaining_len > 0:
br.read_more_input()
# Read one byte and ignore it
br.read_bits(8)
meta_block_remaining_len -= 1
@ -504,10 +398,11 @@ def brotli_decompress(input_stream, output_stream):
if meta_block_remaining_len == 0:
continue
if len(output_buffer) < (pos + meta_block_remaining_len):
output_buffer.extend(bytearray([0] * meta_block_remaining_len))
if is_uncompressed:
br.bit_pos_ = (br.bit_pos_ + 7) & ~7
copy_uncompressed_block_to_output(output_stream, meta_block_remaining_len, pos, ringbuffer,
ringbuffer_mask, br)
copy_uncompressed_block_to_output(meta_block_remaining_len, pos, output_buffer, br)
pos += meta_block_remaining_len
continue
@ -519,8 +414,6 @@ def brotli_decompress(input_stream, output_stream):
block_length[i] = read_block_length(block_len_trees, i * HUFFMAN_MAX_TABLE_SIZE, br)
block_type_rb_index[i] = 1
br.read_more_input()
distance_postfix_bits = br.read_bits(2)
num_direct_distance_codes = NUM_DISTANCE_SHORT_CODES + (br.read_bits(4) << distance_postfix_bits)
distance_postfix_mask = (1 << distance_postfix_bits) - 1
@ -528,7 +421,6 @@ def brotli_decompress(input_stream, output_stream):
context_modes = bytearray([0] * num_block_types[0])
for i in range(0, num_block_types[0]):
br.read_more_input()
context_modes[i] = (br.read_bits(2) << 1)
_o1 = DecodeContextMap(num_block_types[0] << kLiteralContextBits, br)
@ -555,8 +447,6 @@ def brotli_decompress(input_stream, output_stream):
while meta_block_remaining_len > 0:
br.read_more_input()
if block_length[1] == 0:
decode_block_type(num_block_types[1], block_type_trees, 1, block_type, block_type_rb,
block_type_rb_index, br)
@ -575,11 +465,9 @@ def brotli_decompress(input_stream, output_stream):
kInsertLengthPrefixCode[insert_code].nbits)
copy_length = kCopyLengthPrefixCode[copy_code].offset + br.read_bits(
kCopyLengthPrefixCode[copy_code].nbits)
prev_byte1 = ringbuffer[pos - 1 & ringbuffer_mask]
prev_byte2 = ringbuffer[pos - 2 & ringbuffer_mask]
prev_byte1 = output_buffer[pos - 1]
prev_byte2 = output_buffer[pos - 2]
for j in range(0, insert_length):
br.read_more_input()
if block_length[0] == 0:
decode_block_type(num_block_types[0], block_type_trees, 0, block_type, block_type_rb,
block_type_rb_index, br)
@ -595,16 +483,13 @@ def brotli_decompress(input_stream, output_stream):
block_length[0] -= 1
prev_byte2 = prev_byte1
prev_byte1 = read_symbol(hgroup[0].codes, hgroup[0].huff_trees[literal_huff_tree_index], br)
ringbuffer[pos & ringbuffer_mask] = prev_byte1
if (pos & ringbuffer_mask) == ringbuffer_mask:
output_stream.write(ringbuffer[:ringbuffer_size])
output_buffer[pos] = prev_byte1
pos += 1
meta_block_remaining_len -= insert_length
if meta_block_remaining_len <= 0:
break
if distance_code < 0:
br.read_more_input()
if block_length[2] == 0:
decode_block_type(num_block_types[2], block_type_trees, 2, block_type, block_type_rb,
block_type_rb_index, br)
@ -634,7 +519,7 @@ def brotli_decompress(input_stream, output_stream):
else:
max_distance = max_backward_distance
copy_dst = pos & ringbuffer_mask
copy_dst = pos
if distance > max_distance:
if BrotliDictionary.minDictionaryWordLength <= copy_length <= BrotliDictionary.maxDictionaryWordLength:
@ -646,16 +531,11 @@ def brotli_decompress(input_stream, output_stream):
transform_idx = word_id >> shift
offset += word_idx * copy_length
if transform_idx < kNumTransforms:
length = Transform.transformDictionaryWord(ringbuffer, copy_dst, offset, copy_length,
length = Transform.transformDictionaryWord(output_buffer, copy_dst, offset, copy_length,
transform_idx)
copy_dst += length
pos += length
meta_block_remaining_len -= length
if copy_dst >= ringbuffer_end:
output_stream.write(ringbuffer[:ringbuffer_size])
for _x in range(0, copy_dst - ringbuffer_end):
ringbuffer[_x] = ringbuffer[ringbuffer_end + _x]
else:
raise Exception("Invalid backward reference. pos: %s distance: %s len: %s bytes left: %s" % (
pos, distance, copy_length, meta_block_remaining_len))
@ -671,14 +551,8 @@ def brotli_decompress(input_stream, output_stream):
raise Exception("Invalid backward reference. pos: %s distance: %s len: %s bytes left: %s" % (
pos, distance, copy_length, meta_block_remaining_len))
for j in range(0, copy_length):
ringbuffer[pos & ringbuffer_mask] = ringbuffer[(pos - distance) & ringbuffer_mask]
if (pos & ringbuffer_mask) == ringbuffer_mask:
output_stream.write(ringbuffer[:ringbuffer_size])
for j in range(0, copy_length): # don't try to optimize with a slice. source and dest may overlap
output_buffer[pos] = output_buffer[pos - distance]
pos += 1
meta_block_remaining_len -= 1
# Protect pos from overflow, wrap it around at every GB of input data
pos &= 0x3fffffff
output_stream.write(ringbuffer[:pos & ringbuffer_mask])
return output_buffer