From 32db6e20365fd7e7f3ced2b5b948ae94fe30c4e2 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Mon, 1 Feb 2021 10:39:12 -0600 Subject: [PATCH] Update to brotlidecpy v1.0.1 --- included_dependencies/brotlidecpy/__init__.py | 2 +- .../brotlidecpy/bit_reader.py | 120 ++++-------- included_dependencies/brotlidecpy/decode.py | 182 +++--------------- 3 files changed, 72 insertions(+), 232 deletions(-) diff --git a/included_dependencies/brotlidecpy/__init__.py b/included_dependencies/brotlidecpy/__init__.py index 2585ffc1..8e10a688 100644 --- a/included_dependencies/brotlidecpy/__init__.py +++ b/included_dependencies/brotlidecpy/__init__.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -__version__ = "1.0.0" +__version__ = "1.0.1" # noinspection PyUnresolvedReferences from .decode import brotli_decompress_buffer as decompress diff --git a/included_dependencies/brotlidecpy/bit_reader.py b/included_dependencies/brotlidecpy/bit_reader.py index b68699ae..8126de9f 100644 --- a/included_dependencies/brotlidecpy/bit_reader.py +++ b/included_dependencies/brotlidecpy/bit_reader.py @@ -2,95 +2,57 @@ # Distributed under MIT license. # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -BROTLI_READ_SIZE = 4096 -BROTLI_IBUF_SIZE = (2 * BROTLI_READ_SIZE + 32) -BROTLI_IBUF_MASK = (2 * BROTLI_READ_SIZE - 1) kBitMask = [ - 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, - 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215 + 0x000000, 0x000001, 0x000003, 0x000007, 0x00000f, 0x00001f, 0x00003f, 0x00007f, + 0x0000ff, 0x0001ff, 0x0003ff, 0x0007ff, 0x000fff, 0x001fff, 0x003fff, 0x007fff, + 0x00ffff, 0x01ffff, 0x03ffff, 0x07ffff, 0x0fffff, 0x1fffff, 0x3fffff, 0x7fffff, + 0xffffff ] class BrotliBitReader: - def __init__(self, input_stream): - self.buf_ = bytearray([0] * BROTLI_IBUF_SIZE) - self.input_ = input_stream # input stream - self.buf_ptr_ = 0 # next input will write here - self.val_ = 0 # pre-fetched bits + """Wrap a bytes buffer to enable reading 0 < n <=24 bits at a time, or transfer of arbitrary number of bytes""" + def __init__(self, input_buffer): + self.buf_ = bytearray(input_buffer) + self.buf_len_ = len(input_buffer) self.pos_ = 0 # byte position in stream - self.bit_pos_ = 0 # current bit-reading position in val_ - self.bit_end_pos_ = 0 # bit-reading end position from LSB of val_ - self.eos_ = 0 # input stream is finished - self.reset() - - READ_SIZE = BROTLI_READ_SIZE - IBUF_MASK = BROTLI_IBUF_MASK + self.bit_pos_ = 0 # current bit-reading position in current byte (number bits already read from byte, 0-7) def reset(self): - self.buf_ptr_ = 0 # next input will write here - self.val_ = 0 # pre-fetched bits - self.pos_ = 0 # byte position in stream - self.bit_pos_ = 0 # current bit-reading position in val_ - self.bit_end_pos_ = 0 # bit-reading end position from LSB of val_ - self.eos_ = 0 # input stream is finished + """Reset an initialized BrotliBitReader to start of input buffer""" + self.pos_ = 0 + self.bit_pos_ = 0 - self.read_more_input() - for i in range(0, 4): - self.val_ |= self.buf_[self.pos_] << (8 * i) - self.pos_ += 1 - return self.bit_end_pos_ > 0 + def peek_bits(self, n_bits): + """Get value a n_bits unsigned integer treating input as little-endian byte stream, without advancing pointer""" + val = 0 + bytes_shift = 0 + buf_pos = self.pos_ + bit_pos_when_done = n_bits + self.bit_pos_ + while bytes_shift < bit_pos_when_done: + if buf_pos >= self.buf_len_: + break # if hit end of buffer, this simulates zero padding after end, which is correct + val |= self.buf_[buf_pos] << bytes_shift + bytes_shift += 8 + buf_pos += 1 + return (val >> self.bit_pos_) & kBitMask[n_bits] - def read_more_input(self): - """ Fills up the input ringbuffer by calling the input callback. - - Does nothing if there are at least 32 bytes present after current position. - - Returns 0 if either: - - the input callback returned an error, or - - there is no more input and the position is past the end of the stream. - - After encountering the end of the input stream, 32 additional zero bytes are - copied to the ringbuffer, therefore it is safe to call this function after - every 32 bytes of input is read""" - if self.bit_end_pos_ > 256: - return - elif self.eos_: - if self.bit_pos_ > self.bit_end_pos_: - raise Exception('Unexpected end of input %s %s' % (self.bit_pos_, self.bit_end_pos_)) - else: - dst = self.buf_ptr_ - bytes_read = self.input_.readinto(memoryview(self.buf_)[dst:dst+BROTLI_READ_SIZE]) - if bytes_read < 0: - raise Exception('Unexpected end of input') - - if bytes_read < BROTLI_READ_SIZE: - self.eos_ = 1 - # Store 32 bytes of zero after the stream end - for p in range(0, 32): - self.buf_[dst + bytes_read + p] = 0 - - if dst == 0: - # Copy the head of the ringbuffer to the slack region - for p in range(0, 32): - self.buf_[(BROTLI_READ_SIZE << 1) + p] = self.buf_[p] - self.buf_ptr_ = BROTLI_READ_SIZE - else: - self.buf_ptr_ = 0 - - self.bit_end_pos_ += bytes_read << 3 - - def fill_bit_window(self): - """Guarantees that there are at least 24 bits in the buffer""" - while self.bit_pos_ >= 8: - self.val_ >>= 8 - self.val_ |= self.buf_[self.pos_ & BROTLI_IBUF_MASK] << 24 - self.pos_ += 1 - self.bit_pos_ -= 8 - self.bit_end_pos_ -= 8 + def skip_bits(self, n_bits): + next_in_bits = self.bit_pos_ + n_bits + self.bit_pos_ = next_in_bits & 7 + self.pos_ += next_in_bits >> 3 def read_bits(self, n_bits): - if 32 - self.bit_pos_ < n_bits: - self.fill_bit_window() - val = ((self.val_ >> self.bit_pos_) & kBitMask[n_bits]) - self.bit_pos_ += n_bits + val = self.peek_bits(n_bits) + self.skip_bits(n_bits) return val + + def copy_bytes(self, dest_buffer, dest_pos, n_bytes): + """Copy bytes from input buffer. This will first skip to next byte boundary if not already on one""" + if self.bit_pos_ != 0: + self.bit_pos_ = 0 + self.pos_ += 1 + if n_bytes > 0: # call with n_bytes == 0 to just skip to next byte boundary + new_pos = self.pos_ + n_bytes + memoryview(dest_buffer)[dest_pos:dest_pos+n_bytes] = self.buf_[self.pos_:new_pos] + self.pos_ = new_pos diff --git a/included_dependencies/brotlidecpy/decode.py b/included_dependencies/brotlidecpy/decode.py index 860e7956..133acf27 100644 --- a/included_dependencies/brotlidecpy/decode.py +++ b/included_dependencies/brotlidecpy/decode.py @@ -8,7 +8,6 @@ from .bit_reader import BrotliBitReader from .dictionary import BrotliDictionary from .context import Context from .transform import Transform, kNumTransforms -from io import BytesIO kDefaultCodeLength = 8 kCodeLengthRepeatCode = 16 @@ -106,14 +105,13 @@ def decode_meta_block_length(br): def read_symbol(table, index, br): """Decodes the next Huffman code from bit-stream.""" - br.fill_bit_window() - index += (br.val_ >> br.bit_pos_) & HUFFMAN_TABLE_MASK + index += br.peek_bits(HUFFMAN_TABLE_BITS) nbits = table[index].bits - HUFFMAN_TABLE_BITS if nbits > 0: - br.bit_pos_ += HUFFMAN_TABLE_BITS + br.skip_bits(HUFFMAN_TABLE_BITS) index += table[index].value - index += (br.val_ >> br.bit_pos_) & ((1 << nbits) - 1) - br.bit_pos_ += table[index].bits + index += br.peek_bits(nbits) + br.skip_bits(table[index].bits) return table[index].value @@ -130,10 +128,8 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length while (symbol < num_symbols) and (space > 0): p = 0 - br.read_more_input() - br.fill_bit_window() - p += (br.val_ >> br.bit_pos_) & 31 - br.bit_pos_ += table[p].bits + p += br.peek_bits(5) + br.skip_bits(table[p].bits) code_len = table[p].value & 0xff if code_len < kCodeLengthRepeatCode: repeat = 0 @@ -141,7 +137,7 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length symbol += 1 if code_len != 0: prev_code_len = code_len - space -= 32768 >> code_len + space -= 0x8000 >> code_len else: extra_bits = code_len - 14 new_len = 0 @@ -177,8 +173,6 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length def read_huffman_code(alphabet_size, tables, table, br): code_lengths = bytearray([0] * alphabet_size) - br.read_more_input() - # simple_code_or_skip is used as follows: # 1 for simple code # 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths @@ -228,9 +222,8 @@ def read_huffman_code(alphabet_size, tables, table, br): break code_len_idx = kCodeLengthCodeOrder[i] p = 0 - br.fill_bit_window() - p += (br.val_ >> br.bit_pos_) & 15 - br.bit_pos_ += huff[p].bits + p += br.peek_bits(4) + br.skip_bits(huff[p].bits) v = huff[p].value code_length_code_lengths[code_len_idx] = v if v != 0: @@ -298,8 +291,6 @@ class HuffmanTreeGroup: class DecodeContextMap: def __init__(self, context_map_size, br): max_run_length_prefix = 0 - br.read_more_input() - self.num_huff_trees = decode_var_len_uint8(br) + 1 self.context_map = bytearray([0] * context_map_size) @@ -316,7 +307,6 @@ class DecodeContextMap: i = 0 while i < context_map_size: - br.read_more_input() code = read_symbol(table, 0, br) if code == 0: self.context_map[i] = 0 @@ -351,99 +341,28 @@ def decode_block_type(max_block_type, trees, tree_type, block_types, ring_buffer indexes[index] += 1 -def copy_uncompressed_block_to_output(output, length, pos, ringbuffer, ringbuffer_mask, br): - rb_size = ringbuffer_mask + 1 - rb_pos = pos & ringbuffer_mask - br_pos = br.pos_ & BrotliBitReader.IBUF_MASK - - # For short lengths copy byte-by-byte - if (length < 8) or (br.bit_pos_ + (length << 3) < br.bit_end_pos_): - for i in range(0, length): - br.read_more_input() - ringbuffer[rb_pos] = br.read_bits(8) - rb_pos += 1 - if rb_pos == rb_size: - output.write(ringbuffer[:rb_size]) - rb_pos = 0 - return - - if br.bit_end_pos_ < 32: - raise Exception('[copy_uncompressed_block_to_output] br.bit_end_pos_ < 32') - - # Copy remaining 0-4 bytes from br.val_ to ringbuffer. - while br.bit_pos_ < 32: - ringbuffer[rb_pos] = (br.val_ >> br.bit_pos_) - br.bit_pos_ += 8 - rb_pos += 1 - length -= 1 - - # Copy remaining bytes from br.buf_ to ringbuffer. - num_bytes = (br.bit_end_pos_ - br.bit_pos_) >> 3 - if br_pos + num_bytes > BrotliBitReader.IBUF_MASK: - tail = BrotliBitReader.IBUF_MASK + 1 - br_pos - for x in range(0, tail): - ringbuffer[rb_pos + x] = br.buf_[br_pos + x] - - num_bytes -= tail - rb_pos += tail - length -= tail - br_pos = 0 - - for x in range(0, num_bytes): - ringbuffer[rb_pos + x] = br.buf_[br_pos + x] - - rb_pos += num_bytes - length -= num_bytes - - # If we wrote past the logical end of the ringbuffer, copy the tail of the - # ringbuffer to its beginning and flush the ringbuffer to the output. - if rb_pos >= rb_size: - output.write(ringbuffer[:rb_size]) - rb_pos -= rb_size - for x in range(0, rb_pos): - ringbuffer[x] = ringbuffer[rb_size + x] - - # If we have more to copy than the remaining size of the ringbuffer, then we first - # fill the ringbuffer from the input and then flush the ringbuffer to the output - while rb_pos + length >= rb_size: - num_bytes = rb_size - rb_pos - if br.input_.readinto(memoryview(ringbuffer)[rb_pos:rb_pos+num_bytes]) < num_bytes: - raise Exception('[copy_uncompressed_block_to_output] not enough bytes') - output.write(ringbuffer[:rb_size]) - length -= num_bytes - rb_pos = 0 - - # Copy straight from the input onto the ringbuffer. The ringbuffer will be flushed to the output at a later time. - if br.input_.readinto(memoryview(ringbuffer)[rb_pos:rb_pos+length]) < length: - raise Exception('[copy_uncompressed_block_to_output] not enough bytes') - - # Restore the state of the bit reader. - br.reset() +def copy_uncompressed_block_to_output(length, pos, output_buffer, br): + """This only is called when input is on a byte boundary. Copy length raw bytes from input to output[pos]""" + br.copy_bytes(output_buffer, pos, length) def jump_to_byte_boundary(br): - """Advances the bit reader position to the next byte boundary and verifies that any skipped bits are set to zero""" - new_bit_pos = (br.bit_pos_ + 7) & ~7 - pad_bits = br.read_bits(new_bit_pos - br.bit_pos_) - return pad_bits == 0 - - -def brotli_decompressed_size(input_buffer): - with BytesIO(input_buffer) as input_stream: - br = BrotliBitReader(input_stream) - decode_window_bits(br) - out = decode_meta_block_length(br) - return out.meta_block_length + """Advances the bit reader position if needed to put it on a byte boundary""" + br.copy_bytes(b'', 0, 0) def brotli_decompress_buffer(input_buffer): - with BytesIO(input_buffer) as input_stream: - with BytesIO() as output_stream: - brotli_decompress(input_stream, output_stream) - return output_stream.getvalue() + br = BrotliBitReader(input_buffer) + decode_window_bits(br) + out = decode_meta_block_length(br) + decompressed_size = out.meta_block_length + output_buffer = bytearray([0] * decompressed_size) + br.reset() + brotli_decompress_br_to_buffer(br, output_buffer) + return output_buffer -def brotli_decompress(input_stream, output_stream): +def brotli_decompress_br_to_buffer(br, output_buffer): pos = 0 input_end = 0 max_distance = 0 @@ -452,24 +371,10 @@ def brotli_decompress(input_stream, output_stream): dist_rb_idx = 0 hgroup = [HuffmanTreeGroup(0, 0), HuffmanTreeGroup(0, 0), HuffmanTreeGroup(0, 0)] - # We need the slack region for the following reasons: - # - always doing two 8-byte copies for fast backward copying - # - transforms - # - flushing the input ringbuffer when decoding uncompressed blocks - _ring_buffer_write_ahead_slack = 128 + BrotliBitReader.READ_SIZE - - br = BrotliBitReader(input_stream) - # Decode window size. window_bits = decode_window_bits(br) max_backward_distance = (1 << window_bits) - 16 - ringbuffer_size = 1 << window_bits - ringbuffer_mask = ringbuffer_size - 1 - ringbuffer = bytearray( - [0] * (ringbuffer_size + _ring_buffer_write_ahead_slack + BrotliDictionary.maxDictionaryWordLength)) - ringbuffer_end = ringbuffer_size - block_type_trees = [HuffmanCode(0, 0) for _ in range(0, 3 * HUFFMAN_MAX_TABLE_SIZE)] block_len_trees = [HuffmanCode(0, 0) for _ in range(0, 3 * HUFFMAN_MAX_TABLE_SIZE)] @@ -484,8 +389,6 @@ def brotli_decompress(input_stream, output_stream): hgroup[i].codes = None hgroup[i].huff_trees = None - br.read_more_input() - _out = decode_meta_block_length(br) meta_block_remaining_len = _out.meta_block_length input_end = _out.input_end @@ -495,7 +398,6 @@ def brotli_decompress(input_stream, output_stream): jump_to_byte_boundary(br) while meta_block_remaining_len > 0: - br.read_more_input() # Read one byte and ignore it br.read_bits(8) meta_block_remaining_len -= 1 @@ -505,9 +407,7 @@ def brotli_decompress(input_stream, output_stream): continue if is_uncompressed: - br.bit_pos_ = (br.bit_pos_ + 7) & ~7 - copy_uncompressed_block_to_output(output_stream, meta_block_remaining_len, pos, ringbuffer, - ringbuffer_mask, br) + copy_uncompressed_block_to_output(meta_block_remaining_len, pos, output_buffer, br) pos += meta_block_remaining_len continue @@ -519,8 +419,6 @@ def brotli_decompress(input_stream, output_stream): block_length[i] = read_block_length(block_len_trees, i * HUFFMAN_MAX_TABLE_SIZE, br) block_type_rb_index[i] = 1 - br.read_more_input() - distance_postfix_bits = br.read_bits(2) num_direct_distance_codes = NUM_DISTANCE_SHORT_CODES + (br.read_bits(4) << distance_postfix_bits) distance_postfix_mask = (1 << distance_postfix_bits) - 1 @@ -528,7 +426,6 @@ def brotli_decompress(input_stream, output_stream): context_modes = bytearray([0] * num_block_types[0]) for i in range(0, num_block_types[0]): - br.read_more_input() context_modes[i] = (br.read_bits(2) << 1) _o1 = DecodeContextMap(num_block_types[0] << kLiteralContextBits, br) @@ -555,8 +452,6 @@ def brotli_decompress(input_stream, output_stream): while meta_block_remaining_len > 0: - br.read_more_input() - if block_length[1] == 0: decode_block_type(num_block_types[1], block_type_trees, 1, block_type, block_type_rb, block_type_rb_index, br) @@ -575,11 +470,9 @@ def brotli_decompress(input_stream, output_stream): kInsertLengthPrefixCode[insert_code].nbits) copy_length = kCopyLengthPrefixCode[copy_code].offset + br.read_bits( kCopyLengthPrefixCode[copy_code].nbits) - prev_byte1 = ringbuffer[pos - 1 & ringbuffer_mask] - prev_byte2 = ringbuffer[pos - 2 & ringbuffer_mask] + prev_byte1 = output_buffer[pos - 1] + prev_byte2 = output_buffer[pos - 2] for j in range(0, insert_length): - br.read_more_input() - if block_length[0] == 0: decode_block_type(num_block_types[0], block_type_trees, 0, block_type, block_type_rb, block_type_rb_index, br) @@ -595,16 +488,13 @@ def brotli_decompress(input_stream, output_stream): block_length[0] -= 1 prev_byte2 = prev_byte1 prev_byte1 = read_symbol(hgroup[0].codes, hgroup[0].huff_trees[literal_huff_tree_index], br) - ringbuffer[pos & ringbuffer_mask] = prev_byte1 - if (pos & ringbuffer_mask) == ringbuffer_mask: - output_stream.write(ringbuffer[:ringbuffer_size]) + output_buffer[pos] = prev_byte1 pos += 1 meta_block_remaining_len -= insert_length if meta_block_remaining_len <= 0: break if distance_code < 0: - br.read_more_input() if block_length[2] == 0: decode_block_type(num_block_types[2], block_type_trees, 2, block_type, block_type_rb, block_type_rb_index, br) @@ -634,7 +524,7 @@ def brotli_decompress(input_stream, output_stream): else: max_distance = max_backward_distance - copy_dst = pos & ringbuffer_mask + copy_dst = pos if distance > max_distance: if BrotliDictionary.minDictionaryWordLength <= copy_length <= BrotliDictionary.maxDictionaryWordLength: @@ -646,16 +536,11 @@ def brotli_decompress(input_stream, output_stream): transform_idx = word_id >> shift offset += word_idx * copy_length if transform_idx < kNumTransforms: - length = Transform.transformDictionaryWord(ringbuffer, copy_dst, offset, copy_length, + length = Transform.transformDictionaryWord(output_buffer, copy_dst, offset, copy_length, transform_idx) copy_dst += length pos += length meta_block_remaining_len -= length - if copy_dst >= ringbuffer_end: - output_stream.write(ringbuffer[:ringbuffer_size]) - - for _x in range(0, copy_dst - ringbuffer_end): - ringbuffer[_x] = ringbuffer[ringbuffer_end + _x] else: raise Exception("Invalid backward reference. pos: %s distance: %s len: %s bytes left: %s" % ( pos, distance, copy_length, meta_block_remaining_len)) @@ -672,13 +557,6 @@ def brotli_decompress(input_stream, output_stream): pos, distance, copy_length, meta_block_remaining_len)) for j in range(0, copy_length): - ringbuffer[pos & ringbuffer_mask] = ringbuffer[(pos - distance) & ringbuffer_mask] - if (pos & ringbuffer_mask) == ringbuffer_mask: - output_stream.write(ringbuffer[:ringbuffer_size]) + output_buffer[pos] = output_buffer[pos - distance] pos += 1 meta_block_remaining_len -= 1 - - # Protect pos from overflow, wrap it around at every GB of input data - pos &= 0x3fffffff - - output_stream.write(ringbuffer[:pos & ringbuffer_mask])