From 0b9b066c18bd1a209ef018036afdfdb8e1a7eae8 Mon Sep 17 00:00:00 2001 From: Jim Miller Date: Tue, 2 Feb 2021 16:38:13 -0600 Subject: [PATCH] Revert brotlidecpy back v1.0.0. --- included_dependencies/brotlidecpy/__init__.py | 2 +- .../brotlidecpy/bit_reader.py | 137 ++++++++----- included_dependencies/brotlidecpy/decode.py | 187 ++++++++++++++---- 3 files changed, 240 insertions(+), 86 deletions(-) diff --git a/included_dependencies/brotlidecpy/__init__.py b/included_dependencies/brotlidecpy/__init__.py index 595239e8..2585ffc1 100644 --- a/included_dependencies/brotlidecpy/__init__.py +++ b/included_dependencies/brotlidecpy/__init__.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -__version__ = "1.0.2" +__version__ = "1.0.0" # noinspection PyUnresolvedReferences from .decode import brotli_decompress_buffer as decompress diff --git a/included_dependencies/brotlidecpy/bit_reader.py b/included_dependencies/brotlidecpy/bit_reader.py index 6dd641a8..b68699ae 100644 --- a/included_dependencies/brotlidecpy/bit_reader.py +++ b/included_dependencies/brotlidecpy/bit_reader.py @@ -2,62 +2,95 @@ # Distributed under MIT license. # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +BROTLI_READ_SIZE = 4096 +BROTLI_IBUF_SIZE = (2 * BROTLI_READ_SIZE + 32) +BROTLI_IBUF_MASK = (2 * BROTLI_READ_SIZE - 1) +kBitMask = [ + 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, + 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215 +] + class BrotliBitReader: - """Wrap a bytes buffer to enable reading 0 < n <=24 bits at a time, or transfer of arbitrary number of bytes""" - - kBitMask = [ - 0x000000, 0x000001, 0x000003, 0x000007, 0x00000f, 0x00001f, 0x00003f, 0x00007f, - 0x0000ff, 0x0001ff, 0x0003ff, 0x0007ff, 0x000fff, 0x001fff, 0x003fff, 0x007fff, - 0x00ffff, 0x01ffff, 0x03ffff, 0x07ffff, 0x0fffff, 0x1fffff, 0x3fffff, 0x7fffff, - 0xffffff - ] - - def __init__(self, input_buffer): - self.buf_ = bytearray(input_buffer) - self.buf_len_ = len(input_buffer) + def __init__(self, input_stream): + self.buf_ = bytearray([0] * BROTLI_IBUF_SIZE) + self.input_ = input_stream # input stream + self.buf_ptr_ = 0 # next input will write here + self.val_ = 0 # pre-fetched bits self.pos_ = 0 # byte position in stream - self.bit_pos_ = 0 # current bit-reading position in current byte (number bits already read from byte, 0-7) + self.bit_pos_ = 0 # current bit-reading position in val_ + self.bit_end_pos_ = 0 # bit-reading end position from LSB of val_ + self.eos_ = 0 # input stream is finished + self.reset() + + READ_SIZE = BROTLI_READ_SIZE + IBUF_MASK = BROTLI_IBUF_MASK def reset(self): - """Reset an initialized BrotliBitReader to start of input buffer""" - self.pos_ = 0 - self.bit_pos_ = 0 + self.buf_ptr_ = 0 # next input will write here + self.val_ = 0 # pre-fetched bits + self.pos_ = 0 # byte position in stream + self.bit_pos_ = 0 # current bit-reading position in val_ + self.bit_end_pos_ = 0 # bit-reading end position from LSB of val_ + self.eos_ = 0 # input stream is finished - def read_bits(self, n_bits, bits_to_skip=None): - """Get n_bits unsigned integer treating input as little-endian byte stream, maybe advancing input buffer pointer - n_bits: is number of bits to read from input buffer. Set to None or 0 to seek ahead ignoring the value - bits_to_skip: number of bits to advance in input_buffer, defaults to n_bits if it is None - pass in 0 to peek at the next n_bits of value without advancing - It is ok to have n_bits and bits_to_skip be different non-zero values if that is what is wanted - Returns: the next n_bits from the buffer as a little-endian integer, 0 if n_bits is None or 0 - """ - val = 0 - if bits_to_skip is None: - bits_to_skip = n_bits - if n_bits: - bytes_shift = 0 - buf_pos = self.pos_ - bit_pos_when_done = n_bits + self.bit_pos_ - while bytes_shift < bit_pos_when_done: - if buf_pos >= self.buf_len_: - break # if hit end of buffer, this simulates zero padding after end, which is correct - val |= self.buf_[buf_pos] << bytes_shift - bytes_shift += 8 - buf_pos += 1 - val = (val >> self.bit_pos_) & self.kBitMask[n_bits] - if bits_to_skip: - next_in_bits = self.bit_pos_ + bits_to_skip - self.bit_pos_ = next_in_bits & 7 - self.pos_ += next_in_bits >> 3 - return val - - def copy_bytes(self, dest_buffer, dest_pos, n_bytes): - """Copy bytes from input buffer. This will first skip to next byte boundary if not already on one""" - if self.bit_pos_ != 0: - self.bit_pos_ = 0 + self.read_more_input() + for i in range(0, 4): + self.val_ |= self.buf_[self.pos_] << (8 * i) self.pos_ += 1 - if n_bytes > 0: # call with n_bytes == 0 to just skip to next byte boundary - new_pos = self.pos_ + n_bytes - memoryview(dest_buffer)[dest_pos:dest_pos+n_bytes] = self.buf_[self.pos_:new_pos] - self.pos_ = new_pos + return self.bit_end_pos_ > 0 + + def read_more_input(self): + """ Fills up the input ringbuffer by calling the input callback. + + Does nothing if there are at least 32 bytes present after current position. + + Returns 0 if either: + - the input callback returned an error, or + - there is no more input and the position is past the end of the stream. + + After encountering the end of the input stream, 32 additional zero bytes are + copied to the ringbuffer, therefore it is safe to call this function after + every 32 bytes of input is read""" + if self.bit_end_pos_ > 256: + return + elif self.eos_: + if self.bit_pos_ > self.bit_end_pos_: + raise Exception('Unexpected end of input %s %s' % (self.bit_pos_, self.bit_end_pos_)) + else: + dst = self.buf_ptr_ + bytes_read = self.input_.readinto(memoryview(self.buf_)[dst:dst+BROTLI_READ_SIZE]) + if bytes_read < 0: + raise Exception('Unexpected end of input') + + if bytes_read < BROTLI_READ_SIZE: + self.eos_ = 1 + # Store 32 bytes of zero after the stream end + for p in range(0, 32): + self.buf_[dst + bytes_read + p] = 0 + + if dst == 0: + # Copy the head of the ringbuffer to the slack region + for p in range(0, 32): + self.buf_[(BROTLI_READ_SIZE << 1) + p] = self.buf_[p] + self.buf_ptr_ = BROTLI_READ_SIZE + else: + self.buf_ptr_ = 0 + + self.bit_end_pos_ += bytes_read << 3 + + def fill_bit_window(self): + """Guarantees that there are at least 24 bits in the buffer""" + while self.bit_pos_ >= 8: + self.val_ >>= 8 + self.val_ |= self.buf_[self.pos_ & BROTLI_IBUF_MASK] << 24 + self.pos_ += 1 + self.bit_pos_ -= 8 + self.bit_end_pos_ -= 8 + + def read_bits(self, n_bits): + if 32 - self.bit_pos_ < n_bits: + self.fill_bit_window() + val = ((self.val_ >> self.bit_pos_) & kBitMask[n_bits]) + self.bit_pos_ += n_bits + return val diff --git a/included_dependencies/brotlidecpy/decode.py b/included_dependencies/brotlidecpy/decode.py index 5ea2dd8a..860e7956 100644 --- a/included_dependencies/brotlidecpy/decode.py +++ b/included_dependencies/brotlidecpy/decode.py @@ -8,6 +8,7 @@ from .bit_reader import BrotliBitReader from .dictionary import BrotliDictionary from .context import Context from .transform import Transform, kNumTransforms +from io import BytesIO kDefaultCodeLength = 8 kCodeLengthRepeatCode = 16 @@ -104,15 +105,15 @@ def decode_meta_block_length(br): def read_symbol(table, index, br): - """Decodes the next Huffman code from bit-stream. table is array of nodes in a huffman tree, index points to root""" - x_bits = br.read_bits(16, 0) # The C reference version assumes 15 is the max needed and uses 16 in this function - index += (x_bits & HUFFMAN_TABLE_MASK) + """Decodes the next Huffman code from bit-stream.""" + br.fill_bit_window() + index += (br.val_ >> br.bit_pos_) & HUFFMAN_TABLE_MASK nbits = table[index].bits - HUFFMAN_TABLE_BITS - skip = 0 if nbits > 0: - skip = HUFFMAN_TABLE_BITS - index += table[index].value + ((x_bits >> HUFFMAN_TABLE_BITS) & br.kBitMask[nbits]) - br.read_bits(None, skip + table[index].bits) + br.bit_pos_ += HUFFMAN_TABLE_BITS + index += table[index].value + index += (br.val_ >> br.bit_pos_) & ((1 << nbits) - 1) + br.bit_pos_ += table[index].bits return table[index].value @@ -129,8 +130,10 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length while (symbol < num_symbols) and (space > 0): p = 0 - p += br.read_bits(5, 0) - br.read_bits(None, table[p].bits) + br.read_more_input() + br.fill_bit_window() + p += (br.val_ >> br.bit_pos_) & 31 + br.bit_pos_ += table[p].bits code_len = table[p].value & 0xff if code_len < kCodeLengthRepeatCode: repeat = 0 @@ -138,7 +141,7 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length symbol += 1 if code_len != 0: prev_code_len = code_len - space -= 0x8000 >> code_len + space -= 32768 >> code_len else: extra_bits = code_len - 14 new_len = 0 @@ -174,6 +177,8 @@ def read_huffman_code_lengths(code_length_code_lengths, num_symbols, code_length def read_huffman_code(alphabet_size, tables, table, br): code_lengths = bytearray([0] * alphabet_size) + br.read_more_input() + # simple_code_or_skip is used as follows: # 1 for simple code # 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths @@ -223,8 +228,9 @@ def read_huffman_code(alphabet_size, tables, table, br): break code_len_idx = kCodeLengthCodeOrder[i] p = 0 - p += br.read_bits(4, 0) - br.read_bits(None, huff[p].bits) + br.fill_bit_window() + p += (br.val_ >> br.bit_pos_) & 15 + br.bit_pos_ += huff[p].bits v = huff[p].value code_length_code_lengths[code_len_idx] = v if v != 0: @@ -292,6 +298,8 @@ class HuffmanTreeGroup: class DecodeContextMap: def __init__(self, context_map_size, br): max_run_length_prefix = 0 + br.read_more_input() + self.num_huff_trees = decode_var_len_uint8(br) + 1 self.context_map = bytearray([0] * context_map_size) @@ -308,6 +316,7 @@ class DecodeContextMap: i = 0 while i < context_map_size: + br.read_more_input() code = read_symbol(table, 0, br) if code == 0: self.context_map[i] = 0 @@ -342,28 +351,99 @@ def decode_block_type(max_block_type, trees, tree_type, block_types, ring_buffer indexes[index] += 1 -def copy_uncompressed_block_to_output(length, pos, output_buffer, br): - """This only is called when input is on a byte boundary. Copy length raw bytes from input to output[pos]""" - br.copy_bytes(output_buffer, pos, length) +def copy_uncompressed_block_to_output(output, length, pos, ringbuffer, ringbuffer_mask, br): + rb_size = ringbuffer_mask + 1 + rb_pos = pos & ringbuffer_mask + br_pos = br.pos_ & BrotliBitReader.IBUF_MASK + + # For short lengths copy byte-by-byte + if (length < 8) or (br.bit_pos_ + (length << 3) < br.bit_end_pos_): + for i in range(0, length): + br.read_more_input() + ringbuffer[rb_pos] = br.read_bits(8) + rb_pos += 1 + if rb_pos == rb_size: + output.write(ringbuffer[:rb_size]) + rb_pos = 0 + return + + if br.bit_end_pos_ < 32: + raise Exception('[copy_uncompressed_block_to_output] br.bit_end_pos_ < 32') + + # Copy remaining 0-4 bytes from br.val_ to ringbuffer. + while br.bit_pos_ < 32: + ringbuffer[rb_pos] = (br.val_ >> br.bit_pos_) + br.bit_pos_ += 8 + rb_pos += 1 + length -= 1 + + # Copy remaining bytes from br.buf_ to ringbuffer. + num_bytes = (br.bit_end_pos_ - br.bit_pos_) >> 3 + if br_pos + num_bytes > BrotliBitReader.IBUF_MASK: + tail = BrotliBitReader.IBUF_MASK + 1 - br_pos + for x in range(0, tail): + ringbuffer[rb_pos + x] = br.buf_[br_pos + x] + + num_bytes -= tail + rb_pos += tail + length -= tail + br_pos = 0 + + for x in range(0, num_bytes): + ringbuffer[rb_pos + x] = br.buf_[br_pos + x] + + rb_pos += num_bytes + length -= num_bytes + + # If we wrote past the logical end of the ringbuffer, copy the tail of the + # ringbuffer to its beginning and flush the ringbuffer to the output. + if rb_pos >= rb_size: + output.write(ringbuffer[:rb_size]) + rb_pos -= rb_size + for x in range(0, rb_pos): + ringbuffer[x] = ringbuffer[rb_size + x] + + # If we have more to copy than the remaining size of the ringbuffer, then we first + # fill the ringbuffer from the input and then flush the ringbuffer to the output + while rb_pos + length >= rb_size: + num_bytes = rb_size - rb_pos + if br.input_.readinto(memoryview(ringbuffer)[rb_pos:rb_pos+num_bytes]) < num_bytes: + raise Exception('[copy_uncompressed_block_to_output] not enough bytes') + output.write(ringbuffer[:rb_size]) + length -= num_bytes + rb_pos = 0 + + # Copy straight from the input onto the ringbuffer. The ringbuffer will be flushed to the output at a later time. + if br.input_.readinto(memoryview(ringbuffer)[rb_pos:rb_pos+length]) < length: + raise Exception('[copy_uncompressed_block_to_output] not enough bytes') + + # Restore the state of the bit reader. + br.reset() def jump_to_byte_boundary(br): - """Advances the bit reader position if needed to put it on a byte boundary""" - br.copy_bytes(b'', 0, 0) + """Advances the bit reader position to the next byte boundary and verifies that any skipped bits are set to zero""" + new_bit_pos = (br.bit_pos_ + 7) & ~7 + pad_bits = br.read_bits(new_bit_pos - br.bit_pos_) + return pad_bits == 0 + + +def brotli_decompressed_size(input_buffer): + with BytesIO(input_buffer) as input_stream: + br = BrotliBitReader(input_stream) + decode_window_bits(br) + out = decode_meta_block_length(br) + return out.meta_block_length def brotli_decompress_buffer(input_buffer): - br = BrotliBitReader(input_buffer) - decode_window_bits(br) - out = decode_meta_block_length(br) - decompressed_size = out.meta_block_length - output_buffer = bytearray([0] * decompressed_size) - br.reset() - brotli_decompress_br_to_buffer(br, output_buffer) - return output_buffer + with BytesIO(input_buffer) as input_stream: + with BytesIO() as output_stream: + brotli_decompress(input_stream, output_stream) + return output_stream.getvalue() -def brotli_decompress_br_to_buffer(br, output_buffer): +def brotli_decompress(input_stream, output_stream): pos = 0 input_end = 0 max_distance = 0 @@ -372,10 +452,24 @@ def brotli_decompress_br_to_buffer(br, output_buffer): dist_rb_idx = 0 hgroup = [HuffmanTreeGroup(0, 0), HuffmanTreeGroup(0, 0), HuffmanTreeGroup(0, 0)] + # We need the slack region for the following reasons: + # - always doing two 8-byte copies for fast backward copying + # - transforms + # - flushing the input ringbuffer when decoding uncompressed blocks + _ring_buffer_write_ahead_slack = 128 + BrotliBitReader.READ_SIZE + + br = BrotliBitReader(input_stream) + # Decode window size. window_bits = decode_window_bits(br) max_backward_distance = (1 << window_bits) - 16 + ringbuffer_size = 1 << window_bits + ringbuffer_mask = ringbuffer_size - 1 + ringbuffer = bytearray( + [0] * (ringbuffer_size + _ring_buffer_write_ahead_slack + BrotliDictionary.maxDictionaryWordLength)) + ringbuffer_end = ringbuffer_size + block_type_trees = [HuffmanCode(0, 0) for _ in range(0, 3 * HUFFMAN_MAX_TABLE_SIZE)] block_len_trees = [HuffmanCode(0, 0) for _ in range(0, 3 * HUFFMAN_MAX_TABLE_SIZE)] @@ -390,6 +484,8 @@ def brotli_decompress_br_to_buffer(br, output_buffer): hgroup[i].codes = None hgroup[i].huff_trees = None + br.read_more_input() + _out = decode_meta_block_length(br) meta_block_remaining_len = _out.meta_block_length input_end = _out.input_end @@ -399,6 +495,7 @@ def brotli_decompress_br_to_buffer(br, output_buffer): jump_to_byte_boundary(br) while meta_block_remaining_len > 0: + br.read_more_input() # Read one byte and ignore it br.read_bits(8) meta_block_remaining_len -= 1 @@ -408,7 +505,9 @@ def brotli_decompress_br_to_buffer(br, output_buffer): continue if is_uncompressed: - copy_uncompressed_block_to_output(meta_block_remaining_len, pos, output_buffer, br) + br.bit_pos_ = (br.bit_pos_ + 7) & ~7 + copy_uncompressed_block_to_output(output_stream, meta_block_remaining_len, pos, ringbuffer, + ringbuffer_mask, br) pos += meta_block_remaining_len continue @@ -420,6 +519,8 @@ def brotli_decompress_br_to_buffer(br, output_buffer): block_length[i] = read_block_length(block_len_trees, i * HUFFMAN_MAX_TABLE_SIZE, br) block_type_rb_index[i] = 1 + br.read_more_input() + distance_postfix_bits = br.read_bits(2) num_direct_distance_codes = NUM_DISTANCE_SHORT_CODES + (br.read_bits(4) << distance_postfix_bits) distance_postfix_mask = (1 << distance_postfix_bits) - 1 @@ -427,6 +528,7 @@ def brotli_decompress_br_to_buffer(br, output_buffer): context_modes = bytearray([0] * num_block_types[0]) for i in range(0, num_block_types[0]): + br.read_more_input() context_modes[i] = (br.read_bits(2) << 1) _o1 = DecodeContextMap(num_block_types[0] << kLiteralContextBits, br) @@ -453,6 +555,8 @@ def brotli_decompress_br_to_buffer(br, output_buffer): while meta_block_remaining_len > 0: + br.read_more_input() + if block_length[1] == 0: decode_block_type(num_block_types[1], block_type_trees, 1, block_type, block_type_rb, block_type_rb_index, br) @@ -471,9 +575,11 @@ def brotli_decompress_br_to_buffer(br, output_buffer): kInsertLengthPrefixCode[insert_code].nbits) copy_length = kCopyLengthPrefixCode[copy_code].offset + br.read_bits( kCopyLengthPrefixCode[copy_code].nbits) - prev_byte1 = output_buffer[pos - 1] - prev_byte2 = output_buffer[pos - 2] + prev_byte1 = ringbuffer[pos - 1 & ringbuffer_mask] + prev_byte2 = ringbuffer[pos - 2 & ringbuffer_mask] for j in range(0, insert_length): + br.read_more_input() + if block_length[0] == 0: decode_block_type(num_block_types[0], block_type_trees, 0, block_type, block_type_rb, block_type_rb_index, br) @@ -489,13 +595,16 @@ def brotli_decompress_br_to_buffer(br, output_buffer): block_length[0] -= 1 prev_byte2 = prev_byte1 prev_byte1 = read_symbol(hgroup[0].codes, hgroup[0].huff_trees[literal_huff_tree_index], br) - output_buffer[pos] = prev_byte1 + ringbuffer[pos & ringbuffer_mask] = prev_byte1 + if (pos & ringbuffer_mask) == ringbuffer_mask: + output_stream.write(ringbuffer[:ringbuffer_size]) pos += 1 meta_block_remaining_len -= insert_length if meta_block_remaining_len <= 0: break if distance_code < 0: + br.read_more_input() if block_length[2] == 0: decode_block_type(num_block_types[2], block_type_trees, 2, block_type, block_type_rb, block_type_rb_index, br) @@ -525,7 +634,7 @@ def brotli_decompress_br_to_buffer(br, output_buffer): else: max_distance = max_backward_distance - copy_dst = pos + copy_dst = pos & ringbuffer_mask if distance > max_distance: if BrotliDictionary.minDictionaryWordLength <= copy_length <= BrotliDictionary.maxDictionaryWordLength: @@ -537,11 +646,16 @@ def brotli_decompress_br_to_buffer(br, output_buffer): transform_idx = word_id >> shift offset += word_idx * copy_length if transform_idx < kNumTransforms: - length = Transform.transformDictionaryWord(output_buffer, copy_dst, offset, copy_length, + length = Transform.transformDictionaryWord(ringbuffer, copy_dst, offset, copy_length, transform_idx) copy_dst += length pos += length meta_block_remaining_len -= length + if copy_dst >= ringbuffer_end: + output_stream.write(ringbuffer[:ringbuffer_size]) + + for _x in range(0, copy_dst - ringbuffer_end): + ringbuffer[_x] = ringbuffer[ringbuffer_end + _x] else: raise Exception("Invalid backward reference. pos: %s distance: %s len: %s bytes left: %s" % ( pos, distance, copy_length, meta_block_remaining_len)) @@ -558,6 +672,13 @@ def brotli_decompress_br_to_buffer(br, output_buffer): pos, distance, copy_length, meta_block_remaining_len)) for j in range(0, copy_length): - output_buffer[pos] = output_buffer[pos - distance] + ringbuffer[pos & ringbuffer_mask] = ringbuffer[(pos - distance) & ringbuffer_mask] + if (pos & ringbuffer_mask) == ringbuffer_mask: + output_stream.write(ringbuffer[:ringbuffer_size]) pos += 1 meta_block_remaining_len -= 1 + + # Protect pos from overflow, wrap it around at every GB of input data + pos &= 0x3fffffff + + output_stream.write(ringbuffer[:pos & ringbuffer_mask])