From 76f36fc4194147984bd007b365fcf49d4bdfd764 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Tue, 26 Jan 2021 22:06:01 +0100 Subject: support multiple deflate blocks. this library is now fully RFC 1950 and RFC 1951 compliant --- README.md | 24 ++++++++++++--------- src/inflate.c | 67 ++++++++++++++++++++++++++++++++++++++--------------------- src/inflate.h | 1 + 3 files changed, 58 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 20c9a35..e4b4c45 100644 --- a/README.md +++ b/README.md @@ -87,16 +87,20 @@ checksum in `inflate_zlib`. ## Compliance -The code *almost* complies with RFC 1950 (decompression only), with the -following exceptions. - -* Unless compiled with `-DDEFLATE_CHECKSUM`, zlib-deflate-nostdlib does not - verify the ADLER32 checksum embedded into zlib-compressed data. - -The code *almost* complies with RFC 1951, with the following exceptions. - -* zlib-deflate-nostdlib does not yet support compressed items consisting of - more than one deflate block. I intend to fix this. +`inflate` is fully compliant with RFC 1951 for data with a decompressed size +of up to 65 kB. + +When compiled with `-DDEFLATE_CHECKSUM`, `inflate_zlib` is fully compliant with +RFC 1950 (decompression only) for data with a decompressed size of up to 65 kB. +By default (without `-DDEFLATE_CHECKSUM`), it does not verify the ADLER32 +checksum embedded into zlib-compressed data and is therefore not compliant with +RFC 1950. + +For files larger than 65 kB, you only need to change some size arguments to +`uint32_t`. However, if you are decompressing files of that size, you probably +have more RAM than this library is designed for. In that case, you may be +better off with [udeflate](https://github.com/jlublin/udeflate), +[uzlib](https://github.com/pfalcon/uzlib), or similar. ## Requirements and Performance diff --git a/src/inflate.c b/src/inflate.c index 725d6b0..16e557c 100644 --- a/src/inflate.c +++ b/src/inflate.c @@ -184,6 +184,12 @@ static void deflate_build_alphabet(uint8_t * lengths, uint16_t size, } } +/* + * This function trades speed for low memory requirements. Instead of building + * an actual huffman tree (at a cost of about 650 Bytes of RAM), we iterate + * through the code lengths whenever we have found a huffman code. This is + * very slow, but memory-efficient. + */ static uint16_t deflate_huff(uint8_t * lengths, uint16_t size, uint8_t * bl_count, uint16_t * next_code) { @@ -199,6 +205,7 @@ static uint16_t deflate_huff(uint8_t * lengths, uint16_t size, } uint8_t len_pos = next_bits; uint8_t cur_pos = next_code[num_bits]; + // This is slow, but memory-efficient for (uint16_t i = 0; i < size; i++) { if (lengths[i] == num_bits) { if (cur_pos == len_pos) { @@ -229,6 +236,8 @@ static int8_t deflate_huffman(uint8_t * ll_lengths, uint16_t ll_size, deflate_output_now++; } else if (code == 256) { return 0; + } else if (code == 65535) { + return DEFLATE_ERR_HUFFMAN; } else { uint16_t len_val = deflate_length_offsets[code - 257]; uint8_t extra_bits = deflate_length_bits[code - 257]; @@ -261,7 +270,10 @@ static int8_t deflate_huffman(uint8_t * ll_lengths, uint16_t ll_size, static int8_t deflate_uncompressed() { - deflate_input_now++; + if (deflate_bit_offset) { + deflate_input_now++; + deflate_bit_offset = 0; + } uint16_t len = ((uint16_t) deflate_input_now[1] << 8) + deflate_input_now[0]; uint16_t nlen = @@ -331,7 +343,8 @@ static int8_t deflate_dynamic_huffman() uint16_t items_processed = 0; while (items_processed < hlit + hdist) { uint8_t code = - deflate_huff(deflate_hc_lengths, 19, deflate_bl_count_ll, + deflate_huff(deflate_hc_lengths, sizeof(deflate_hc_lengths), + deflate_bl_count_ll, deflate_next_code_ll); if (code == 16) { uint8_t copy_count = 3 + deflate_get_bits(2); @@ -370,36 +383,42 @@ static int8_t deflate_dynamic_huffman() int16_t inflate(unsigned char *input_buf, uint16_t input_len, unsigned char *output_buf, uint16_t output_len) { - //uint8_t is_final = input_buf[0] & 0x01; - uint8_t block_type = (input_buf[0] & 0x06) >> 1; - int8_t ret; - deflate_input_now = input_buf; deflate_input_end = input_buf + input_len; - deflate_bit_offset = 3; + deflate_bit_offset = 0; deflate_output_now = output_buf; deflate_output_end = output_buf + output_len; - switch (block_type) { - case 0: - ret = deflate_uncompressed(); - break; - case 1: - ret = deflate_static_huffman(); - break; - case 2: - ret = deflate_dynamic_huffman(); - break; - default: - return DEFLATE_ERR_BLOCK; - } + while (1) { + uint8_t block_type = deflate_get_bits(3); + uint8_t is_final = block_type & 0x01; + int8_t ret; + + block_type >>= 1; + + switch (block_type) { + case 0: + ret = deflate_uncompressed(); + break; + case 1: + ret = deflate_static_huffman(); + break; + case 2: + ret = deflate_dynamic_huffman(); + break; + default: + return DEFLATE_ERR_BLOCK; + } - if (ret < 0) { - return ret; - } + if (ret < 0) { + return ret; + } - return deflate_output_now - output_buf; + if (is_final) { + return deflate_output_now - output_buf; + } + } } int16_t inflate_zlib(unsigned char *input_buf, uint16_t input_len, diff --git a/src/inflate.h b/src/inflate.h index 575be4a..af2b4f7 100644 --- a/src/inflate.h +++ b/src/inflate.h @@ -16,6 +16,7 @@ #define DEFLATE_ERR_OUTPUT_LENGTH (-6) #define DEFLATE_ERR_FCHECK (-7) #define DEFLATE_ERR_NLEN (-8) +#define DEFLATE_ERR_HUFFMAN (-9) int16_t inflate(unsigned char *input_buf, uint16_t input_len, unsigned char *output_buf, uint16_t output_len); -- cgit v1.2.3