diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/app/deflatetest/Makefile.inc | 2 | ||||
-rw-r--r-- | src/app/deflatetest/main.cc | 11 | ||||
-rw-r--r-- | src/lib/deflate.cc | 496 | ||||
-rw-r--r-- | src/lib/udeflate.cc | 492 |
4 files changed, 500 insertions, 501 deletions
diff --git a/src/app/deflatetest/Makefile.inc b/src/app/deflatetest/Makefile.inc index 84a6aff..c70a04d 100644 --- a/src/app/deflatetest/Makefile.inc +++ b/src/app/deflatetest/Makefile.inc @@ -10,4 +10,4 @@ ifdef app override arch_drivers += ,counter endif -CXX_TARGETS += src/lib/udeflate.cc +CXX_TARGETS += src/lib/deflate.cc diff --git a/src/app/deflatetest/main.cc b/src/app/deflatetest/main.cc index 516a6a0..1fb394d 100644 --- a/src/app/deflatetest/main.cc +++ b/src/app/deflatetest/main.cc @@ -9,13 +9,8 @@ #include "driver/uptime.h" #include "driver/counter.h" -#include "lib/udeflate.h" +#include "lib/deflate.h" -#define UDEFLATE_ERR_LENGTH (-1) -#define UDEFLATE_ERR_METHOD (-2) -#define UDEFLATE_ERR_FDICT (-3) -#define UDEFLATE_ERR_BLOCK (-4) -#define UDEFLATE_ERR_CHECKSUM (-5) /* // bad apple 0042.png @@ -56,9 +51,9 @@ int main(void) for (uint8_t i = 0; i < 5; i++) { counter.start(); - int8_t ret = udeflate_zlib((unsigned char*)deflate_input, sizeof(deflate_input), deflate_output, sizeof(deflate_output)); + int8_t ret = deflate_zlib((unsigned char*)deflate_input, sizeof(deflate_input), deflate_output, sizeof(deflate_output)); counter.stop(); - kout << "udeflate returned " << ret << endl; + kout << "deflate returned " << ret << endl; kout << "Output: " << (char*)deflate_output << endl; kout << "took " << counter.value << "/" << counter.overflow << " cycles" << endl; } diff --git a/src/lib/deflate.cc b/src/lib/deflate.cc new file mode 100644 index 0000000..078dcbc --- /dev/null +++ b/src/lib/deflate.cc @@ -0,0 +1,496 @@ +/* + * Copyright 2021 Daniel Friesel + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "lib/deflate.h" + +#ifdef DEFLATE_DEBUG +#include "driver/stdout.h" +#endif + +/* + * The compressed (inflated) input data. + */ +unsigned char *deflate_input_now; +unsigned char *deflate_input_end; + +/* + * The decompressed (deflated) output stream. + */ +unsigned char *deflate_output_now; +unsigned char *deflate_output_end; + +/* + * The current bit offset in the input stream, if any. + * + * Deflate streams are read from least to most significant bit. + * An offset of 1 indicates that the least significant bit is skipped + * (i.e., only bits 7, 6, 5, 4, 3, 2, and 1 are read). + */ +uint8_t deflate_bit_offset = 0; + +/* + * Base lengths for length codes (code 257 to 285). + * Code 257 corresponds to a copy of 3 bytes, etc. + */ +uint16_t const deflate_length_offsets[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258 +}; + +/* + * Extra bits for length codes (code 257 to 285). + * Code 257 has no extra bits, code 265 has 1 extra bit + * (and indicates a length of 11 or 12 depending on its value), etc. + */ +uint8_t const deflate_length_bits[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 0 +}; + +// can also be expressed as (index < 4 || index == 28) ? 0 : (index-4) >> 2 + +/* + * Base distances for distance codes (code 0 to 29). + * Code 0 indicates a distance of 1, etc. + */ +uint16_t const deflate_distance_offsets[] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, + 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 +}; + +/* + * Extra bits for distance codes (code 0 to 29). + * Code 0 has no extra bits, code 4 has 1 bit, etc. + */ +uint8_t const deflate_distance_bits[] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13 +}; + +// can also be expressed as index < 2 ? 0 : (index-2) >> 1 + +/* + * In block type 2 (dynamic huffman codes), the code lengths of literal/length + * and distance alphabet are themselves stored as huffman codes. To save space + * in case only a few code lengths are used, the code length codes are stored + * in the following order. This allows a few bits to be saved if some code + * lengths are unused and the unused code lengths are at the end of the list. + */ +uint8_t const deflate_hclen_index[] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + +/* + * Code lengths of the "code length" code (see above). + */ +uint8_t deflate_hc_lengths[19]; + +/* + * Code lengths of the literal/length and distance alphabets. + */ +uint8_t deflate_lld_lengths[318]; + +/* + * Assumptions: + * * huffman code length is limited to 11 bits + * * there are no more than 255 huffman codes with the same length + * + * Rationale: longer huffman codes might appear when handling large data + * sets. We don't do that; instead, we expect the uncompressed source to + * be no more than a few kB of data. + */ + +/* + * Bit length counts and next code entries for Literal/Length alphabet. + * Combined with the code lengths in deflate_lld_lengths, these make up the + * Literal/Length alphabet. See the algorithm in RFC 1951 section 3.2.2 for + * details. + * + * In deflate, these variables are also used for the huffman alphabet in + * dynamic huffman blocks. + */ +uint8_t deflate_bl_count_ll[12]; +uint16_t deflate_next_code_ll[12]; + +/* + * Bit length counts and next code entries for Distance alphabet. + */ +uint8_t deflate_bl_count_d[12]; +uint16_t deflate_next_code_d[12]; + +static uint16_t deflate_rev_word(uint16_t word, uint8_t bits) +{ + uint16_t ret = 0; + uint16_t mask = 1; + for (uint16_t rmask = 1 << (bits - 1); rmask > 0; rmask >>= 1) { + if (word & rmask) { + ret |= mask; + } + mask <<= 1; + } + return ret; +} + +static uint8_t deflate_bitmask(uint8_t bit_count) +{ + return (1 << bit_count) - 1; +} + +static uint16_t deflate_get_word() +{ + uint16_t ret = 0; + ret |= (deflate_input_now[0] >> deflate_bit_offset); + ret |= (uint16_t) deflate_input_now[1] << (8 - deflate_bit_offset); + if (deflate_bit_offset) { + ret |= + (uint16_t) (deflate_input_now[2] & + deflate_bitmask(deflate_bit_offset)) << (16 - + deflate_bit_offset); + } +#ifdef DEFLATE_DEBUG + kout << "get_word = " << bin << ret << dec << endl; +#endif + return ret; +} + +static uint16_t deflate_get_bits(uint8_t num_bits) +{ + uint16_t ret = deflate_get_word(); + deflate_bit_offset += num_bits; + while (deflate_bit_offset >= 8) { + deflate_input_now++; + deflate_bit_offset -= 8; + } + return ret & deflate_bitmask(num_bits); +} + +static void deflate_build_alphabet(uint8_t * lengths, uint16_t size, + uint8_t * bl_count, uint16_t * next_code) +{ + uint16_t i; + uint16_t code = 0; + uint16_t max_len = 0; + for (i = 0; i < 12; i++) { + bl_count[i] = 0; + } + + for (i = 0; i < size; i++) { + if (lengths[i]) { + bl_count[lengths[i]]++; + } + if (lengths[i] > max_len) { + max_len = lengths[i]; + } + } + + for (i = 1; i < max_len + 1; i++) { + code = (code + bl_count[i - 1]) << 1; + next_code[i] = code; + } + +#ifdef DEFLATE_DEBUG + for (i = 0; i < 12; i++) { + kout << "bl_count[" << i << "] = " << bl_count[i] << endl; + } + for (i = 0; i < 12; i++) { + kout << "next_code[" << i << "] = " << next_code[i] << endl; + } +#endif +} + +static uint16_t deflate_huff(uint8_t * lengths, uint16_t size, + uint8_t * bl_count, uint16_t * next_code) +{ + uint16_t next_word = deflate_get_word(); + for (uint8_t num_bits = 1; num_bits < 12; num_bits++) { + uint16_t next_bits = deflate_rev_word(next_word, num_bits); + if (bl_count[num_bits] && next_bits >= next_code[num_bits] + && next_bits < next_code[num_bits] + bl_count[num_bits]) { +#ifdef DEFLATE_DEBUG + kout << "found huffman code, length = " << num_bits << + endl; +#endif + deflate_bit_offset += num_bits; + while (deflate_bit_offset >= 8) { + deflate_input_now++; + deflate_bit_offset -= 8; + } + uint8_t len_pos = next_bits; + uint8_t cur_pos = next_code[num_bits]; + for (uint16_t i = 0; i < size; i++) { + if (lengths[i] == num_bits) { + if (cur_pos == len_pos) { + return i; + } + cur_pos++; + } + } + } + } + return 65535; +} + +static int8_t deflate_huffman(uint8_t * ll_lengths, uint16_t ll_size, + uint8_t * d_lengths, uint8_t d_size) +{ + uint16_t code; + uint16_t dcode; + while (1) { + code = + deflate_huff(ll_lengths, ll_size, deflate_bl_count_ll, + deflate_next_code_ll); +#ifdef DEFLATE_DEBUG + kout << "code " << code << endl; +#endif + if (code < 256) { + if (deflate_output_now == deflate_output_end) { + return DEFLATE_ERR_OUTPUT_LENGTH; + } + *deflate_output_now = code; + deflate_output_now++; + } else if (code == 256) { + return 0; + } else { + uint16_t len_val = deflate_length_offsets[code - 257]; + uint8_t extra_bits = deflate_length_bits[code - 257]; + if (extra_bits) { + len_val += deflate_get_bits(extra_bits); + } + dcode = + deflate_huff(d_lengths, d_size, + deflate_bl_count_d, + deflate_next_code_d); + uint16_t dist_val = deflate_distance_offsets[dcode]; + extra_bits = deflate_distance_bits[dcode]; + if (extra_bits) { + dist_val += deflate_get_bits(extra_bits); + } + while (len_val--) { + if (deflate_output_now == deflate_output_end) { + return DEFLATE_ERR_OUTPUT_LENGTH; + } + deflate_output_now[0] = + deflate_output_now[-dist_val]; + deflate_output_now++; + } + } + if (deflate_input_now >= deflate_input_end - 4) { + return DEFLATE_ERR_INPUT_LENGTH; + } + } +} + +static int8_t deflate_uncompressed() +{ + deflate_input_now++; + uint16_t len = + ((uint16_t) deflate_input_now[1] << 8) + deflate_input_now[0]; + uint16_t nlen = + ((uint16_t) deflate_input_now[3] << 8) + deflate_input_now[2]; + if (len & nlen) { + return DEFLATE_ERR_NLEN; + } + deflate_input_now += 4; + if (deflate_input_now + len >= deflate_input_end) { + return DEFLATE_ERR_INPUT_LENGTH; + } + if (deflate_output_now + len >= deflate_output_end) { + return DEFLATE_ERR_OUTPUT_LENGTH; + } + for (uint16_t i = 0; i < len; i++) { + *(deflate_output_now++) = *(deflate_input_now++); + } + return 0; +} + +static int8_t deflate_static_huffman() +{ + uint16_t i; + for (i = 0; i <= 143; i++) { + deflate_lld_lengths[i] = 8; + } + for (i = 144; i <= 255; i++) { + deflate_lld_lengths[i] = 9; + } + for (i = 256; i <= 279; i++) { + deflate_lld_lengths[i] = 7; + } + for (i = 280; i <= 285; i++) { + deflate_lld_lengths[i] = 8; + } + for (i = 286; i <= 286 + 29; i++) { + deflate_lld_lengths[i] = 5; + } + + deflate_build_alphabet(deflate_lld_lengths, 286, deflate_bl_count_ll, + deflate_next_code_ll); + deflate_build_alphabet(deflate_lld_lengths + 286, 29, + deflate_bl_count_d, deflate_next_code_d); + return deflate_huffman(deflate_lld_lengths, 286, + deflate_lld_lengths + 286, 29); +} + +static int8_t deflate_dynamic_huffman() +{ + uint8_t i; + uint16_t hlit = 257 + deflate_get_bits(5); + uint8_t hdist = 1 + deflate_get_bits(5); + uint8_t hclen = 4 + deflate_get_bits(4); + +#ifdef DEFLATE_DEBUG + kout << "hlit=" << hlit << endl; + kout << "hdist=" << hdist << endl; + kout << "hclen=" << hclen << endl; +#endif + + for (i = 0; i < hclen; i++) { + deflate_hc_lengths[deflate_hclen_index[i]] = + deflate_get_bits(3); + } + for (i = hclen; i < sizeof(deflate_hc_lengths); i++) { + deflate_hc_lengths[deflate_hclen_index[i]] = 0; + } + + deflate_build_alphabet(deflate_hc_lengths, + sizeof(deflate_hc_lengths), + deflate_bl_count_ll, deflate_next_code_ll); + + uint16_t items_processed = 0; + while (items_processed < hlit + hdist) { + uint8_t code = + deflate_huff(deflate_hc_lengths, 19, deflate_bl_count_ll, + deflate_next_code_ll); +#ifdef DEFLATE_DEBUG + kout << "code = " << code << endl; +#endif + if (code == 16) { + uint8_t copy_count = 3 + deflate_get_bits(2); + for (uint8_t i = 0; i < copy_count; i++) { + deflate_lld_lengths[items_processed] = + deflate_lld_lengths[items_processed - 1]; + items_processed++; + } + } else if (code == 17) { + uint8_t null_count = 3 + deflate_get_bits(3); + for (uint8_t i = 0; i < null_count; i++) { + deflate_lld_lengths[items_processed] = 0; + items_processed++; + } + } else if (code == 18) { + uint8_t null_count = 11 + deflate_get_bits(7); + for (uint8_t i = 0; i < null_count; i++) { + deflate_lld_lengths[items_processed] = 0; + items_processed++; + } + } else { + deflate_lld_lengths[items_processed] = code; + items_processed++; + } + } + + deflate_build_alphabet(deflate_lld_lengths, hlit, + deflate_bl_count_ll, deflate_next_code_ll); + deflate_build_alphabet(deflate_lld_lengths + hlit, hdist, + deflate_bl_count_d, deflate_next_code_d); + + return deflate_huffman(deflate_lld_lengths, hlit, + deflate_lld_lengths + hlit, hdist); +} + +int8_t deflate(unsigned char *input_buf, uint16_t input_len, + unsigned char *output_buf, uint16_t output_len) +{ + uint8_t is_final = input_buf[0] & 0x01; + uint8_t block_type = (input_buf[0] & 0x06) >> 1; +#ifdef DEFLATE_DEBUG + kout << "is_final=" << is_final << " block_type=" << block_type << endl; +#endif + + deflate_input_now = input_buf; + deflate_input_end = input_buf + input_len; + deflate_bit_offset = 3; + + deflate_output_now = output_buf; + deflate_output_end = output_buf + output_len; + + if (block_type == 0) { + return deflate_uncompressed(); + } + if (block_type == 1) { + return deflate_static_huffman(); + } + if (block_type == 2) { + return deflate_dynamic_huffman(); + } + + return DEFLATE_ERR_BLOCK; +} + +int8_t deflate_zlib(unsigned char *input_buf, uint16_t input_len, + unsigned char *output_buf, uint16_t output_len) +{ + if (input_len < 4) { + return DEFLATE_ERR_INPUT_LENGTH; + } + uint8_t zlib_method = input_buf[0] & 0x0f; + uint8_t zlib_flags = input_buf[1]; + +#ifdef DEFLATE_DEBUG + kout << "zlib_method=" << zlib_method << endl; + kout << "zlib_window_size=" << ((uint16_t) 1 << + (8 + + ((input_buf[0] & 0xf0) >> 4))) << endl; +#endif + + if (zlib_method != 8) { + return DEFLATE_ERR_METHOD; + } + + if (zlib_flags & 0x20) { + return DEFLATE_ERR_FDICT; + } + + if ((((uint16_t) input_buf[0] << 8) | input_buf[1]) % 31) { + return DEFLATE_ERR_FCHECK; + } + + uint8_t ret = + deflate(input_buf + 2, input_len - 2, output_buf, output_len); + +#ifdef DEFLATE_CHECKSUM + if (ret == 0) { + uint16_t deflate_s1 = 1; + uint16_t deflate_s2 = 0; + + deflate_output_end = deflate_output_now; + for (deflate_output_now = output_buf; + deflate_output_now < deflate_output_end; + deflate_output_now++) { + deflate_s1 = + ((uint32_t) deflate_s1 + + (uint32_t) (*deflate_output_now)) % 65521; + deflate_s2 = + ((uint32_t) deflate_s2 + + (uint32_t) deflate_s1) % 65521; + } + + if (deflate_bit_offset) { + deflate_input_now++; + } + + if ((deflate_s2 != + (((uint16_t) deflate_input_now[0] << 8) | (uint16_t) + deflate_input_now[1])) + || (deflate_s1 != + (((uint16_t) deflate_input_now[2] << 8) | (uint16_t) + deflate_input_now[3]))) { + return DEFLATE_ERR_CHECKSUM; + } + } +#endif + + return ret; +} diff --git a/src/lib/udeflate.cc b/src/lib/udeflate.cc deleted file mode 100644 index c3a051d..0000000 --- a/src/lib/udeflate.cc +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Copyright 2021 Daniel Friesel - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include "lib/udeflate.h" - -/* - * The compressed (inflated) input data. - */ -unsigned char *udeflate_input_now; -unsigned char *udeflate_input_end; - -/* - * The decompressed (deflated) output stream. - */ -unsigned char *udeflate_output_now; -unsigned char *udeflate_output_end; - -/* - * The current bit offset in the input stream, if any. - * - * Deflate streams are read from least to most significant bit. - * An offset of 1 indicates that the least significant bit is skipped - * (i.e., only bits 7, 6, 5, 4, 3, 2, and 1 are read). - */ -uint8_t udeflate_bit_offset = 0; - -/* - * Base lengths for length codes (code 257 to 285). - * Code 257 corresponds to a copy of 3 bytes, etc. - */ -uint16_t const udeflate_length_offsets[] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, - 67, 83, 99, 115, 131, 163, 195, 227, 258 -}; - -/* - * Extra bits for length codes (code 257 to 285). - * Code 257 has no extra bits, code 265 has 1 extra bit - * (and indicates a length of 11 or 12 depending on its value), etc. - */ -uint8_t const udeflate_length_bits[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, - 5, 5, 5, 5, 0 -}; - -// can also be expressed as (index < 4 || index == 28) ? 0 : (index-4) >> 2 - -/* - * Base distances for distance codes (code 0 to 29). - * Code 0 indicates a distance of 1, etc. - */ -uint16_t const udeflate_distance_offsets[] = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, - 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 -}; - -/* - * Extra bits for distance codes (code 0 to 29). - * Code 0 has no extra bits, code 4 has 1 bit, etc. - */ -uint8_t const udeflate_distance_bits[] = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, - 10, 11, 11, 12, 12, 13, 13 -}; - -// can also be expressed as index < 2 ? 0 : (index-2) >> 1 - -/* - * In block type 2 (dynamic huffman codes), the code lengths of literal/length - * and distance alphabet are themselves stored as huffman codes. To save space - * in case only a few code lengths are used, the code length codes are stored - * in the following order. This allows a few bits to be saved if some code - * lengths are unused and the unused code lengths are at the end of the list. - */ -uint8_t const udeflate_hclen_index[] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 -}; - -/* - * Code lengths of the "code length" code (see above). - */ -uint8_t udeflate_hc_lengths[19]; - -/* - * Code lengths of the literal/length and distance alphabets. - */ -uint8_t udeflate_lld_lengths[318]; - -/* - * Assumptions: - * * huffman code length is limited to 11 bits - * * there are no more than 255 huffman codes with the same length - * - * Rationale: longer huffman codes might appear when handling large data - * sets. We don't do that; instead, we expect the uncompressed source to - * be no more than a few kB of data. - */ - -/* - * Bit length counts and next code entries for Literal/Length alphabet. - * Combined with the code lengths in udeflate_lld_lengths, these make up the - * Literal/Length alphabet. See the algorithm in RFC 1951 section 3.2.2 for - * details. - * - * In udeflate, these variables are also used for the huffman alphabet in - * dynamic huffman blocks. - */ -uint8_t udeflate_bl_count_ll[12]; -uint16_t udeflate_next_code_ll[12]; - -/* - * Bit length counts and next code entries for Distance alphabet. - */ -uint8_t udeflate_bl_count_d[12]; -uint16_t udeflate_next_code_d[12]; - -static uint16_t udeflate_rev_word(uint16_t word, uint8_t bits) -{ - uint16_t ret = 0; - uint16_t mask = 1; - for (uint16_t rmask = 1 << (bits - 1); rmask > 0; rmask >>= 1) { - if (word & rmask) { - ret |= mask; - } - mask <<= 1; - } - return ret; -} - -static uint8_t udeflate_bitmask(uint8_t bit_count) -{ - return (1 << bit_count) - 1; -} - -static uint16_t udeflate_get_word() -{ - uint16_t ret = 0; - ret |= (udeflate_input_now[0] >> udeflate_bit_offset); - ret |= (uint16_t) udeflate_input_now[1] << (8 - udeflate_bit_offset); - if (udeflate_bit_offset) { - ret |= - (uint16_t) (udeflate_input_now[2] & - udeflate_bitmask(udeflate_bit_offset)) << (16 - - udeflate_bit_offset); - } -#ifdef UDEFLATE_DEBUG - kout << "get_word = " << bin << ret << dec << endl; -#endif - return ret; -} - -static uint16_t udeflate_get_bits(uint8_t num_bits) -{ - uint16_t ret = udeflate_get_word(); - udeflate_bit_offset += num_bits; - while (udeflate_bit_offset >= 8) { - udeflate_input_now++; - udeflate_bit_offset -= 8; - } - return ret & udeflate_bitmask(num_bits); -} - -static void udeflate_build_alphabet(uint8_t * lengths, uint16_t size, - uint8_t * bl_count, uint16_t * next_code) -{ - uint16_t i; - uint16_t code = 0; - uint16_t max_len = 0; - for (i = 0; i < 12; i++) { - bl_count[i] = 0; - } - - for (i = 0; i < size; i++) { - if (lengths[i]) { - bl_count[lengths[i]]++; - } - if (lengths[i] > max_len) { - max_len = lengths[i]; - } - } - - for (i = 1; i < max_len + 1; i++) { - code = (code + bl_count[i - 1]) << 1; - next_code[i] = code; - } - -#ifdef UDEFLATE_DEBUG - for (i = 0; i < 12; i++) { - kout << "bl_count[" << i << "] = " << bl_count[i] << endl; - } - for (i = 0; i < 12; i++) { - kout << "next_code[" << i << "] = " << next_code[i] << endl; - } -#endif -} - -static uint16_t udeflate_huff(uint8_t * lengths, uint16_t size, - uint8_t * bl_count, uint16_t * next_code) -{ - uint16_t next_word = udeflate_get_word(); - for (uint8_t num_bits = 1; num_bits < 12; num_bits++) { - uint16_t next_bits = udeflate_rev_word(next_word, num_bits); - if (bl_count[num_bits] && next_bits >= next_code[num_bits] - && next_bits < next_code[num_bits] + bl_count[num_bits]) { -#ifdef UDEFLATE_DEBUG - kout << "found huffman code, length = " << num_bits << - endl; -#endif - udeflate_bit_offset += num_bits; - while (udeflate_bit_offset >= 8) { - udeflate_input_now++; - udeflate_bit_offset -= 8; - } - uint8_t len_pos = next_bits; - uint8_t cur_pos = next_code[num_bits]; - for (uint16_t i = 0; i < size; i++) { - if (lengths[i] == num_bits) { - if (cur_pos == len_pos) { - return i; - } - cur_pos++; - } - } - } - } - return 65535; -} - -static int8_t udeflate_huffman(uint8_t * ll_lengths, uint16_t ll_size, - uint8_t * d_lengths, uint8_t d_size) -{ - uint16_t code; - uint16_t dcode; - while (1) { - code = - udeflate_huff(ll_lengths, ll_size, udeflate_bl_count_ll, - udeflate_next_code_ll); -#ifdef UDEFLATE_DEBUG - kout << "code " << code << endl; -#endif - if (code < 256) { - if (udeflate_output_now == udeflate_output_end) { - return UDEFLATE_ERR_OUTPUT_LENGTH; - } - *udeflate_output_now = code; - udeflate_output_now++; - } else if (code == 256) { - return 0; - } else { - uint16_t len_val = udeflate_length_offsets[code - 257]; - uint8_t extra_bits = udeflate_length_bits[code - 257]; - if (extra_bits) { - len_val += udeflate_get_bits(extra_bits); - } - dcode = - udeflate_huff(d_lengths, d_size, - udeflate_bl_count_d, - udeflate_next_code_d); - uint16_t dist_val = udeflate_distance_offsets[dcode]; - extra_bits = udeflate_distance_bits[dcode]; - if (extra_bits) { - dist_val += udeflate_get_bits(extra_bits); - } - while (len_val--) { - if (udeflate_output_now == udeflate_output_end) { - return UDEFLATE_ERR_OUTPUT_LENGTH; - } - udeflate_output_now[0] = - udeflate_output_now[-dist_val]; - udeflate_output_now++; - } - } - if (udeflate_input_now >= udeflate_input_end - 4) { - return UDEFLATE_ERR_INPUT_LENGTH; - } - } -} - -static int8_t udeflate_uncompressed() -{ - udeflate_input_now++; - uint16_t len = - ((uint16_t) udeflate_input_now[1] << 8) + udeflate_input_now[0]; - uint16_t nlen = - ((uint16_t) udeflate_input_now[3] << 8) + udeflate_input_now[2]; - if (len & nlen) { - return UDEFLATE_ERR_NLEN; - } - udeflate_input_now += 4; - if (udeflate_input_now + len >= udeflate_input_end) { - return UDEFLATE_ERR_INPUT_LENGTH; - } - if (udeflate_output_now + len >= udeflate_output_end) { - return UDEFLATE_ERR_OUTPUT_LENGTH; - } - for (uint16_t i = 0; i < len; i++) { - *(udeflate_output_now++) = *(udeflate_input_now++); - } - return 0; -} - -static int8_t udeflate_static_huffman() -{ - uint16_t i; - for (i = 0; i <= 143; i++) { - udeflate_lld_lengths[i] = 8; - } - for (i = 144; i <= 255; i++) { - udeflate_lld_lengths[i] = 9; - } - for (i = 256; i <= 279; i++) { - udeflate_lld_lengths[i] = 7; - } - for (i = 280; i <= 285; i++) { - udeflate_lld_lengths[i] = 8; - } - for (i = 286; i <= 286 + 29; i++) { - udeflate_lld_lengths[i] = 5; - } - - udeflate_build_alphabet(udeflate_lld_lengths, 286, udeflate_bl_count_ll, - udeflate_next_code_ll); - udeflate_build_alphabet(udeflate_lld_lengths + 286, 29, - udeflate_bl_count_d, udeflate_next_code_d); - return udeflate_huffman(udeflate_lld_lengths, 286, - udeflate_lld_lengths + 286, 29); -} - -static int8_t udeflate_dynamic_huffman() -{ - uint8_t i; - uint16_t hlit = 257 + udeflate_get_bits(5); - uint8_t hdist = 1 + udeflate_get_bits(5); - uint8_t hclen = 4 + udeflate_get_bits(4); - -#ifdef UDEFLATE_DEBUG - kout << "hlit=" << hlit << endl; - kout << "hdist=" << hdist << endl; - kout << "hclen=" << hclen << endl; -#endif - - for (i = 0; i < hclen; i++) { - udeflate_hc_lengths[udeflate_hclen_index[i]] = - udeflate_get_bits(3); - } - for (i = hclen; i < sizeof(udeflate_hc_lengths); i++) { - udeflate_hc_lengths[udeflate_hclen_index[i]] = 0; - } - - udeflate_build_alphabet(udeflate_hc_lengths, - sizeof(udeflate_hc_lengths), - udeflate_bl_count_ll, udeflate_next_code_ll); - - uint16_t items_processed = 0; - while (items_processed < hlit + hdist) { - uint8_t code = - udeflate_huff(udeflate_hc_lengths, 19, udeflate_bl_count_ll, - udeflate_next_code_ll); -#ifdef UDEFLATE_DEBUG - kout << "code = " << code << endl; -#endif - if (code == 16) { - uint8_t copy_count = 3 + udeflate_get_bits(2); - for (uint8_t i = 0; i < copy_count; i++) { - udeflate_lld_lengths[items_processed] = - udeflate_lld_lengths[items_processed - 1]; - items_processed++; - } - } else if (code == 17) { - uint8_t null_count = 3 + udeflate_get_bits(3); - for (uint8_t i = 0; i < null_count; i++) { - udeflate_lld_lengths[items_processed] = 0; - items_processed++; - } - } else if (code == 18) { - uint8_t null_count = 11 + udeflate_get_bits(7); - for (uint8_t i = 0; i < null_count; i++) { - udeflate_lld_lengths[items_processed] = 0; - items_processed++; - } - } else { - udeflate_lld_lengths[items_processed] = code; - items_processed++; - } - } - - udeflate_build_alphabet(udeflate_lld_lengths, hlit, - udeflate_bl_count_ll, udeflate_next_code_ll); - udeflate_build_alphabet(udeflate_lld_lengths + hlit, hdist, - udeflate_bl_count_d, udeflate_next_code_d); - - return udeflate_huffman(udeflate_lld_lengths, hlit, - udeflate_lld_lengths + hlit, hdist); -} - -int8_t udeflate(unsigned char *input_buf, uint16_t input_len, - unsigned char *output_buf, uint16_t output_len) -{ - uint8_t is_final = input_buf[0] & 0x01; - uint8_t block_type = (input_buf[0] & 0x06) >> 1; -#ifdef UDEFLATE_DEBUG - kout << "is_final=" << is_final << " block_type=" << block_type << endl; -#endif - - udeflate_input_now = input_buf; - udeflate_input_end = input_buf + input_len; - udeflate_bit_offset = 3; - - udeflate_output_now = output_buf; - udeflate_output_end = output_buf + output_len; - - if (block_type == 0) { - return udeflate_uncompressed(); - } - if (block_type == 1) { - return udeflate_static_huffman(); - } - if (block_type == 2) { - return udeflate_dynamic_huffman(); - } - - return UDEFLATE_ERR_BLOCK; -} - -int8_t udeflate_zlib(unsigned char *input_buf, uint16_t input_len, - unsigned char *output_buf, uint16_t output_len) -{ - if (input_len < 4) { - return UDEFLATE_ERR_INPUT_LENGTH; - } - uint8_t zlib_method = input_buf[0] & 0x0f; - uint8_t zlib_flags = input_buf[1]; - -#ifdef UDEFLATE_DEBUG - kout << "zlib_method=" << zlib_method << endl; - kout << "zlib_window_size=" << ((uint16_t) 1 << - (8 + - ((input_buf[0] & 0xf0) >> 4))) << endl; -#endif - - if (zlib_method != 8) { - return UDEFLATE_ERR_METHOD; - } - - if (zlib_flags & 0x20) { - return UDEFLATE_ERR_FDICT; - } - - if ((((uint16_t) input_buf[0] << 8) | input_buf[1]) % 31) { - return UDEFLATE_ERR_FCHECK; - } - - uint8_t ret = - udeflate(input_buf + 2, input_len - 2, output_buf, output_len); - -#ifdef UDEFLATE_CHECKSUM - if (ret == 0) { - uint16_t udeflate_s1 = 1; - uint16_t udeflate_s2 = 0; - - udeflate_output_end = udeflate_output_now; - for (udeflate_output_now = output_buf; - udeflate_output_now < udeflate_output_end; - udeflate_output_now++) { - udeflate_s1 = - ((uint32_t) udeflate_s1 + - (uint32_t) (*udeflate_output_now)) % 65521; - udeflate_s2 = - ((uint32_t) udeflate_s2 + - (uint32_t) udeflate_s1) % 65521; - } - - if (udeflate_bit_offset) { - udeflate_input_now++; - } - - if ((udeflate_s2 != - (((uint16_t) udeflate_input_now[0] << 8) | (uint16_t) - udeflate_input_now[1])) - || (udeflate_s1 != - (((uint16_t) udeflate_input_now[2] << 8) | (uint16_t) - udeflate_input_now[3]))) { - return UDEFLATE_ERR_CHECKSUM; - } - } -#endif - - return ret; -} |