From 79eb1dc8436e55c11936d923512b4c480a90fe85 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Thu, 21 Jan 2021 21:15:07 +0100 Subject: Initial Commit --- .reuse/dep5 | 5 + COPYING | 22 +++ LICENSES/BSD-2-Clause.txt | 22 +++ LICENSES/CC0-1.0.txt | 121 ++++++++++++ README.md | 113 +++++++++++ src/deflate.c | 465 ++++++++++++++++++++++++++++++++++++++++++++++ src/deflate.h | 23 +++ 7 files changed, 771 insertions(+) create mode 100644 .reuse/dep5 create mode 100644 COPYING create mode 100644 LICENSES/BSD-2-Clause.txt create mode 100644 LICENSES/CC0-1.0.txt create mode 100644 README.md create mode 100644 src/deflate.c create mode 100644 src/deflate.h diff --git a/.reuse/dep5 b/.reuse/dep5 new file mode 100644 index 0000000..227e8e5 --- /dev/null +++ b/.reuse/dep5 @@ -0,0 +1,5 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + +Files: README.md +Copyright: 2021 Daniel Friesel +License: CC0-1.0 diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..baa80b5 --- /dev/null +++ b/COPYING @@ -0,0 +1,22 @@ +Copyright (c) All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/BSD-2-Clause.txt b/LICENSES/BSD-2-Clause.txt new file mode 100644 index 0000000..baa80b5 --- /dev/null +++ b/LICENSES/BSD-2-Clause.txt @@ -0,0 +1,22 @@ +Copyright (c) All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/CC0-1.0.txt b/LICENSES/CC0-1.0.txt new file mode 100644 index 0000000..0e259d4 --- /dev/null +++ b/LICENSES/CC0-1.0.txt @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/README.md b/README.md new file mode 100644 index 0000000..3422584 --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +**zlib-deflate-nostdlib** provides a zlib decompressor (RFC 1950) and deflate +reader (RFC 1951) suitable for 8- and 16-bit microcontrollers. It works +fine on MCUs as small as ATMega328P (used, for example, in the Arduino Nano) +and MSP430FR5994. It is compatible with both C (tested with c99) and C++ +(tested with c++14). Apart from type definitions for (u)int8\_t, (u)int16\_t, +and (u)int32\_t, which are typically provided by stdint.h, it has no external +dependencies. + +zlib-deflate-nostdlib is focused on a low memory footprint. It is not optimized +for speed. Right now, the implementation is naive, but usable. See below for +the current status and TODOs. Be aware that this library has not been +extensively tested yet. + +## Usage + +Embed `deflate.c` and `deflate.h` into your project. You can rename `deflate.c` +to `deflate.cc` and/or compile it with g++ instead of gcc, if you like. Use +`deflate_zlib(input, input_len, output, output_len)` to decompress zlib data, +and `deflate(input, input_len, output, output_len)` to decompress deflate data +without zlib header. + +input and output must be `unsigned char *`, input\_len and output\_len are +expected to be unsigned 16-bit integers. Both functions return the number of +bytes written to `output`, or a negative value on error. + +Example for zlib decompression (RFC 1950): + +``` +#include "deflate.h" + +unsigned char deflate_input[] = { /* some compressed data, e.g.: */ + 120, 156, 243, 72, 205, 201, 201, 215, 81, 8, 207, 47, 202, 73, 177, 87, + 240, 64, 226, 41, 2, 0, 128, 125, 9, 17 +}; + +unsigned char deflate_output[128]; + +// within some function +{ + int16_t out_bytes = deflate_zlib(deflate_input, sizeof(deflate_input), + deflate_output, sizeof(deflate_output)); + if (out_bytes < 0) { + // error + } else { + // success. deflate_output contains "Hello, World? Hello, World!" + // out_bytes contains the number of bytes written to deflate_output + } +} + +``` + +Decompressing deflate (RFC 1951) data works as follows: + +``` +#include "deflate.h" + +unsigned char deflate_input[] = { /* some compressed data, e.g.: */ + 243, 72, 205, 201, 201, 215, 81, 8, 207, 47, 202, 73, 177, 87, + 240, 64, 226, 41, 2, 0 +}; + +unsigned char deflate_output[128]; + +// within some function +{ + int16_t out_bytes = deflate(deflate_input, sizeof(deflate_input), + deflate_output, sizeof(deflate_output)); + if (out_bytes < 0) { + // error + } else { + // success. deflate_output contains "Hello, World? Hello, World!" + // out_bytes contains the number of bytes written to deflate_output + } +} + +``` + +## Compilation flags + +Compile with `-DDEFLATE_CHECKSUM` to enable verification of the zlib ADLER32 +checksum in `deflate_zlib`. + +## Compliance + +The code *almost* complies with RFC 1950 (decompression only), with the +following exceptions. + +* Unless compiled with `-DDEFLATE_CHECKSUM`, zlib-deflate-nostdlib does not + verify the ADLER32 checksum embedded into zlib-compressed data. + +The code *almost* complies with RFC 1951, with the following exceptions. + +* zlib-deflate-nostdlib assumes that Huffman codes are limited to a length + of 12 bits and that there are no more than 255 codes per length. This appears + to be a reasonable assumption for embedded devices, whose decompression + abilities are limited by the amount of RAM anyways. I have not yet determined + whether longer Huffman codes can appear in practice or not, and if so, under + which conditions. +* zlib-deflate-nostdlib does not yet support compressed items consisting of + more than one deflate block. I intend to fix this. + +## Requirements + +RAM usage excludes the space needed for input and output buffer. Numbers +rounded up to the next multiple of 64B for ROM and 16B for RAM to account +for variations in compiler optimizations. + +| Architecture | ROM | RAM +| :--- | ---: | ---: | +| 8-bit ATMega328P | 1584 B | 624 B | +| 16-bit MSP430FR5994 | 2304 B | 432 B | +| 20-bit MSP430FR5994 | 2608 B | 432 B | +| 32-bit STM32F446RE (ARM Cortex M3) | 1744 B | 432 B | diff --git a/src/deflate.c b/src/deflate.c new file mode 100644 index 0000000..2af8245 --- /dev/null +++ b/src/deflate.c @@ -0,0 +1,465 @@ +/* + * zlib-deflate-nostdlib + * + * Copyright 2021 Daniel Friesel + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "lib/deflate.h" + +/* + * The compressed (inflated) input data. + */ +unsigned char *deflate_input_now; +unsigned char *deflate_input_end; + +/* + * The decompressed (deflated) output stream. + */ +unsigned char *deflate_output_now; +unsigned char *deflate_output_end; + +/* + * The current bit offset in the input stream, if any. + * + * Deflate streams are read from least to most significant bit. + * An offset of 1 indicates that the least significant bit is skipped + * (i.e., only bits 7, 6, 5, 4, 3, 2, and 1 are read). + */ +uint8_t deflate_bit_offset = 0; + +/* + * Base lengths for length codes (code 257 to 285). + * Code 257 corresponds to a copy of 3 bytes, etc. + */ +uint16_t const deflate_length_offsets[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258 +}; + +/* + * Extra bits for length codes (code 257 to 285). + * Code 257 has no extra bits, code 265 has 1 extra bit + * (and indicates a length of 11 or 12 depending on its value), etc. + */ +uint8_t const deflate_length_bits[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 0 +}; + +// can also be expressed as (index < 4 || index == 28) ? 0 : (index-4) >> 2 + +/* + * Base distances for distance codes (code 0 to 29). + * Code 0 indicates a distance of 1, etc. + */ +uint16_t const deflate_distance_offsets[] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, + 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 +}; + +/* + * Extra bits for distance codes (code 0 to 29). + * Code 0 has no extra bits, code 4 has 1 bit, etc. + */ +uint8_t const deflate_distance_bits[] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13 +}; + +// can also be expressed as index < 2 ? 0 : (index-2) >> 1 + +/* + * In block type 2 (dynamic huffman codes), the code lengths of literal/length + * and distance alphabet are themselves stored as huffman codes. To save space + * in case only a few code lengths are used, the code length codes are stored + * in the following order. This allows a few bits to be saved if some code + * lengths are unused and the unused code lengths are at the end of the list. + */ +uint8_t const deflate_hclen_index[] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + +/* + * Code lengths of the "code length" code (see above). + */ +uint8_t deflate_hc_lengths[19]; + +/* + * Code lengths of the literal/length and distance alphabets. + */ +uint8_t deflate_lld_lengths[318]; + +/* + * Assumptions: + * * huffman code length is limited to 11 bits + * * there are no more than 255 huffman codes with the same length + * + * Rationale: longer huffman codes might appear when handling large data + * sets. We don't do that; instead, we expect the uncompressed source to + * be no more than a few kB of data. + */ + +/* + * Bit length counts and next code entries for Literal/Length alphabet. + * Combined with the code lengths in deflate_lld_lengths, these make up the + * Literal/Length alphabet. See the algorithm in RFC 1951 section 3.2.2 for + * details. + * + * In deflate, these variables are also used for the huffman alphabet in + * dynamic huffman blocks. + */ +uint8_t deflate_bl_count_ll[12]; +uint16_t deflate_next_code_ll[12]; + +/* + * Bit length counts and next code entries for Distance alphabet. + */ +uint8_t deflate_bl_count_d[12]; +uint16_t deflate_next_code_d[12]; + +static uint16_t deflate_rev_word(uint16_t word, uint8_t bits) +{ + uint16_t ret = 0; + uint16_t mask = 1; + for (uint16_t rmask = 1 << (bits - 1); rmask > 0; rmask >>= 1) { + if (word & rmask) { + ret |= mask; + } + mask <<= 1; + } + return ret; +} + +static uint8_t deflate_bitmask(uint8_t bit_count) +{ + return (1 << bit_count) - 1; +} + +static uint16_t deflate_get_word() +{ + uint16_t ret = 0; + ret |= (deflate_input_now[0] >> deflate_bit_offset); + ret |= (uint16_t) deflate_input_now[1] << (8 - deflate_bit_offset); + if (deflate_bit_offset) { + ret |= + (uint16_t) (deflate_input_now[2] & + deflate_bitmask(deflate_bit_offset)) << (16 - + deflate_bit_offset); + } + return ret; +} + +static uint16_t deflate_get_bits(uint8_t num_bits) +{ + uint16_t ret = deflate_get_word(); + deflate_bit_offset += num_bits; + while (deflate_bit_offset >= 8) { + deflate_input_now++; + deflate_bit_offset -= 8; + } + return ret & deflate_bitmask(num_bits); +} + +static void deflate_build_alphabet(uint8_t * lengths, uint16_t size, + uint8_t * bl_count, uint16_t * next_code) +{ + uint16_t i; + uint16_t code = 0; + uint16_t max_len = 0; + for (i = 0; i < 12; i++) { + bl_count[i] = 0; + } + + for (i = 0; i < size; i++) { + if (lengths[i]) { + bl_count[lengths[i]]++; + } + if (lengths[i] > max_len) { + max_len = lengths[i]; + } + } + + for (i = 1; i < max_len + 1; i++) { + code = (code + bl_count[i - 1]) << 1; + next_code[i] = code; + } +} + +static uint16_t deflate_huff(uint8_t * lengths, uint16_t size, + uint8_t * bl_count, uint16_t * next_code) +{ + uint16_t next_word = deflate_get_word(); + for (uint8_t num_bits = 1; num_bits < 12; num_bits++) { + uint16_t next_bits = deflate_rev_word(next_word, num_bits); + if (bl_count[num_bits] && next_bits >= next_code[num_bits] + && next_bits < next_code[num_bits] + bl_count[num_bits]) { + deflate_bit_offset += num_bits; + while (deflate_bit_offset >= 8) { + deflate_input_now++; + deflate_bit_offset -= 8; + } + uint8_t len_pos = next_bits; + uint8_t cur_pos = next_code[num_bits]; + for (uint16_t i = 0; i < size; i++) { + if (lengths[i] == num_bits) { + if (cur_pos == len_pos) { + return i; + } + cur_pos++; + } + } + } + } + return 65535; +} + +static int8_t deflate_huffman(uint8_t * ll_lengths, uint16_t ll_size, + uint8_t * d_lengths, uint8_t d_size) +{ + uint16_t code; + uint16_t dcode; + while (1) { + code = + deflate_huff(ll_lengths, ll_size, deflate_bl_count_ll, + deflate_next_code_ll); + if (code < 256) { + if (deflate_output_now == deflate_output_end) { + return DEFLATE_ERR_OUTPUT_LENGTH; + } + *deflate_output_now = code; + deflate_output_now++; + } else if (code == 256) { + return 0; + } else { + uint16_t len_val = deflate_length_offsets[code - 257]; + uint8_t extra_bits = deflate_length_bits[code - 257]; + if (extra_bits) { + len_val += deflate_get_bits(extra_bits); + } + dcode = + deflate_huff(d_lengths, d_size, + deflate_bl_count_d, + deflate_next_code_d); + uint16_t dist_val = deflate_distance_offsets[dcode]; + extra_bits = deflate_distance_bits[dcode]; + if (extra_bits) { + dist_val += deflate_get_bits(extra_bits); + } + while (len_val--) { + if (deflate_output_now == deflate_output_end) { + return DEFLATE_ERR_OUTPUT_LENGTH; + } + deflate_output_now[0] = + deflate_output_now[-dist_val]; + deflate_output_now++; + } + } + if (deflate_input_now >= deflate_input_end - 4) { + return DEFLATE_ERR_INPUT_LENGTH; + } + } +} + +static int8_t deflate_uncompressed() +{ + deflate_input_now++; + uint16_t len = + ((uint16_t) deflate_input_now[1] << 8) + deflate_input_now[0]; + uint16_t nlen = + ((uint16_t) deflate_input_now[3] << 8) + deflate_input_now[2]; + if (len & nlen) { + return DEFLATE_ERR_NLEN; + } + deflate_input_now += 4; + if (deflate_input_now + len >= deflate_input_end) { + return DEFLATE_ERR_INPUT_LENGTH; + } + if (deflate_output_now + len >= deflate_output_end) { + return DEFLATE_ERR_OUTPUT_LENGTH; + } + for (uint16_t i = 0; i < len; i++) { + *(deflate_output_now++) = *(deflate_input_now++); + } + return 0; +} + +static int8_t deflate_static_huffman() +{ + uint16_t i; + for (i = 0; i <= 143; i++) { + deflate_lld_lengths[i] = 8; + } + for (i = 144; i <= 255; i++) { + deflate_lld_lengths[i] = 9; + } + for (i = 256; i <= 279; i++) { + deflate_lld_lengths[i] = 7; + } + for (i = 280; i <= 285; i++) { + deflate_lld_lengths[i] = 8; + } + for (i = 286; i <= 286 + 29; i++) { + deflate_lld_lengths[i] = 5; + } + + deflate_build_alphabet(deflate_lld_lengths, 286, deflate_bl_count_ll, + deflate_next_code_ll); + deflate_build_alphabet(deflate_lld_lengths + 286, 29, + deflate_bl_count_d, deflate_next_code_d); + return deflate_huffman(deflate_lld_lengths, 286, + deflate_lld_lengths + 286, 29); +} + +static int8_t deflate_dynamic_huffman() +{ + uint8_t i; + uint16_t hlit = 257 + deflate_get_bits(5); + uint8_t hdist = 1 + deflate_get_bits(5); + uint8_t hclen = 4 + deflate_get_bits(4); + + for (i = 0; i < hclen; i++) { + deflate_hc_lengths[deflate_hclen_index[i]] = + deflate_get_bits(3); + } + for (i = hclen; i < sizeof(deflate_hc_lengths); i++) { + deflate_hc_lengths[deflate_hclen_index[i]] = 0; + } + + deflate_build_alphabet(deflate_hc_lengths, + sizeof(deflate_hc_lengths), + deflate_bl_count_ll, deflate_next_code_ll); + + uint16_t items_processed = 0; + while (items_processed < hlit + hdist) { + uint8_t code = + deflate_huff(deflate_hc_lengths, 19, deflate_bl_count_ll, + deflate_next_code_ll); + if (code == 16) { + uint8_t copy_count = 3 + deflate_get_bits(2); + for (uint8_t i = 0; i < copy_count; i++) { + deflate_lld_lengths[items_processed] = + deflate_lld_lengths[items_processed - 1]; + items_processed++; + } + } else if (code == 17) { + uint8_t null_count = 3 + deflate_get_bits(3); + for (uint8_t i = 0; i < null_count; i++) { + deflate_lld_lengths[items_processed] = 0; + items_processed++; + } + } else if (code == 18) { + uint8_t null_count = 11 + deflate_get_bits(7); + for (uint8_t i = 0; i < null_count; i++) { + deflate_lld_lengths[items_processed] = 0; + items_processed++; + } + } else { + deflate_lld_lengths[items_processed] = code; + items_processed++; + } + } + + deflate_build_alphabet(deflate_lld_lengths, hlit, + deflate_bl_count_ll, deflate_next_code_ll); + deflate_build_alphabet(deflate_lld_lengths + hlit, hdist, + deflate_bl_count_d, deflate_next_code_d); + + return deflate_huffman(deflate_lld_lengths, hlit, + deflate_lld_lengths + hlit, hdist); +} + +int16_t deflate(unsigned char *input_buf, uint16_t input_len, + unsigned char *output_buf, uint16_t output_len) +{ + //uint8_t is_final = input_buf[0] & 0x01; + uint8_t block_type = (input_buf[0] & 0x06) >> 1; + int8_t ret; + + deflate_input_now = input_buf; + deflate_input_end = input_buf + input_len; + deflate_bit_offset = 3; + + deflate_output_now = output_buf; + deflate_output_end = output_buf + output_len; + + switch (block_type) { + case 0: + ret = deflate_uncompressed(); + break; + case 1: + ret = deflate_static_huffman(); + break; + case 2: + ret = deflate_dynamic_huffman(); + break; + default: + return DEFLATE_ERR_BLOCK; + } + + if (ret < 0) { + return ret; + } + + return deflate_output_now - output_buf; +} + +int16_t deflate_zlib(unsigned char *input_buf, uint16_t input_len, + unsigned char *output_buf, uint16_t output_len) +{ + if (input_len < 4) { + return DEFLATE_ERR_INPUT_LENGTH; + } + uint8_t zlib_method = input_buf[0] & 0x0f; + uint8_t zlib_flags = input_buf[1]; + + if (zlib_method != 8) { + return DEFLATE_ERR_METHOD; + } + + if (zlib_flags & 0x20) { + return DEFLATE_ERR_FDICT; + } + + if ((((uint16_t) input_buf[0] << 8) | input_buf[1]) % 31) { + return DEFLATE_ERR_FCHECK; + } + + int16_t ret = + deflate(input_buf + 2, input_len - 2, output_buf, output_len); + +#ifdef DEFLATE_CHECKSUM + if (ret >= 0) { + uint16_t deflate_s1 = 1; + uint16_t deflate_s2 = 0; + + deflate_output_end = deflate_output_now; + for (deflate_output_now = output_buf; + deflate_output_now < deflate_output_end; + deflate_output_now++) { + deflate_s1 = + ((uint32_t) deflate_s1 + + (uint32_t) (*deflate_output_now)) % 65521; + deflate_s2 = + ((uint32_t) deflate_s2 + + (uint32_t) deflate_s1) % 65521; + } + + if (deflate_bit_offset) { + deflate_input_now++; + } + + if ((deflate_s2 != + (((uint16_t) deflate_input_now[0] << 8) | (uint16_t) + deflate_input_now[1])) + || (deflate_s1 != + (((uint16_t) deflate_input_now[2] << 8) | (uint16_t) + deflate_input_now[3]))) { + return DEFLATE_ERR_CHECKSUM; + } + } +#endif + + return ret; +} diff --git a/src/deflate.h b/src/deflate.h new file mode 100644 index 0000000..4ed28a0 --- /dev/null +++ b/src/deflate.h @@ -0,0 +1,23 @@ +/* + * zlib-deflate-nostdlib + * + * Copyright 2021 Daniel Friesel + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#define DEFLATE_ERR_INPUT_LENGTH (-1) +#define DEFLATE_ERR_METHOD (-2) +#define DEFLATE_ERR_FDICT (-3) +#define DEFLATE_ERR_BLOCK (-4) +#define DEFLATE_ERR_CHECKSUM (-5) +#define DEFLATE_ERR_OUTPUT_LENGTH (-6) +#define DEFLATE_ERR_FCHECK (-7) +#define DEFLATE_ERR_NLEN (-8) + +int16_t deflate(unsigned char *input_buf, uint16_t input_len, + unsigned char *output_buf, uint16_t output_len); +int16_t deflate_zlib(unsigned char *input_buf, uint16_t input_len, + unsigned char *output_buf, uint16_t output_len); -- cgit v1.2.3