summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <derf@finalrewind.org>2021-01-26 21:56:39 +0100
committerDaniel Friesel <derf@finalrewind.org>2021-01-26 21:58:07 +0100
commit1fa976457d60d98643567d3c2bfc2a4ed3e19817 (patch)
tree258c822498bda272f829d350a6c3ee2d196fe77a
parentd83d6c1797dcb4a9239149f59b055585e539bb7e (diff)
huffman codes may be up to 16 bit long
-rw-r--r--README.md6
-rw-r--r--src/inflate.c35
2 files changed, 16 insertions, 25 deletions
diff --git a/README.md b/README.md
index 65dfbe2..20c9a35 100644
--- a/README.md
+++ b/README.md
@@ -95,12 +95,6 @@ following exceptions.
The code *almost* complies with RFC 1951, with the following exceptions.
-* zlib-deflate-nostdlib assumes that Huffman codes are limited to a length
- of 12 bits and that there are no more than 255 codes per length. This appears
- to be a reasonable assumption for embedded devices, whose decompression
- abilities are limited by the amount of RAM anyways. I have not yet determined
- whether longer Huffman codes can appear in practice or not, and if so, under
- which conditions.
* zlib-deflate-nostdlib does not yet support compressed items consisting of
more than one deflate block. I intend to fix this.
diff --git a/src/inflate.c b/src/inflate.c
index 9fa7d8d..725d6b0 100644
--- a/src/inflate.c
+++ b/src/inflate.c
@@ -88,36 +88,33 @@ uint8_t deflate_hc_lengths[19];
/*
* Code lengths of the literal/length and distance alphabets.
+ * up to 286 literal/length codes + up to 32 distance codes.
*/
uint8_t deflate_lld_lengths[318];
/*
- * Assumptions:
- * * huffman code length is limited to 11 bits
- * * there are no more than 255 huffman codes with the same length
- *
- * Rationale: longer huffman codes might appear when handling large data
- * sets. We don't do that; instead, we expect the uncompressed source to
- * be no more than a few kB of data.
- */
-
-/*
* Bit length counts and next code entries for Literal/Length alphabet.
* Combined with the code lengths in deflate_lld_lengths, these make up the
* Literal/Length alphabet. See the algorithm in RFC 1951 section 3.2.2 for
* details.
*
- * In deflate, these variables are also used for the huffman alphabet in
- * dynamic huffman blocks.
+ * Assumption: There are no more than 255 huffman codes with the same length.
+ * As the largest alphabet (the literal/length alphabet) contains just 288
+ * codes in total, this should be reasonable.
+ *
+ * These variables are also used for the huffman alphabet in dynamic huffman
+ * blocks.
*/
-uint8_t deflate_bl_count_ll[12];
-uint16_t deflate_next_code_ll[12];
+uint8_t deflate_bl_count_ll[16];
+uint16_t deflate_next_code_ll[16];
/*
- * Bit length counts and next code entries for Distance alphabet.
+ * Bit length counts and next code entries for Distance alphabet. Note that,
+ * even though there are just 30 different distance codes, individual
+ * codes may be up to 16 bits long.
*/
-uint8_t deflate_bl_count_d[12];
-uint16_t deflate_next_code_d[12];
+uint8_t deflate_bl_count_d[16];
+uint16_t deflate_next_code_d[16];
static uint16_t deflate_rev_word(uint16_t word, uint8_t bits)
{
@@ -168,7 +165,7 @@ static void deflate_build_alphabet(uint8_t * lengths, uint16_t size,
uint16_t i;
uint16_t code = 0;
uint16_t max_len = 0;
- for (i = 0; i < 12; i++) {
+ for (i = 0; i < 16; i++) {
bl_count[i] = 0;
}
@@ -191,7 +188,7 @@ static uint16_t deflate_huff(uint8_t * lengths, uint16_t size,
uint8_t * bl_count, uint16_t * next_code)
{
uint16_t next_word = deflate_get_word();
- for (uint8_t num_bits = 1; num_bits < 12; num_bits++) {
+ for (uint8_t num_bits = 1; num_bits < 16; num_bits++) {
uint16_t next_bits = deflate_rev_word(next_word, num_bits);
if (bl_count[num_bits] && next_bits >= next_code[num_bits]
&& next_bits < next_code[num_bits] + bl_count[num_bits]) {