2 * Decompression code for LZ77+Huffman. This encoding is used by
3 * Microsoft in various file formats and protocols including SMB3.
7 * Initial code from Samba re-licensed with Samuel's permission.
8 * Copyright (C) Samuel Cabrero 2017
10 * Glib-ification, extra error-checking and WS integration
11 * Copyright (C) Aurélien Aptel 2019
13 * Wireshark - Network traffic analyzer
14 * By Gerald Combs <gerald@wireshark.org>
15 * Copyright 1998 Gerald Combs
17 * SPDX-License-Identifier: GPL-2.0-or-later
21 #include <stdlib.h> /* qsort */
22 #include <epan/exceptions.h>
23 #include <epan/tvbuff.h>
24 #include <epan/wmem_scopes.h>
26 #define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */
28 #define TREE_SIZE 1024
29 #define ENCODED_TREE_SIZE 256
30 #define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE)
39 * Represents a node in a Huffman prefix code tree
41 struct prefix_code_node
{
42 /* Stores the symbol encoded by this node in the prefix code tree */
45 /* Indicates whether this node is a leaf in the tree */
49 * Points to the node's two children. Values are indexes in
50 * the tree node array. The value -1 is used to indicate that
51 * a particular child does not exist
57 * Represent information about a Huffman-encoded symbol
59 struct prefix_code_symbol
{
60 /* Stores the symbol */
63 /* Stores the symbol’s Huffman prefix code length */
68 * Represent a byte array as a bit string from which individual bits can
73 const struct input
*input
;
75 /* The index in source from which the next set of bits will be pulled
76 * when the bits in mask have been consumed */
77 uint32_t bitstring_index
;
79 /* Stores the next bits to be consumed in the bit string */
82 /* Stores the number of bits in mask that remain to be consumed */
87 struct prefix_code_node
*root
;
88 struct prefix_code_node nodes
[TREE_SIZE
];
91 static bool is_node_valid(struct hf_tree
*tree
, struct prefix_code_node
*node
)
93 return (node
&& node
>= tree
->nodes
&& node
< tree
->nodes
+ TREE_SIZE
);
97 * Links a symbol's prefix_code_node into its correct position in a Huffman
100 static int prefix_code_tree_add_leaf(struct hf_tree
*tree
,
106 struct prefix_code_node
*node
= &tree
->nodes
[0];
107 uint32_t i
= leaf_index
+ 1;
108 uint32_t child_index
;
110 if (leaf_index
>= TREE_SIZE
)
115 child_index
= (mask
>> bits
) & 1;
116 if (node
->child
[child_index
] < 0) {
119 node
->child
[child_index
] = i
;
120 tree
->nodes
[i
].leaf
= false;
123 node
= tree
->nodes
+ node
->child
[child_index
];
124 if (!is_node_valid(tree
, node
))
128 node
->child
[mask
& 1] = leaf_index
;
135 * Determines the sort order of one prefix_code_symbol relative to another
137 static int compare_symbols(const void *ve1
, const void *ve2
)
139 const struct prefix_code_symbol
*e1
= (const struct prefix_code_symbol
*)ve1
;
140 const struct prefix_code_symbol
*e2
= (const struct prefix_code_symbol
*)ve2
;
142 if (e1
->length
< e2
->length
)
144 else if (e1
->length
> e2
->length
)
146 else if (e1
->symbol
< e2
->symbol
)
148 else if (e1
->symbol
> e2
->symbol
)
155 * Rebuilds the Huffman prefix code tree that will be used to decode symbols
156 * during decompression
158 static int PrefixCodeTreeRebuild( struct hf_tree
*tree
,
159 const struct input
*input
)
161 struct prefix_code_symbol symbolInfo
[SYMBOL_INFO_SIZE
];
162 uint32_t i
, j
, mask
, bits
;
165 for (i
= 0; i
< TREE_SIZE
; i
++) {
166 tree
->nodes
[i
].symbol
= 0;
167 tree
->nodes
[i
].leaf
= false;
168 tree
->nodes
[i
].child
[0] = -1;
169 tree
->nodes
[i
].child
[1] = -1;
172 if (input
->size
< ENCODED_TREE_SIZE
)
175 for (i
= 0; i
< ENCODED_TREE_SIZE
; i
++) {
176 symbolInfo
[2*i
].symbol
= 2*i
;
177 symbolInfo
[2*i
].length
= tvb_get_uint8(input
->tvb
, input
->offset
+i
) & 15;
178 symbolInfo
[2*i
+1].symbol
= 2*i
+1;
179 symbolInfo
[2*i
+1].length
= tvb_get_uint8(input
->tvb
, input
->offset
+i
) >> 4;
182 qsort(symbolInfo
, SYMBOL_INFO_SIZE
, sizeof(symbolInfo
[0]), compare_symbols
);
185 while (i
< SYMBOL_INFO_SIZE
&& symbolInfo
[i
].length
== 0) {
192 tree
->root
= &tree
->nodes
[0];
193 tree
->root
->leaf
= false;
196 for (; i
< 512; i
++) {
197 //ws_assert(j < TREE_SIZE);
198 if (j
>= TREE_SIZE
) {
201 tree
->nodes
[j
].symbol
= symbolInfo
[i
].symbol
;
202 tree
->nodes
[j
].leaf
= true;
203 mask
<<= symbolInfo
[i
].length
- bits
;
204 bits
= symbolInfo
[i
].length
;
205 rc
= prefix_code_tree_add_leaf(tree
, j
, mask
, bits
, &j
);
215 * Initializes a bitstream data structure
217 static void bitstring_init(struct bitstring
*bstr
,
218 const struct input
*input
,
219 uint32_t bitstring_index
)
221 bstr
->mask
= tvb_get_letohs(input
->tvb
, input
->offset
+bitstring_index
);
222 bstr
->mask
<<= sizeof(bstr
->mask
) * 8 - 16;
223 bitstring_index
+= 2;
225 bstr
->mask
+= tvb_get_letohs(input
->tvb
, input
->offset
+bitstring_index
);
226 bitstring_index
+= 2;
230 bstr
->bitstring_index
= bitstring_index
;
234 * Returns the next n bits from the front of a bit string.
236 static uint32_t bitstring_lookup(struct bitstring
*bstr
, uint32_t n
)
238 if (n
== 0 || bstr
->bits
< 0 || n
> (uint32_t)bstr
->bits
) {
241 return bstr
->mask
>> (sizeof(bstr
->mask
) * 8 - n
);
245 * Advances the bit string's cursor by n bits.
247 static void bitstring_skip(struct bitstring
*bstr
, uint32_t n
)
249 bstr
->mask
= bstr
->mask
<< n
;
250 bstr
->bits
= bstr
->bits
- n
;
252 if (bstr
->bits
< 16) {
253 bstr
->mask
+= tvb_get_letohs(bstr
->input
->tvb
,
254 bstr
->input
->offset
+ bstr
->bitstring_index
)
255 << (16 - bstr
->bits
);
256 bstr
->bitstring_index
= bstr
->bitstring_index
+ 2;
257 bstr
->bits
= bstr
->bits
+ 16;
262 * Returns the symbol encoded by the next prefix code in a bit string.
264 static int prefix_code_tree_decode_symbol(struct hf_tree
*tree
,
265 struct bitstring
*bstr
,
266 uint32_t *out_symbol
)
269 struct prefix_code_node
*node
= tree
->root
;
272 bit
= bitstring_lookup(bstr
, 1);
273 bitstring_skip(bstr
, 1);
274 node
= tree
->nodes
+ node
->child
[bit
];
275 if (!is_node_valid(tree
, node
))
277 } while (node
->leaf
== false);
279 *out_symbol
= node
->symbol
;
283 static bool do_uncompress(struct input
*input
,
288 int32_t match_offset
;
290 struct hf_tree tree
= {0};
291 struct bitstring bstr
= {0};
296 if (!input
->size
|| input
->size
> MAX_INPUT_SIZE
)
299 rc
= PrefixCodeTreeRebuild(&tree
, input
);
303 bitstring_init(&bstr
, input
, ENCODED_TREE_SIZE
);
306 rc
= prefix_code_tree_decode_symbol(&tree
, &bstr
, &symbol
);
311 uint8_t v
= symbol
& 0xFF;
312 wmem_array_append_one(obuf
, v
);
316 return bstr
.bitstring_index
== bstr
.input
->size
;
318 symbol
= symbol
- 256;
319 length
= symbol
& 0xF;
320 symbol
= symbol
>> 4;
322 match_offset
= (1U << symbol
) + bitstring_lookup(&bstr
, symbol
);
326 if (bstr
.bitstring_index
>= bstr
.input
->size
)
328 length
= tvb_get_uint8(bstr
.input
->tvb
,
329 bstr
.input
->offset
+bstr
.bitstring_index
) + 15;
330 bstr
.bitstring_index
+= 1;
333 if (bstr
.bitstring_index
+1 >= bstr
.input
->size
)
335 length
= tvb_get_letohs(bstr
.input
->tvb
, bstr
.input
->offset
+bstr
.bitstring_index
);
336 bstr
.bitstring_index
+= 2;
340 bitstring_skip(&bstr
, symbol
);
345 unsigned elem_count
= wmem_array_get_count(obuf
)+match_offset
;
347 if (wmem_array_try_index(obuf
, elem_count
, &byte
))
349 wmem_array_append_one(obuf
, byte
);
351 } while (length
!= 0);
358 tvb_uncompress_lz77huff(tvbuff_t
*tvb
,
363 wmem_allocator_t
*pool
;
366 struct input input
= {
372 pool
= wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE
);
373 obuf
= wmem_array_sized_new(pool
, 1, input_size
*2);
376 ok
= do_uncompress(&input
, obuf
);
384 * Cannot pass a tvb free callback that frees the wmem
385 * pool, so we make an extra copy that uses bare
386 * pointers. This could be optimized if tvb API had a
387 * free pool callback of some sort.
389 unsigned size
= wmem_array_get_count(obuf
);
390 uint8_t *p
= (uint8_t *)g_malloc(size
);
391 memcpy(p
, wmem_array_get_raw(obuf
), size
);
392 out
= tvb_new_real_data(p
, size
, size
);
393 tvb_set_free_cb(out
, g_free
);
398 wmem_destroy_allocator(pool
);
404 tvb_child_uncompress_lz77huff(tvbuff_t
*parent
, tvbuff_t
*tvb
, const int offset
, int in_size
)
406 tvbuff_t
*new_tvb
= tvb_uncompress_lz77huff(tvb
, offset
, in_size
);
408 tvb_set_child_real_data_tvbuff(parent
, new_tvb
);
413 * Editor modelines - https://www.wireshark.org/tools/modelines.html
418 * indent-tabs-mode: t
421 * vi: set shiftwidth=8 tabstop=8 noexpandtab:
422 * :indentSize=8:tabSize=8:noTabs=false: