regen pidl all: rm epan/dissectors/pidl/*-stamp; pushd epan/dissectors/pidl/ && make...
[wireshark-sm.git] / epan / tvbuff_lz77huff.c
blobb50026250f38d031db6b28fca5d38e456d86ca69
1 /*
2 * Decompression code for LZ77+Huffman. This encoding is used by
3 * Microsoft in various file formats and protocols including SMB3.
5 * See MS-XCA.
7 * Initial code from Samba re-licensed with Samuel's permission.
8 * Copyright (C) Samuel Cabrero 2017
10 * Glib-ification, extra error-checking and WS integration
11 * Copyright (C) Aurélien Aptel 2019
13 * Wireshark - Network traffic analyzer
14 * By Gerald Combs <gerald@wireshark.org>
15 * Copyright 1998 Gerald Combs
17 * SPDX-License-Identifier: GPL-2.0-or-later
20 #include <glib.h>
21 #include <stdlib.h> /* qsort */
22 #include <epan/exceptions.h>
23 #include <epan/tvbuff.h>
24 #include <epan/wmem_scopes.h>
26 #define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */
28 #define TREE_SIZE 1024
29 #define ENCODED_TREE_SIZE 256
30 #define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE)
32 struct input {
33 tvbuff_t *tvb;
34 int offset;
35 size_t size;
38 /**
39 * Represents a node in a Huffman prefix code tree
41 struct prefix_code_node {
42 /* Stores the symbol encoded by this node in the prefix code tree */
43 uint16_t symbol;
45 /* Indicates whether this node is a leaf in the tree */
46 uint8_t leaf;
49 * Points to the node's two children. Values are indexes in
50 * the tree node array. The value -1 is used to indicate that
51 * a particular child does not exist
53 int16_t child[2];
56 /**
57 * Represent information about a Huffman-encoded symbol
59 struct prefix_code_symbol {
60 /* Stores the symbol */
61 uint16_t symbol;
63 /* Stores the symbol’s Huffman prefix code length */
64 uint16_t length;
67 /**
68 * Represent a byte array as a bit string from which individual bits can
69 * be read
71 struct bitstring {
72 /* The byte array */
73 const struct input *input;
75 /* The index in source from which the next set of bits will be pulled
76 * when the bits in mask have been consumed */
77 uint32_t bitstring_index;
79 /* Stores the next bits to be consumed in the bit string */
80 uint32_t mask;
82 /* Stores the number of bits in mask that remain to be consumed */
83 int32_t bits;
86 struct hf_tree {
87 struct prefix_code_node *root;
88 struct prefix_code_node nodes[TREE_SIZE];
91 static bool is_node_valid(struct hf_tree *tree, struct prefix_code_node *node)
93 return (node && node >= tree->nodes && node < tree->nodes + TREE_SIZE);
96 /**
97 * Links a symbol's prefix_code_node into its correct position in a Huffman
98 * prefix code tree
100 static int prefix_code_tree_add_leaf(struct hf_tree *tree,
101 uint32_t leaf_index,
102 uint32_t mask,
103 uint32_t bits,
104 uint32_t *out_index)
106 struct prefix_code_node *node = &tree->nodes[0];
107 uint32_t i = leaf_index + 1;
108 uint32_t child_index;
110 if (leaf_index >= TREE_SIZE)
111 return -1;
113 while (bits > 1) {
114 bits = bits - 1;
115 child_index = (mask >> bits) & 1;
116 if (node->child[child_index] < 0) {
117 if (i >= TREE_SIZE)
118 return -1;
119 node->child[child_index] = i;
120 tree->nodes[i].leaf = false;
121 i = i + 1;
123 node = tree->nodes + node->child[child_index];
124 if (!is_node_valid(tree, node))
125 return -1;
128 node->child[mask & 1] = leaf_index;
130 *out_index = i;
131 return 0;
135 * Determines the sort order of one prefix_code_symbol relative to another
137 static int compare_symbols(const void *ve1, const void *ve2)
139 const struct prefix_code_symbol *e1 = (const struct prefix_code_symbol *)ve1;
140 const struct prefix_code_symbol *e2 = (const struct prefix_code_symbol *)ve2;
142 if (e1->length < e2->length)
143 return -1;
144 else if (e1->length > e2->length)
145 return 1;
146 else if (e1->symbol < e2->symbol)
147 return -1;
148 else if (e1->symbol > e2->symbol)
149 return 1;
150 else
151 return 0;
155 * Rebuilds the Huffman prefix code tree that will be used to decode symbols
156 * during decompression
158 static int PrefixCodeTreeRebuild( struct hf_tree *tree,
159 const struct input *input)
161 struct prefix_code_symbol symbolInfo[SYMBOL_INFO_SIZE];
162 uint32_t i, j, mask, bits;
163 int rc;
165 for (i = 0; i < TREE_SIZE; i++) {
166 tree->nodes[i].symbol = 0;
167 tree->nodes[i].leaf = false;
168 tree->nodes[i].child[0] = -1;
169 tree->nodes[i].child[1] = -1;
172 if (input->size < ENCODED_TREE_SIZE)
173 return -1;
175 for (i = 0; i < ENCODED_TREE_SIZE; i++) {
176 symbolInfo[2*i].symbol = 2*i;
177 symbolInfo[2*i].length = tvb_get_uint8(input->tvb, input->offset+i) & 15;
178 symbolInfo[2*i+1].symbol = 2*i+1;
179 symbolInfo[2*i+1].length = tvb_get_uint8(input->tvb, input->offset+i) >> 4;
182 qsort(symbolInfo, SYMBOL_INFO_SIZE, sizeof(symbolInfo[0]), compare_symbols);
184 i = 0;
185 while (i < SYMBOL_INFO_SIZE && symbolInfo[i].length == 0) {
186 i = i + 1;
189 mask = 0;
190 bits = 1;
192 tree->root = &tree->nodes[0];
193 tree->root->leaf = false;
195 j = 1;
196 for (; i < 512; i++) {
197 //ws_assert(j < TREE_SIZE);
198 if (j >= TREE_SIZE) {
199 return -1;
201 tree->nodes[j].symbol = symbolInfo[i].symbol;
202 tree->nodes[j].leaf = true;
203 mask <<= symbolInfo[i].length - bits;
204 bits = symbolInfo[i].length;
205 rc = prefix_code_tree_add_leaf(tree, j, mask, bits, &j);
206 if (rc)
207 return rc;
208 mask += 1;
211 return 0;
215 * Initializes a bitstream data structure
217 static void bitstring_init(struct bitstring *bstr,
218 const struct input *input,
219 uint32_t bitstring_index)
221 bstr->mask = tvb_get_letohs(input->tvb, input->offset+bitstring_index);
222 bstr->mask <<= sizeof(bstr->mask) * 8 - 16;
223 bitstring_index += 2;
225 bstr->mask += tvb_get_letohs(input->tvb, input->offset+bitstring_index);
226 bitstring_index += 2;
228 bstr->bits = 32;
229 bstr->input = input;
230 bstr->bitstring_index = bitstring_index;
234 * Returns the next n bits from the front of a bit string.
236 static uint32_t bitstring_lookup(struct bitstring *bstr, uint32_t n)
238 if (n == 0 || bstr->bits < 0 || n > (uint32_t)bstr->bits) {
239 return 0;
241 return bstr->mask >> (sizeof(bstr->mask) * 8 - n);
245 * Advances the bit string's cursor by n bits.
247 static void bitstring_skip(struct bitstring *bstr, uint32_t n)
249 bstr->mask = bstr->mask << n;
250 bstr->bits = bstr->bits - n;
252 if (bstr->bits < 16) {
253 bstr->mask += tvb_get_letohs(bstr->input->tvb,
254 bstr->input->offset + bstr->bitstring_index)
255 << (16 - bstr->bits);
256 bstr->bitstring_index = bstr->bitstring_index + 2;
257 bstr->bits = bstr->bits + 16;
262 * Returns the symbol encoded by the next prefix code in a bit string.
264 static int prefix_code_tree_decode_symbol(struct hf_tree *tree,
265 struct bitstring *bstr,
266 uint32_t *out_symbol)
268 uint32_t bit;
269 struct prefix_code_node *node = tree->root;
271 do {
272 bit = bitstring_lookup(bstr, 1);
273 bitstring_skip(bstr, 1);
274 node = tree->nodes + node->child[bit];
275 if (!is_node_valid(tree, node))
276 return -1;
277 } while (node->leaf == false);
279 *out_symbol = node->symbol;
280 return 0;
283 static bool do_uncompress(struct input *input,
284 wmem_array_t *obuf)
286 uint32_t symbol;
287 uint32_t length;
288 int32_t match_offset;
289 int rc;
290 struct hf_tree tree = {0};
291 struct bitstring bstr = {0};
293 if (!input->tvb)
294 return false;
296 if (!input->size || input->size > MAX_INPUT_SIZE)
297 return false;
299 rc = PrefixCodeTreeRebuild(&tree, input);
300 if (rc)
301 return false;
303 bitstring_init(&bstr, input, ENCODED_TREE_SIZE);
305 while (1) {
306 rc = prefix_code_tree_decode_symbol(&tree, &bstr, &symbol);
307 if (rc < 0)
308 return false;
310 if (symbol < 256) {
311 uint8_t v = symbol & 0xFF;
312 wmem_array_append_one(obuf, v);
313 } else {
314 if (symbol == 256) {
315 /* EOF symbol */
316 return bstr.bitstring_index == bstr.input->size;
318 symbol = symbol - 256;
319 length = symbol & 0xF;
320 symbol = symbol >> 4;
322 match_offset = (1U << symbol) + bitstring_lookup(&bstr, symbol);
323 match_offset *= -1;
325 if (length == 15) {
326 if (bstr.bitstring_index >= bstr.input->size)
327 return false;
328 length = tvb_get_uint8(bstr.input->tvb,
329 bstr.input->offset+bstr.bitstring_index) + 15;
330 bstr.bitstring_index += 1;
332 if (length == 270) {
333 if (bstr.bitstring_index+1 >= bstr.input->size)
334 return false;
335 length = tvb_get_letohs(bstr.input->tvb, bstr.input->offset+bstr.bitstring_index);
336 bstr.bitstring_index += 2;
340 bitstring_skip(&bstr, symbol);
342 length += 3;
343 do {
344 uint8_t byte;
345 unsigned elem_count = wmem_array_get_count(obuf)+match_offset;
347 if (wmem_array_try_index(obuf, elem_count, &byte))
348 return false;
349 wmem_array_append_one(obuf, byte);
350 length--;
351 } while (length != 0);
354 return true;
357 tvbuff_t *
358 tvb_uncompress_lz77huff(tvbuff_t *tvb,
359 const int offset,
360 int input_size)
362 volatile bool ok;
363 wmem_allocator_t *pool;
364 wmem_array_t *obuf;
365 tvbuff_t *out;
366 struct input input = {
367 .tvb = tvb,
368 .offset = offset,
369 .size = input_size
372 pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE);
373 obuf = wmem_array_sized_new(pool, 1, input_size*2);
375 TRY {
376 ok = do_uncompress(&input, obuf);
377 } CATCH_ALL {
378 ok = false;
380 ENDTRY;
382 if (ok) {
384 * Cannot pass a tvb free callback that frees the wmem
385 * pool, so we make an extra copy that uses bare
386 * pointers. This could be optimized if tvb API had a
387 * free pool callback of some sort.
389 unsigned size = wmem_array_get_count(obuf);
390 uint8_t *p = (uint8_t *)g_malloc(size);
391 memcpy(p, wmem_array_get_raw(obuf), size);
392 out = tvb_new_real_data(p, size, size);
393 tvb_set_free_cb(out, g_free);
394 } else {
395 out = NULL;
398 wmem_destroy_allocator(pool);
400 return out;
403 tvbuff_t *
404 tvb_child_uncompress_lz77huff(tvbuff_t *parent, tvbuff_t *tvb, const int offset, int in_size)
406 tvbuff_t *new_tvb = tvb_uncompress_lz77huff(tvb, offset, in_size);
407 if (new_tvb)
408 tvb_set_child_real_data_tvbuff(parent, new_tvb);
409 return new_tvb;
413 * Editor modelines - https://www.wireshark.org/tools/modelines.html
415 * Local variables:
416 * c-basic-offset: 8
417 * tab-width: 8
418 * indent-tabs-mode: t
419 * End:
421 * vi: set shiftwidth=8 tabstop=8 noexpandtab:
422 * :indentSize=8:tabSize=8:noTabs=false: