regen pidl all: rm epan/dissectors/pidl/*-stamp; pushd epan/dissectors/pidl/ && make...
[wireshark-sm.git] / epan / tvbuff.c
blob637e6a69ac80e154e50568c7e60cc6f09ab3dbb8
1 /* tvbuff.c
3 * Testy, Virtual(-izable) Buffer of uint8_t*'s
5 * "Testy" -- the buffer gets mad when an attempt to access data
6 * beyond the bounds of the buffer. An exception is thrown.
8 * "Virtual" -- the buffer can have its own data, can use a subset of
9 * the data of a backing tvbuff, or can be a composite of
10 * other tvbuffs.
12 * Copyright (c) 2000 by Gilbert Ramirez <gram@alumni.rice.edu>
14 * Code to convert IEEE floating point formats to native floating point
15 * derived from code Copyright (c) Ashok Narayanan, 2000
17 * Wireshark - Network traffic analyzer
18 * By Gerald Combs <gerald@wireshark.org>
19 * Copyright 1998 Gerald Combs
21 * SPDX-License-Identifier: GPL-2.0-or-later
24 #include "config.h"
26 #include <string.h>
27 #include <stdio.h>
28 #include <errno.h>
30 #include <glib.h>
32 #include "wsutil/pint.h"
33 #include "wsutil/sign_ext.h"
34 #include "wsutil/strtoi.h"
35 #include "wsutil/unicode-utils.h"
36 #include "wsutil/nstime.h"
37 #include "wsutil/time_util.h"
38 #include <wsutil/ws_assert.h>
39 #include "tvbuff.h"
40 #include "tvbuff-int.h"
41 #include "strutil.h"
42 #include "to_str.h"
43 #include "charsets.h"
44 #include "proto.h" /* XXX - only used for DISSECTOR_ASSERT, probably a new header file? */
45 #include "exceptions.h"
47 #include <time.h>
49 static uint64_t
50 _tvb_get_bits64(tvbuff_t *tvb, unsigned bit_offset, const int total_no_of_bits);
52 static uint64_t
53 _tvb_get_bits64_le(tvbuff_t *tvb, unsigned bit_offset, const int total_no_of_bits);
55 static inline int
56 _tvb_captured_length_remaining(const tvbuff_t *tvb, const int offset);
58 static inline const uint8_t*
59 ensure_contiguous(tvbuff_t *tvb, const int offset, const int length);
61 static inline uint8_t *
62 tvb_get_raw_string(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int length);
64 tvbuff_t *
65 tvb_new(const struct tvb_ops *ops)
67 tvbuff_t *tvb;
68 size_t size = ops->tvb_size;
70 ws_assert(size >= sizeof(*tvb));
72 tvb = (tvbuff_t *) g_slice_alloc(size);
74 tvb->next = NULL;
75 tvb->ops = ops;
76 tvb->initialized = false;
77 tvb->flags = 0;
78 tvb->length = 0;
79 tvb->reported_length = 0;
80 tvb->contained_length = 0;
81 tvb->real_data = NULL;
82 tvb->raw_offset = -1;
83 tvb->ds_tvb = NULL;
85 return tvb;
88 static void
89 tvb_free_internal(tvbuff_t *tvb)
91 size_t size;
93 DISSECTOR_ASSERT(tvb);
95 if (tvb->ops->tvb_free)
96 tvb->ops->tvb_free(tvb);
98 size = tvb->ops->tvb_size;
100 g_slice_free1(size, tvb);
103 /* XXX: just call tvb_free_chain();
104 * Not removed so that existing dissectors using tvb_free() need not be changed.
105 * I'd argue that existing calls to tvb_free() should have actually beeen
106 * calls to tvb_free_chain() although the calls were OK as long as no
107 * subsets, etc had been created on the tvb. */
108 void
109 tvb_free(tvbuff_t *tvb)
111 tvb_free_chain(tvb);
114 void
115 tvb_free_chain(tvbuff_t *tvb)
117 tvbuff_t *next_tvb;
118 DISSECTOR_ASSERT(tvb);
119 while (tvb) {
120 next_tvb = tvb->next;
121 tvb_free_internal(tvb);
122 tvb = next_tvb;
126 tvbuff_t *
127 tvb_new_chain(tvbuff_t *parent, tvbuff_t *backing)
129 tvbuff_t *tvb = tvb_new_proxy(backing);
131 tvb_add_to_chain(parent, tvb);
132 return tvb;
135 void
136 tvb_add_to_chain(tvbuff_t *parent, tvbuff_t *child)
138 tvbuff_t *tmp;
140 DISSECTOR_ASSERT(parent);
141 DISSECTOR_ASSERT(child);
143 while (child) {
144 tmp = child;
145 child = child->next;
147 tmp->next = parent->next;
148 parent->next = tmp;
153 * Check whether that offset goes more than one byte past the
154 * end of the buffer.
156 * If not, return 0; otherwise, return exception
158 static inline int
159 validate_offset(const tvbuff_t *tvb, const unsigned abs_offset)
161 if (G_LIKELY(abs_offset <= tvb->length)) {
162 /* It's OK. */
163 return 0;
167 * It's not OK, but why? Which boundaries is it
168 * past?
170 if (abs_offset <= tvb->contained_length) {
172 * It's past the captured length, but not past
173 * the reported end of any parent tvbuffs from
174 * which this is constructed, or the reported
175 * end of this tvbuff, so it's out of bounds
176 * solely because we're past the end of the
177 * captured data.
179 return BoundsError;
183 * There's some actual packet boundary, not just the
184 * artificial boundary imposed by packet slicing, that
185 * we're past.
188 if (tvb->flags & TVBUFF_FRAGMENT) {
190 * This tvbuff is the first fragment of a larger
191 * packet that hasn't been reassembled, so we
192 * assume that's the source of the problem - if
193 * we'd reassembled the packet, we wouldn't have
194 * gone past the end.
196 * That might not be true, but for at least
197 * some forms of reassembly, such as IP
198 * reassembly, you don't know how big the
199 * reassembled packet is unless you reassemble
200 * it, so, in those cases, we can't determine
201 * whether we would have gone past the end
202 * had we reassembled the packet.
204 return FragmentBoundsError;
207 /* OK, we're not an unreassembled fragment (that we know of). */
208 if (abs_offset <= tvb->reported_length) {
210 * We're within the bounds of what this tvbuff
211 * purportedly contains, based on some length
212 * value, but we're not within the bounds of
213 * something from which this tvbuff was
214 * extracted, so that length value ran past
215 * the end of some parent tvbuff.
217 return ContainedBoundsError;
221 * OK, it looks as if we ran past the claimed length
222 * of data.
224 return ReportedBoundsError;
227 static inline int
228 compute_offset(const tvbuff_t *tvb, const int offset, unsigned *offset_ptr)
230 if (offset >= 0) {
231 /* Positive offset - relative to the beginning of the packet. */
232 if (G_LIKELY((unsigned) offset <= tvb->length)) {
233 *offset_ptr = offset;
234 } else if ((unsigned) offset <= tvb->contained_length) {
235 return BoundsError;
236 } else if (tvb->flags & TVBUFF_FRAGMENT) {
237 return FragmentBoundsError;
238 } else if ((unsigned) offset <= tvb->reported_length) {
239 return ContainedBoundsError;
240 } else {
241 return ReportedBoundsError;
244 else {
245 /* Negative offset - relative to the end of the packet. */
246 if (G_LIKELY((unsigned) -offset <= tvb->length)) {
247 *offset_ptr = tvb->length + offset;
248 } else if ((unsigned) -offset <= tvb->contained_length) {
249 return BoundsError;
250 } else if (tvb->flags & TVBUFF_FRAGMENT) {
251 return FragmentBoundsError;
252 } else if ((unsigned) -offset <= tvb->reported_length) {
253 return ContainedBoundsError;
254 } else {
255 return ReportedBoundsError;
259 return 0;
262 static inline int
263 compute_offset_and_remaining(const tvbuff_t *tvb, const int offset, unsigned *offset_ptr, unsigned *rem_len)
265 int exception;
267 exception = compute_offset(tvb, offset, offset_ptr);
268 if (!exception)
269 *rem_len = tvb->length - *offset_ptr;
271 return exception;
274 /* Computes the absolute offset and length based on a possibly-negative offset
275 * and a length that is possible -1 (which means "to the end of the data").
276 * Returns integer indicating whether the offset is in bounds (0) or
277 * not (exception number). The integer ptrs are modified with the new offset,
278 * captured (available) length, and contained length (amount that's present
279 * in the parent tvbuff based on its reported length).
280 * No exception is thrown; on success, we return 0, otherwise we return an
281 * exception for the caller to throw if appropriate.
283 * XXX - we return success (0), if the offset is positive and right
284 * after the end of the tvbuff (i.e., equal to the length). We do this
285 * so that a dissector constructing a subset tvbuff for the next protocol
286 * will get a zero-length tvbuff, not an exception, if there's no data
287 * left for the next protocol - we want the next protocol to be the one
288 * that gets an exception, so the error is reported as an error in that
289 * protocol rather than the containing protocol. */
290 static inline int
291 check_offset_length_no_exception(const tvbuff_t *tvb,
292 const int offset, int const length_val,
293 unsigned *offset_ptr, unsigned *length_ptr)
295 unsigned end_offset;
296 int exception;
298 DISSECTOR_ASSERT(offset_ptr);
299 DISSECTOR_ASSERT(length_ptr);
301 /* Compute the offset */
302 exception = compute_offset(tvb, offset, offset_ptr);
303 if (exception)
304 return exception;
306 if (length_val < -1) {
307 /* XXX - ReportedBoundsError? */
308 return BoundsError;
311 /* Compute the length */
312 if (length_val == -1)
313 *length_ptr = tvb->length - *offset_ptr;
314 else
315 *length_ptr = length_val;
318 * Compute the offset of the first byte past the length.
320 end_offset = *offset_ptr + *length_ptr;
323 * Check for an overflow
325 if (end_offset < *offset_ptr)
326 return BoundsError;
328 return validate_offset(tvb, end_offset);
331 /* Checks (+/-) offset and length and throws an exception if
332 * either is out of bounds. Sets integer ptrs to the new offset
333 * and length. */
334 static inline void
335 check_offset_length(const tvbuff_t *tvb,
336 const int offset, int const length_val,
337 unsigned *offset_ptr, unsigned *length_ptr)
339 int exception;
341 exception = check_offset_length_no_exception(tvb, offset, length_val, offset_ptr, length_ptr);
342 if (exception)
343 THROW(exception);
346 void
347 tvb_check_offset_length(const tvbuff_t *tvb,
348 const int offset, int const length_val,
349 unsigned *offset_ptr, unsigned *length_ptr)
351 check_offset_length(tvb, offset, length_val, offset_ptr, length_ptr);
354 static const unsigned char left_aligned_bitmask[] = {
355 0xff,
356 0x80,
357 0xc0,
358 0xe0,
359 0xf0,
360 0xf8,
361 0xfc,
362 0xfe
365 tvbuff_t *
366 tvb_new_octet_aligned(tvbuff_t *tvb, uint32_t bit_offset, int32_t no_of_bits)
368 tvbuff_t *sub_tvb = NULL;
369 uint32_t byte_offset;
370 int32_t datalen, i;
371 uint8_t left, right, remaining_bits, *buf;
372 const uint8_t *data;
374 DISSECTOR_ASSERT(tvb && tvb->initialized);
376 byte_offset = bit_offset >> 3;
377 left = bit_offset % 8; /* for left-shifting */
378 right = 8 - left; /* for right-shifting */
380 if (no_of_bits == -1) {
381 datalen = _tvb_captured_length_remaining(tvb, byte_offset);
382 remaining_bits = 0;
383 } else {
384 datalen = no_of_bits >> 3;
385 remaining_bits = no_of_bits % 8;
386 if (remaining_bits) {
387 datalen++;
391 /* already aligned -> shortcut */
392 if ((left == 0) && (remaining_bits == 0)) {
393 return tvb_new_subset_length_caplen(tvb, byte_offset, datalen, datalen);
396 DISSECTOR_ASSERT(datalen>0);
398 /* if at least one trailing byte is available, we must use the content
399 * of that byte for the last shift (i.e. tvb_get_ptr() must use datalen + 1
400 * if non extra byte is available, the last shifted byte requires
401 * special treatment
403 if (_tvb_captured_length_remaining(tvb, byte_offset) > datalen) {
404 data = ensure_contiguous(tvb, byte_offset, datalen + 1); /* tvb_get_ptr */
406 /* Do this allocation AFTER tvb_get_ptr() (which could throw an exception) */
407 buf = (uint8_t *)g_malloc(datalen);
409 /* shift tvb data bit_offset bits to the left */
410 for (i = 0; i < datalen; i++)
411 buf[i] = (data[i] << left) | (data[i+1] >> right);
412 } else {
413 data = ensure_contiguous(tvb, byte_offset, datalen); /* tvb_get_ptr() */
415 /* Do this allocation AFTER tvb_get_ptr() (which could throw an exception) */
416 buf = (uint8_t *)g_malloc(datalen);
418 /* shift tvb data bit_offset bits to the left */
419 for (i = 0; i < (datalen-1); i++)
420 buf[i] = (data[i] << left) | (data[i+1] >> right);
421 buf[datalen-1] = data[datalen-1] << left; /* set last octet */
423 buf[datalen-1] &= left_aligned_bitmask[remaining_bits];
425 sub_tvb = tvb_new_child_real_data(tvb, buf, datalen, datalen);
426 tvb_set_free_cb(sub_tvb, g_free);
428 return sub_tvb;
431 tvbuff_t *
432 tvb_new_octet_right_aligned(tvbuff_t *tvb, uint32_t bit_offset, int32_t no_of_bits)
434 tvbuff_t *sub_tvb = NULL;
435 uint32_t byte_offset;
436 int src_len, dst_len, i;
437 uint8_t left, right, remaining_bits, *buf;
438 const uint8_t *data;
440 DISSECTOR_ASSERT(tvb && tvb->initialized);
442 byte_offset = bit_offset / 8;
443 /* right shift to put bits in place and discard least significant bits */
444 right = bit_offset % 8;
445 /* left shift to get most significant bits from next octet */
446 left = 8 - right;
448 if (no_of_bits == -1) {
449 dst_len = _tvb_captured_length_remaining(tvb, byte_offset);
450 remaining_bits = 0;
451 } else {
452 dst_len = no_of_bits / 8;
453 remaining_bits = no_of_bits % 8;
454 if (remaining_bits) {
455 dst_len++;
459 /* already aligned -> shortcut */
460 if ((right == 0) && (remaining_bits == 0)) {
461 return tvb_new_subset_length_caplen(tvb, byte_offset, dst_len, dst_len);
464 DISSECTOR_ASSERT(dst_len>0);
466 if (_tvb_captured_length_remaining(tvb, byte_offset) > dst_len) {
467 /* last octet will get data from trailing octet */
468 src_len = dst_len + 1;
469 } else {
470 /* last octet will be zero padded */
471 src_len = dst_len;
474 data = ensure_contiguous(tvb, byte_offset, src_len); /* tvb_get_ptr */
476 /* Do this allocation AFTER tvb_get_ptr() (which could throw an exception) */
477 buf = (uint8_t *)g_malloc(dst_len);
479 for (i = 0; i < (dst_len - 1); i++)
480 buf[i] = (data[i] >> right) | (data[i+1] << left);
482 /* Special handling for last octet */
483 buf[i] = (data[i] >> right);
484 /* Shift most significant bits from trailing octet if available */
485 if (src_len > dst_len)
486 buf[i] |= (data[i+1] << left);
487 /* Preserve only remaining bits in last octet if not multiple of 8 */
488 if (remaining_bits)
489 buf[i] &= ((1 << remaining_bits) - 1);
491 sub_tvb = tvb_new_child_real_data(tvb, buf, dst_len, dst_len);
492 tvb_set_free_cb(sub_tvb, g_free);
494 return sub_tvb;
497 static tvbuff_t *
498 tvb_generic_clone_offset_len(tvbuff_t *tvb, unsigned offset, unsigned len)
500 tvbuff_t *cloned_tvb;
501 uint8_t *data;
503 DISSECTOR_ASSERT(tvb_bytes_exist(tvb, offset, len));
505 data = (uint8_t *) g_malloc(len);
507 tvb_memcpy(tvb, data, offset, len);
509 cloned_tvb = tvb_new_real_data(data, len, len);
510 tvb_set_free_cb(cloned_tvb, g_free);
512 return cloned_tvb;
515 tvbuff_t *
516 tvb_clone_offset_len(tvbuff_t *tvb, unsigned offset, unsigned len)
518 if (tvb->ops->tvb_clone) {
519 tvbuff_t *cloned_tvb;
521 cloned_tvb = tvb->ops->tvb_clone(tvb, offset, len);
522 if (cloned_tvb)
523 return cloned_tvb;
526 return tvb_generic_clone_offset_len(tvb, offset, len);
529 tvbuff_t *
530 tvb_clone(tvbuff_t *tvb)
532 return tvb_clone_offset_len(tvb, 0, tvb->length);
535 unsigned
536 tvb_captured_length(const tvbuff_t *tvb)
538 DISSECTOR_ASSERT(tvb && tvb->initialized);
540 return tvb->length;
543 /* For tvbuff internal use */
544 static inline int
545 _tvb_captured_length_remaining(const tvbuff_t *tvb, const int offset)
547 unsigned abs_offset = 0, rem_length;
548 int exception;
550 exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &rem_length);
551 if (exception)
552 return 0;
554 return rem_length;
558 tvb_captured_length_remaining(const tvbuff_t *tvb, const int offset)
560 unsigned abs_offset = 0, rem_length;
561 int exception;
563 DISSECTOR_ASSERT(tvb && tvb->initialized);
565 exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &rem_length);
566 if (exception)
567 return 0;
569 return rem_length;
572 unsigned
573 tvb_ensure_captured_length_remaining(const tvbuff_t *tvb, const int offset)
575 unsigned abs_offset = 0, rem_length = 0;
576 int exception;
578 DISSECTOR_ASSERT(tvb && tvb->initialized);
580 exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &rem_length);
581 if (exception)
582 THROW(exception);
584 if (rem_length == 0) {
586 * This routine ensures there's at least one byte available.
587 * There aren't any bytes available, so throw the appropriate
588 * exception.
590 if (abs_offset < tvb->contained_length) {
591 THROW(BoundsError);
592 } else if (tvb->flags & TVBUFF_FRAGMENT) {
593 THROW(FragmentBoundsError);
594 } else if (abs_offset < tvb->reported_length) {
595 THROW(ContainedBoundsError);
596 } else {
597 THROW(ReportedBoundsError);
600 return rem_length;
603 /* Validates that 'length' bytes are available starting from
604 * offset (pos/neg). Does not throw an exception. */
605 bool
606 tvb_bytes_exist(const tvbuff_t *tvb, const int offset, const int length)
608 unsigned abs_offset = 0, abs_length;
609 int exception;
611 DISSECTOR_ASSERT(tvb && tvb->initialized);
614 * Negative lengths are not possible and indicate a bug (e.g. arithmetic
615 * error or an overly large value from packet data).
617 if (length < 0)
618 return false;
620 exception = check_offset_length_no_exception(tvb, offset, length, &abs_offset, &abs_length);
621 if (exception)
622 return false;
624 return true;
627 /* Validates that 'length' bytes, where 'length' is a 64-bit unsigned
628 * integer, are available starting from offset (pos/neg). Throws an
629 * exception if they aren't. */
630 void
631 tvb_ensure_bytes_exist64(const tvbuff_t *tvb, const int offset, const uint64_t length)
634 * Make sure the value fits in a signed integer; if not, assume
635 * that means that it's too big.
637 if (length > INT_MAX) {
638 THROW(ReportedBoundsError);
641 /* OK, now cast it and try it with tvb_ensure_bytes_exist(). */
642 tvb_ensure_bytes_exist(tvb, offset, (int)length);
645 /* Validates that 'length' bytes are available starting from
646 * offset (pos/neg). Throws an exception if they aren't. */
647 void
648 tvb_ensure_bytes_exist(const tvbuff_t *tvb, const int offset, const int length)
650 unsigned real_offset, end_offset;
652 DISSECTOR_ASSERT(tvb && tvb->initialized);
655 * -1 doesn't mean "until end of buffer", as that's pointless
656 * for this routine. We must treat it as a Really Large Positive
657 * Number, so that we throw an exception; we throw
658 * ReportedBoundsError, as if it were past even the end of a
659 * reassembled packet, and past the end of even the data we
660 * didn't capture.
662 * We do the same with other negative lengths.
664 if (length < 0) {
665 THROW(ReportedBoundsError);
668 /* XXX: Below this point could be replaced with a call to
669 * check_offset_length with no functional change, however this is a
670 * *very* hot path and check_offset_length is not well-optimized for
671 * this case, so we eat some code duplication for a lot of speedup. */
673 if (offset >= 0) {
674 /* Positive offset - relative to the beginning of the packet. */
675 if (G_LIKELY((unsigned) offset <= tvb->length)) {
676 real_offset = offset;
677 } else if ((unsigned) offset <= tvb->contained_length) {
678 THROW(BoundsError);
679 } else if (tvb->flags & TVBUFF_FRAGMENT) {
680 THROW(FragmentBoundsError);
681 } else if ((unsigned) offset <= tvb->reported_length) {
682 THROW(ContainedBoundsError);
683 } else {
684 THROW(ReportedBoundsError);
687 else {
688 /* Negative offset - relative to the end of the packet. */
689 if (G_LIKELY((unsigned) -offset <= tvb->length)) {
690 real_offset = tvb->length + offset;
691 } else if ((unsigned) -offset <= tvb->contained_length) {
692 THROW(BoundsError);
693 } else if (tvb->flags & TVBUFF_FRAGMENT) {
694 THROW(FragmentBoundsError);
695 } else if ((unsigned) -offset <= tvb->reported_length) {
696 THROW(ContainedBoundsError);
697 } else {
698 THROW(ReportedBoundsError);
703 * Compute the offset of the first byte past the length.
705 end_offset = real_offset + length;
708 * Check for an overflow
710 if (end_offset < real_offset)
711 THROW(BoundsError);
713 if (G_LIKELY(end_offset <= tvb->length))
714 return;
715 else if (end_offset <= tvb->contained_length)
716 THROW(BoundsError);
717 else if (tvb->flags & TVBUFF_FRAGMENT)
718 THROW(FragmentBoundsError);
719 else if (end_offset <= tvb->reported_length)
720 THROW(ContainedBoundsError);
721 else
722 THROW(ReportedBoundsError);
725 bool
726 tvb_offset_exists(const tvbuff_t *tvb, const int offset)
728 unsigned abs_offset = 0;
729 int exception;
731 DISSECTOR_ASSERT(tvb && tvb->initialized);
733 exception = compute_offset(tvb, offset, &abs_offset);
734 if (exception)
735 return false;
737 /* compute_offset only throws an exception on >, not >= because of the
738 * comment above check_offset_length_no_exception, but here we want the
739 * opposite behaviour so we check ourselves... */
740 return abs_offset < tvb->length;
743 unsigned
744 tvb_reported_length(const tvbuff_t *tvb)
746 DISSECTOR_ASSERT(tvb && tvb->initialized);
748 return tvb->reported_length;
752 tvb_reported_length_remaining(const tvbuff_t *tvb, const int offset)
754 unsigned abs_offset = 0;
755 int exception;
757 DISSECTOR_ASSERT(tvb && tvb->initialized);
759 exception = compute_offset(tvb, offset, &abs_offset);
760 if (exception)
761 return 0;
763 if (tvb->reported_length >= abs_offset)
764 return tvb->reported_length - abs_offset;
765 else
766 return 0;
769 unsigned
770 tvb_ensure_reported_length_remaining(const tvbuff_t *tvb, const int offset)
772 unsigned abs_offset = 0;
773 int exception;
775 DISSECTOR_ASSERT(tvb && tvb->initialized);
777 exception = compute_offset(tvb, offset, &abs_offset);
778 if (exception)
779 THROW(exception);
781 if (tvb->reported_length >= abs_offset)
782 return tvb->reported_length - abs_offset;
783 else
784 THROW(ReportedBoundsError);
787 /* Set the reported length of a tvbuff to a given value; used for protocols
788 * whose headers contain an explicit length and where the calling
789 * dissector's payload may include padding as well as the packet for
790 * this protocol.
791 * Also adjusts the available and contained length. */
792 void
793 tvb_set_reported_length(tvbuff_t *tvb, const unsigned reported_length)
795 DISSECTOR_ASSERT(tvb && tvb->initialized);
797 if (reported_length > tvb->reported_length)
798 THROW(ReportedBoundsError);
800 tvb->reported_length = reported_length;
801 if (reported_length < tvb->length)
802 tvb->length = reported_length;
803 if (reported_length < tvb->contained_length)
804 tvb->contained_length = reported_length;
807 /* Repair a tvbuff where the captured length is greater than the
808 * reported length; such a tvbuff makes no sense, as it's impossible
809 * to capture more data than is in the packet.
811 void
812 tvb_fix_reported_length(tvbuff_t *tvb)
814 DISSECTOR_ASSERT(tvb && tvb->initialized);
815 DISSECTOR_ASSERT(tvb->reported_length < tvb->length);
817 tvb->reported_length = tvb->length;
818 if (tvb->contained_length < tvb->length)
819 tvb->contained_length = tvb->length;
822 unsigned
823 tvb_offset_from_real_beginning_counter(const tvbuff_t *tvb, const unsigned counter)
825 if (tvb->ops->tvb_offset)
826 return tvb->ops->tvb_offset(tvb, counter);
828 DISSECTOR_ASSERT_NOT_REACHED();
829 return 0;
832 unsigned
833 tvb_offset_from_real_beginning(const tvbuff_t *tvb)
835 return tvb_offset_from_real_beginning_counter(tvb, 0);
838 static inline const uint8_t*
839 ensure_contiguous_no_exception(tvbuff_t *tvb, const int offset, const int length, int *pexception)
841 unsigned abs_offset = 0, abs_length = 0;
842 int exception;
844 exception = check_offset_length_no_exception(tvb, offset, length, &abs_offset, &abs_length);
845 if (exception) {
846 if (pexception)
847 *pexception = exception;
848 return NULL;
852 * Special case: if the caller (e.g. tvb_get_ptr) requested no data,
853 * then it is acceptable to have an empty tvb (!tvb->real_data).
855 if (length == 0) {
856 return NULL;
860 * We know that all the data is present in the tvbuff, so
861 * no exceptions should be thrown.
863 if (tvb->real_data)
864 return tvb->real_data + abs_offset;
866 if (tvb->ops->tvb_get_ptr)
867 return tvb->ops->tvb_get_ptr(tvb, abs_offset, abs_length);
869 DISSECTOR_ASSERT_NOT_REACHED();
870 return NULL;
873 static inline const uint8_t*
874 ensure_contiguous(tvbuff_t *tvb, const int offset, const int length)
876 int exception = 0;
877 const uint8_t *p;
879 p = ensure_contiguous_no_exception(tvb, offset, length, &exception);
880 if (p == NULL && length != 0) {
881 DISSECTOR_ASSERT(exception > 0);
882 THROW(exception);
884 return p;
887 static inline const uint8_t*
888 fast_ensure_contiguous(tvbuff_t *tvb, const int offset, const unsigned length)
890 unsigned end_offset;
891 unsigned u_offset;
893 DISSECTOR_ASSERT(tvb && tvb->initialized);
894 /* We don't check for overflow in this fast path so we only handle simple types */
895 DISSECTOR_ASSERT(length <= 8);
897 if (offset < 0 || !tvb->real_data) {
898 return ensure_contiguous(tvb, offset, length);
901 u_offset = offset;
902 end_offset = u_offset + length;
904 if (G_LIKELY(end_offset <= tvb->length)) {
905 return tvb->real_data + u_offset;
906 } else if (end_offset <= tvb->contained_length) {
907 THROW(BoundsError);
908 } else if (tvb->flags & TVBUFF_FRAGMENT) {
909 THROW(FragmentBoundsError);
910 } else if (end_offset <= tvb->reported_length) {
911 THROW(ContainedBoundsError);
912 } else {
913 THROW(ReportedBoundsError);
915 /* not reached */
916 return NULL;
921 /************** ACCESSORS **************/
923 void *
924 tvb_memcpy(tvbuff_t *tvb, void *target, const int offset, size_t length)
926 unsigned abs_offset = 0, abs_length = 0;
928 DISSECTOR_ASSERT(tvb && tvb->initialized);
931 * XXX - we should eliminate the "length = -1 means 'to the end
932 * of the tvbuff'" convention, and use other means to achieve
933 * that; this would let us eliminate a bunch of checks for
934 * negative lengths in cases where the protocol has a 32-bit
935 * length field.
937 * Allowing -1 but throwing an assertion on other negative
938 * lengths is a bit more work with the length being a size_t;
939 * instead, we check for a length <= 2^31-1.
941 DISSECTOR_ASSERT(length <= 0x7FFFFFFF);
942 check_offset_length(tvb, offset, (int) length, &abs_offset, &abs_length);
944 if (target && tvb->real_data) {
945 return memcpy(target, tvb->real_data + abs_offset, abs_length);
948 if (target && tvb->ops->tvb_memcpy)
949 return tvb->ops->tvb_memcpy(tvb, target, abs_offset, abs_length);
952 * If the length is 0, there's nothing to do.
953 * (tvb->real_data could be null if it's allocated with
954 * a size of length.)
956 if (length != 0) {
958 * XXX, fallback to slower method
960 DISSECTOR_ASSERT_NOT_REACHED();
962 return NULL;
967 * XXX - this doesn't treat a length of -1 as an error.
968 * If it did, this could replace some code that calls
969 * "tvb_ensure_bytes_exist()" and then allocates a buffer and copies
970 * data to it.
972 * "composite_get_ptr()" depends on -1 not being
973 * an error; does anything else depend on this routine treating -1 as
974 * meaning "to the end of the buffer"?
976 * If scope is NULL, memory is allocated with g_malloc() and user must
977 * explicitly free it with g_free().
978 * If scope is not NULL, memory is allocated with the corresponding pool
979 * lifetime.
981 void *
982 tvb_memdup(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, size_t length)
984 unsigned abs_offset = 0, abs_length = 0;
985 void *duped;
987 DISSECTOR_ASSERT(tvb && tvb->initialized);
989 check_offset_length(tvb, offset, (int) length, &abs_offset, &abs_length);
991 if (abs_length == 0)
992 return NULL;
994 duped = wmem_alloc(scope, abs_length);
995 return tvb_memcpy(tvb, duped, abs_offset, abs_length);
1000 const uint8_t*
1001 tvb_get_ptr(tvbuff_t *tvb, const int offset, const int length)
1003 return ensure_contiguous(tvb, offset, length);
1006 /* ---------------- */
1007 uint8_t
1008 tvb_get_uint8(tvbuff_t *tvb, const int offset)
1010 const uint8_t *ptr;
1012 ptr = fast_ensure_contiguous(tvb, offset, 1);
1013 return *ptr;
1016 int8_t
1017 tvb_get_int8(tvbuff_t *tvb, const int offset)
1019 const uint8_t *ptr;
1021 ptr = fast_ensure_contiguous(tvb, offset, 1);
1022 return *ptr;
1025 uint16_t
1026 tvb_get_ntohs(tvbuff_t *tvb, const int offset)
1028 const uint8_t *ptr;
1030 ptr = fast_ensure_contiguous(tvb, offset, 2);
1031 return pntoh16(ptr);
1034 int16_t
1035 tvb_get_ntohis(tvbuff_t *tvb, const int offset)
1037 const uint8_t *ptr;
1039 ptr = fast_ensure_contiguous(tvb, offset, 2);
1040 return pntoh16(ptr);
1043 uint32_t
1044 tvb_get_ntoh24(tvbuff_t *tvb, const int offset)
1046 const uint8_t *ptr;
1048 ptr = fast_ensure_contiguous(tvb, offset, 3);
1049 return pntoh24(ptr);
1052 int32_t
1053 tvb_get_ntohi24(tvbuff_t *tvb, const int offset)
1055 uint32_t ret;
1057 ret = ws_sign_ext32(tvb_get_ntoh24(tvb, offset), 24);
1059 return (int32_t)ret;
1062 uint32_t
1063 tvb_get_ntohl(tvbuff_t *tvb, const int offset)
1065 const uint8_t *ptr;
1067 ptr = fast_ensure_contiguous(tvb, offset, 4);
1068 return pntoh32(ptr);
1071 int32_t
1072 tvb_get_ntohil(tvbuff_t *tvb, const int offset)
1074 const uint8_t *ptr;
1076 ptr = fast_ensure_contiguous(tvb, offset, 4);
1077 return pntoh32(ptr);
1080 uint64_t
1081 tvb_get_ntoh40(tvbuff_t *tvb, const int offset)
1083 const uint8_t *ptr;
1085 ptr = fast_ensure_contiguous(tvb, offset, 5);
1086 return pntoh40(ptr);
1089 int64_t
1090 tvb_get_ntohi40(tvbuff_t *tvb, const int offset)
1092 uint64_t ret;
1094 ret = ws_sign_ext64(tvb_get_ntoh40(tvb, offset), 40);
1096 return (int64_t)ret;
1099 uint64_t
1100 tvb_get_ntoh48(tvbuff_t *tvb, const int offset)
1102 const uint8_t *ptr;
1104 ptr = fast_ensure_contiguous(tvb, offset, 6);
1105 return pntoh48(ptr);
1108 int64_t
1109 tvb_get_ntohi48(tvbuff_t *tvb, const int offset)
1111 uint64_t ret;
1113 ret = ws_sign_ext64(tvb_get_ntoh48(tvb, offset), 48);
1115 return (int64_t)ret;
1118 uint64_t
1119 tvb_get_ntoh56(tvbuff_t *tvb, const int offset)
1121 const uint8_t *ptr;
1123 ptr = fast_ensure_contiguous(tvb, offset, 7);
1124 return pntoh56(ptr);
1127 int64_t
1128 tvb_get_ntohi56(tvbuff_t *tvb, const int offset)
1130 uint64_t ret;
1132 ret = ws_sign_ext64(tvb_get_ntoh56(tvb, offset), 56);
1134 return (int64_t)ret;
1137 uint64_t
1138 tvb_get_ntoh64(tvbuff_t *tvb, const int offset)
1140 const uint8_t *ptr;
1142 ptr = fast_ensure_contiguous(tvb, offset, 8);
1143 return pntoh64(ptr);
1146 int64_t
1147 tvb_get_ntohi64(tvbuff_t *tvb, const int offset)
1149 const uint8_t *ptr;
1151 ptr = fast_ensure_contiguous(tvb, offset, 8);
1152 return pntoh64(ptr);
1155 uint16_t
1156 tvb_get_uint16(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1157 if (encoding & ENC_LITTLE_ENDIAN) {
1158 return tvb_get_letohs(tvb, offset);
1159 } else {
1160 return tvb_get_ntohs(tvb, offset);
1164 int16_t
1165 tvb_get_int16(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1166 if (encoding & ENC_LITTLE_ENDIAN) {
1167 return tvb_get_letohis(tvb, offset);
1168 } else {
1169 return tvb_get_ntohis(tvb, offset);
1173 uint32_t
1174 tvb_get_uint24(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1175 if (encoding & ENC_LITTLE_ENDIAN) {
1176 return tvb_get_letoh24(tvb, offset);
1177 } else {
1178 return tvb_get_ntoh24(tvb, offset);
1182 int32_t
1183 tvb_get_int24(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1184 if (encoding & ENC_LITTLE_ENDIAN) {
1185 return tvb_get_letohi24(tvb, offset);
1186 } else {
1187 return tvb_get_ntohi24(tvb, offset);
1191 uint32_t
1192 tvb_get_uint32(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1193 if (encoding & ENC_LITTLE_ENDIAN) {
1194 return tvb_get_letohl(tvb, offset);
1195 } else {
1196 return tvb_get_ntohl(tvb, offset);
1200 int32_t
1201 tvb_get_int32(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1202 if (encoding & ENC_LITTLE_ENDIAN) {
1203 return tvb_get_letohil(tvb, offset);
1204 } else {
1205 return tvb_get_ntohil(tvb, offset);
1209 uint64_t
1210 tvb_get_uint40(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1211 if (encoding & ENC_LITTLE_ENDIAN) {
1212 return tvb_get_letoh40(tvb, offset);
1213 } else {
1214 return tvb_get_ntoh40(tvb, offset);
1218 int64_t
1219 tvb_get_int40(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1220 if (encoding & ENC_LITTLE_ENDIAN) {
1221 return tvb_get_letohi40(tvb, offset);
1222 } else {
1223 return tvb_get_ntohi40(tvb, offset);
1227 uint64_t
1228 tvb_get_uint48(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1229 if (encoding & ENC_LITTLE_ENDIAN) {
1230 return tvb_get_letoh48(tvb, offset);
1231 } else {
1232 return tvb_get_ntoh48(tvb, offset);
1236 int64_t
1237 tvb_get_int48(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1238 if (encoding & ENC_LITTLE_ENDIAN) {
1239 return tvb_get_letohi48(tvb, offset);
1240 } else {
1241 return tvb_get_ntohi48(tvb, offset);
1245 uint64_t
1246 tvb_get_uint56(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1247 if (encoding & ENC_LITTLE_ENDIAN) {
1248 return tvb_get_letoh56(tvb, offset);
1249 } else {
1250 return tvb_get_ntoh56(tvb, offset);
1254 int64_t
1255 tvb_get_int56(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1256 if (encoding & ENC_LITTLE_ENDIAN) {
1257 return tvb_get_letohi56(tvb, offset);
1258 } else {
1259 return tvb_get_ntohi56(tvb, offset);
1263 uint64_t
1264 tvb_get_uint64(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1265 if (encoding & ENC_LITTLE_ENDIAN) {
1266 return tvb_get_letoh64(tvb, offset);
1267 } else {
1268 return tvb_get_ntoh64(tvb, offset);
1272 uint64_t
1273 tvb_get_uint64_with_length(tvbuff_t *tvb, const int offset, unsigned length, const unsigned encoding)
1275 uint64_t value;
1277 switch (length) {
1279 case 1:
1280 value = tvb_get_uint8(tvb, offset);
1281 break;
1283 case 2:
1284 value = (encoding & ENC_LITTLE_ENDIAN) ? tvb_get_letohs(tvb, offset)
1285 : tvb_get_ntohs(tvb, offset);
1286 break;
1288 case 3:
1289 value = (encoding & ENC_LITTLE_ENDIAN) ? tvb_get_letoh24(tvb, offset)
1290 : tvb_get_ntoh24(tvb, offset);
1291 break;
1293 case 4:
1294 value = (encoding & ENC_LITTLE_ENDIAN) ? tvb_get_letohl(tvb, offset)
1295 : tvb_get_ntohl(tvb, offset);
1296 break;
1298 case 5:
1299 value = (encoding & ENC_LITTLE_ENDIAN) ? tvb_get_letoh40(tvb, offset)
1300 : tvb_get_ntoh40(tvb, offset);
1301 break;
1303 case 6:
1304 value = (encoding & ENC_LITTLE_ENDIAN) ? tvb_get_letoh48(tvb, offset)
1305 : tvb_get_ntoh48(tvb, offset);
1306 break;
1308 case 7:
1309 value = (encoding & ENC_LITTLE_ENDIAN) ? tvb_get_letoh56(tvb, offset)
1310 : tvb_get_ntoh56(tvb, offset);
1311 break;
1313 case 8:
1314 value = (encoding & ENC_LITTLE_ENDIAN) ? tvb_get_letoh64(tvb, offset)
1315 : tvb_get_ntoh64(tvb, offset);
1316 break;
1318 default:
1319 if (length < 1) {
1320 value = 0;
1321 } else {
1322 value = (encoding & ENC_LITTLE_ENDIAN) ? tvb_get_letoh64(tvb, offset)
1323 : tvb_get_ntoh64(tvb, offset);
1325 break;
1327 return value;
1330 int64_t
1331 tvb_get_int64(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1332 if (encoding & ENC_LITTLE_ENDIAN) {
1333 return tvb_get_letohi64(tvb, offset);
1334 } else {
1335 return tvb_get_ntohi64(tvb, offset);
1339 float
1340 tvb_get_ieee_float(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1341 if (encoding & ENC_LITTLE_ENDIAN) {
1342 return tvb_get_letohieee_float(tvb, offset);
1343 } else {
1344 return tvb_get_ntohieee_float(tvb, offset);
1348 double
1349 tvb_get_ieee_double(tvbuff_t *tvb, const int offset, const unsigned encoding) {
1350 if (encoding & ENC_LITTLE_ENDIAN) {
1351 return tvb_get_letohieee_double(tvb, offset);
1352 } else {
1353 return tvb_get_ntohieee_double(tvb, offset);
1358 * Stuff for IEEE float handling on platforms that don't have IEEE
1359 * format as the native floating-point format.
1361 * For now, we treat only the VAX as such a platform.
1363 * XXX - other non-IEEE boxes that can run UN*X include some Crays,
1364 * and possibly other machines. However, I don't know whether there
1365 * are any other machines that could run Wireshark and that don't use
1366 * IEEE format. As far as I know, all of the main current and past
1367 * commercial microprocessor families on which OSes that support
1368 * Wireshark can run use IEEE format (x86, ARM, 68k, SPARC, MIPS,
1369 * PA-RISC, Alpha, IA-64, and so on), and it appears that the official
1370 * Linux port to System/390 and zArchitecture uses IEEE format floating-
1371 * point rather than IBM hex floating-point (not a huge surprise), so
1372 * I'm not sure that leaves any 32-bit or larger UN*X or Windows boxes,
1373 * other than VAXes, that don't use IEEE format. If you're not running
1374 * UN*X or Windows, the floating-point format is probably going to be
1375 * the least of your problems in a port.
1378 #if defined(vax)
1380 #include <math.h>
1383 * Single-precision.
1385 #define IEEE_SP_NUMBER_WIDTH 32 /* bits in number */
1386 #define IEEE_SP_EXP_WIDTH 8 /* bits in exponent */
1387 #define IEEE_SP_MANTISSA_WIDTH 23 /* IEEE_SP_NUMBER_WIDTH - 1 - IEEE_SP_EXP_WIDTH */
1389 #define IEEE_SP_SIGN_MASK 0x80000000
1390 #define IEEE_SP_EXPONENT_MASK 0x7F800000
1391 #define IEEE_SP_MANTISSA_MASK 0x007FFFFF
1392 #define IEEE_SP_INFINITY IEEE_SP_EXPONENT_MASK
1394 #define IEEE_SP_IMPLIED_BIT (1 << IEEE_SP_MANTISSA_WIDTH)
1395 #define IEEE_SP_INFINITE ((1 << IEEE_SP_EXP_WIDTH) - 1)
1396 #define IEEE_SP_BIAS ((1 << (IEEE_SP_EXP_WIDTH - 1)) - 1)
1398 static int
1399 ieee_float_is_zero(const uint32_t w)
1401 return ((w & ~IEEE_SP_SIGN_MASK) == 0);
1404 static float
1405 get_ieee_float(const uint32_t w)
1407 long sign;
1408 long exponent;
1409 long mantissa;
1411 sign = w & IEEE_SP_SIGN_MASK;
1412 exponent = w & IEEE_SP_EXPONENT_MASK;
1413 mantissa = w & IEEE_SP_MANTISSA_MASK;
1415 if (ieee_float_is_zero(w)) {
1416 /* number is zero, unnormalized, or not-a-number */
1417 return 0.0;
1419 #if 0
1421 * XXX - how to handle this?
1423 if (IEEE_SP_INFINITY == exponent) {
1425 * number is positive or negative infinity, or a special value
1427 return (sign? MINUS_INFINITY: PLUS_INFINITY);
1429 #endif
1431 exponent = ((exponent >> IEEE_SP_MANTISSA_WIDTH) - IEEE_SP_BIAS) -
1432 IEEE_SP_MANTISSA_WIDTH;
1433 mantissa |= IEEE_SP_IMPLIED_BIT;
1435 if (sign)
1436 return -mantissa * pow(2, exponent);
1437 else
1438 return mantissa * pow(2, exponent);
1442 * Double-precision.
1443 * We assume that if you don't have IEEE floating-point, you have a
1444 * compiler that understands 64-bit integral quantities.
1446 #define IEEE_DP_NUMBER_WIDTH 64 /* bits in number */
1447 #define IEEE_DP_EXP_WIDTH 11 /* bits in exponent */
1448 #define IEEE_DP_MANTISSA_WIDTH 52 /* IEEE_DP_NUMBER_WIDTH - 1 - IEEE_DP_EXP_WIDTH */
1450 #define IEEE_DP_SIGN_MASK INT64_C(0x8000000000000000)
1451 #define IEEE_DP_EXPONENT_MASK INT64_C(0x7FF0000000000000)
1452 #define IEEE_DP_MANTISSA_MASK INT64_C(0x000FFFFFFFFFFFFF)
1453 #define IEEE_DP_INFINITY IEEE_DP_EXPONENT_MASK
1455 #define IEEE_DP_IMPLIED_BIT (INT64_C(1) << IEEE_DP_MANTISSA_WIDTH)
1456 #define IEEE_DP_INFINITE ((1 << IEEE_DP_EXP_WIDTH) - 1)
1457 #define IEEE_DP_BIAS ((1 << (IEEE_DP_EXP_WIDTH - 1)) - 1)
1459 static int
1460 ieee_double_is_zero(const uint64_t w)
1462 return ((w & ~IEEE_SP_SIGN_MASK) == 0);
1465 static double
1466 get_ieee_double(const uint64_t w)
1468 int64_t sign;
1469 int64_t exponent;
1470 int64_t mantissa;
1472 sign = w & IEEE_DP_SIGN_MASK;
1473 exponent = w & IEEE_DP_EXPONENT_MASK;
1474 mantissa = w & IEEE_DP_MANTISSA_MASK;
1476 if (ieee_double_is_zero(w)) {
1477 /* number is zero, unnormalized, or not-a-number */
1478 return 0.0;
1480 #if 0
1482 * XXX - how to handle this?
1484 if (IEEE_DP_INFINITY == exponent) {
1486 * number is positive or negative infinity, or a special value
1488 return (sign? MINUS_INFINITY: PLUS_INFINITY);
1490 #endif
1492 exponent = ((exponent >> IEEE_DP_MANTISSA_WIDTH) - IEEE_DP_BIAS) -
1493 IEEE_DP_MANTISSA_WIDTH;
1494 mantissa |= IEEE_DP_IMPLIED_BIT;
1496 if (sign)
1497 return -mantissa * pow(2, exponent);
1498 else
1499 return mantissa * pow(2, exponent);
1501 #endif
1504 * Fetches an IEEE single-precision floating-point number, in
1505 * big-endian form, and returns a "float".
1507 * XXX - should this be "double", in case there are IEEE single-
1508 * precision numbers that won't fit in some platform's native
1509 * "float" format?
1511 float
1512 tvb_get_ntohieee_float(tvbuff_t *tvb, const int offset)
1514 #if defined(vax)
1515 return get_ieee_float(tvb_get_ntohl(tvb, offset));
1516 #else
1517 union {
1518 float f;
1519 uint32_t w;
1520 } ieee_fp_union;
1522 ieee_fp_union.w = tvb_get_ntohl(tvb, offset);
1523 return ieee_fp_union.f;
1524 #endif
1528 * Fetches an IEEE double-precision floating-point number, in
1529 * big-endian form, and returns a "double".
1531 double
1532 tvb_get_ntohieee_double(tvbuff_t *tvb, const int offset)
1534 #if defined(vax)
1535 union {
1536 uint32_t w[2];
1537 uint64_t dw;
1538 } ieee_fp_union;
1539 #else
1540 union {
1541 double d;
1542 uint32_t w[2];
1543 } ieee_fp_union;
1544 #endif
1546 #if G_BYTE_ORDER == G_BIG_ENDIAN
1547 ieee_fp_union.w[0] = tvb_get_ntohl(tvb, offset);
1548 ieee_fp_union.w[1] = tvb_get_ntohl(tvb, offset+4);
1549 #else
1550 ieee_fp_union.w[0] = tvb_get_ntohl(tvb, offset+4);
1551 ieee_fp_union.w[1] = tvb_get_ntohl(tvb, offset);
1552 #endif
1553 #if defined(vax)
1554 return get_ieee_double(ieee_fp_union.dw);
1555 #else
1556 return ieee_fp_union.d;
1557 #endif
1560 uint16_t
1561 tvb_get_letohs(tvbuff_t *tvb, const int offset)
1563 const uint8_t *ptr;
1565 ptr = fast_ensure_contiguous(tvb, offset, 2);
1566 return pletoh16(ptr);
1569 int16_t
1570 tvb_get_letohis(tvbuff_t *tvb, const int offset)
1572 const uint8_t *ptr;
1574 ptr = fast_ensure_contiguous(tvb, offset, 2);
1575 return pletoh16(ptr);
1578 uint32_t
1579 tvb_get_letoh24(tvbuff_t *tvb, const int offset)
1581 const uint8_t *ptr;
1583 ptr = fast_ensure_contiguous(tvb, offset, 3);
1584 return pletoh24(ptr);
1587 int32_t
1588 tvb_get_letohi24(tvbuff_t *tvb, const int offset)
1590 uint32_t ret;
1592 ret = ws_sign_ext32(tvb_get_letoh24(tvb, offset), 24);
1594 return (int32_t)ret;
1597 uint32_t
1598 tvb_get_letohl(tvbuff_t *tvb, const int offset)
1600 const uint8_t *ptr;
1602 ptr = fast_ensure_contiguous(tvb, offset, 4);
1603 return pletoh32(ptr);
1606 int32_t
1607 tvb_get_letohil(tvbuff_t *tvb, const int offset)
1609 const uint8_t *ptr;
1611 ptr = fast_ensure_contiguous(tvb, offset, 4);
1612 return pletoh32(ptr);
1615 uint64_t
1616 tvb_get_letoh40(tvbuff_t *tvb, const int offset)
1618 const uint8_t *ptr;
1620 ptr = fast_ensure_contiguous(tvb, offset, 5);
1621 return pletoh40(ptr);
1624 int64_t
1625 tvb_get_letohi40(tvbuff_t *tvb, const int offset)
1627 uint64_t ret;
1629 ret = ws_sign_ext64(tvb_get_letoh40(tvb, offset), 40);
1631 return (int64_t)ret;
1634 uint64_t
1635 tvb_get_letoh48(tvbuff_t *tvb, const int offset)
1637 const uint8_t *ptr;
1639 ptr = fast_ensure_contiguous(tvb, offset, 6);
1640 return pletoh48(ptr);
1643 int64_t
1644 tvb_get_letohi48(tvbuff_t *tvb, const int offset)
1646 uint64_t ret;
1648 ret = ws_sign_ext64(tvb_get_letoh48(tvb, offset), 48);
1650 return (int64_t)ret;
1653 uint64_t
1654 tvb_get_letoh56(tvbuff_t *tvb, const int offset)
1656 const uint8_t *ptr;
1658 ptr = fast_ensure_contiguous(tvb, offset, 7);
1659 return pletoh56(ptr);
1662 int64_t
1663 tvb_get_letohi56(tvbuff_t *tvb, const int offset)
1665 uint64_t ret;
1667 ret = ws_sign_ext64(tvb_get_letoh56(tvb, offset), 56);
1669 return (int64_t)ret;
1672 uint64_t
1673 tvb_get_letoh64(tvbuff_t *tvb, const int offset)
1675 const uint8_t *ptr;
1677 ptr = fast_ensure_contiguous(tvb, offset, 8);
1678 return pletoh64(ptr);
1681 int64_t
1682 tvb_get_letohi64(tvbuff_t *tvb, const int offset)
1684 const uint8_t *ptr;
1686 ptr = fast_ensure_contiguous(tvb, offset, 8);
1687 return pletoh64(ptr);
1691 * Fetches an IEEE single-precision floating-point number, in
1692 * little-endian form, and returns a "float".
1694 * XXX - should this be "double", in case there are IEEE single-
1695 * precision numbers that won't fit in some platform's native
1696 * "float" format?
1698 float
1699 tvb_get_letohieee_float(tvbuff_t *tvb, const int offset)
1701 #if defined(vax)
1702 return get_ieee_float(tvb_get_letohl(tvb, offset));
1703 #else
1704 union {
1705 float f;
1706 uint32_t w;
1707 } ieee_fp_union;
1709 ieee_fp_union.w = tvb_get_letohl(tvb, offset);
1710 return ieee_fp_union.f;
1711 #endif
1715 * Fetches an IEEE double-precision floating-point number, in
1716 * little-endian form, and returns a "double".
1718 double
1719 tvb_get_letohieee_double(tvbuff_t *tvb, const int offset)
1721 #if defined(vax)
1722 union {
1723 uint32_t w[2];
1724 uint64_t dw;
1725 } ieee_fp_union;
1726 #else
1727 union {
1728 double d;
1729 uint32_t w[2];
1730 } ieee_fp_union;
1731 #endif
1733 #if G_BYTE_ORDER == G_BIG_ENDIAN
1734 ieee_fp_union.w[0] = tvb_get_letohl(tvb, offset+4);
1735 ieee_fp_union.w[1] = tvb_get_letohl(tvb, offset);
1736 #else
1737 ieee_fp_union.w[0] = tvb_get_letohl(tvb, offset);
1738 ieee_fp_union.w[1] = tvb_get_letohl(tvb, offset+4);
1739 #endif
1740 #if defined(vax)
1741 return get_ieee_double(ieee_fp_union.dw);
1742 #else
1743 return ieee_fp_union.d;
1744 #endif
1747 /* This function is a slight misnomer. It accepts all encodings that are
1748 * ASCII "enough", which means encodings that are the same as US-ASCII
1749 * for textual representations of dates and hex bytes; i.e., the same
1750 * for the hex digits and Z (in practice, all alphanumerics), and the
1751 * four separators ':' '-' '.' and ' '
1752 * That means that any encoding that keeps the ISO/IEC 646 invariant
1753 * characters the same (including the T.61 8 bit encoding and multibyte
1754 * encodings like EUC-KR and GB18030) are OK, even if they replace characters
1755 * like '$' '#' and '\' with national variants, but not encodings like UTF-16
1756 * that include extra null bytes.
1757 * For our current purposes, the unpacked GSM 7-bit default alphabet (but not
1758 * all National Language Shift Tables) also satisfies this requirement, but
1759 * note that it does *not* keep all ISO/IEC 646 invariant characters the same.
1760 * If this internal function gets used for additional purposes than currently,
1761 * the set of encodings that it accepts could change.
1762 * */
1763 static inline void
1764 validate_single_byte_ascii_encoding(const unsigned encoding)
1766 const unsigned enc = encoding & ~ENC_CHARENCODING_MASK;
1768 switch (enc) {
1769 case ENC_UTF_16:
1770 case ENC_UCS_2:
1771 case ENC_UCS_4:
1772 case ENC_3GPP_TS_23_038_7BITS_PACKED:
1773 case ENC_ASCII_7BITS:
1774 case ENC_EBCDIC:
1775 case ENC_EBCDIC_CP037:
1776 case ENC_EBCDIC_CP500:
1777 case ENC_BCD_DIGITS_0_9:
1778 case ENC_KEYPAD_ABC_TBCD:
1779 case ENC_KEYPAD_BC_TBCD:
1780 case ENC_ETSI_TS_102_221_ANNEX_A:
1781 case ENC_APN_STR:
1782 case ENC_DECT_STANDARD_4BITS_TBCD:
1783 REPORT_DISSECTOR_BUG("Invalid string encoding type passed to tvb_get_string_XXX");
1784 break;
1785 default:
1786 break;
1788 /* make sure something valid was set */
1789 if (enc == 0)
1790 REPORT_DISSECTOR_BUG("No string encoding type passed to tvb_get_string_XXX");
1793 GByteArray*
1794 tvb_get_string_bytes(tvbuff_t *tvb, const int offset, const int length,
1795 const unsigned encoding, GByteArray *bytes, int *endoff)
1797 char *ptr;
1798 const char *begin;
1799 const char *end = NULL;
1800 GByteArray *retval = NULL;
1802 validate_single_byte_ascii_encoding(encoding);
1804 ptr = (char*) tvb_get_raw_string(NULL, tvb, offset, length);
1805 begin = ptr;
1807 if (endoff) *endoff = offset;
1809 while (*begin == ' ') begin++;
1811 if (*begin && bytes) {
1812 if (hex_str_to_bytes_encoding(begin, bytes, &end, encoding, false)) {
1813 if (bytes->len > 0) {
1814 if (endoff) *endoff = offset + (int)(end - ptr);
1815 retval = bytes;
1820 wmem_free(NULL, ptr);
1822 return retval;
1825 static bool
1826 parse_month_name(const char *name, int *tm_mon)
1828 static const char months[][4] = { "Jan", "Feb", "Mar", "Apr", "May",
1829 "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
1830 for (int i = 0; i < 12; i++) {
1831 if (memcmp(months[i], name, 4) == 0) {
1832 *tm_mon = i;
1833 return true;
1836 return false;
1840 * Is the character a WSP character, as per RFC 5234? (space or tab).
1842 #define IS_WSP(c) ((c) == ' ' || (c) == '\t')
1844 /* support hex-encoded time values? */
1845 nstime_t*
1846 tvb_get_string_time(tvbuff_t *tvb, const int offset, const int length,
1847 const unsigned encoding, nstime_t *ns, int *endoff)
1849 char *begin;
1850 const char *ptr;
1851 const char *end = NULL;
1852 int num_chars = 0;
1853 int utc_offset = 0;
1855 validate_single_byte_ascii_encoding(encoding);
1857 DISSECTOR_ASSERT(ns);
1859 begin = (char*) tvb_get_raw_string(NULL, tvb, offset, length);
1860 ptr = begin;
1862 while (IS_WSP(*ptr))
1863 ptr++;
1865 if (*ptr) {
1866 if ((encoding & ENC_ISO_8601_DATE_TIME) == ENC_ISO_8601_DATE_TIME) {
1867 if (!(end = iso8601_to_nstime(ns, ptr, ISO8601_DATETIME))) {
1870 goto fail;
1872 } else if ((encoding & ENC_ISO_8601_DATE_TIME_BASIC) == ENC_ISO_8601_DATE_TIME_BASIC) {
1873 if (!(end = iso8601_to_nstime(ns, ptr, ISO8601_DATETIME_BASIC))) {
1876 goto fail;
1878 } else {
1879 struct tm tm;
1881 memset(&tm, 0, sizeof(tm));
1882 tm.tm_isdst = -1;
1883 ns->secs = 0;
1884 ns->nsecs = 0;
1886 /* note: sscanf is known to be inconsistent across platforms with respect
1887 to whether a %n is counted as a return value or not, so we have to use
1888 '>=' a lot */
1889 if (encoding & ENC_ISO_8601_DATE) {
1890 /* 2014-04-07 */
1891 if (sscanf(ptr, "%d-%d-%d%n",
1892 &tm.tm_year,
1893 &tm.tm_mon,
1894 &tm.tm_mday,
1895 &num_chars) >= 3)
1897 end = ptr + num_chars;
1898 tm.tm_mon--;
1899 if (tm.tm_year > 1900) tm.tm_year -= 1900;
1900 } else {
1901 goto fail;
1904 else if (encoding & ENC_ISO_8601_TIME) {
1905 /* 2014-04-07 */
1906 if (sscanf(ptr, "%d:%d:%d%n",
1907 &tm.tm_hour,
1908 &tm.tm_min,
1909 &tm.tm_sec,
1910 &num_chars) >= 2)
1912 /* what should we do about day/month/year? */
1913 /* setting it to "now" for now */
1914 time_t time_now = time(NULL);
1915 struct tm *tm_now = gmtime(&time_now);
1916 if (tm_now != NULL) {
1917 tm.tm_year = tm_now->tm_year;
1918 tm.tm_mon = tm_now->tm_mon;
1919 tm.tm_mday = tm_now->tm_mday;
1920 } else {
1921 /* The second before the Epoch */
1922 tm.tm_year = 69;
1923 tm.tm_mon = 12;
1924 tm.tm_mday = 31;
1926 end = ptr + num_chars;
1927 } else {
1928 goto fail;
1931 else if (encoding & ENC_IMF_DATE_TIME) {
1933 * Match [dow,] day month year hh:mm[:ss] with
1934 * two-digit years (RFC 822) or four-digit
1935 * years (RFCs 1123, 2822, 5822). Skip
1936 * the day of week since it is locale
1937 * dependent and does not affect the resulting
1938 * date anyway.
1940 if (g_ascii_isalpha(ptr[0]) && g_ascii_isalpha(ptr[1]) && g_ascii_isalpha(ptr[2]) && ptr[3] == ',')
1941 ptr += 4; /* Skip day of week. */
1944 * Parse the day-of-month and month
1945 * name.
1947 char month_name[4] = { 0 };
1949 if (sscanf(ptr, "%d %3s%n",
1950 &tm.tm_mday,
1951 month_name,
1952 &num_chars) < 2)
1954 /* Not matched. */
1955 goto fail;
1957 if (!parse_month_name(month_name, &tm.tm_mon)) {
1958 goto fail;
1960 ptr += num_chars;
1961 while (IS_WSP(*ptr))
1962 ptr++;
1965 * Scan the year. Treat 2-digit years
1966 * differently from 4-digit years.
1968 uint32_t year;
1969 const char *yearendp;
1971 if (!ws_strtou32(ptr, &yearendp, &year)) {
1972 goto fail;
1974 if (!IS_WSP(*yearendp)) {
1975 /* Not followed by WSP. */
1976 goto fail;
1978 if (yearendp - ptr < 2) {
1979 /* 1-digit year. Error. */
1980 goto fail;
1982 if (yearendp - ptr == 2) {
1984 * 2-digit year.
1986 * Match RFC 2822/RFC 5322 behavior;
1987 * add 2000 to years from 0 to
1988 * 49 and 1900 to uears from 50
1989 * to 99.
1991 if (year <= 49) {
1992 year += 2000;
1993 } else {
1994 year += 1900;
1996 } else if (yearendp - ptr == 3) {
1998 * 3-digit year.
2000 * Match RFC 2822/RFC 5322 behavior;
2001 * add 1900 to the year.
2003 year += 1900;
2005 tm.tm_year = year - 1900;
2006 ptr = yearendp;
2007 while (IS_WSP(*ptr))
2008 ptr++;
2010 /* Parse the time. */
2011 if (sscanf(ptr, "%d:%d%n:%d%n",
2012 &tm.tm_hour,
2013 &tm.tm_min,
2014 &num_chars,
2015 &tm.tm_sec,
2016 &num_chars) < 2)
2018 goto fail;
2020 ptr += num_chars;
2021 while (IS_WSP(*ptr))
2022 ptr++;
2025 * Parse the time zone.
2026 * Check for obs-zone values first.
2028 if (g_ascii_strncasecmp(ptr, "UT", 2) == 0)
2030 ptr += 2;
2032 else if (g_ascii_strncasecmp(ptr, "GMT", 3) == 0)
2034 ptr += 3;
2036 else
2038 char sign;
2039 int off_hr;
2040 int off_min;
2042 if (sscanf(ptr, "%c%2d%2d%n",
2043 &sign,
2044 &off_hr,
2045 &off_min,
2046 &num_chars) < 3)
2048 goto fail;
2052 * If sign is '+', there's a positive
2053 * UTC offset.
2055 * If sign is '-', there's a negative
2056 * UTC offset.
2058 * Otherwise, that's an invalid UTC
2059 * offset string.
2061 if (sign == '+')
2062 utc_offset += (off_hr * 3600) + (off_min * 60);
2063 else if (sign == '-')
2064 utc_offset -= (off_hr * 3600) + (off_min * 60);
2065 else {
2066 /* Sign must be + or - */
2067 goto fail;
2069 ptr += num_chars;
2071 end = ptr;
2073 ns->secs = mktime_utc(&tm);
2074 if (ns->secs == (time_t)-1 && errno != 0) {
2075 goto fail;
2077 ns->secs += utc_offset;
2079 } else {
2080 /* Empty string */
2081 goto fail;
2084 if (endoff)
2085 *endoff = (int)(offset + (end - begin));
2086 wmem_free(NULL, begin);
2087 return ns;
2089 fail:
2090 wmem_free(NULL, begin);
2091 return NULL;
2094 /* Fetch an IPv4 address, in network byte order.
2095 * We do *not* convert them to host byte order; we leave them in
2096 * network byte order. */
2097 uint32_t
2098 tvb_get_ipv4(tvbuff_t *tvb, const int offset)
2100 const uint8_t *ptr;
2101 uint32_t addr;
2103 ptr = fast_ensure_contiguous(tvb, offset, sizeof(uint32_t));
2104 memcpy(&addr, ptr, sizeof addr);
2105 return addr;
2108 /* Fetch an IPv6 address. */
2109 void
2110 tvb_get_ipv6(tvbuff_t *tvb, const int offset, ws_in6_addr *addr)
2112 const uint8_t *ptr;
2114 ptr = ensure_contiguous(tvb, offset, sizeof(*addr));
2115 memcpy(addr, ptr, sizeof *addr);
2119 * These routines return the length of the address in bytes on success
2120 * and -1 if the prefix length is too long.
2123 tvb_get_ipv4_addr_with_prefix_len(tvbuff_t *tvb, int offset, ws_in4_addr *addr,
2124 uint32_t prefix_len)
2126 uint8_t addr_len;
2128 if (prefix_len > 32)
2129 return -1;
2131 addr_len = (prefix_len + 7) / 8;
2132 *addr = 0;
2133 tvb_memcpy(tvb, addr, offset, addr_len);
2134 if (prefix_len % 8)
2135 ((uint8_t*)addr)[addr_len - 1] &= ((0xff00 >> (prefix_len % 8)) & 0xff);
2136 return addr_len;
2140 * These routines return the length of the address in bytes on success
2141 * and -1 if the prefix length is too long.
2144 tvb_get_ipv6_addr_with_prefix_len(tvbuff_t *tvb, int offset, ws_in6_addr *addr,
2145 uint32_t prefix_len)
2147 uint32_t addr_len;
2149 if (prefix_len > 128)
2150 return -1;
2152 addr_len = (prefix_len + 7) / 8;
2153 memset(addr->bytes, 0, 16);
2154 tvb_memcpy(tvb, addr->bytes, offset, addr_len);
2155 if (prefix_len % 8) {
2156 addr->bytes[addr_len - 1] &=
2157 ((0xff00 >> (prefix_len % 8)) & 0xff);
2160 return addr_len;
2163 /* Fetch a GUID. */
2164 void
2165 tvb_get_ntohguid(tvbuff_t *tvb, const int offset, e_guid_t *guid)
2167 const uint8_t *ptr = ensure_contiguous(tvb, offset, GUID_LEN);
2169 guid->data1 = pntoh32(ptr + 0);
2170 guid->data2 = pntoh16(ptr + 4);
2171 guid->data3 = pntoh16(ptr + 6);
2172 memcpy(guid->data4, ptr + 8, sizeof guid->data4);
2175 void
2176 tvb_get_letohguid(tvbuff_t *tvb, const int offset, e_guid_t *guid)
2178 const uint8_t *ptr = ensure_contiguous(tvb, offset, GUID_LEN);
2180 guid->data1 = pletoh32(ptr + 0);
2181 guid->data2 = pletoh16(ptr + 4);
2182 guid->data3 = pletoh16(ptr + 6);
2183 memcpy(guid->data4, ptr + 8, sizeof guid->data4);
2187 * NOTE: to support code written when proto_tree_add_item() took a
2188 * bool as its last argument, with false meaning "big-endian"
2189 * and true meaning "little-endian", we treat any non-zero value of
2190 * "encoding" as meaning "little-endian".
2192 void
2193 tvb_get_guid(tvbuff_t *tvb, const int offset, e_guid_t *guid, const unsigned encoding)
2195 if (encoding) {
2196 tvb_get_letohguid(tvb, offset, guid);
2197 } else {
2198 tvb_get_ntohguid(tvb, offset, guid);
2202 static const uint8_t bit_mask8[] = {
2203 0x00,
2204 0x01,
2205 0x03,
2206 0x07,
2207 0x0f,
2208 0x1f,
2209 0x3f,
2210 0x7f,
2211 0xff
2215 /* Get a variable amount of bits
2217 * Return a byte array with bit limited data.
2218 * When encoding is ENC_BIG_ENDIAN, the data is aligned to the left.
2219 * When encoding is ENC_LITTLE_ENDIAN, the data is aligned to the right.
2221 uint8_t *
2222 tvb_get_bits_array(wmem_allocator_t *scope, tvbuff_t *tvb, const int bit_offset,
2223 size_t no_of_bits, size_t *data_length, const unsigned encoding)
2225 tvbuff_t *sub_tvb;
2226 if (encoding & ENC_LITTLE_ENDIAN) {
2227 sub_tvb = tvb_new_octet_right_aligned(tvb, bit_offset, (int32_t) no_of_bits);
2228 } else {
2229 sub_tvb = tvb_new_octet_aligned(tvb, bit_offset, (int32_t) no_of_bits);
2231 *data_length = tvb_reported_length(sub_tvb);
2232 return (uint8_t*)tvb_memdup(scope, sub_tvb, 0, *data_length);
2235 /* Get 1 - 8 bits */
2236 uint8_t
2237 tvb_get_bits8(tvbuff_t *tvb, unsigned bit_offset, const int no_of_bits)
2239 return (uint8_t)_tvb_get_bits64(tvb, bit_offset, no_of_bits);
2242 /* Get 1 - 16 bits */
2243 uint16_t
2244 tvb_get_bits16(tvbuff_t *tvb, unsigned bit_offset, const int no_of_bits, const unsigned encoding)
2246 return (uint16_t)tvb_get_bits64(tvb, bit_offset, no_of_bits, encoding);
2249 /* Get 1 - 32 bits */
2250 uint32_t
2251 tvb_get_bits32(tvbuff_t *tvb, unsigned bit_offset, const int no_of_bits, const unsigned encoding)
2253 return (uint32_t)tvb_get_bits64(tvb, bit_offset, no_of_bits, encoding);
2256 /* Get 1 - 64 bits */
2257 uint64_t
2258 tvb_get_bits64(tvbuff_t *tvb, unsigned bit_offset, const int no_of_bits, const unsigned encoding)
2260 /* encoding determines bit numbering within octet array */
2261 if (encoding & ENC_LITTLE_ENDIAN) {
2262 return _tvb_get_bits64_le(tvb, bit_offset, no_of_bits);
2263 } else {
2264 return _tvb_get_bits64(tvb, bit_offset, no_of_bits);
2269 * This function will dissect a sequence of bits that does not need to be byte aligned; the bits
2270 * set will be shown in the tree as ..10 10.. and the integer value returned if return_value is set.
2271 * Offset should be given in bits from the start of the tvb.
2272 * Bits within octet are numbered from MSB (0) to LSB (7). Bit at bit_offset is return value most significant bit.
2273 * The function tolerates requests for more than 64 bits, but will only return the least significant 64 bits.
2275 static uint64_t
2276 _tvb_get_bits64(tvbuff_t *tvb, unsigned bit_offset, const int total_no_of_bits)
2278 uint64_t value;
2279 unsigned octet_offset = bit_offset >> 3;
2280 uint8_t required_bits_in_first_octet = 8 - (bit_offset % 8);
2282 if(required_bits_in_first_octet > total_no_of_bits)
2284 /* the required bits don't extend to the end of the first octet */
2285 uint8_t right_shift = required_bits_in_first_octet - total_no_of_bits;
2286 value = (tvb_get_uint8(tvb, octet_offset) >> right_shift) & bit_mask8[total_no_of_bits % 8];
2288 else
2290 uint8_t remaining_bit_length = total_no_of_bits;
2292 /* get the bits up to the first octet boundary */
2293 value = 0;
2294 required_bits_in_first_octet %= 8;
2295 if(required_bits_in_first_octet != 0)
2297 value = tvb_get_uint8(tvb, octet_offset) & bit_mask8[required_bits_in_first_octet];
2298 remaining_bit_length -= required_bits_in_first_octet;
2299 octet_offset ++;
2301 /* take the biggest words, shorts or octets that we can */
2302 while (remaining_bit_length > 7)
2304 switch (remaining_bit_length >> 4)
2306 case 0:
2307 /* 8 - 15 bits. (note that 0 - 7 would have dropped out of the while() loop) */
2308 value <<= 8;
2309 value += tvb_get_uint8(tvb, octet_offset);
2310 remaining_bit_length -= 8;
2311 octet_offset ++;
2312 break;
2314 case 1:
2315 /* 16 - 31 bits */
2316 value <<= 16;
2317 value += tvb_get_ntohs(tvb, octet_offset);
2318 remaining_bit_length -= 16;
2319 octet_offset += 2;
2320 break;
2322 case 2:
2323 case 3:
2324 /* 32 - 63 bits */
2325 value <<= 32;
2326 value += tvb_get_ntohl(tvb, octet_offset);
2327 remaining_bit_length -= 32;
2328 octet_offset += 4;
2329 break;
2331 default:
2332 /* 64 bits (or more???) */
2333 value = tvb_get_ntoh64(tvb, octet_offset);
2334 remaining_bit_length -= 64;
2335 octet_offset += 8;
2336 break;
2339 /* get bits from any partial octet at the tail */
2340 if(remaining_bit_length)
2342 value <<= remaining_bit_length;
2343 value += (tvb_get_uint8(tvb, octet_offset) >> (8 - remaining_bit_length));
2346 return value;
2350 * Offset should be given in bits from the start of the tvb.
2351 * Bits within octet are numbered from LSB (0) to MSB (7). Bit at bit_offset is return value least significant bit.
2352 * The function tolerates requests for more than 64 bits, but will only return the least significant 64 bits.
2354 static uint64_t
2355 _tvb_get_bits64_le(tvbuff_t *tvb, unsigned bit_offset, const int total_no_of_bits)
2357 uint64_t value = 0;
2358 unsigned octet_offset = bit_offset / 8;
2359 int remaining_bits = total_no_of_bits;
2360 int shift = 0;
2362 if (remaining_bits > 64)
2364 remaining_bits = 64;
2367 if (bit_offset % 8)
2369 /* not aligned, extract bits from first octet */
2370 shift = 8 - (bit_offset % 8);
2371 value = tvb_get_uint8(tvb, octet_offset) >> (bit_offset % 8);
2372 if (shift > remaining_bits)
2374 /* keep only the requested bits */
2375 value &= (UINT64_C(1) << remaining_bits) - 1;
2376 remaining_bits = 0;
2378 else
2380 remaining_bits -= shift;
2382 octet_offset++;
2385 while (remaining_bits > 0)
2387 /* take the biggest words, shorts or octets that we can */
2388 if (remaining_bits >= 32)
2390 value |= ((uint64_t)tvb_get_letohl(tvb, octet_offset) << shift);
2391 shift += 32;
2392 remaining_bits -= 32;
2393 octet_offset += 4;
2395 else if (remaining_bits >= 16)
2397 value |= ((uint64_t)tvb_get_letohs(tvb, octet_offset) << shift);
2398 shift += 16;
2399 remaining_bits -= 16;
2400 octet_offset += 2;
2402 else if (remaining_bits >= 8)
2404 value |= ((uint64_t)tvb_get_uint8(tvb, octet_offset) << shift);
2405 shift += 8;
2406 remaining_bits -= 8;
2407 octet_offset += 1;
2409 else
2411 unsigned mask = (1 << remaining_bits) - 1;
2412 value |= (((uint64_t)tvb_get_uint8(tvb, octet_offset) & mask) << shift);
2413 shift += remaining_bits;
2414 remaining_bits = 0;
2415 octet_offset += 1;
2418 return value;
2421 /* Get 1 - 32 bits (should be deprecated as same as tvb_get_bits32??) */
2422 uint32_t
2423 tvb_get_bits(tvbuff_t *tvb, const unsigned bit_offset, const int no_of_bits, const unsigned encoding)
2425 return (uint32_t)tvb_get_bits64(tvb, bit_offset, no_of_bits, encoding);
2428 static int
2429 tvb_find_uint8_generic(tvbuff_t *tvb, unsigned abs_offset, unsigned limit, uint8_t needle)
2431 const uint8_t *ptr;
2432 const uint8_t *result;
2434 ptr = ensure_contiguous(tvb, abs_offset, limit); /* tvb_get_ptr() */
2435 if (!ptr)
2436 return -1;
2438 result = (const uint8_t *) memchr(ptr, needle, limit);
2439 if (!result)
2440 return -1;
2442 return (int) ((result - ptr) + abs_offset);
2445 /* Find first occurrence of needle in tvbuff, starting at offset. Searches
2446 * at most maxlength number of bytes; if maxlength is -1, searches to
2447 * end of tvbuff.
2448 * Returns the offset of the found needle, or -1 if not found.
2449 * Will not throw an exception, even if maxlength exceeds boundary of tvbuff;
2450 * in that case, -1 will be returned if the boundary is reached before
2451 * finding needle. */
2453 tvb_find_uint8(tvbuff_t *tvb, const int offset, const int maxlength, const uint8_t needle)
2455 const uint8_t *result;
2456 unsigned abs_offset = 0;
2457 unsigned limit = 0;
2458 int exception;
2460 DISSECTOR_ASSERT(tvb && tvb->initialized);
2462 exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &limit);
2463 if (exception)
2464 THROW(exception);
2466 /* Only search to end of tvbuff, w/o throwing exception. */
2467 if (maxlength >= 0 && limit > (unsigned) maxlength) {
2468 /* Maximum length doesn't go past end of tvbuff; search
2469 to that value. */
2470 limit = (unsigned) maxlength;
2473 /* If we have real data, perform our search now. */
2474 if (tvb->real_data) {
2475 result = (const uint8_t *)memchr(tvb->real_data + abs_offset, needle, limit);
2476 if (result == NULL) {
2477 return -1;
2479 else {
2480 return (int) (result - tvb->real_data);
2484 if (tvb->ops->tvb_find_uint8)
2485 return tvb->ops->tvb_find_uint8(tvb, abs_offset, limit, needle);
2487 return tvb_find_uint8_generic(tvb, offset, limit, needle);
2490 /* Same as tvb_find_uint8() with 16bit needle. */
2492 tvb_find_uint16(tvbuff_t *tvb, const int offset, const int maxlength,
2493 const uint16_t needle)
2495 unsigned abs_offset = 0;
2496 unsigned limit = 0;
2497 int exception;
2499 exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &limit);
2500 if (exception)
2501 THROW(exception);
2503 /* Only search to end of tvbuff, w/o throwing exception. */
2504 if (maxlength >= 0 && limit > (unsigned) maxlength) {
2505 /* Maximum length doesn't go past end of tvbuff; search
2506 to that value. */
2507 limit = (unsigned) maxlength;
2510 const uint8_t needle1 = ((needle & 0xFF00) >> 8);
2511 const uint8_t needle2 = ((needle & 0x00FF) >> 0);
2512 unsigned searched_bytes = 0;
2513 unsigned pos = abs_offset;
2515 do {
2516 int offset1 =
2517 tvb_find_uint8(tvb, pos, limit - searched_bytes, needle1);
2518 int offset2 = -1;
2520 if (offset1 == -1) {
2521 return -1;
2524 searched_bytes = (unsigned)offset1 - abs_offset + 1;
2526 if (searched_bytes >= limit) {
2527 return -1;
2530 offset2 = tvb_find_uint8(tvb, offset1 + 1, 1, needle2);
2532 searched_bytes += 1;
2534 if (offset2 != -1) {
2535 if (searched_bytes > limit) {
2536 return -1;
2538 return offset1;
2541 pos = offset1 + 1;
2542 } while (searched_bytes < limit);
2544 return -1;
2547 static inline int
2548 tvb_ws_mempbrk_uint8_generic(tvbuff_t *tvb, unsigned abs_offset, unsigned limit, const ws_mempbrk_pattern* pattern, unsigned char *found_needle)
2550 const uint8_t *ptr;
2551 const uint8_t *result;
2553 ptr = ensure_contiguous(tvb, abs_offset, limit); /* tvb_get_ptr */
2554 if (!ptr)
2555 return -1;
2557 result = ws_mempbrk_exec(ptr, limit, pattern, found_needle);
2558 if (!result)
2559 return -1;
2561 return (int) ((result - ptr) + abs_offset);
2565 /* Find first occurrence of any of the pattern chars in tvbuff, starting at offset.
2566 * Searches at most maxlength number of bytes; if maxlength is -1, searches
2567 * to end of tvbuff.
2568 * Returns the offset of the found needle, or -1 if not found.
2569 * Will not throw an exception, even if maxlength exceeds boundary of tvbuff;
2570 * in that case, -1 will be returned if the boundary is reached before
2571 * finding needle. */
2573 tvb_ws_mempbrk_pattern_uint8(tvbuff_t *tvb, const int offset, const int maxlength,
2574 const ws_mempbrk_pattern* pattern, unsigned char *found_needle)
2576 const uint8_t *result;
2577 unsigned abs_offset = 0;
2578 unsigned limit = 0;
2579 int exception;
2581 DISSECTOR_ASSERT(tvb && tvb->initialized);
2583 exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &limit);
2584 if (exception)
2585 THROW(exception);
2587 /* Only search to end of tvbuff, w/o throwing exception. */
2588 if (limit > (unsigned) maxlength) {
2589 /* Maximum length doesn't go past end of tvbuff; search
2590 to that value. */
2591 limit = maxlength;
2594 /* If we have real data, perform our search now. */
2595 if (tvb->real_data) {
2596 result = ws_mempbrk_exec(tvb->real_data + abs_offset, limit, pattern, found_needle);
2597 if (result == NULL) {
2598 return -1;
2600 else {
2601 return (int) (result - tvb->real_data);
2605 if (tvb->ops->tvb_ws_mempbrk_pattern_uint8)
2606 return tvb->ops->tvb_ws_mempbrk_pattern_uint8(tvb, abs_offset, limit, pattern, found_needle);
2608 return tvb_ws_mempbrk_uint8_generic(tvb, abs_offset, limit, pattern, found_needle);
2611 /* Find size of stringz (NUL-terminated string) by looking for terminating
2612 * NUL. The size of the string includes the terminating NUL.
2614 * If the NUL isn't found, it throws the appropriate exception.
2616 unsigned
2617 tvb_strsize(tvbuff_t *tvb, const int offset)
2619 unsigned abs_offset = 0, junk_length;
2620 int nul_offset;
2622 DISSECTOR_ASSERT(tvb && tvb->initialized);
2624 check_offset_length(tvb, offset, 0, &abs_offset, &junk_length);
2625 nul_offset = tvb_find_uint8(tvb, abs_offset, -1, 0);
2626 if (nul_offset == -1) {
2628 * OK, we hit the end of the tvbuff, so we should throw
2629 * an exception.
2631 if (tvb->length < tvb->contained_length) {
2632 THROW(BoundsError);
2633 } else if (tvb->flags & TVBUFF_FRAGMENT) {
2634 THROW(FragmentBoundsError);
2635 } else if (tvb->length < tvb->reported_length) {
2636 THROW(ContainedBoundsError);
2637 } else {
2638 THROW(ReportedBoundsError);
2641 return (nul_offset - abs_offset) + 1;
2644 /* UTF-16/UCS-2 version of tvb_strsize */
2645 /* Returns number of bytes including the (two-bytes) null terminator */
2646 unsigned
2647 tvb_unicode_strsize(tvbuff_t *tvb, const int offset)
2649 unsigned i = 0;
2650 gunichar2 uchar;
2652 DISSECTOR_ASSERT(tvb && tvb->initialized);
2654 do {
2655 /* Endianness doesn't matter when looking for null */
2656 uchar = tvb_get_ntohs(tvb, offset + i);
2657 i += 2;
2658 } while(uchar != 0);
2660 return i;
2663 /* Find length of string by looking for end of string ('\0'), up to
2664 * 'maxlength' characters'; if 'maxlength' is -1, searches to end
2665 * of tvbuff.
2666 * Returns -1 if 'maxlength' reached before finding EOS. */
2668 tvb_strnlen(tvbuff_t *tvb, const int offset, const unsigned maxlength)
2670 int result_offset;
2671 unsigned abs_offset = 0, junk_length;
2673 DISSECTOR_ASSERT(tvb && tvb->initialized);
2675 check_offset_length(tvb, offset, 0, &abs_offset, &junk_length);
2677 result_offset = tvb_find_uint8(tvb, abs_offset, maxlength, 0);
2679 if (result_offset == -1) {
2680 return -1;
2682 else {
2683 return result_offset - abs_offset;
2688 * Implement strneql etc
2692 * Call strncmp after checking if enough chars left, returning 0 if
2693 * it returns 0 (meaning "equal") and -1 otherwise, otherwise return -1.
2696 tvb_strneql(tvbuff_t *tvb, const int offset, const char *str, const size_t size)
2698 const uint8_t *ptr;
2700 ptr = ensure_contiguous_no_exception(tvb, offset, (int)size, NULL);
2702 if (ptr) {
2703 int cmp = strncmp((const char *)ptr, str, size);
2706 * Return 0 if equal, -1 otherwise.
2708 return (cmp == 0 ? 0 : -1);
2709 } else {
2711 * Not enough characters in the tvbuff to match the
2712 * string.
2714 return -1;
2719 * Call g_ascii_strncasecmp after checking if enough chars left, returning
2720 * 0 if it returns 0 (meaning "equal") and -1 otherwise, otherwise return -1.
2723 tvb_strncaseeql(tvbuff_t *tvb, const int offset, const char *str, const size_t size)
2725 const uint8_t *ptr;
2727 ptr = ensure_contiguous_no_exception(tvb, offset, (int)size, NULL);
2729 if (ptr) {
2730 int cmp = g_ascii_strncasecmp((const char *)ptr, str, size);
2733 * Return 0 if equal, -1 otherwise.
2735 return (cmp == 0 ? 0 : -1);
2736 } else {
2738 * Not enough characters in the tvbuff to match the
2739 * string.
2741 return -1;
2746 * Check that the tvbuff contains at least size bytes, starting at
2747 * offset, and that those bytes are equal to str. Return 0 for success
2748 * and -1 for error. This function does not throw an exception.
2751 tvb_memeql(tvbuff_t *tvb, const int offset, const uint8_t *str, size_t size)
2753 const uint8_t *ptr;
2755 ptr = ensure_contiguous_no_exception(tvb, offset, (int) size, NULL);
2757 if (ptr) {
2758 int cmp = memcmp(ptr, str, size);
2761 * Return 0 if equal, -1 otherwise.
2763 return (cmp == 0 ? 0 : -1);
2764 } else {
2766 * Not enough characters in the tvbuff to match the
2767 * string.
2769 return -1;
2774 * Format the data in the tvb from offset for size.
2776 char *
2777 tvb_format_text(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int size)
2779 const uint8_t *ptr;
2780 int len;
2782 len = (size > 0) ? size : 0;
2784 ptr = ensure_contiguous(tvb, offset, size);
2785 return format_text(scope, ptr, len);
2789 * Format the data in the tvb from offset for length ...
2791 char *
2792 tvb_format_text_wsp(wmem_allocator_t* allocator, tvbuff_t *tvb, const int offset, const int size)
2794 const uint8_t *ptr;
2795 int len;
2797 len = (size > 0) ? size : 0;
2799 ptr = ensure_contiguous(tvb, offset, size);
2800 return format_text_wsp(allocator, ptr, len);
2804 * Like "tvb_format_text()", but for null-padded strings; don't show
2805 * the null padding characters as "\000".
2807 char *
2808 tvb_format_stringzpad(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int size)
2810 const uint8_t *ptr, *p;
2811 int len;
2812 int stringlen;
2814 len = (size > 0) ? size : 0;
2816 ptr = ensure_contiguous(tvb, offset, size);
2817 for (p = ptr, stringlen = 0; stringlen < len && *p != '\0'; p++, stringlen++)
2819 return format_text(scope, ptr, stringlen);
2823 * Like "tvb_format_text_wsp()", but for null-padded strings; don't show
2824 * the null padding characters as "\000".
2826 char *
2827 tvb_format_stringzpad_wsp(wmem_allocator_t* allocator, tvbuff_t *tvb, const int offset, const int size)
2829 const uint8_t *ptr, *p;
2830 int len;
2831 int stringlen;
2833 len = (size > 0) ? size : 0;
2835 ptr = ensure_contiguous(tvb, offset, size);
2836 for (p = ptr, stringlen = 0; stringlen < len && *p != '\0'; p++, stringlen++)
2838 return format_text_wsp(allocator, ptr, stringlen);
2842 * All string functions below take a scope as an argument.
2845 * If scope is NULL, memory is allocated with g_malloc() and user must
2846 * explicitly free it with g_free().
2847 * If scope is not NULL, memory is allocated with the corresponding pool
2848 * lifetime.
2850 * All functions throw an exception if the tvbuff ends before the string
2851 * does.
2855 * Given a wmem scope, a tvbuff, an offset, and a length, treat the string
2856 * of bytes referred to by the tvbuff, offset, and length as an ASCII string,
2857 * with all bytes with the high-order bit set being invalid, and return a
2858 * pointer to a UTF-8 string, allocated using the wmem scope.
2860 * Octets with the highest bit set will be converted to the Unicode
2861 * REPLACEMENT CHARACTER.
2863 static uint8_t *
2864 tvb_get_ascii_string(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length)
2866 const uint8_t *ptr;
2868 ptr = ensure_contiguous(tvb, offset, length);
2869 return get_ascii_string(scope, ptr, length);
2873 * Given a wmem scope, a tvbuff, an offset, a length, and a translation table,
2874 * treat the string of bytes referred to by the tvbuff, offset, and length
2875 * as a string encoded using one octet per character, with octets with the
2876 * high-order bit clear being mapped by the translation table to 2-byte
2877 * Unicode Basic Multilingual Plane characters (including REPLACEMENT
2878 * CHARACTER) and octets with the high-order bit set being mapped to
2879 * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string,
2880 * allocated using the wmem scope.
2882 * Octets with the highest bit set will be converted to the Unicode
2883 * REPLACEMENT CHARACTER.
2885 static uint8_t *
2886 tvb_get_iso_646_string(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length, const gunichar2 table[0x80])
2888 const uint8_t *ptr;
2890 ptr = ensure_contiguous(tvb, offset, length);
2891 return get_iso_646_string(scope, ptr, length, table);
2895 * Given a wmem scope, a tvbuff, an offset, and a length, treat the string
2896 * of bytes referred to by the tvbuff, the offset. and the length as a UTF-8
2897 * string, and return a pointer to a UTF-8 string, allocated using the wmem
2898 * scope, with all ill-formed sequences replaced with the Unicode REPLACEMENT
2899 * CHARACTER according to the recommended "best practices" given in the Unicode
2900 * Standard and specified by W3C/WHATWG.
2902 * Note that in conformance with the Unicode Standard, this treats three
2903 * byte sequences corresponding to UTF-16 surrogate halves (paired or unpaired)
2904 * and two byte overlong encodings of 7-bit ASCII characters as invalid and
2905 * substitutes REPLACEMENT CHARACTER for them. Explicit support for nonstandard
2906 * derivative encoding formats (e.g. CESU-8, Java Modified UTF-8, WTF-8) could
2907 * be added later.
2909 static uint8_t *
2910 tvb_get_utf_8_string(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int length)
2912 const uint8_t *ptr;
2914 ptr = ensure_contiguous(tvb, offset, length);
2915 return get_utf_8_string(scope, ptr, length);
2919 * Given a wmem scope, a tvbuff, an offset, and a length, treat the string
2920 * of bytes referred to by the tvbuff, the offset, and the length as a
2921 * raw string, and return a pointer to that string, allocated using the
2922 * wmem scope. This means a null is appended at the end, but no replacement
2923 * checking is done otherwise, unlike tvb_get_utf_8_string().
2925 * Also, this one allows a length of -1 to mean get all, but does not
2926 * allow a negative offset.
2928 static inline uint8_t *
2929 tvb_get_raw_string(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int length)
2931 uint8_t *strbuf;
2932 int abs_length = length;
2934 DISSECTOR_ASSERT(offset >= 0);
2935 DISSECTOR_ASSERT(abs_length >= -1);
2937 if (abs_length < 0)
2938 abs_length = tvb->length - offset;
2940 tvb_ensure_bytes_exist(tvb, offset, abs_length);
2941 strbuf = (uint8_t *)wmem_alloc(scope, abs_length + 1);
2942 tvb_memcpy(tvb, strbuf, offset, abs_length);
2943 strbuf[abs_length] = '\0';
2944 return strbuf;
2948 * Given a wmem scope, a tvbuff, an offset, and a length, treat the string
2949 * of bytes referred to by the tvbuff, the offset, and the length as an
2950 * ISO 8859/1 string, and return a pointer to a UTF-8 string, allocated
2951 * using the wmem scope.
2953 static uint8_t *
2954 tvb_get_string_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length)
2956 const uint8_t *ptr;
2958 ptr = ensure_contiguous(tvb, offset, length);
2959 return get_8859_1_string(scope, ptr, length);
2963 * Given a wmem scope, a tvbuff, an offset, a length, and a translation
2964 * table, treat the string of bytes referred to by the tvbuff, the offset,
2965 * and the length as a string encoded using one octet per character, with
2966 * octets with the high-order bit clear being ASCII and octets with the
2967 * high-order bit set being mapped by the translation table to 2-byte
2968 * Unicode Basic Multilingual Plane characters (including REPLACEMENT
2969 * CHARACTER), and return a pointer to a UTF-8 string, allocated with the
2970 * wmem scope.
2972 static uint8_t *
2973 tvb_get_string_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length, const gunichar2 table[0x80])
2975 const uint8_t *ptr;
2977 ptr = ensure_contiguous(tvb, offset, length);
2978 return get_unichar2_string(scope, ptr, length, table);
2982 * Given a wmem scope, a tvbuff, an offset, a length, and an encoding
2983 * giving the byte order, treat the string of bytes referred to by the
2984 * tvbuff, the offset, and the length as a UCS-2 encoded string in
2985 * the byte order in question, containing characters from the Basic
2986 * Multilingual Plane (plane 0) of Unicode, and return a pointer to a
2987 * UTF-8 string, allocated with the wmem scope.
2989 * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN.
2991 * Specify length in bytes.
2993 * XXX - should map lead and trail surrogate values to REPLACEMENT
2994 * CHARACTERs (0xFFFD)?
2995 * XXX - if there are an odd number of bytes, should put a
2996 * REPLACEMENT CHARACTER at the end.
2998 static uint8_t *
2999 tvb_get_ucs_2_string(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int length, const unsigned encoding)
3001 const uint8_t *ptr;
3003 ptr = ensure_contiguous(tvb, offset, length);
3004 return get_ucs_2_string(scope, ptr, length, encoding);
3008 * Given a wmem scope, a tvbuff, an offset, a length, and an encoding
3009 * giving the byte order, treat the string of bytes referred to by the
3010 * tvbuff, the offset, and the length as a UTF-16 encoded string in
3011 * the byte order in question, and return a pointer to a UTF-8 string,
3012 * allocated with the wmem scope.
3014 * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN.
3016 * Specify length in bytes.
3018 * XXX - should map surrogate errors to REPLACEMENT CHARACTERs (0xFFFD).
3019 * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs.
3020 * XXX - if there are an odd number of bytes, should put a
3021 * REPLACEMENT CHARACTER at the end.
3023 static uint8_t *
3024 tvb_get_utf_16_string(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int length, const unsigned encoding)
3026 const uint8_t *ptr;
3028 ptr = ensure_contiguous(tvb, offset, length);
3029 return get_utf_16_string(scope, ptr, length, encoding);
3033 * Given a wmem scope, a tvbuff, an offset, a length, and an encoding
3034 * giving the byte order, treat the string of bytes referred to by the
3035 * tvbuff, the offset, and the length as a UCS-4 encoded string in
3036 * the byte order in question, and return a pointer to a UTF-8 string,
3037 * allocated with the wmem scope.
3039 * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN
3041 * Specify length in bytes
3043 * XXX - should map lead and trail surrogate values to a "substitute"
3044 * UTF-8 character?
3045 * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs.
3046 * XXX - if the number of bytes isn't a multiple of 4, should put a
3047 * REPLACEMENT CHARACTER at the end.
3049 static char *
3050 tvb_get_ucs_4_string(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int length, const unsigned encoding)
3052 const uint8_t *ptr;
3054 ptr = ensure_contiguous(tvb, offset, length);
3055 return get_ucs_4_string(scope, ptr, length, encoding);
3058 char *
3059 tvb_get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, tvbuff_t *tvb,
3060 const int bit_offset, int no_of_chars)
3062 int in_offset = bit_offset >> 3; /* Current pointer to the input buffer */
3063 int length = ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3;
3064 const uint8_t *ptr;
3066 DISSECTOR_ASSERT(tvb && tvb->initialized);
3068 ptr = ensure_contiguous(tvb, in_offset, length);
3069 return get_ts_23_038_7bits_string_packed(scope, ptr, bit_offset, no_of_chars);
3072 char *
3073 tvb_get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, tvbuff_t *tvb,
3074 const int offset, int length)
3076 const uint8_t *ptr;
3078 DISSECTOR_ASSERT(tvb && tvb->initialized);
3080 ptr = ensure_contiguous(tvb, offset, length);
3081 return get_ts_23_038_7bits_string_unpacked(scope, ptr, length);
3084 char *
3085 tvb_get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, tvbuff_t *tvb,
3086 const int offset, int length)
3088 const uint8_t *ptr;
3090 DISSECTOR_ASSERT(tvb && tvb->initialized);
3092 ptr = ensure_contiguous(tvb, offset, length);
3093 return get_etsi_ts_102_221_annex_a_string(scope, ptr, length);
3096 char *
3097 tvb_get_ascii_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
3098 const int bit_offset, int no_of_chars)
3100 int in_offset = bit_offset >> 3; /* Current pointer to the input buffer */
3101 int length = ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3;
3102 const uint8_t *ptr;
3104 DISSECTOR_ASSERT(tvb && tvb->initialized);
3106 ptr = ensure_contiguous(tvb, in_offset, length);
3107 return get_ascii_7bits_string(scope, ptr, bit_offset, no_of_chars);
3111 * Given a wmem scope, a tvbuff, an offset, a length, and a translation
3112 * table, treat the string of bytes referred to by the tvbuff, the offset,
3113 * and the length as a string encoded using one octet per character, with
3114 * octets being mapped by the translation table to 2-byte Unicode Basic
3115 * Multilingual Plane characters (including REPLACEMENT CHARACTER), and
3116 * return a pointer to a UTF-8 string, allocated with the wmem scope.
3118 static uint8_t *
3119 tvb_get_nonascii_unichar2_string(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length, const gunichar2 table[256])
3121 const uint8_t *ptr;
3123 ptr = ensure_contiguous(tvb, offset, length);
3124 return get_nonascii_unichar2_string(scope, ptr, length, table);
3128 * Given a wmem scope, a tvbuff, an offset, and a length, treat the bytes
3129 * referred to by the tvbuff, offset, and length as a GB18030 encoded string,
3130 * and return a pointer to a UTF-8 string, allocated with the wmem scope,
3131 * converted having substituted REPLACEMENT CHARACTER according to the
3132 * Unicode Standard 5.22 U+FFFD Substitution for Conversion.
3133 * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf )
3135 * As expected, this will also decode GBK and GB2312 strings.
3137 static uint8_t *
3138 tvb_get_gb18030_string(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length)
3140 const uint8_t *ptr;
3142 ptr = ensure_contiguous(tvb, offset, length);
3143 return get_gb18030_string(scope, ptr, length);
3147 * Given a wmem scope, a tvbuff, an offset, and a length, treat the bytes
3148 * referred to by the tvbuff, offset, and length as a EUC-KR encoded string,
3149 * and return a pointer to a UTF-8 string, allocated with the wmem scope,
3150 * converted having substituted REPLACEMENT CHARACTER according to the
3151 * Unicode Standard 5.22 U+FFFD Substitution for Conversion.
3152 * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf )
3154 static uint8_t *
3155 tvb_get_euc_kr_string(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length)
3157 const uint8_t *ptr;
3159 ptr = ensure_contiguous(tvb, offset, length);
3160 return get_euc_kr_string(scope, ptr, length);
3163 static uint8_t *
3164 tvb_get_t61_string(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length)
3166 const uint8_t *ptr;
3168 ptr = ensure_contiguous(tvb, offset, length);
3169 return get_t61_string(scope, ptr, length);
3173 * Encoding tables for BCD strings.
3175 static const dgt_set_t Dgt0_9_bcd = {
3177 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
3178 '0','1','2','3','4','5','6','7','8','9','?','?','?','?','?','?'
3182 static const dgt_set_t Dgt_keypad_abc_tbcd = {
3184 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
3185 '0','1','2','3','4','5','6','7','8','9','*','#','a','b','c','?'
3189 static const dgt_set_t Dgt_ansi_tbcd = {
3191 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
3192 '0','1','2','3','4','5','6','7','8','9','?','B','C','*','#','?'
3196 static const dgt_set_t Dgt_dect_standard_4bits_tbcd = {
3198 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
3199 '0','1','2','3','4','5','6','7','8','9','?',' ','?','?','?','?'
3203 static uint8_t *
3204 tvb_get_apn_string(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset,
3205 int length)
3207 wmem_strbuf_t *str;
3210 * This is a domain name.
3212 * 3GPP TS 23.003, section 19.4.2 "Fully Qualified Domain Names
3213 * (FQDNs)", subsection 19.4.2.1 "General", says:
3215 * The encoding of any identifier used as part of a Fully
3216 * Qualifed Domain Name (FQDN) shall follow the Name Syntax
3217 * defined in IETF RFC 2181 [18], IETF RFC 1035 [19] and
3218 * IETF RFC 1123 [20]. An FQDN consists of one or more
3219 * labels. Each label is coded as a one octet length field
3220 * followed by that number of octets coded as 8 bit ASCII
3221 * characters.
3223 * so this does not appear to use full-blown DNS compression -
3224 * the upper 2 bits of the length don't indicate that it's a
3225 * pointer or an extended label (RFC 2673).
3227 str = wmem_strbuf_new_sized(scope, length + 1);
3228 if (length > 0) {
3229 const uint8_t *ptr;
3231 ptr = ensure_contiguous(tvb, offset, length);
3233 for (;;) {
3234 unsigned label_len;
3237 * Process this label.
3239 label_len = *ptr;
3240 ptr++;
3241 length--;
3243 while (label_len != 0) {
3244 uint8_t ch;
3246 if (length == 0)
3247 goto end;
3249 ch = *ptr;
3250 if (ch < 0x80)
3251 wmem_strbuf_append_c(str, ch);
3252 else
3253 wmem_strbuf_append_unichar_repl(str);
3254 ptr++;
3255 label_len--;
3256 length--;
3259 if (length == 0)
3260 goto end;
3262 wmem_strbuf_append_c(str, '.');
3266 end:
3267 return (uint8_t *) wmem_strbuf_finalize(str);
3270 static uint8_t *
3271 tvb_get_dect_standard_8bits_string(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int length)
3273 const uint8_t *ptr;
3275 ptr = ensure_contiguous(tvb, offset, length);
3276 return get_dect_standard_8bits_string(scope, ptr, length);
3280 * Given a tvbuff, an offset, a length, and an encoding, allocate a
3281 * buffer big enough to hold a non-null-terminated string of that length
3282 * at that offset, plus a trailing '\0', copy into the buffer the
3283 * string as converted from the appropriate encoding to UTF-8, and
3284 * return a pointer to the string.
3286 uint8_t *
3287 tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset,
3288 const int length, const unsigned encoding)
3290 uint8_t *strptr;
3291 bool odd, skip_first;
3293 DISSECTOR_ASSERT(tvb && tvb->initialized);
3295 /* make sure length = -1 fails */
3296 if (length < 0) {
3297 THROW(ReportedBoundsError);
3300 switch (encoding & ENC_CHARENCODING_MASK) {
3302 case ENC_ASCII:
3303 default:
3305 * For now, we treat bogus values as meaning
3306 * "ASCII" rather than reporting an error,
3307 * for the benefit of old dissectors written
3308 * when the last argument to proto_tree_add_item()
3309 * was a bool for the byte order, not an
3310 * encoding value, and passed non-zero values
3311 * other than true to mean "little-endian".
3313 strptr = tvb_get_ascii_string(scope, tvb, offset, length);
3314 break;
3316 case ENC_UTF_8:
3317 strptr = tvb_get_utf_8_string(scope, tvb, offset, length);
3318 break;
3320 case ENC_UTF_16:
3321 strptr = tvb_get_utf_16_string(scope, tvb, offset, length,
3322 encoding & (ENC_LITTLE_ENDIAN|ENC_BOM));
3323 break;
3325 case ENC_UCS_2:
3326 strptr = tvb_get_ucs_2_string(scope, tvb, offset, length,
3327 encoding & (ENC_LITTLE_ENDIAN|ENC_BOM));
3328 break;
3330 case ENC_UCS_4:
3331 strptr = tvb_get_ucs_4_string(scope, tvb, offset, length,
3332 encoding & (ENC_LITTLE_ENDIAN|ENC_BOM));
3333 break;
3335 case ENC_ISO_8859_1:
3337 * ISO 8859-1 printable code point values are equal
3338 * to the equivalent Unicode code point value, so
3339 * no translation table is needed.
3341 strptr = tvb_get_string_8859_1(scope, tvb, offset, length);
3342 break;
3344 case ENC_ISO_8859_2:
3345 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_2);
3346 break;
3348 case ENC_ISO_8859_3:
3349 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_3);
3350 break;
3352 case ENC_ISO_8859_4:
3353 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_4);
3354 break;
3356 case ENC_ISO_8859_5:
3357 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_5);
3358 break;
3360 case ENC_ISO_8859_6:
3361 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_6);
3362 break;
3364 case ENC_ISO_8859_7:
3365 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_7);
3366 break;
3368 case ENC_ISO_8859_8:
3369 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_8);
3370 break;
3372 case ENC_ISO_8859_9:
3373 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_9);
3374 break;
3376 case ENC_ISO_8859_10:
3377 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_10);
3378 break;
3380 case ENC_ISO_8859_11:
3381 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_11);
3382 break;
3384 case ENC_ISO_8859_13:
3385 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_13);
3386 break;
3388 case ENC_ISO_8859_14:
3389 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_14);
3390 break;
3392 case ENC_ISO_8859_15:
3393 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_15);
3394 break;
3396 case ENC_ISO_8859_16:
3397 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_16);
3398 break;
3400 case ENC_WINDOWS_1250:
3401 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1250);
3402 break;
3404 case ENC_WINDOWS_1251:
3405 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1251);
3406 break;
3408 case ENC_WINDOWS_1252:
3409 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1252);
3410 break;
3412 case ENC_MAC_ROMAN:
3413 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_mac_roman);
3414 break;
3416 case ENC_CP437:
3417 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp437);
3418 break;
3420 case ENC_CP855:
3421 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp855);
3422 break;
3424 case ENC_CP866:
3425 strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp866);
3426 break;
3428 case ENC_ISO_646_BASIC:
3429 strptr = tvb_get_iso_646_string(scope, tvb, offset, length, charset_table_iso_646_basic);
3430 break;
3432 case ENC_3GPP_TS_23_038_7BITS_PACKED:
3434 int bit_offset = offset << 3;
3435 int no_of_chars = (length << 3) / 7;
3436 strptr = tvb_get_ts_23_038_7bits_string_packed(scope, tvb, bit_offset, no_of_chars);
3438 break;
3440 case ENC_ASCII_7BITS:
3442 int bit_offset = offset << 3;
3443 int no_of_chars = (length << 3) / 7;
3444 strptr = tvb_get_ascii_7bits_string(scope, tvb, bit_offset, no_of_chars);
3446 break;
3448 case ENC_EBCDIC:
3450 * "Common" EBCDIC, covering all characters with the
3451 * same code point in all Roman-alphabet EBCDIC code
3452 * pages.
3454 strptr = tvb_get_nonascii_unichar2_string(scope, tvb, offset, length, charset_table_ebcdic);
3455 break;
3457 case ENC_EBCDIC_CP037:
3459 * EBCDIC code page 037.
3461 strptr = tvb_get_nonascii_unichar2_string(scope, tvb, offset, length, charset_table_ebcdic_cp037);
3462 break;
3464 case ENC_EBCDIC_CP500:
3466 * EBCDIC code page 500.
3468 strptr = tvb_get_nonascii_unichar2_string(scope, tvb, offset, length, charset_table_ebcdic_cp500);
3469 break;
3471 case ENC_T61:
3472 strptr = tvb_get_t61_string(scope, tvb, offset, length);
3473 break;
3475 case ENC_BCD_DIGITS_0_9:
3477 * Packed BCD, with digits 0-9.
3479 odd = (encoding & ENC_BCD_ODD_NUM_DIG) >> 16;
3480 skip_first = (encoding & ENC_BCD_SKIP_FIRST) >> 17;
3481 strptr = tvb_get_bcd_string(scope, tvb, offset, length, &Dgt0_9_bcd, skip_first, odd, !(encoding & ENC_LITTLE_ENDIAN));
3482 break;
3484 case ENC_KEYPAD_ABC_TBCD:
3486 * Keypad-with-a/b/c "telephony BCD" - packed BCD, with
3487 * digits 0-9 and symbols *, #, a, b, and c.
3489 odd = (encoding & ENC_BCD_ODD_NUM_DIG) >> 16;
3490 skip_first = (encoding & ENC_BCD_SKIP_FIRST) >> 17;
3491 strptr = tvb_get_bcd_string(scope, tvb, offset, length, &Dgt_keypad_abc_tbcd, skip_first, odd, !(encoding & ENC_LITTLE_ENDIAN));
3492 break;
3494 case ENC_KEYPAD_BC_TBCD:
3496 * Keypad-with-B/C "telephony BCD" - packed BCD, with
3497 * digits 0-9 and symbols B, C, *, and #.
3499 odd = (encoding & ENC_BCD_ODD_NUM_DIG) >> 16;
3500 skip_first = (encoding & ENC_BCD_SKIP_FIRST) >> 17;
3501 strptr = tvb_get_bcd_string(scope, tvb, offset, length, &Dgt_ansi_tbcd, skip_first, odd, !(encoding & ENC_LITTLE_ENDIAN));
3502 break;
3504 case ENC_3GPP_TS_23_038_7BITS_UNPACKED:
3505 strptr = tvb_get_ts_23_038_7bits_string_unpacked(scope, tvb, offset, length);
3506 break;
3508 case ENC_ETSI_TS_102_221_ANNEX_A:
3509 strptr = tvb_get_etsi_ts_102_221_annex_a_string(scope, tvb, offset, length);
3510 break;
3512 case ENC_GB18030:
3513 strptr = tvb_get_gb18030_string(scope, tvb, offset, length);
3514 break;
3516 case ENC_EUC_KR:
3517 strptr = tvb_get_euc_kr_string(scope, tvb, offset, length);
3518 break;
3520 case ENC_APN_STR:
3521 strptr = tvb_get_apn_string(scope, tvb, offset, length);
3522 break;
3524 case ENC_DECT_STANDARD_8BITS:
3525 strptr = tvb_get_dect_standard_8bits_string(scope, tvb, offset, length);
3526 break;
3528 case ENC_DECT_STANDARD_4BITS_TBCD:
3530 * DECT standard 4bits "telephony BCD" - packed BCD, with
3531 * digits 0-9 and symbol SPACE for 0xb.
3533 odd = (encoding & ENC_BCD_ODD_NUM_DIG) >> 16;
3534 skip_first = (encoding & ENC_BCD_SKIP_FIRST) >> 17;
3535 strptr = tvb_get_bcd_string(scope, tvb, offset, length, &Dgt_dect_standard_4bits_tbcd, skip_first, odd, false);
3536 break;
3538 return strptr;
3542 * This is like tvb_get_string_enc(), except that it handles null-padded
3543 * strings.
3545 * Currently, string values are stored as UTF-8 null-terminated strings,
3546 * so nothing needs to be done differently for null-padded strings; we
3547 * could save a little memory by not storing the null padding.
3549 * If we ever store string values differently, in a fashion that doesn't
3550 * involve null termination, that might change.
3552 uint8_t *
3553 tvb_get_stringzpad(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset,
3554 const int length, const unsigned encoding)
3556 return tvb_get_string_enc(scope, tvb, offset, length, encoding);
3560 * These routines are like the above routines, except that they handle
3561 * null-terminated strings. They find the length of that string (and
3562 * throw an exception if the tvbuff ends before we find the null), and
3563 * also return through a pointer the length of the string, in bytes,
3564 * including the terminating null (the terminating null being 2 bytes
3565 * for UCS-2 and UTF-16, 4 bytes for UCS-4, and 1 byte for other
3566 * encodings).
3568 static uint8_t *
3569 tvb_get_ascii_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp)
3571 unsigned size;
3572 const uint8_t *ptr;
3574 size = tvb_strsize(tvb, offset);
3575 ptr = ensure_contiguous(tvb, offset, size);
3576 /* XXX, conversion between signed/unsigned integer */
3577 if (lengthp)
3578 *lengthp = size;
3579 return get_ascii_string(scope, ptr, size);
3582 static uint8_t *
3583 tvb_get_iso_646_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp, const gunichar2 table[0x80])
3585 unsigned size;
3586 const uint8_t *ptr;
3588 size = tvb_strsize(tvb, offset);
3589 ptr = ensure_contiguous(tvb, offset, size);
3590 /* XXX, conversion between signed/unsigned integer */
3591 if (lengthp)
3592 *lengthp = size;
3593 return get_iso_646_string(scope, ptr, size, table);
3596 static uint8_t *
3597 tvb_get_utf_8_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int *lengthp)
3599 unsigned size;
3600 const uint8_t *ptr;
3602 size = tvb_strsize(tvb, offset);
3603 ptr = ensure_contiguous(tvb, offset, size);
3604 /* XXX, conversion between signed/unsigned integer */
3605 if (lengthp)
3606 *lengthp = size;
3607 return get_utf_8_string(scope, ptr, size);
3610 static uint8_t *
3611 tvb_get_stringz_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp)
3613 unsigned size;
3614 const uint8_t *ptr;
3616 size = tvb_strsize(tvb, offset);
3617 ptr = ensure_contiguous(tvb, offset, size);
3618 /* XXX, conversion between signed/unsigned integer */
3619 if (lengthp)
3620 *lengthp = size;
3621 return get_8859_1_string(scope, ptr, size);
3624 static uint8_t *
3625 tvb_get_stringz_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp, const gunichar2 table[0x80])
3627 unsigned size;
3628 const uint8_t *ptr;
3630 size = tvb_strsize(tvb, offset);
3631 ptr = ensure_contiguous(tvb, offset, size);
3632 /* XXX, conversion between signed/unsigned integer */
3633 if (lengthp)
3634 *lengthp = size;
3635 return get_unichar2_string(scope, ptr, size, table);
3639 * Given a tvbuff and an offset, with the offset assumed to refer to
3640 * a null-terminated string, find the length of that string (and throw
3641 * an exception if the tvbuff ends before we find the null), ensure that
3642 * the TVB is flat, and return a pointer to the string (in the TVB).
3643 * Also return the length of the string (including the terminating null)
3644 * through a pointer.
3646 * As long as we aren't using composite TVBs, this saves the cycles used
3647 * (often unnecessariliy) in allocating a buffer and copying the string into
3648 * it. (If we do start using composite TVBs, we may want to replace this
3649 * function with the _ephemeral version.)
3651 const uint8_t *
3652 tvb_get_const_stringz(tvbuff_t *tvb, const int offset, int *lengthp)
3654 unsigned size;
3655 const uint8_t *strptr;
3657 size = tvb_strsize(tvb, offset);
3658 strptr = ensure_contiguous(tvb, offset, size);
3659 if (lengthp)
3660 *lengthp = size;
3661 return strptr;
3664 static char *
3665 tvb_get_ucs_2_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int *lengthp, const unsigned encoding)
3667 int size; /* Number of bytes in string */
3668 const uint8_t *ptr;
3670 size = tvb_unicode_strsize(tvb, offset);
3671 ptr = ensure_contiguous(tvb, offset, size);
3672 /* XXX, conversion between signed/unsigned integer */
3673 if (lengthp)
3674 *lengthp = size;
3675 return get_ucs_2_string(scope, ptr, size, encoding);
3678 static char *
3679 tvb_get_utf_16_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int *lengthp, const unsigned encoding)
3681 int size;
3682 const uint8_t *ptr;
3684 size = tvb_unicode_strsize(tvb, offset);
3685 ptr = ensure_contiguous(tvb, offset, size);
3686 /* XXX, conversion between signed/unsigned integer */
3687 if (lengthp)
3688 *lengthp = size;
3689 return get_utf_16_string(scope, ptr, size, encoding);
3692 static char *
3693 tvb_get_ucs_4_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int *lengthp, const unsigned encoding)
3695 int size;
3696 gunichar uchar;
3697 const uint8_t *ptr;
3699 size = 0;
3700 do {
3701 /* Endianness doesn't matter when looking for null */
3702 uchar = tvb_get_ntohl(tvb, offset + size);
3703 size += 4;
3704 } while(uchar != 0);
3706 ptr = ensure_contiguous(tvb, offset, size);
3707 /* XXX, conversion between signed/unsigned integer */
3708 if (lengthp)
3709 *lengthp = size;
3710 return get_ucs_4_string(scope, ptr, size, encoding);
3713 static uint8_t *
3714 tvb_get_nonascii_unichar2_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp, const gunichar2 table[256])
3716 unsigned size;
3717 const uint8_t *ptr;
3719 size = tvb_strsize(tvb, offset);
3720 ptr = ensure_contiguous(tvb, offset, size);
3721 /* XXX, conversion between signed/unsigned integer */
3722 if (lengthp)
3723 *lengthp = size;
3724 return get_nonascii_unichar2_string(scope, ptr, size, table);
3727 static uint8_t *
3728 tvb_get_t61_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp)
3730 unsigned size;
3731 const uint8_t *ptr;
3733 size = tvb_strsize(tvb, offset);
3734 ptr = ensure_contiguous(tvb, offset, size);
3735 /* XXX, conversion between signed/unsigned integer */
3736 if (lengthp)
3737 *lengthp = size;
3738 return get_t61_string(scope, ptr, size);
3741 static uint8_t *
3742 tvb_get_gb18030_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp)
3744 unsigned size;
3745 const uint8_t *ptr;
3747 size = tvb_strsize(tvb, offset);
3748 ptr = ensure_contiguous(tvb, offset, size);
3749 /* XXX, conversion between signed/unsigned integer */
3750 if (lengthp)
3751 *lengthp = size;
3752 return get_gb18030_string(scope, ptr, size);
3755 static uint8_t *
3756 tvb_get_euc_kr_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp)
3758 unsigned size;
3759 const uint8_t *ptr;
3761 size = tvb_strsize(tvb, offset);
3762 ptr = ensure_contiguous(tvb, offset, size);
3763 /* XXX, conversion between signed/unsigned integer */
3764 if (lengthp)
3765 *lengthp = size;
3766 return get_euc_kr_string(scope, ptr, size);
3769 static uint8_t *
3770 tvb_get_dect_standard_8bits_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, int offset, int *lengthp)
3772 unsigned size;
3773 const uint8_t *ptr;
3775 size = tvb_strsize(tvb, offset);
3776 ptr = ensure_contiguous(tvb, offset, size);
3777 /* XXX, conversion between signed/unsigned integer */
3778 if (lengthp)
3779 *lengthp = size;
3780 return get_t61_string(scope, ptr, size);
3783 uint8_t *
3784 tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int *lengthp, const unsigned encoding)
3786 uint8_t *strptr;
3788 DISSECTOR_ASSERT(tvb && tvb->initialized);
3790 switch (encoding & ENC_CHARENCODING_MASK) {
3792 case ENC_ASCII:
3793 default:
3795 * For now, we treat bogus values as meaning
3796 * "ASCII" rather than reporting an error,
3797 * for the benefit of old dissectors written
3798 * when the last argument to proto_tree_add_item()
3799 * was a bool for the byte order, not an
3800 * encoding value, and passed non-zero values
3801 * other than true to mean "little-endian".
3803 strptr = tvb_get_ascii_stringz(scope, tvb, offset, lengthp);
3804 break;
3806 case ENC_UTF_8:
3808 * XXX - should map all invalid UTF-8 sequences
3809 * to a "substitute" UTF-8 character.
3810 * XXX - should map code points > 10FFFF to REPLACEMENT
3811 * CHARACTERs.
3813 strptr = tvb_get_utf_8_stringz(scope, tvb, offset, lengthp);
3814 break;
3816 case ENC_UTF_16:
3817 strptr = tvb_get_utf_16_stringz(scope, tvb, offset, lengthp,
3818 encoding & (ENC_LITTLE_ENDIAN|ENC_BOM));
3819 break;
3821 case ENC_UCS_2:
3822 strptr = tvb_get_ucs_2_stringz(scope, tvb, offset, lengthp,
3823 encoding & (ENC_LITTLE_ENDIAN|ENC_BOM));
3824 break;
3826 case ENC_UCS_4:
3827 strptr = tvb_get_ucs_4_stringz(scope, tvb, offset, lengthp,
3828 encoding & (ENC_LITTLE_ENDIAN|ENC_BOM));
3829 break;
3831 case ENC_ISO_8859_1:
3833 * ISO 8859-1 printable code point values are equal
3834 * to the equivalent Unicode code point value, so
3835 * no translation table is needed.
3837 strptr = tvb_get_stringz_8859_1(scope, tvb, offset, lengthp);
3838 break;
3840 case ENC_ISO_8859_2:
3841 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_2);
3842 break;
3844 case ENC_ISO_8859_3:
3845 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_3);
3846 break;
3848 case ENC_ISO_8859_4:
3849 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_4);
3850 break;
3852 case ENC_ISO_8859_5:
3853 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_5);
3854 break;
3856 case ENC_ISO_8859_6:
3857 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_6);
3858 break;
3860 case ENC_ISO_8859_7:
3861 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_7);
3862 break;
3864 case ENC_ISO_8859_8:
3865 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_8);
3866 break;
3868 case ENC_ISO_8859_9:
3869 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_9);
3870 break;
3872 case ENC_ISO_8859_10:
3873 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_10);
3874 break;
3876 case ENC_ISO_8859_11:
3877 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_11);
3878 break;
3880 case ENC_ISO_8859_13:
3881 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_13);
3882 break;
3884 case ENC_ISO_8859_14:
3885 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_14);
3886 break;
3888 case ENC_ISO_8859_15:
3889 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_15);
3890 break;
3892 case ENC_ISO_8859_16:
3893 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_16);
3894 break;
3896 case ENC_WINDOWS_1250:
3897 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1250);
3898 break;
3900 case ENC_WINDOWS_1251:
3901 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1251);
3902 break;
3904 case ENC_WINDOWS_1252:
3905 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1252);
3906 break;
3908 case ENC_MAC_ROMAN:
3909 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_mac_roman);
3910 break;
3912 case ENC_CP437:
3913 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp437);
3914 break;
3916 case ENC_CP855:
3917 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp855);
3918 break;
3920 case ENC_CP866:
3921 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp866);
3922 break;
3924 case ENC_ISO_646_BASIC:
3925 strptr = tvb_get_iso_646_stringz(scope, tvb, offset, lengthp, charset_table_iso_646_basic);
3926 break;
3928 case ENC_3GPP_TS_23_038_7BITS_PACKED:
3929 case ENC_3GPP_TS_23_038_7BITS_UNPACKED:
3930 case ENC_ETSI_TS_102_221_ANNEX_A:
3931 REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings");
3932 break;
3934 case ENC_ASCII_7BITS:
3935 REPORT_DISSECTOR_BUG("tvb_get_stringz_enc function with ENC_ASCII_7BITS not implemented yet");
3936 break;
3938 case ENC_EBCDIC:
3940 * "Common" EBCDIC, covering all characters with the
3941 * same code point in all Roman-alphabet EBCDIC code
3942 * pages.
3944 strptr = tvb_get_nonascii_unichar2_stringz(scope, tvb, offset, lengthp, charset_table_ebcdic);
3945 break;
3947 case ENC_EBCDIC_CP037:
3949 * EBCDIC code page 037.
3951 strptr = tvb_get_nonascii_unichar2_stringz(scope, tvb, offset, lengthp, charset_table_ebcdic_cp037);
3952 break;
3954 case ENC_EBCDIC_CP500:
3956 * EBCDIC code page 500.
3958 strptr = tvb_get_nonascii_unichar2_stringz(scope, tvb, offset, lengthp, charset_table_ebcdic_cp500);
3959 break;
3961 case ENC_T61:
3962 strptr = tvb_get_t61_stringz(scope, tvb, offset, lengthp);
3963 break;
3965 case ENC_GB18030:
3966 strptr = tvb_get_gb18030_stringz(scope, tvb, offset, lengthp);
3967 break;
3969 case ENC_EUC_KR:
3970 strptr = tvb_get_euc_kr_stringz(scope, tvb, offset, lengthp);
3971 break;
3973 case ENC_DECT_STANDARD_8BITS:
3974 strptr = tvb_get_dect_standard_8bits_stringz(scope, tvb, offset, lengthp);
3975 break;
3978 return strptr;
3981 /* Looks for a stringz (NUL-terminated string) in tvbuff and copies
3982 * no more than bufsize number of bytes, including terminating NUL, to buffer.
3983 * Returns length of string (not including terminating NUL), or -1 if the string was
3984 * truncated in the buffer due to not having reached the terminating NUL.
3985 * In this way, it acts like snprintf().
3987 * bufsize MUST be greater than 0.
3989 * When processing a packet where the remaining number of bytes is less
3990 * than bufsize, an exception is not thrown if the end of the packet
3991 * is reached before the NUL is found. If no NUL is found before reaching
3992 * the end of the short packet, -1 is still returned, and the string
3993 * is truncated with a NUL, albeit not at buffer[bufsize - 1], but
3994 * at the correct spot, terminating the string.
3996 * *bytes_copied will contain the number of bytes actually copied,
3997 * including the terminating-NUL.
3999 static int
4000 _tvb_get_raw_bytes_as_stringz(tvbuff_t *tvb, const int offset, const unsigned bufsize, uint8_t* buffer, int *bytes_copied)
4002 int stringlen;
4003 unsigned abs_offset = 0;
4004 int limit, len = 0;
4005 bool decreased_max = false;
4007 /* Only read to end of tvbuff, w/o throwing exception. */
4008 check_offset_length(tvb, offset, -1, &abs_offset, &len);
4010 /* There must at least be room for the terminating NUL. */
4011 DISSECTOR_ASSERT(bufsize != 0);
4013 /* If there's no room for anything else, just return the NUL. */
4014 if (bufsize == 1) {
4015 buffer[0] = 0;
4016 *bytes_copied = 1;
4017 return 0;
4020 /* check_offset_length() won't throw an exception if we're
4021 * looking at the byte immediately after the end of the tvbuff. */
4022 if (len == 0) {
4023 THROW(ReportedBoundsError);
4026 /* This should not happen because check_offset_length() would
4027 * have already thrown an exception if 'offset' were out-of-bounds.
4029 DISSECTOR_ASSERT(len != -1);
4032 * If we've been passed a negative number, bufsize will
4033 * be huge.
4035 DISSECTOR_ASSERT(bufsize <= INT_MAX);
4037 if ((unsigned)len < bufsize) {
4038 limit = len;
4039 decreased_max = true;
4041 else {
4042 limit = bufsize;
4045 stringlen = tvb_strnlen(tvb, abs_offset, limit - 1);
4046 /* If NUL wasn't found, copy the data and return -1 */
4047 if (stringlen == -1) {
4048 tvb_memcpy(tvb, buffer, abs_offset, limit);
4049 if (decreased_max) {
4050 buffer[limit] = 0;
4051 /* Add 1 for the extra NUL that we set at buffer[limit],
4052 * pretending that it was copied as part of the string. */
4053 *bytes_copied = limit + 1;
4055 else {
4056 *bytes_copied = limit;
4058 return -1;
4061 /* Copy the string to buffer */
4062 tvb_memcpy(tvb, buffer, abs_offset, stringlen + 1);
4063 *bytes_copied = stringlen + 1;
4064 return stringlen;
4068 tvb_get_raw_bytes_as_stringz(tvbuff_t *tvb, const int offset, const unsigned bufsize, uint8_t* buffer)
4070 int len, bytes_copied;
4072 DISSECTOR_ASSERT(tvb && tvb->initialized);
4074 len = _tvb_get_raw_bytes_as_stringz(tvb, offset, bufsize, buffer, &bytes_copied);
4076 if (len == -1) {
4077 buffer[bufsize - 1] = 0;
4078 return bytes_copied - 1;
4080 else {
4081 return len;
4086 * Given a tvbuff, an offset into the tvbuff, a buffer, and a buffer size,
4087 * extract as many raw bytes from the tvbuff, starting at the offset,
4088 * as 1) are available in the tvbuff and 2) will fit in the buffer, leaving
4089 * room for a terminating NUL.
4092 tvb_get_raw_bytes_as_string(tvbuff_t *tvb, const int offset, char *buffer, size_t bufsize)
4094 int len = 0;
4096 DISSECTOR_ASSERT(tvb && tvb->initialized);
4098 /* There must be room for the string and the terminating NUL. */
4099 DISSECTOR_ASSERT(bufsize > 0);
4101 DISSECTOR_ASSERT(bufsize - 1 < INT_MAX);
4103 len = tvb_captured_length_remaining(tvb, offset);
4104 if (len <= 0) {
4105 buffer[0] = '\0';
4106 return 0;
4108 if (len > (int)(bufsize - 1))
4109 len = (int)(bufsize - 1);
4111 /* Copy the string to buffer */
4112 tvb_memcpy(tvb, buffer, offset, len);
4113 buffer[len] = '\0';
4114 return len;
4117 bool
4118 tvb_ascii_isprint(tvbuff_t *tvb, const int offset, const int length)
4120 const uint8_t* buf = tvb_get_ptr(tvb, offset, length);
4121 unsigned abs_offset, abs_length = length;
4123 if (length == -1) {
4124 /* tvb_get_ptr has already checked for exceptions. */
4125 compute_offset_and_remaining(tvb, offset, &abs_offset, &abs_length);
4127 for (unsigned i = 0; i < abs_length; i++, buf++)
4128 if (!g_ascii_isprint(*buf))
4129 return false;
4131 return true;
4134 bool
4135 tvb_utf_8_isprint(tvbuff_t *tvb, const int offset, const int length)
4137 const uint8_t* buf = tvb_get_ptr(tvb, offset, length);
4138 unsigned abs_offset, abs_length = length;
4140 if (length == -1) {
4141 /* tvb_get_ptr has already checked for exceptions. */
4142 compute_offset_and_remaining(tvb, offset, &abs_offset, &abs_length);
4145 return isprint_utf8_string(buf, abs_length);
4148 bool
4149 tvb_ascii_isdigit(tvbuff_t *tvb, const int offset, const int length)
4151 const uint8_t* buf = tvb_get_ptr(tvb, offset, length);
4152 unsigned abs_offset, abs_length = length;
4154 if (length == -1) {
4155 /* tvb_get_ptr has already checked for exceptions. */
4156 compute_offset_and_remaining(tvb, offset, &abs_offset, &abs_length);
4158 for (unsigned i = 0; i < abs_length; i++, buf++)
4159 if (!g_ascii_isdigit(*buf))
4160 return false;
4162 return true;
4165 static ws_mempbrk_pattern pbrk_crlf;
4167 * Given a tvbuff, an offset into the tvbuff, and a length that starts
4168 * at that offset (which may be -1 for "all the way to the end of the
4169 * tvbuff"), find the end of the (putative) line that starts at the
4170 * specified offset in the tvbuff, going no further than the specified
4171 * length.
4173 * Return the length of the line (not counting the line terminator at
4174 * the end), or, if we don't find a line terminator:
4176 * if "desegment" is true, return -1;
4178 * if "desegment" is false, return the amount of data remaining in
4179 * the buffer.
4181 * If "next_offset" is not NULL, set "*next_offset" to the offset of the
4182 * character past the line terminator, or past the end of the buffer if
4183 * we don't find a line terminator. (It's not set if we return -1.)
4186 tvb_find_line_end(tvbuff_t *tvb, const int offset, int len, int *next_offset, const bool desegment)
4188 int eob_offset;
4189 int eol_offset;
4190 int linelen;
4191 unsigned char found_needle = 0;
4192 static bool compiled = false;
4194 DISSECTOR_ASSERT(tvb && tvb->initialized);
4196 if (len == -1) {
4197 len = _tvb_captured_length_remaining(tvb, offset);
4198 /* if offset is past the end of the tvbuff, len is now 0 */
4201 eob_offset = offset + len;
4203 if (!compiled) {
4204 ws_mempbrk_compile(&pbrk_crlf, "\r\n");
4205 compiled = true;
4209 * Look either for a CR or an LF.
4211 eol_offset = tvb_ws_mempbrk_pattern_uint8(tvb, offset, len, &pbrk_crlf, &found_needle);
4212 if (eol_offset == -1) {
4214 * No CR or LF - line is presumably continued in next packet.
4216 if (desegment) {
4218 * Tell our caller we saw no EOL, so they can
4219 * try to desegment and get the entire line
4220 * into one tvbuff.
4222 return -1;
4223 } else {
4225 * Pretend the line runs to the end of the tvbuff.
4227 linelen = eob_offset - offset;
4228 if (next_offset)
4229 *next_offset = eob_offset;
4231 } else {
4233 * Find the number of bytes between the starting offset
4234 * and the CR or LF.
4236 linelen = eol_offset - offset;
4239 * Is it a CR?
4241 if (found_needle == '\r') {
4243 * Yes - is it followed by an LF?
4245 if (eol_offset + 1 >= eob_offset) {
4247 * Dunno - the next byte isn't in this
4248 * tvbuff.
4250 if (desegment) {
4252 * We'll return -1, although that
4253 * runs the risk that if the line
4254 * really *is* terminated with a CR,
4255 * we won't properly dissect this
4256 * tvbuff.
4258 * It's probably more likely that
4259 * the line ends with CR-LF than
4260 * that it ends with CR by itself.
4262 return -1;
4264 } else {
4266 * Well, we can at least look at the next
4267 * byte.
4269 if (tvb_get_uint8(tvb, eol_offset + 1) == '\n') {
4271 * It's an LF; skip over the CR.
4273 eol_offset++;
4279 * Return the offset of the character after the last
4280 * character in the line, skipping over the last character
4281 * in the line terminator.
4283 if (next_offset)
4284 *next_offset = eol_offset + 1;
4286 return linelen;
4289 static ws_mempbrk_pattern pbrk_crlf_dquote;
4291 * Given a tvbuff, an offset into the tvbuff, and a length that starts
4292 * at that offset (which may be -1 for "all the way to the end of the
4293 * tvbuff"), find the end of the (putative) line that starts at the
4294 * specified offset in the tvbuff, going no further than the specified
4295 * length.
4297 * However, treat quoted strings inside the buffer specially - don't
4298 * treat newlines in quoted strings as line terminators.
4300 * Return the length of the line (not counting the line terminator at
4301 * the end), or the amount of data remaining in the buffer if we don't
4302 * find a line terminator.
4304 * If "next_offset" is not NULL, set "*next_offset" to the offset of the
4305 * character past the line terminator, or past the end of the buffer if
4306 * we don't find a line terminator.
4309 tvb_find_line_end_unquoted(tvbuff_t *tvb, const int offset, int len, int *next_offset)
4311 int cur_offset, char_offset;
4312 bool is_quoted;
4313 unsigned char c = 0;
4314 int eob_offset;
4315 int linelen;
4316 static bool compiled = false;
4318 DISSECTOR_ASSERT(tvb && tvb->initialized);
4320 if (len == -1)
4321 len = _tvb_captured_length_remaining(tvb, offset);
4323 if (!compiled) {
4324 ws_mempbrk_compile(&pbrk_crlf_dquote, "\r\n\"");
4325 compiled = true;
4329 * XXX - what if "len" is still -1, meaning "offset is past the
4330 * end of the tvbuff"?
4332 eob_offset = offset + len;
4334 cur_offset = offset;
4335 is_quoted = false;
4336 for (;;) {
4338 * Is this part of the string quoted?
4340 if (is_quoted) {
4342 * Yes - look only for the terminating quote.
4344 char_offset = tvb_find_uint8(tvb, cur_offset, len,
4345 '"');
4346 } else {
4348 * Look either for a CR, an LF, or a '"'.
4350 char_offset = tvb_ws_mempbrk_pattern_uint8(tvb, cur_offset, len, &pbrk_crlf_dquote, &c);
4352 if (char_offset == -1) {
4354 * Not found - line is presumably continued in
4355 * next packet.
4356 * We pretend the line runs to the end of the tvbuff.
4358 linelen = eob_offset - offset;
4359 if (next_offset)
4360 *next_offset = eob_offset;
4361 break;
4364 if (is_quoted) {
4366 * We're processing a quoted string.
4367 * We only looked for ", so we know it's a ";
4368 * as we're processing a quoted string, it's a
4369 * closing quote.
4371 is_quoted = false;
4372 } else {
4374 * OK, what is it?
4376 if (c == '"') {
4378 * Un-quoted "; it begins a quoted
4379 * string.
4381 is_quoted = true;
4382 } else {
4384 * It's a CR or LF; we've found a line
4385 * terminator.
4387 * Find the number of bytes between the
4388 * starting offset and the CR or LF.
4390 linelen = char_offset - offset;
4393 * Is it a CR?
4395 if (c == '\r') {
4397 * Yes; is it followed by an LF?
4399 if (char_offset + 1 < eob_offset &&
4400 tvb_get_uint8(tvb, char_offset + 1)
4401 == '\n') {
4403 * Yes; skip over the CR.
4405 char_offset++;
4410 * Return the offset of the character after
4411 * the last character in the line, skipping
4412 * over the last character in the line
4413 * terminator, and quit.
4415 if (next_offset)
4416 *next_offset = char_offset + 1;
4417 break;
4422 * Step past the character we found.
4424 cur_offset = char_offset + 1;
4425 if (cur_offset >= eob_offset) {
4427 * The character we found was the last character
4428 * in the tvbuff - line is presumably continued in
4429 * next packet.
4430 * We pretend the line runs to the end of the tvbuff.
4432 linelen = eob_offset - offset;
4433 if (next_offset)
4434 *next_offset = eob_offset;
4435 break;
4438 return linelen;
4442 * Copied from the mgcp dissector. (This function should be moved to /epan )
4443 * tvb_skip_wsp - Returns the position in tvb of the first non-whitespace
4444 * character following offset or offset + maxlength -1 whichever
4445 * is smaller.
4447 * Parameters:
4448 * tvb - The tvbuff in which we are skipping whitespace.
4449 * offset - The offset in tvb from which we begin trying to skip whitespace.
4450 * maxlength - The maximum distance from offset that we may try to skip
4451 * whitespace.
4453 * Returns: The position in tvb of the first non-whitespace
4454 * character following offset or offset + maxlength -1 whichever
4455 * is smaller.
4458 tvb_skip_wsp(tvbuff_t *tvb, const int offset, const int maxlength)
4460 int counter;
4461 int end, tvb_len;
4462 uint8_t tempchar;
4464 DISSECTOR_ASSERT(tvb && tvb->initialized);
4466 /* Get the length remaining */
4467 /*tvb_len = tvb_captured_length(tvb);*/
4468 tvb_len = tvb->length;
4470 end = offset + maxlength;
4471 if (end >= tvb_len)
4473 end = tvb_len;
4476 /* Skip past spaces, tabs, CRs and LFs until run out or meet something else */
4477 for (counter = offset;
4478 counter < end &&
4479 ((tempchar = tvb_get_uint8(tvb,counter)) == ' ' ||
4480 tempchar == '\t' || tempchar == '\r' || tempchar == '\n');
4481 counter++);
4483 return (counter);
4487 tvb_skip_wsp_return(tvbuff_t *tvb, const int offset)
4489 int counter;
4490 uint8_t tempchar;
4492 DISSECTOR_ASSERT(tvb && tvb->initialized);
4494 for (counter = offset; counter > 0 &&
4495 ((tempchar = tvb_get_uint8(tvb,counter)) == ' ' ||
4496 tempchar == '\t' || tempchar == '\n' || tempchar == '\r'); counter--);
4497 counter++;
4499 return (counter);
4503 tvb_skip_uint8(tvbuff_t *tvb, int offset, const int maxlength, const uint8_t ch)
4505 int end, tvb_len;
4507 DISSECTOR_ASSERT(tvb && tvb->initialized);
4509 /* Get the length remaining */
4510 /*tvb_len = tvb_captured_length(tvb);*/
4511 tvb_len = tvb->length;
4513 end = offset + maxlength;
4514 if (end >= tvb_len)
4515 end = tvb_len;
4517 while (offset < end) {
4518 uint8_t tempch = tvb_get_uint8(tvb, offset);
4520 if (tempch != ch)
4521 break;
4522 offset++;
4525 return offset;
4528 static ws_mempbrk_pattern pbrk_whitespace;
4530 int tvb_get_token_len(tvbuff_t *tvb, const int offset, int len, int *next_offset, const bool desegment)
4532 int eob_offset;
4533 int eot_offset;
4534 int tokenlen;
4535 unsigned char found_needle = 0;
4536 static bool compiled = false;
4538 DISSECTOR_ASSERT(tvb && tvb->initialized);
4540 if (len == -1) {
4541 len = _tvb_captured_length_remaining(tvb, offset);
4542 /* if offset is past the end of the tvbuff, len is now 0 */
4545 eob_offset = offset + len;
4547 if (!compiled) {
4548 ws_mempbrk_compile(&pbrk_whitespace, " \r\n");
4549 compiled = true;
4553 * Look either for a space, CR, or LF.
4555 eot_offset = tvb_ws_mempbrk_pattern_uint8(tvb, offset, len, &pbrk_whitespace, &found_needle);
4556 if (eot_offset == -1) {
4558 * No space, CR or LF - token is presumably continued in next packet.
4560 if (desegment) {
4562 * Tell our caller we saw no whitespace, so they can
4563 * try to desegment and get the entire line
4564 * into one tvbuff.
4566 return -1;
4568 else {
4570 * Pretend the token runs to the end of the tvbuff.
4572 tokenlen = eob_offset - offset;
4573 if (next_offset)
4574 *next_offset = eob_offset;
4577 else {
4579 * Find the number of bytes between the starting offset
4580 * and the space, CR or LF.
4582 tokenlen = eot_offset - offset;
4585 * Return the offset of the character after the last
4586 * character in the line, skipping over the last character
4587 * in the line terminator.
4589 if (next_offset)
4590 *next_offset = eot_offset + 1;
4592 return tokenlen;
4596 * Format a bunch of data from a tvbuff as bytes, returning a pointer
4597 * to the string with the formatted data, with "punct" as a byte
4598 * separator.
4600 char *
4601 tvb_bytes_to_str_punct(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int len, const char punct)
4603 DISSECTOR_ASSERT(len > 0);
4604 return bytes_to_str_punct(scope, ensure_contiguous(tvb, offset, len), len, punct);
4608 * Given a wmem scope, a tvbuff, an offset, a length, an input digit
4609 * set, and a boolean indicator, fetch BCD-encoded digits from a
4610 * tvbuff starting from either the low or high half byte of the
4611 * first byte depending on the boolean indicator (true means "start
4612 * with the high half byte, ignoring the low half byte", and false
4613 * means "start with the low half byte and proceed to the high half
4614 * byte), formating the digits into characters according to the
4615 * input digit set, and return a pointer to a UTF-8 string, allocated
4616 * using the wmem scope. A nibble of 0xf is considered a 'filler'
4617 * and will end the conversion. Similarly if odd is set the last
4618 * high nibble will be omitted. (Note that if both skip_first and
4619 * odd are true, then both the first and last semi-octet are skipped,
4620 * i.e. an even number of nibbles are considered.)
4622 char *
4623 tvb_get_bcd_string(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, int len, const dgt_set_t *dgt, bool skip_first, bool odd, bool bigendian)
4625 const uint8_t *ptr;
4626 int i = 0;
4627 char *digit_str;
4628 uint8_t octet, nibble;
4630 DISSECTOR_ASSERT(tvb && tvb->initialized);
4632 if (len == -1) {
4634 * Run to the end of the captured data.
4636 * XXX - captured, or total?
4638 /*length = tvb_captured_length(tvb);*/
4639 len = tvb->length;
4640 if (len < offset) {
4641 return (char *)"";
4643 len -= offset;
4646 ptr = ensure_contiguous(tvb, offset, len);
4649 * XXX - map illegal digits (digits that map to 0) to REPLACEMENT
4650 * CHARACTER, and have all the tables in epan/tvbuff.c use 0 rather
4651 * than '?'?
4653 digit_str = (char *)wmem_alloc(scope, len*2 + 1);
4655 while (len > 0) {
4656 octet = *ptr;
4657 if (!skip_first) {
4658 if (bigendian) {
4659 nibble = (octet >> 4) & 0x0f;
4660 } else {
4661 nibble = octet & 0x0f;
4663 if (nibble == 0x0f) {
4665 * Stop digit.
4667 break;
4669 digit_str[i] = dgt->out[nibble];
4670 i++;
4672 skip_first = false;
4675 * unpack second value in byte
4677 if (bigendian) {
4678 nibble = octet & 0x0f;
4679 } else {
4680 nibble = octet >> 4;
4683 if (nibble == 0x0f) {
4685 * This is the stop digit or a filler digit. Ignore
4686 * it.
4688 break;
4690 if ((len == 1) && (odd == true )){
4691 /* Last octet, skip last high nibble in case of odd number of digits */
4692 break;
4694 digit_str[i] = dgt->out[nibble];
4695 i++;
4697 ptr++;
4698 len--;
4700 digit_str[i] = '\0';
4701 return digit_str;
4704 /* XXXX Fix me - needs odd indicator added */
4705 const char *
4706 tvb_bcd_dig_to_str(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int len, const dgt_set_t *dgt, bool skip_first)
4708 if (!dgt)
4709 dgt = &Dgt0_9_bcd;
4711 return tvb_get_bcd_string(scope, tvb, offset, len, dgt, skip_first, false, false);
4714 const char *
4715 tvb_bcd_dig_to_str_be(wmem_allocator_t *scope, tvbuff_t *tvb, const int offset, const int len, const dgt_set_t *dgt, bool skip_first)
4717 if (!dgt)
4718 dgt = &Dgt0_9_bcd;
4720 return tvb_get_bcd_string(scope, tvb, offset, len, dgt, skip_first, false, true);
4724 * Format a bunch of data from a tvbuff as bytes, returning a pointer
4725 * to the string with the formatted data.
4727 char *tvb_bytes_to_str(wmem_allocator_t *allocator, tvbuff_t *tvb,
4728 const int offset, const int len)
4730 DISSECTOR_ASSERT(len > 0);
4731 return bytes_to_str(allocator, ensure_contiguous(tvb, offset, len), len);
4734 /* Find a needle tvbuff within a haystack tvbuff. */
4736 tvb_find_tvb(tvbuff_t *haystack_tvb, tvbuff_t *needle_tvb, const int haystack_offset)
4738 unsigned haystack_abs_offset = 0, haystack_abs_length = 0;
4739 const uint8_t *haystack_data;
4740 const uint8_t *needle_data;
4741 const unsigned needle_len = needle_tvb->length;
4742 const uint8_t *location;
4744 DISSECTOR_ASSERT(haystack_tvb && haystack_tvb->initialized);
4746 if (haystack_tvb->length < 1 || needle_tvb->length < 1) {
4747 return -1;
4750 /* Get pointers to the tvbuffs' data. */
4751 haystack_data = ensure_contiguous(haystack_tvb, 0, -1);
4752 needle_data = ensure_contiguous(needle_tvb, 0, -1);
4754 check_offset_length(haystack_tvb, haystack_offset, -1,
4755 &haystack_abs_offset, &haystack_abs_length);
4757 location = ws_memmem(haystack_data + haystack_abs_offset, haystack_abs_length,
4758 needle_data, needle_len);
4760 if (location) {
4761 return (int) (location - haystack_data);
4764 return -1;
4768 tvb_raw_offset(tvbuff_t *tvb)
4770 return ((tvb->raw_offset==-1) ? (tvb->raw_offset = tvb_offset_from_real_beginning(tvb)) : tvb->raw_offset);
4773 void
4774 tvb_set_fragment(tvbuff_t *tvb)
4776 tvb->flags |= TVBUFF_FRAGMENT;
4779 struct tvbuff *
4780 tvb_get_ds_tvb(tvbuff_t *tvb)
4782 return(tvb->ds_tvb);
4785 unsigned
4786 tvb_get_varint(tvbuff_t *tvb, unsigned offset, unsigned maxlen, uint64_t *value, const unsigned encoding)
4788 *value = 0;
4790 switch (encoding & ENC_VARINT_MASK) {
4791 case ENC_VARINT_PROTOBUF:
4793 unsigned i;
4794 uint64_t b; /* current byte */
4796 for (i = 0; ((i < FT_VARINT_MAX_LEN) && (i < maxlen)); ++i) {
4797 b = tvb_get_uint8(tvb, offset++);
4798 *value |= ((b & 0x7F) << (i * 7)); /* add lower 7 bits to val */
4800 if (b < 0x80) {
4801 /* end successfully becauseof last byte's msb(most significant bit) is zero */
4802 return i + 1;
4805 break;
4808 case ENC_VARINT_ZIGZAG:
4810 unsigned i;
4811 uint64_t b; /* current byte */
4813 for (i = 0; ((i < FT_VARINT_MAX_LEN) && (i < maxlen)); ++i) {
4814 b = tvb_get_uint8(tvb, offset++);
4815 *value |= ((b & 0x7F) << (i * 7)); /* add lower 7 bits to val */
4817 if (b < 0x80) {
4818 /* end successfully becauseof last byte's msb(most significant bit) is zero */
4819 *value = (*value >> 1) ^ ((*value & 1) ? -1 : 0);
4820 return i + 1;
4823 break;
4826 case ENC_VARINT_SDNV:
4828 /* Decodes similar to protobuf but in MSByte order */
4829 unsigned i;
4830 uint64_t b; /* current byte */
4832 for (i = 0; ((i < FT_VARINT_MAX_LEN) && (i < maxlen)); ++i) {
4833 b = tvb_get_uint8(tvb, offset++);
4834 if ((i == 9) && (*value >= UINT64_C(1)<<(64-7))) {
4835 // guaranteed overflow, not valid SDNV
4836 return 0;
4838 *value <<= 7;
4839 *value |= (b & 0x7F); /* add lower 7 bits to val */
4841 if (b < 0x80) {
4842 /* end successfully because of last byte's msb(most significant bit) is zero */
4843 return i + 1;
4846 break;
4849 case ENC_VARINT_QUIC:
4851 /* calculate variable length */
4852 *value = tvb_get_uint8(tvb, offset);
4853 switch((*value) >> 6) {
4854 case 0: /* 0b00 => 1 byte length (6 bits Usable) */
4855 (*value) &= 0x3F;
4856 return 1;
4857 case 1: /* 0b01 => 2 bytes length (14 bits Usable) */
4858 *value = tvb_get_ntohs(tvb, offset) & 0x3FFF;
4859 return 2;
4860 case 2: /* 0b10 => 4 bytes length (30 bits Usable) */
4861 *value = tvb_get_ntohl(tvb, offset) & 0x3FFFFFFF;
4862 return 4;
4863 case 3: /* 0b11 => 8 bytes length (62 bits Usable) */
4864 *value = tvb_get_ntoh64(tvb, offset) & UINT64_C(0x3FFFFFFFFFFFFFFF);
4865 return 8;
4866 default: /* No Possible */
4867 ws_assert_not_reached();
4868 break;
4870 break;
4873 default:
4874 DISSECTOR_ASSERT_NOT_REACHED();
4877 return 0; /* 10 bytes scanned, but no bytes' msb is zero */
4881 * Editor modelines - https://www.wireshark.org/tools/modelines.html
4883 * Local variables:
4884 * c-basic-offset: 8
4885 * tab-width: 8
4886 * indent-tabs-mode: t
4887 * End:
4889 * vi: set shiftwidth=8 tabstop=8 noexpandtab:
4890 * :indentSize=8:tabSize=8:noTabs=false: