update epan/dissectors/pidl/drsuapi/drsuapi.idl from samba
[wireshark-sm.git] / wsutil / str_util.c
blob7606c81c6f31efd6cb03cb873d877c3a44a00784
1 /* str_util.c
2 * String utility routines
4 * Wireshark - Network traffic analyzer
5 * By Gerald Combs <gerald@wireshark.org>
6 * Copyright 1998 Gerald Combs
8 * SPDX-License-Identifier: GPL-2.0-or-later
9 */
11 #define _GNU_SOURCE
12 #include "config.h"
13 #include "str_util.h"
15 #include <string.h>
16 #include <locale.h>
17 #include <math.h>
19 #include <ws_codepoints.h>
21 #include <wsutil/to_str.h>
24 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
25 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
27 char *
28 wmem_strconcat(wmem_allocator_t *allocator, const char *first, ...)
30 size_t len;
31 va_list args;
32 char *s;
33 char *concat;
34 char *ptr;
36 if (!first)
37 return NULL;
39 len = 1 + strlen(first);
40 va_start(args, first);
41 while ((s = va_arg(args, char*))) {
42 len += strlen(s);
44 va_end(args);
46 ptr = concat = (char *)wmem_alloc(allocator, len);
48 ptr = g_stpcpy(ptr, first);
49 va_start(args, first);
50 while ((s = va_arg(args, char*))) {
51 ptr = g_stpcpy(ptr, s);
53 va_end(args);
55 return concat;
58 char *
59 wmem_strjoin(wmem_allocator_t *allocator,
60 const char *separator, const char *first, ...)
62 size_t len;
63 va_list args;
64 size_t separator_len;
65 char *s;
66 char *concat;
67 char *ptr;
69 if (!first)
70 return NULL;
72 if (separator == NULL) {
73 separator = "";
76 separator_len = strlen (separator);
78 len = 1 + strlen(first); /* + 1 for null byte */
79 va_start(args, first);
80 while ((s = va_arg(args, char*))) {
81 len += (separator_len + strlen(s));
83 va_end(args);
85 ptr = concat = (char *)wmem_alloc(allocator, len);
86 ptr = g_stpcpy(ptr, first);
87 va_start(args, first);
88 while ((s = va_arg(args, char*))) {
89 ptr = g_stpcpy(ptr, separator);
90 ptr = g_stpcpy(ptr, s);
92 va_end(args);
94 return concat;
98 char *
99 wmem_strjoinv(wmem_allocator_t *allocator,
100 const char *separator, char **str_array)
102 char *string = NULL;
104 ws_return_val_if(!str_array, NULL);
106 if (separator == NULL) {
107 separator = "";
110 if (str_array[0]) {
111 int i;
112 char *ptr;
113 size_t len, separator_len;
115 separator_len = strlen(separator);
117 /* Get first part of length. Plus one for null byte. */
118 len = 1 + strlen(str_array[0]);
119 /* Get the full length, including the separators. */
120 for (i = 1; str_array[i] != NULL; i++) {
121 len += separator_len;
122 len += strlen(str_array[i]);
125 /* Allocate and build the string. */
126 string = (char *)wmem_alloc(allocator, len);
127 ptr = g_stpcpy(string, str_array[0]);
128 for (i = 1; str_array[i] != NULL; i++) {
129 ptr = g_stpcpy(ptr, separator);
130 ptr = g_stpcpy(ptr, str_array[i]);
132 } else {
133 string = wmem_strdup(allocator, "");
136 return string;
140 char **
141 wmem_strsplit(wmem_allocator_t *allocator, const char *src,
142 const char *delimiter, int max_tokens)
144 char *splitted;
145 char *s;
146 unsigned tokens;
147 unsigned sep_len;
148 unsigned i;
149 char **vec;
151 if (!src || !delimiter || !delimiter[0])
152 return NULL;
154 /* An empty string results in an empty vector. */
155 if (!src[0]) {
156 vec = wmem_new0(allocator, char *);
157 return vec;
160 splitted = wmem_strdup(allocator, src);
161 sep_len = (unsigned)strlen(delimiter);
163 if (max_tokens < 1)
164 max_tokens = INT_MAX;
166 /* Calculate the number of fields. */
167 s = splitted;
168 tokens = 1;
169 while (tokens < (unsigned)max_tokens && (s = strstr(s, delimiter))) {
170 s += sep_len;
171 tokens++;
174 vec = wmem_alloc_array(allocator, char *, tokens + 1);
176 /* Populate the array of string tokens. */
177 s = splitted;
178 vec[0] = s;
179 tokens = 1;
180 while (tokens < (unsigned)max_tokens && (s = strstr(s, delimiter))) {
181 for (i = 0; i < sep_len; i++)
182 s[i] = '\0';
183 s += sep_len;
184 vec[tokens] = s;
185 tokens++;
189 vec[tokens] = NULL;
191 return vec;
195 * wmem_ascii_strdown:
196 * based on g_ascii_strdown.
198 char*
199 wmem_ascii_strdown(wmem_allocator_t *allocator, const char *str, ssize_t len)
201 char *result, *s;
203 g_return_val_if_fail (str != NULL, NULL);
205 if (len < 0)
206 len = strlen (str);
208 result = wmem_strndup(allocator, str, len);
209 for (s = result; *s; s++)
210 *s = g_ascii_tolower (*s);
212 return result;
216 ws_xton(char ch)
218 switch (ch) {
219 case '0': return 0;
220 case '1': return 1;
221 case '2': return 2;
222 case '3': return 3;
223 case '4': return 4;
224 case '5': return 5;
225 case '6': return 6;
226 case '7': return 7;
227 case '8': return 8;
228 case '9': return 9;
229 case 'a': case 'A': return 10;
230 case 'b': case 'B': return 11;
231 case 'c': case 'C': return 12;
232 case 'd': case 'D': return 13;
233 case 'e': case 'E': return 14;
234 case 'f': case 'F': return 15;
235 default: return -1;
239 /* Convert all ASCII letters to lower case, in place. */
240 char *
241 ascii_strdown_inplace(char *str)
243 char *s;
245 for (s = str; *s; s++)
246 /* What 'g_ascii_tolower (char c)' does, this should be slightly more efficient */
247 *s = g_ascii_isupper (*s) ? *s - 'A' + 'a' : *s;
249 return (str);
252 /* Convert all ASCII letters to upper case, in place. */
253 char *
254 ascii_strup_inplace(char *str)
256 char *s;
258 for (s = str; *s; s++)
259 /* What 'g_ascii_toupper (char c)' does, this should be slightly more efficient */
260 *s = g_ascii_islower (*s) ? *s - 'a' + 'A' : *s;
262 return (str);
265 /* Check if an entire string is printable. */
266 bool
267 isprint_string(const char *str)
269 unsigned pos;
271 /* Loop until we reach the end of the string (a null) */
272 for(pos = 0; str[pos] != '\0'; pos++){
273 if(!g_ascii_isprint(str[pos])){
274 /* The string contains a non-printable character */
275 return false;
279 /* The string contains only printable characters */
280 return true;
283 /* Check if an entire UTF-8 string is printable. */
284 bool
285 isprint_utf8_string(const char *str, const unsigned length)
287 const char *strend = str + length;
289 if (!g_utf8_validate(str, length, NULL)) {
290 return false;
293 while (str < strend) {
294 /* This returns false for G_UNICODE_CONTROL | G_UNICODE_FORMAT |
295 * G_UNICODE_UNASSIGNED | G_UNICODE_SURROGATE
296 * XXX: Could it be ok to have certain format characters, e.g.
297 * U+00AD SOFT HYPHEN? If so, format_text() should be changed too.
299 if (!g_unichar_isprint(g_utf8_get_char(str))) {
300 return false;
302 str = g_utf8_next_char(str);
305 return true;
308 /* Check if an entire string is digits. */
309 bool
310 isdigit_string(const unsigned char *str)
312 unsigned pos;
314 /* Loop until we reach the end of the string (a null) */
315 for(pos = 0; str[pos] != '\0'; pos++){
316 if(!g_ascii_isdigit(str[pos])){
317 /* The string contains a non-digit character */
318 return false;
322 /* The string contains only digits */
323 return true;
326 const char *
327 ws_ascii_strcasestr(const char *haystack, const char *needle)
329 /* Do not use strcasestr() here, even if a system has it, as it is
330 * locale-dependent (and has different results for e.g. Turkic languages.)
331 * FreeBSD, NetBSD, macOS have a strcasestr_l() that could be used.
333 size_t hlen = strlen(haystack);
334 size_t nlen = strlen(needle);
336 while (hlen-- >= nlen) {
337 if (!g_ascii_strncasecmp(haystack, needle, nlen))
338 return haystack;
339 haystack++;
341 return NULL;
344 /* Return the last occurrence of ch in the n bytes of haystack.
345 * If not found or n is 0, return NULL. */
346 const uint8_t *
347 ws_memrchr(const void *_haystack, int ch, size_t n)
349 #ifdef HAVE_MEMRCHR
350 return memrchr(_haystack, ch, n);
351 #else
352 /* A generic implementation. This could be optimized considerably,
353 * e.g. by fetching a word at a time.
355 if (n == 0) {
356 return NULL;
358 const uint8_t *haystack = _haystack;
359 const uint8_t *p;
360 uint8_t c = (uint8_t)ch;
362 const uint8_t *const end = haystack + n - 1;
364 for (p = end; p >= haystack; --p) {
365 if (*p == c) {
366 return p;
370 return NULL;
371 #endif /* HAVE_MEMRCHR */
374 #define FORMAT_SIZE_UNIT_MASK 0x00ff
375 #define FORMAT_SIZE_PFX_MASK 0xff00
377 static const char *thousands_grouping_fmt;
378 static const char *thousands_grouping_fmt_flt;
380 DIAG_OFF(format)
381 static void test_printf_thousands_grouping(void) {
382 /* test whether wmem_strbuf works with "'" flag character */
383 wmem_strbuf_t *buf = wmem_strbuf_new(NULL, NULL);
384 wmem_strbuf_append_printf(buf, "%'d", 22);
385 if (g_strcmp0(wmem_strbuf_get_str(buf), "22") == 0) {
386 thousands_grouping_fmt = "%'"PRId64;
387 thousands_grouping_fmt_flt = "%'.*f";
388 } else {
389 /* Don't use */
390 thousands_grouping_fmt = "%"PRId64;
391 thousands_grouping_fmt_flt = "%.*f";
393 wmem_strbuf_destroy(buf);
395 DIAG_ON(format)
397 static const char* decimal_point = NULL;
399 static void truncate_numeric_strbuf(wmem_strbuf_t *strbuf, int n) {
401 const char *s = wmem_strbuf_get_str(strbuf);
402 char *p;
403 int count;
405 if (decimal_point == NULL) {
406 decimal_point = localeconv()->decimal_point;
409 p = strchr(s, decimal_point[0]);
410 if (p != NULL) {
411 count = n;
412 while (count >= 0) {
413 count--;
414 if (*p == '\0')
415 break;
416 p++;
419 p--;
420 while (*p == '0') {
421 p--;
424 if (*p != decimal_point[0]) {
425 p++;
427 wmem_strbuf_truncate(strbuf, p - s);
431 /* Given a floating point value, return it in a human-readable format,
432 * using units with metric prefixes (falling back to scientific notation
433 * with the base units if outside the range.)
435 char *
436 format_units(wmem_allocator_t *allocator, double size,
437 format_size_units_e unit, uint16_t flags,
438 int precision)
440 wmem_strbuf_t *human_str = wmem_strbuf_new(allocator, NULL);
441 double power = 1000.0;
442 int pfx_off = 6;
443 bool is_small = false;
444 /* is_small is when to use the longer, spelled out unit.
445 * We use it for inf, NaN, 0, and unprefixed small values,
446 * but not for unprefixed values using scientific notation
447 * the value is outside the supported prefix range.
449 bool scientific = false;
450 double abs_size = fabs(size);
451 int exponent = 0;
452 static const char * const si_prefix[] = {" a", " f", " p", " n", " μ", " m", " ", " k", " M", " G", " T", " P", " E"};
453 static const char * const iec_prefix[] = {" ", " Ki", " Mi", " Gi", " Ti", " Pi", " Ei"};
454 const char * const *prefix = si_prefix;
455 int max_exp = (int)G_N_ELEMENTS(si_prefix) - 1;
457 char *ret_val;
459 if (thousands_grouping_fmt == NULL)
460 test_printf_thousands_grouping();
462 if (flags & FORMAT_SIZE_PREFIX_IEC) {
463 prefix = iec_prefix;
464 max_exp = (int)G_N_ELEMENTS(iec_prefix) - 1;
465 power = 1024.0;
468 if (isfinite(size) && size != 0.0) {
470 double comp = precision == 0 ? 10.0 : 1.0;
472 /* For precision 0, use the range [10, 10*power) because only
473 * one significant digit is not as useful. This is what format_size
474 * does for integers. ("ls -h" uses one digit after the decimal
475 * point only for the [1, 10) range, g_format_size() always displays
476 * tenths.) Prefer non-prefixed units for the range [1,10), though.
478 * We have a limited number of units to check, so this (which
479 * can be unrolled) is presumably faster than log + floor + pow/exp
481 if (abs_size < 1.0) {
482 while (abs_size < comp) {
483 abs_size *= power;
484 exponent--;
485 if ((exponent + pfx_off) < 0) {
486 scientific = true;
487 break;
490 } else {
491 while (abs_size >= comp*power) {
492 abs_size *= 1/power;
493 exponent++;
494 if ((exponent + pfx_off) > max_exp) {
495 scientific = true;
496 break;
502 if (scientific) {
503 wmem_strbuf_append_printf(human_str, "%.*g", precision + 1, size);
504 exponent = 0;
505 } else {
506 if (exponent == 0) {
507 is_small = true;
509 size = copysign(abs_size, size);
510 // Truncate trailing zeros, but do it this way because we know
511 // we don't want scientific notation, and we don't want %g to
512 // switch to that if precision is small. (We could always use
513 // %g when precision is large.)
514 wmem_strbuf_append_printf(human_str, thousands_grouping_fmt_flt, precision, size);
515 truncate_numeric_strbuf(human_str, precision);
516 // XXX - when rounding to a certain precision, printf might
517 // round up to "power" from something like 999.99999995, which
518 // looks a little odd on a graph when transitioning from 1,000 bytes
519 // (for values just under 1 kB) to 1 kB (for values 1 kB and larger.)
520 // Due to edge cases in binary fp representation and how printf might
521 // round things, the right way to handle it is taking the printf output
522 // and comparing it to "1000" and "1024" and adjusting the exponent
523 // if so - though we need to compare to the version with the thousands
524 // separator if we have that (which makes it harder to use strnatcmp
525 // as is.)
528 if ((size_t)(pfx_off + exponent) < G_N_ELEMENTS(si_prefix)) {
529 wmem_strbuf_append(human_str, prefix[pfx_off+exponent]);
532 switch (unit) {
533 case FORMAT_SIZE_UNIT_NONE:
534 break;
535 case FORMAT_SIZE_UNIT_BYTES:
536 wmem_strbuf_append(human_str, is_small ? "bytes" : "B");
537 break;
538 case FORMAT_SIZE_UNIT_BITS:
539 wmem_strbuf_append(human_str, is_small ? "bits" : "b");
540 break;
541 case FORMAT_SIZE_UNIT_BITS_S:
542 wmem_strbuf_append(human_str, is_small ? "bits/s" : "bps");
543 break;
544 case FORMAT_SIZE_UNIT_BYTES_S:
545 wmem_strbuf_append(human_str, is_small ? "bytes/s" : "Bps");
546 break;
547 case FORMAT_SIZE_UNIT_PACKETS:
548 wmem_strbuf_append(human_str, is_small ? "packets" : "pkts");
549 break;
550 case FORMAT_SIZE_UNIT_PACKETS_S:
551 wmem_strbuf_append(human_str, is_small ? "packets/s" : "pkts/s");
552 break;
553 case FORMAT_SIZE_UNIT_EVENTS:
554 wmem_strbuf_append(human_str, is_small ? "events" : "evts");
555 break;
556 case FORMAT_SIZE_UNIT_EVENTS_S:
557 wmem_strbuf_append(human_str, is_small ? "events/s" : "evts/s");
558 break;
559 case FORMAT_SIZE_UNIT_FIELDS:
560 wmem_strbuf_append(human_str, is_small ? "fields" : "flds");
561 break;
562 case FORMAT_SIZE_UNIT_SECONDS:
563 wmem_strbuf_append(human_str, is_small ? "seconds" : "s");
564 break;
565 case FORMAT_SIZE_UNIT_ERLANGS:
566 wmem_strbuf_append(human_str, is_small ? "erlangs" : "E");
567 break;
568 default:
569 ws_assert_not_reached();
572 ret_val = wmem_strbuf_finalize(human_str);
573 /* Convention is a space between the value and the units. If we have
574 * a prefix, the space is before the prefix. There are two possible
575 * uses of FORMAT_SIZE_UNIT_NONE:
576 * 1. Add a unit immediately after the string returned. In this case,
577 * we would want the string to end with a space if there's no prefix.
578 * 2. The unit appears somewhere else, e.g. in a legend, header, or
579 * different column. In this case, we don't want the string to end
580 * with a space if there's no prefix.
581 * chomping the string here, as we've traditionally done, optimizes for
582 * the latter case but makes the former case harder.
583 * Perhaps the right approach is to distinguish the cases with a new
584 * enum value.
586 return g_strchomp(ret_val);
589 /* Given a size, return its value in a human-readable format */
590 /* This doesn't handle fractional values. We might want to just
591 * call the version with the double and precision 0 (possibly
592 * slower due to the use of floating point math, but do we care?)
594 char *
595 format_size_wmem(wmem_allocator_t *allocator, int64_t size,
596 format_size_units_e unit, uint16_t flags)
598 wmem_strbuf_t *human_str = wmem_strbuf_new(allocator, NULL);
599 int power = 1000;
600 int pfx_off = 0;
601 bool is_small = false;
602 static const char *prefix[] = {" T", " G", " M", " k", " Ti", " Gi", " Mi", " Ki"};
603 char *ret_val;
605 if (thousands_grouping_fmt == NULL)
606 test_printf_thousands_grouping();
608 if (flags & FORMAT_SIZE_PREFIX_IEC) {
609 pfx_off = 4;
610 power = 1024;
613 if (size / power / power / power / power >= 10) {
614 wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power / power / power);
615 wmem_strbuf_append(human_str, prefix[pfx_off]);
616 } else if (size / power / power / power >= 10) {
617 wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power / power);
618 wmem_strbuf_append(human_str, prefix[pfx_off+1]);
619 } else if (size / power / power >= 10) {
620 wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power);
621 wmem_strbuf_append(human_str, prefix[pfx_off+2]);
622 } else if (size / power >= 10) {
623 wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power);
624 wmem_strbuf_append(human_str, prefix[pfx_off+3]);
625 } else {
626 wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size);
627 is_small = true;
630 switch (unit) {
631 case FORMAT_SIZE_UNIT_NONE:
632 break;
633 case FORMAT_SIZE_UNIT_BYTES:
634 wmem_strbuf_append(human_str, is_small ? " bytes" : "B");
635 break;
636 case FORMAT_SIZE_UNIT_BITS:
637 wmem_strbuf_append(human_str, is_small ? " bits" : "b");
638 break;
639 case FORMAT_SIZE_UNIT_BITS_S:
640 wmem_strbuf_append(human_str, is_small ? " bits/s" : "bps");
641 break;
642 case FORMAT_SIZE_UNIT_BYTES_S:
643 wmem_strbuf_append(human_str, is_small ? " bytes/s" : "Bps");
644 break;
645 case FORMAT_SIZE_UNIT_PACKETS:
646 wmem_strbuf_append(human_str, is_small ? " packets" : "packets");
647 break;
648 case FORMAT_SIZE_UNIT_PACKETS_S:
649 wmem_strbuf_append(human_str, is_small ? " packets/s" : "packets/s");
650 break;
651 case FORMAT_SIZE_UNIT_FIELDS:
652 wmem_strbuf_append(human_str, is_small ? " fields" : "fields");
653 break;
654 /* These aren't that practical to use with integers, but
655 * perhaps better than asserting.
657 case FORMAT_SIZE_UNIT_SECONDS:
658 wmem_strbuf_append(human_str, is_small ? " seconds" : "s");
659 break;
660 case FORMAT_SIZE_UNIT_ERLANGS:
661 wmem_strbuf_append(human_str, is_small ? " erlangs" : "E");
662 break;
663 default:
664 ws_assert_not_reached();
667 ret_val = wmem_strbuf_finalize(human_str);
668 return g_strchomp(ret_val);
671 char
672 printable_char_or_period(char c)
674 return g_ascii_isprint(c) ? c : '.';
678 * This is used by the display filter engine and must be compatible
679 * with display filter syntax.
681 static inline bool
682 escape_char(char c, char *p)
684 int r = -1;
685 ws_assert(p);
688 * backslashes and double-quotes must be escaped (double-quotes
689 * are escaped by passing '"' as quote_char in escape_string_len)
690 * whitespace is also escaped.
692 switch (c) {
693 case '\a': r = 'a'; break;
694 case '\b': r = 'b'; break;
695 case '\f': r = 'f'; break;
696 case '\n': r = 'n'; break;
697 case '\r': r = 'r'; break;
698 case '\t': r = 't'; break;
699 case '\v': r = 'v'; break;
700 case '\\': r = '\\'; break;
701 case '\0': r = '0'; break;
704 if (r != -1) {
705 *p = r;
706 return true;
708 return false;
711 static inline bool
712 escape_null(char c, char *p)
714 ws_assert(p);
715 if (c == '\0') {
716 *p = '0';
717 return true;
719 return false;
722 static char *
723 escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len,
724 bool (*escape_func)(char c, char *p), bool add_quotes,
725 char quote_char, bool double_quote)
727 char c, r;
728 wmem_strbuf_t *buf;
729 size_t alloc_size;
730 ssize_t i;
732 if (len < 0)
733 len = strlen(string);
735 alloc_size = len;
736 if (add_quotes)
737 alloc_size += 2;
739 buf = wmem_strbuf_new_sized(alloc, alloc_size);
741 if (add_quotes && quote_char != '\0')
742 wmem_strbuf_append_c(buf, quote_char);
744 for (i = 0; i < len; i++) {
745 c = string[i];
746 if ((escape_func(c, &r))) {
747 wmem_strbuf_append_c(buf, '\\');
748 wmem_strbuf_append_c(buf, r);
750 else if (c == quote_char && quote_char != '\0') {
751 /* If quoting, we must escape the quote_char somehow. */
752 if (double_quote) {
753 wmem_strbuf_append_c(buf, c);
754 wmem_strbuf_append_c(buf, c);
755 } else {
756 wmem_strbuf_append_c(buf, '\\');
757 wmem_strbuf_append_c(buf, c);
760 else if (c == '\\' && quote_char != '\0' && !double_quote) {
761 /* If quoting, and escaping the quote_char with a backslash,
762 * then backslash must be escaped, even if escape_func doesn't. */
763 wmem_strbuf_append_c(buf, '\\');
764 wmem_strbuf_append_c(buf, '\\');
766 else {
767 /* Other UTF-8 bytes are passed through. */
768 wmem_strbuf_append_c(buf, c);
772 if (add_quotes && quote_char != '\0')
773 wmem_strbuf_append_c(buf, quote_char);
775 return wmem_strbuf_finalize(buf);
778 char *
779 ws_escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len, bool add_quotes)
781 return escape_string_len(alloc, string, len, escape_char, add_quotes, '"', false);
784 char *
785 ws_escape_string(wmem_allocator_t *alloc, const char *string, bool add_quotes)
787 return escape_string_len(alloc, string, -1, escape_char, add_quotes, '"', false);
790 char *ws_escape_null(wmem_allocator_t *alloc, const char *string, size_t len, bool add_quotes)
792 /* XXX: The existing behavior (maintained) here is not to escape
793 * backslashes even though NUL is escaped.
795 return escape_string_len(alloc, string, len, escape_null, add_quotes, add_quotes ? '"' : '\0', false);
798 char *ws_escape_csv(wmem_allocator_t *alloc, const char *string, bool add_quotes, char quote_char, bool double_quote, bool escape_whitespace)
800 if (escape_whitespace)
801 return escape_string_len(alloc, string, -1, escape_char, add_quotes, quote_char, double_quote);
802 else
803 return escape_string_len(alloc, string, -1, escape_null, add_quotes, quote_char, double_quote);
806 const char *
807 ws_strerrorname_r(int errnum, char *buf, size_t buf_size)
809 #ifdef HAVE_STRERRORNAME_NP
810 const char *errstr = strerrorname_np(errnum);
811 if (errstr != NULL) {
812 (void)g_strlcpy(buf, errstr, buf_size);
813 return buf;
815 #endif
816 snprintf(buf, buf_size, "Errno(%d)", errnum);
817 return buf;
820 char *
821 ws_strdup_underline(wmem_allocator_t *allocator, long offset, size_t len)
823 if (offset < 0)
824 return NULL;
826 wmem_strbuf_t *buf = wmem_strbuf_new_sized(allocator, offset + len);
828 for (int i = 0; i < offset; i++) {
829 wmem_strbuf_append_c(buf, ' ');
831 wmem_strbuf_append_c(buf, '^');
833 for (size_t l = len; l > 1; l--) {
834 wmem_strbuf_append_c(buf, '~');
837 return wmem_strbuf_finalize(buf);
840 #define INITIAL_FMTBUF_SIZE 128
843 * Declare, and initialize, the variables used for an output buffer.
845 #define FMTBUF_VARS \
846 char *fmtbuf = (char*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE); \
847 unsigned fmtbuf_len = INITIAL_FMTBUF_SIZE; \
848 unsigned column = 0
851 * Expand the buffer to be large enough to add nbytes bytes, plus a
852 * terminating '\0'.
854 #define FMTBUF_EXPAND(nbytes) \
855 /* \
856 * Is there enough room for those bytes and also enough room for \
857 * a terminating '\0'? \
858 */ \
859 if (column+(nbytes+1) >= fmtbuf_len) { \
860 /* \
861 * Double the buffer's size if it's not big enough. \
862 * The size of the buffer starts at 128, so doubling its size \
863 * adds at least another 128 bytes, which is more than enough \
864 * for one more character plus a terminating '\0'. \
865 */ \
866 fmtbuf_len *= 2; \
867 fmtbuf = (char *)wmem_realloc(allocator, fmtbuf, fmtbuf_len); \
871 * Put a byte into the buffer; space must have been ensured for it.
873 #define FMTBUF_PUTCHAR(b) \
874 fmtbuf[column] = (b); \
875 column++
878 * Add the one-byte argument, as an octal escape sequence, to the end
879 * of the buffer.
881 #define FMTBUF_PUTBYTE_OCTAL(b) \
882 FMTBUF_PUTCHAR((((b)>>6)&03) + '0'); \
883 FMTBUF_PUTCHAR((((b)>>3)&07) + '0'); \
884 FMTBUF_PUTCHAR((((b)>>0)&07) + '0')
887 * Add the one-byte argument, as a hex escape sequence, to the end
888 * of the buffer.
890 #define FMTBUF_PUTBYTE_HEX(b) \
891 FMTBUF_PUTCHAR('\\'); \
892 FMTBUF_PUTCHAR('x'); \
893 FMTBUF_PUTCHAR(hex[((b) >> 4) & 0xF]); \
894 FMTBUF_PUTCHAR(hex[((b) >> 0) & 0xF])
897 * Put the trailing '\0' at the end of the buffer.
899 #define FMTBUF_ENDSTR \
900 fmtbuf[column] = '\0'
902 static char *
903 format_text_internal(wmem_allocator_t *allocator,
904 const unsigned char *string, size_t len,
905 bool replace_space)
907 FMTBUF_VARS;
908 const unsigned char *stringend = string + len;
909 unsigned char c;
911 while (string < stringend) {
913 * Get the first byte of this character.
915 c = *string++;
916 if (g_ascii_isprint(c)) {
918 * Printable ASCII, so not part of a multi-byte UTF-8 sequence.
919 * Make sure there's enough room for one more byte, and add
920 * the character.
922 FMTBUF_EXPAND(1);
923 FMTBUF_PUTCHAR(c);
924 } else if (replace_space && g_ascii_isspace(c)) {
926 * ASCII, so not part of a multi-byte UTF-8 sequence, but
927 * not printable, but is a space character; show it as a
928 * blank.
930 * Make sure there's enough room for one more byte, and add
931 * the blank.
933 FMTBUF_EXPAND(1);
934 FMTBUF_PUTCHAR(' ');
935 } else if (c < 128) {
937 * ASCII, so not part of a multi-byte UTF-8 sequence, but not
938 * printable.
940 * That requires a minimum of 2 bytes, one for the backslash
941 * and one for a letter, so make sure we have enough room
942 * for that, plus a trailing '\0'.
944 FMTBUF_EXPAND(2);
945 FMTBUF_PUTCHAR('\\');
946 switch (c) {
948 case '\a':
949 FMTBUF_PUTCHAR('a');
950 break;
952 case '\b':
953 FMTBUF_PUTCHAR('b'); /* BS */
954 break;
956 case '\f':
957 FMTBUF_PUTCHAR('f'); /* FF */
958 break;
960 case '\n':
961 FMTBUF_PUTCHAR('n'); /* NL */
962 break;
964 case '\r':
965 FMTBUF_PUTCHAR('r'); /* CR */
966 break;
968 case '\t':
969 FMTBUF_PUTCHAR('t'); /* tab */
970 break;
972 case '\v':
973 FMTBUF_PUTCHAR('v');
974 break;
976 default:
978 * We've already put the backslash, but this
979 * will put 3 more characters for the octal
980 * number; make sure we have enough room for
981 * that, plus the trailing '\0'.
983 FMTBUF_EXPAND(3);
984 FMTBUF_PUTBYTE_OCTAL(c);
985 break;
987 } else {
989 * We've fetched the first byte of a multi-byte UTF-8
990 * sequence into c.
992 int utf8_len;
993 unsigned char mask;
994 gunichar uc;
995 unsigned char first;
997 if ((c & 0xe0) == 0xc0) {
998 /* Starts a 2-byte UTF-8 sequence; 1 byte left */
999 utf8_len = 1;
1000 mask = 0x1f;
1001 } else if ((c & 0xf0) == 0xe0) {
1002 /* Starts a 3-byte UTF-8 sequence; 2 bytes left */
1003 utf8_len = 2;
1004 mask = 0x0f;
1005 } else if ((c & 0xf8) == 0xf0) {
1006 /* Starts a 4-byte UTF-8 sequence; 3 bytes left */
1007 utf8_len = 3;
1008 mask = 0x07;
1009 } else if ((c & 0xfc) == 0xf8) {
1010 /* Starts an old-style 5-byte UTF-8 sequence; 4 bytes left */
1011 utf8_len = 4;
1012 mask = 0x03;
1013 } else if ((c & 0xfe) == 0xfc) {
1014 /* Starts an old-style 6-byte UTF-8 sequence; 5 bytes left */
1015 utf8_len = 5;
1016 mask = 0x01;
1017 } else {
1018 /* 0xfe or 0xff or a continuation byte - not valid */
1019 utf8_len = -1;
1021 if (utf8_len > 0) {
1022 /* Try to construct the Unicode character */
1023 uc = c & mask;
1024 for (int i = 0; i < utf8_len; i++) {
1025 if (string >= stringend) {
1027 * Ran out of octets, so the character is
1028 * incomplete. Put in a REPLACEMENT CHARACTER
1029 * instead, and then continue the loop, which
1030 * will terminate.
1032 uc = UNICODE_REPLACEMENT_CHARACTER;
1033 break;
1035 c = *string;
1036 if ((c & 0xc0) != 0x80) {
1038 * Not valid UTF-8 continuation character; put in
1039 * a replacement character, and then re-process
1040 * this octet as the beginning of a new character.
1042 uc = UNICODE_REPLACEMENT_CHARACTER;
1043 break;
1045 string++;
1046 uc = (uc << 6) | (c & 0x3f);
1050 * If this isn't a valid Unicode character, put in
1051 * a REPLACEMENT CHARACTER.
1053 if (!g_unichar_validate(uc))
1054 uc = UNICODE_REPLACEMENT_CHARACTER;
1055 } else {
1056 /* 0xfe or 0xff; put it a REPLACEMENT CHARACTER */
1057 uc = UNICODE_REPLACEMENT_CHARACTER;
1061 * OK, is it a printable Unicode character?
1063 if (g_unichar_isprint(uc)) {
1065 * Yes - put it into the string as UTF-8.
1066 * This means that if it was an overlong
1067 * encoding, this will put out the right
1068 * sized encoding.
1070 if (uc < 0x80) {
1071 first = 0;
1072 utf8_len = 1;
1073 } else if (uc < 0x800) {
1074 first = 0xc0;
1075 utf8_len = 2;
1076 } else if (uc < 0x10000) {
1077 first = 0xe0;
1078 utf8_len = 3;
1079 } else if (uc < 0x200000) {
1080 first = 0xf0;
1081 utf8_len = 4;
1082 } else if (uc < 0x4000000) {
1084 * This should never happen, as Unicode doesn't
1085 * go that high.
1087 first = 0xf8;
1088 utf8_len = 5;
1089 } else {
1091 * This should never happen, as Unicode doesn't
1092 * go that high.
1094 first = 0xfc;
1095 utf8_len = 6;
1097 FMTBUF_EXPAND(utf8_len);
1098 for (int i = utf8_len - 1; i > 0; i--) {
1099 fmtbuf[column + i] = (uc & 0x3f) | 0x80;
1100 uc >>= 6;
1102 fmtbuf[column] = uc | first;
1103 column += utf8_len;
1104 } else if (replace_space && g_unichar_isspace(uc)) {
1106 * Not printable, but is a space character; show it
1107 * as a blank.
1109 * Make sure there's enough room for one more byte,
1110 * and add the blank.
1112 FMTBUF_EXPAND(1);
1113 FMTBUF_PUTCHAR(' ');
1114 } else if (c < 128) {
1116 * ASCII, but not printable.
1117 * Yes, this could happen with an overlong encoding.
1119 * That requires a minimum of 2 bytes, one for the
1120 * backslash and one for a letter, so make sure we
1121 * have enough room for that, plus a trailing '\0'.
1123 FMTBUF_EXPAND(2);
1124 FMTBUF_PUTCHAR('\\');
1125 switch (c) {
1127 case '\a':
1128 FMTBUF_PUTCHAR('a');
1129 break;
1131 case '\b':
1132 FMTBUF_PUTCHAR('b'); /* BS */
1133 break;
1135 case '\f':
1136 FMTBUF_PUTCHAR('f'); /* FF */
1137 break;
1139 case '\n':
1140 FMTBUF_PUTCHAR('n'); /* NL */
1141 break;
1143 case '\r':
1144 FMTBUF_PUTCHAR('r'); /* CR */
1145 break;
1147 case '\t':
1148 FMTBUF_PUTCHAR('t'); /* tab */
1149 break;
1151 case '\v':
1152 FMTBUF_PUTCHAR('v');
1153 break;
1155 default:
1157 * We've already put the backslash, but this
1158 * will put 3 more characters for the octal
1159 * number; make sure we have enough room for
1160 * that, plus the trailing '\0'.
1162 FMTBUF_EXPAND(3);
1163 FMTBUF_PUTBYTE_OCTAL(c);
1164 break;
1166 } else {
1168 * Unicode, but not printable, and not ASCII;
1169 * put it out as \uxxxx or \Uxxxxxxxx.
1171 if (uc <= 0xFFFF) {
1172 FMTBUF_EXPAND(6);
1173 FMTBUF_PUTCHAR('\\');
1174 FMTBUF_PUTCHAR('u');
1175 FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
1176 FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
1177 FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
1178 FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
1179 } else {
1180 FMTBUF_EXPAND(10);
1181 FMTBUF_PUTCHAR('\\');
1182 FMTBUF_PUTCHAR('U');
1183 FMTBUF_PUTCHAR(hex[(uc >> 28) & 0xF]);
1184 FMTBUF_PUTCHAR(hex[(uc >> 24) & 0xF]);
1185 FMTBUF_PUTCHAR(hex[(uc >> 20) & 0xF]);
1186 FMTBUF_PUTCHAR(hex[(uc >> 16) & 0xF]);
1187 FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
1188 FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
1189 FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
1190 FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
1196 FMTBUF_ENDSTR;
1198 return fmtbuf;
1202 * Given a wmem scope, a not-necessarily-null-terminated string,
1203 * expected to be in UTF-8 but possibly containing invalid sequences
1204 * (as it may have come from packet data), and the length of the string,
1205 * generate a valid UTF-8 string from it, allocated in the specified
1206 * wmem scope, that:
1208 * shows printable Unicode characters as themselves;
1210 * shows non-printable ASCII characters as C-style escapes (octal
1211 * if not one of the standard ones such as LF -> '\n');
1213 * shows non-printable Unicode-but-not-ASCII characters as
1214 * their universal character names;
1216 * shows illegal UTF-8 sequences as a sequence of bytes represented
1217 * as C-style hex escapes (XXX: Does not actually do this. Some illegal
1218 * sequences, such as overlong encodings, the sequences reserved for
1219 * UTF-16 surrogate halves (paired or unpaired), and values outside
1220 * Unicode (i.e., the old sequences for code points above U+10FFFF)
1221 * will be decoded in a permissive way. Other illegal sequences,
1222 * such 0xFE and 0xFF and the presence of a continuation byte where
1223 * not expected (or vice versa its absence), are replaced with
1224 * REPLACEMENT CHARACTER.)
1226 * and return a pointer to it.
1228 char *
1229 format_text(wmem_allocator_t *allocator,
1230 const char *string, size_t len)
1232 return format_text_internal(allocator, string, len, false);
1235 /** Given a wmem scope and a null-terminated string, expected to be in
1236 * UTF-8 but possibly containing invalid sequences (as it may have come
1237 * from packet data), and the length of the string, generate a valid
1238 * UTF-8 string from it, allocated in the specified wmem scope, that:
1240 * shows printable Unicode characters as themselves;
1242 * shows non-printable ASCII characters as C-style escapes (octal
1243 * if not one of the standard ones such as LF -> '\n');
1245 * shows non-printable Unicode-but-not-ASCII characters as
1246 * their universal character names;
1248 * shows illegal UTF-8 sequences as a sequence of bytes represented
1249 * as C-style hex escapes;
1251 * and return a pointer to it.
1253 char *
1254 format_text_string(wmem_allocator_t* allocator, const char *string)
1256 return format_text_internal(allocator, string, strlen(string), false);
1260 * Given a string, generate a string from it that shows non-printable
1261 * characters as C-style escapes except a whitespace character
1262 * (space, tab, carriage return, new line, vertical tab, or formfeed)
1263 * which will be replaced by a space, and return a pointer to it.
1265 char *
1266 format_text_wsp(wmem_allocator_t* allocator, const char *string, size_t len)
1268 return format_text_internal(allocator, string, len, true);
1272 * Given a string, generate a string from it that shows non-printable
1273 * characters as the chr parameter passed, except a whitespace character
1274 * (space, tab, carriage return, new line, vertical tab, or formfeed)
1275 * which will be replaced by a space, and return a pointer to it.
1277 * This does *not* treat the input string as UTF-8.
1279 * This is useful for displaying binary data that frequently but not always
1280 * contains text; otherwise the number of C escape codes makes it unreadable.
1282 char *
1283 format_text_chr(wmem_allocator_t *allocator, const char *string, size_t len, char chr)
1285 wmem_strbuf_t *buf;
1287 buf = wmem_strbuf_new_sized(allocator, len + 1);
1288 for (const char *p = string; p < string + len; p++) {
1289 if (g_ascii_isprint(*p)) {
1290 wmem_strbuf_append_c(buf, *p);
1292 else if (g_ascii_isspace(*p)) {
1293 wmem_strbuf_append_c(buf, ' ');
1295 else {
1296 wmem_strbuf_append_c(buf, chr);
1299 return wmem_strbuf_finalize(buf);
1302 char *
1303 format_char(wmem_allocator_t *allocator, char c)
1305 char *buf;
1306 char r;
1308 if (g_ascii_isprint(c)) {
1309 buf = wmem_alloc_array(allocator, char, 2);
1310 buf[0] = c;
1311 buf[1] = '\0';
1312 return buf;
1314 if (escape_char(c, &r)) {
1315 buf = wmem_alloc_array(allocator, char, 3);
1316 buf[0] = '\\';
1317 buf[1] = r;
1318 buf[2] = '\0';
1319 return buf;
1321 buf = wmem_alloc_array(allocator, char, 5);
1322 buf[0] = '\\';
1323 buf[1] = 'x';
1324 buf[2] = hex[((uint8_t)c >> 4) & 0xF];
1325 buf[3] = hex[((uint8_t)c >> 0) & 0xF];
1326 buf[4] = '\0';
1327 return buf;
1330 char*
1331 ws_utf8_truncate(char *string, size_t len)
1333 char* last_char;
1335 /* Ensure that it is null terminated */
1336 string[len] = '\0';
1337 last_char = g_utf8_find_prev_char(string, string + len);
1338 if (last_char != NULL && g_utf8_get_char_validated(last_char, -1) == (gunichar)-2) {
1339 /* The last UTF-8 character was truncated into a partial sequence. */
1340 *last_char = '\0';
1342 return string;
1345 /* ASCII/EBCDIC conversion tables from
1346 * https://web.archive.org/web/20060813174742/http://www.room42.com/store/computer_center/code_tables.shtml
1348 #if 0
1349 static const uint8_t ASCII_translate_EBCDIC [ 256 ] = {
1350 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
1351 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1352 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
1353 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
1354 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D,
1355 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
1356 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
1357 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
1358 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8,
1359 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
1360 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
1361 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
1362 0x7D, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
1363 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
1364 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
1365 0xA8, 0xA9, 0xC0, 0x6A, 0xD0, 0xA1, 0x4B,
1366 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1367 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1368 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1369 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1370 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1371 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1372 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1373 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1374 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1375 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1376 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1377 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1378 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1379 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1380 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1381 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B
1384 void
1385 ASCII_to_EBCDIC(uint8_t *buf, unsigned bytes)
1387 unsigned i;
1388 uint8_t *bufptr;
1390 bufptr = buf;
1392 for (i = 0; i < bytes; i++, bufptr++) {
1393 *bufptr = ASCII_translate_EBCDIC[*bufptr];
1397 uint8_t
1398 ASCII_to_EBCDIC1(uint8_t c)
1400 return ASCII_translate_EBCDIC[c];
1402 #endif
1404 static const uint8_t EBCDIC_translate_ASCII [ 256 ] = {
1405 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1406 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1407 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1408 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
1409 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
1410 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
1411 0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
1412 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
1413 0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1414 0x2E, 0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
1415 0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1416 0x2E, 0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
1417 0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1418 0x2E, 0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
1419 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1420 0x2E, 0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
1421 0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
1422 0x68, 0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1423 0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
1424 0x71, 0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1425 0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
1426 0x79, 0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
1427 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1428 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
1429 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
1430 0x48, 0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1431 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
1432 0x51, 0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1433 0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
1434 0x59, 0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1435 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
1436 0x38, 0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
1439 void
1440 EBCDIC_to_ASCII(uint8_t *buf, unsigned bytes)
1442 unsigned i;
1443 uint8_t *bufptr;
1445 bufptr = buf;
1447 for (i = 0; i < bytes; i++, bufptr++) {
1448 *bufptr = EBCDIC_translate_ASCII[*bufptr];
1452 uint8_t
1453 EBCDIC_to_ASCII1(uint8_t c)
1455 return EBCDIC_translate_ASCII[c];
1459 * This routine is based on a routine created by Dan Lasley
1460 * <DLASLEY@PROMUS.com>.
1462 * It was modified for Wireshark by Gilbert Ramirez and others.
1465 #define MAX_OFFSET_LEN 8 /* max length of hex offset of bytes */
1466 #define BYTES_PER_LINE 16 /* max byte values printed on a line */
1467 #define HEX_DUMP_LEN (BYTES_PER_LINE*3)
1468 /* max number of characters hex dump takes -
1469 2 digits plus trailing blank */
1470 #define DATA_DUMP_LEN (HEX_DUMP_LEN + 2 + 2 + BYTES_PER_LINE)
1471 /* number of characters those bytes take;
1472 3 characters per byte of hex dump,
1473 2 blanks separating hex from ASCII,
1474 2 optional ASCII dump delimiters,
1475 1 character per byte of ASCII dump */
1476 #define MAX_LINE_LEN (MAX_OFFSET_LEN + 2 + DATA_DUMP_LEN)
1477 /* number of characters per line;
1478 offset, 2 blanks separating offset
1479 from data dump, data dump */
1481 bool
1482 hex_dump_buffer(bool (*print_line)(void *, const char *), void *fp,
1483 const unsigned char *cp, unsigned length,
1484 hex_dump_enc encoding,
1485 unsigned ascii_option)
1487 register unsigned int ad, i, j, k, l;
1488 unsigned char c;
1489 char line[MAX_LINE_LEN + 1];
1490 unsigned int use_digits;
1492 static char binhex[16] = {
1493 '0', '1', '2', '3', '4', '5', '6', '7',
1494 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
1497 * How many of the leading digits of the offset will we supply?
1498 * We always supply at least 4 digits, but if the maximum offset
1499 * won't fit in 4 digits, we use as many digits as will be needed.
1501 if (((length - 1) & 0xF0000000) != 0)
1502 use_digits = 8; /* need all 8 digits */
1503 else if (((length - 1) & 0x0F000000) != 0)
1504 use_digits = 7; /* need 7 digits */
1505 else if (((length - 1) & 0x00F00000) != 0)
1506 use_digits = 6; /* need 6 digits */
1507 else if (((length - 1) & 0x000F0000) != 0)
1508 use_digits = 5; /* need 5 digits */
1509 else
1510 use_digits = 4; /* we'll supply 4 digits */
1512 ad = 0;
1513 i = 0;
1514 j = 0;
1515 k = 0;
1516 while (i < length) {
1517 if ((i & 15) == 0) {
1519 * Start of a new line.
1521 j = 0;
1522 l = use_digits;
1523 do {
1524 l--;
1525 c = (ad >> (l*4)) & 0xF;
1526 line[j++] = binhex[c];
1527 } while (l != 0);
1528 line[j++] = ' ';
1529 line[j++] = ' ';
1530 memset(line+j, ' ', DATA_DUMP_LEN);
1533 * Offset in line of ASCII dump.
1535 k = j + HEX_DUMP_LEN + 2;
1536 if (ascii_option == HEXDUMP_ASCII_DELIMIT)
1537 line[k++] = '|';
1539 c = *cp++;
1540 line[j++] = binhex[c>>4];
1541 line[j++] = binhex[c&0xf];
1542 j++;
1543 if (ascii_option != HEXDUMP_ASCII_EXCLUDE ) {
1544 if (encoding == HEXDUMP_ENC_EBCDIC) {
1545 c = EBCDIC_to_ASCII1(c);
1547 line[k++] = ((c >= ' ') && (c < 0x7f)) ? c : '.';
1549 i++;
1550 if (((i & 15) == 0) || (i == length)) {
1552 * We'll be starting a new line, or
1553 * we're finished printing this buffer;
1554 * dump out the line we've constructed,
1555 * and advance the offset.
1557 if (ascii_option == HEXDUMP_ASCII_DELIMIT)
1558 line[k++] = '|';
1559 line[k] = '\0';
1560 if (!print_line(fp, line))
1561 return false;
1562 ad += 16;
1565 return true;
1569 * Editor modelines - https://www.wireshark.org/tools/modelines.html
1571 * Local variables:
1572 * c-basic-offset: 4
1573 * tab-width: 8
1574 * indent-tabs-mode: nil
1575 * End:
1577 * vi: set shiftwidth=4 tabstop=8 expandtab:
1578 * :indentSize=4:tabSize=8:noTabs=true: