wsutil/str_util.c

   1 /* str_util.c
   2  * String utility routines
   3  *
   4  * Wireshark - Network traffic analyzer
   5  * By Gerald Combs <gerald@wireshark.org>
   6  * Copyright 1998 Gerald Combs
   7  *
   8  * SPDX-License-Identifier: GPL-2.0-or-later
   9  */
  10
  11 #define _GNU_SOURCE
  12 #include "config.h"
  13 #include "str_util.h"
  14
  15 #include <string.h>
  16 #include <locale.h>
  17 #include <math.h>
  18
  19 #include <ws_codepoints.h>
  20
  21 #include <wsutil/to_str.h>
  22
  23
  24 static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
  25                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
  26
  27 char *
  28 wmem_strconcat(wmem_allocator_t *allocator, const char *first, ...)
  29 {
  30     size_t  len;
  31     va_list args;
  32     char    *s;
  33     char    *concat;
  34     char    *ptr;
  35
  36     if (!first)
  37         return NULL;
  38
  39     len = 1 + strlen(first);
  40     va_start(args, first);
  41     while ((s = va_arg(args, char*))) {
  42         len += strlen(s);
  43     }
  44     va_end(args);
  45
  46     ptr = concat = (char *)wmem_alloc(allocator, len);
  47
  48     ptr = g_stpcpy(ptr, first);
  49     va_start(args, first);
  50     while ((s = va_arg(args, char*))) {
  51         ptr = g_stpcpy(ptr, s);
  52     }
  53     va_end(args);
  54
  55     return concat;
  56 }
  57
  58 char *
  59 wmem_strjoin(wmem_allocator_t *allocator,
  60              const char *separator, const char *first, ...)
  61 {
  62     size_t  len;
  63     va_list args;
  64     size_t separator_len;
  65     char    *s;
  66     char    *concat;
  67     char    *ptr;
  68
  69     if (!first)
  70         return NULL;
  71
  72     if (separator == NULL) {
  73         separator = "";
  74     }
  75
  76     separator_len = strlen (separator);
  77
  78     len = 1 + strlen(first); /* + 1 for null byte */
  79     va_start(args, first);
  80     while ((s = va_arg(args, char*))) {
  81         len += (separator_len + strlen(s));
  82     }
  83     va_end(args);
  84
  85     ptr = concat = (char *)wmem_alloc(allocator, len);
  86     ptr = g_stpcpy(ptr, first);
  87     va_start(args, first);
  88     while ((s = va_arg(args, char*))) {
  89         ptr = g_stpcpy(ptr, separator);
  90         ptr = g_stpcpy(ptr, s);
  91     }
  92     va_end(args);
  93
  94     return concat;
  95
  96 }
  97
  98 char *
  99 wmem_strjoinv(wmem_allocator_t *allocator,
 100               const char *separator, char **str_array)
 101 {
 102     char *string = NULL;
 103
 104     ws_return_val_if(!str_array, NULL);
 105
 106     if (separator == NULL) {
 107         separator = "";
 108     }
 109
 110     if (str_array[0]) {
 111         int i;
 112         char *ptr;
 113         size_t len, separator_len;
 114
 115         separator_len = strlen(separator);
 116
 117         /* Get first part of length. Plus one for null byte. */
 118         len = 1 + strlen(str_array[0]);
 119         /* Get the full length, including the separators. */
 120         for (i = 1; str_array[i] != NULL; i++) {
 121             len += separator_len;
 122             len += strlen(str_array[i]);
 123         }
 124
 125         /* Allocate and build the string. */
 126         string = (char *)wmem_alloc(allocator, len);
 127         ptr = g_stpcpy(string, str_array[0]);
 128         for (i = 1; str_array[i] != NULL; i++) {
 129             ptr = g_stpcpy(ptr, separator);
 130             ptr = g_stpcpy(ptr, str_array[i]);
 131         }
 132     } else {
 133         string = wmem_strdup(allocator, "");
 134     }
 135
 136     return string;
 137
 138 }
 139
 140 char **
 141 wmem_strsplit(wmem_allocator_t *allocator, const char *src,
 142         const char *delimiter, int max_tokens)
 143 {
 144     char *splitted;
 145     char *s;
 146     unsigned tokens;
 147     unsigned sep_len;
 148     unsigned i;
 149     char **vec;
 150
 151     if (!src || !delimiter || !delimiter[0])
 152         return NULL;
 153
 154     /* An empty string results in an empty vector. */
 155     if (!src[0]) {
 156         vec = wmem_new0(allocator, char *);
 157         return vec;
 158     }
 159
 160     splitted = wmem_strdup(allocator, src);
 161     sep_len = (unsigned)strlen(delimiter);
 162
 163     if (max_tokens < 1)
 164         max_tokens = INT_MAX;
 165
 166     /* Calculate the number of fields. */
 167     s = splitted;
 168     tokens = 1;
 169     while (tokens < (unsigned)max_tokens && (s = strstr(s, delimiter))) {
 170         s += sep_len;
 171         tokens++;
 172     }
 173
 174     vec = wmem_alloc_array(allocator, char *, tokens + 1);
 175
 176     /* Populate the array of string tokens. */
 177     s = splitted;
 178     vec[0] = s;
 179     tokens = 1;
 180     while (tokens < (unsigned)max_tokens && (s = strstr(s, delimiter))) {
 181         for (i = 0; i < sep_len; i++)
 182             s[i] = '\0';
 183         s += sep_len;
 184         vec[tokens] = s;
 185         tokens++;
 186
 187     }
 188
 189     vec[tokens] = NULL;
 190
 191     return vec;
 192 }
 193
 194 /*
 195  * wmem_ascii_strdown:
 196  * based on g_ascii_strdown.
 197  */
 198 char*
 199 wmem_ascii_strdown(wmem_allocator_t *allocator, const char *str, ssize_t len)
 200 {
 201     char *result, *s;
 202
 203     g_return_val_if_fail (str != NULL, NULL);
 204
 205     if (len < 0)
 206         len = strlen (str);
 207
 208     result = wmem_strndup(allocator, str, len);
 209     for (s = result; *s; s++)
 210         *s = g_ascii_tolower (*s);
 211
 212     return result;
 213 }
 214
 215 int
 216 ws_xton(char ch)
 217 {
 218     switch (ch) {
 219         case '0': return 0;
 220         case '1': return 1;
 221         case '2': return 2;
 222         case '3': return 3;
 223         case '4': return 4;
 224         case '5': return 5;
 225         case '6': return 6;
 226         case '7': return 7;
 227         case '8': return 8;
 228         case '9': return 9;
 229         case 'a':  case 'A': return 10;
 230         case 'b':  case 'B': return 11;
 231         case 'c':  case 'C': return 12;
 232         case 'd':  case 'D': return 13;
 233         case 'e':  case 'E': return 14;
 234         case 'f':  case 'F': return 15;
 235         default: return -1;
 236     }
 237 }
 238
 239 /* Convert all ASCII letters to lower case, in place. */
 240 char *
 241 ascii_strdown_inplace(char *str)
 242 {
 243     char *s;
 244
 245     for (s = str; *s; s++)
 246         /* What 'g_ascii_tolower (char c)' does, this should be slightly more efficient */
 247         *s = g_ascii_isupper (*s) ? *s - 'A' + 'a' : *s;
 248
 249     return (str);
 250 }
 251
 252 /* Convert all ASCII letters to upper case, in place. */
 253 char *
 254 ascii_strup_inplace(char *str)
 255 {
 256     char *s;
 257
 258     for (s = str; *s; s++)
 259         /* What 'g_ascii_toupper (char c)' does, this should be slightly more efficient */
 260         *s = g_ascii_islower (*s) ? *s - 'a' + 'A' : *s;
 261
 262     return (str);
 263 }
 264
 265 /* Check if an entire string is printable. */
 266 bool
 267 isprint_string(const char *str)
 268 {
 269     unsigned pos;
 270
 271     /* Loop until we reach the end of the string (a null) */
 272     for(pos = 0; str[pos] != '\0'; pos++){
 273         if(!g_ascii_isprint(str[pos])){
 274             /* The string contains a non-printable character */
 275             return false;
 276         }
 277     }
 278
 279     /* The string contains only printable characters */
 280     return true;
 281 }
 282
 283 /* Check if an entire UTF-8 string is printable. */
 284 bool
 285 isprint_utf8_string(const char *str, const unsigned length)
 286 {
 287     const char *strend = str + length;
 288
 289     if (!g_utf8_validate(str, length, NULL)) {
 290         return false;
 291     }
 292
 293     while (str < strend) {
 294         /* This returns false for G_UNICODE_CONTROL | G_UNICODE_FORMAT |
 295          * G_UNICODE_UNASSIGNED | G_UNICODE_SURROGATE
 296          * XXX: Could it be ok to have certain format characters, e.g.
 297          * U+00AD SOFT HYPHEN? If so, format_text() should be changed too.
 298          */
 299         if (!g_unichar_isprint(g_utf8_get_char(str))) {
 300             return false;
 301         }
 302         str = g_utf8_next_char(str);
 303     }
 304
 305     return true;
 306 }
 307
 308 /* Check if an entire string is digits. */
 309 bool
 310 isdigit_string(const unsigned char *str)
 311 {
 312     unsigned pos;
 313
 314     /* Loop until we reach the end of the string (a null) */
 315     for(pos = 0; str[pos] != '\0'; pos++){
 316         if(!g_ascii_isdigit(str[pos])){
 317             /* The string contains a non-digit character */
 318             return false;
 319         }
 320     }
 321
 322     /* The string contains only digits */
 323     return true;
 324 }
 325
 326 const char *
 327 ws_ascii_strcasestr(const char *haystack, const char *needle)
 328 {
 329     /* Do not use strcasestr() here, even if a system has it, as it is
 330      * locale-dependent (and has different results for e.g. Turkic languages.)
 331      * FreeBSD, NetBSD, macOS have a strcasestr_l() that could be used.
 332      */
 333     size_t hlen = strlen(haystack);
 334     size_t nlen = strlen(needle);
 335
 336     while (hlen-- >= nlen) {
 337         if (!g_ascii_strncasecmp(haystack, needle, nlen))
 338             return haystack;
 339         haystack++;
 340     }
 341     return NULL;
 342 }
 343
 344 /* Return the last occurrence of ch in the n bytes of haystack.
 345  * If not found or n is 0, return NULL. */
 346 const uint8_t *
 347 ws_memrchr(const void *_haystack, int ch, size_t n)
 348 {
 349 #ifdef HAVE_MEMRCHR
 350     return memrchr(_haystack, ch, n);
 351 #else
 352     /* A generic implementation. This could be optimized considerably,
 353      * e.g. by fetching a word at a time.
 354      */
 355     if (n == 0) {
 356         return NULL;
 357     }
 358     const uint8_t *haystack = _haystack;
 359     const uint8_t *p;
 360     uint8_t c = (uint8_t)ch;
 361
 362     const uint8_t *const end = haystack + n - 1;
 363
 364     for (p = end; p >= haystack; --p) {
 365         if (*p == c) {
 366             return p;
 367         }
 368     }
 369
 370     return NULL;
 371 #endif /* HAVE_MEMRCHR */
 372 }
 373
 374 #define FORMAT_SIZE_UNIT_MASK 0x00ff
 375 #define FORMAT_SIZE_PFX_MASK 0xff00
 376
 377 static const char *thousands_grouping_fmt;
 378 static const char *thousands_grouping_fmt_flt;
 379
 380 DIAG_OFF(format)
 381 static void test_printf_thousands_grouping(void) {
 382     /* test whether wmem_strbuf works with "'" flag character */
 383     wmem_strbuf_t *buf = wmem_strbuf_new(NULL, NULL);
 384     wmem_strbuf_append_printf(buf, "%'d", 22);
 385     if (g_strcmp0(wmem_strbuf_get_str(buf), "22") == 0) {
 386         thousands_grouping_fmt = "%'"PRId64;
 387         thousands_grouping_fmt_flt = "%'.*f";
 388     } else {
 389         /* Don't use */
 390         thousands_grouping_fmt = "%"PRId64;
 391         thousands_grouping_fmt_flt = "%.*f";
 392     }
 393     wmem_strbuf_destroy(buf);
 394 }
 395 DIAG_ON(format)
 396
 397 static const char* decimal_point = NULL;
 398
 399 static void truncate_numeric_strbuf(wmem_strbuf_t *strbuf, int n) {
 400
 401     const char *s = wmem_strbuf_get_str(strbuf);
 402     char *p;
 403     int count;
 404
 405     if (decimal_point == NULL) {
 406         decimal_point = localeconv()->decimal_point;
 407     }
 408
 409     p = strchr(s, decimal_point[0]);
 410     if (p != NULL) {
 411         count = n;
 412         while (count >= 0) {
 413             count--;
 414             if (*p == '\0')
 415                 break;
 416             p++;
 417         }
 418
 419         p--;
 420         while (*p == '0') {
 421             p--;
 422         }
 423
 424         if (*p != decimal_point[0]) {
 425             p++;
 426         }
 427         wmem_strbuf_truncate(strbuf, p - s);
 428     }
 429 }
 430
 431 /* Given a floating point value, return it in a human-readable format,
 432  * using units with metric prefixes (falling back to scientific notation
 433  * with the base units if outside the range.)
 434  */
 435 char *
 436 format_units(wmem_allocator_t *allocator, double size,
 437              format_size_units_e unit, uint16_t flags,
 438              int precision)
 439 {
 440     wmem_strbuf_t *human_str = wmem_strbuf_new(allocator, NULL);
 441     double power = 1000.0;
 442     int pfx_off = 6;
 443     bool is_small = false;
 444     /* is_small is when to use the longer, spelled out unit.
 445      * We use it for inf, NaN, 0, and unprefixed small values,
 446      * but not for unprefixed values using scientific notation
 447      * the value is outside the supported prefix range.
 448      */
 449     bool scientific = false;
 450     double abs_size = fabs(size);
 451     int exponent = 0;
 452     static const char * const si_prefix[] = {" a", " f", " p", " n", " μ", " m", " ", " k", " M", " G", " T", " P", " E"};
 453     static const char * const iec_prefix[] = {" ", " Ki", " Mi", " Gi", " Ti", " Pi", " Ei"};
 454     const char * const *prefix = si_prefix;
 455     int max_exp = (int)G_N_ELEMENTS(si_prefix) - 1;
 456
 457     char *ret_val;
 458
 459     if (thousands_grouping_fmt == NULL)
 460         test_printf_thousands_grouping();
 461
 462     if (flags & FORMAT_SIZE_PREFIX_IEC) {
 463         prefix = iec_prefix;
 464         max_exp = (int)G_N_ELEMENTS(iec_prefix) - 1;
 465         power = 1024.0;
 466     }
 467
 468     if (isfinite(size) && size != 0.0) {
 469
 470         double comp = precision == 0 ? 10.0 : 1.0;
 471
 472         /* For precision 0, use the range [10, 10*power) because only
 473          * one significant digit is not as useful. This is what format_size
 474          * does for integers. ("ls -h" uses one digit after the decimal
 475          * point only for the [1, 10) range, g_format_size() always displays
 476          * tenths.) Prefer non-prefixed units for the range [1,10), though.
 477          *
 478          * We have a limited number of units to check, so this (which
 479          * can be unrolled) is presumably faster than log + floor + pow/exp
 480          */
 481         if (abs_size < 1.0) {
 482             while (abs_size < comp) {
 483                 abs_size *= power;
 484                 exponent--;
 485                 if ((exponent + pfx_off) < 0) {
 486                     scientific = true;
 487                     break;
 488                 }
 489             }
 490         } else {
 491             while (abs_size >= comp*power) {
 492                 abs_size *= 1/power;
 493                 exponent++;
 494                 if ((exponent + pfx_off) > max_exp) {
 495                     scientific = true;
 496                     break;
 497                 }
 498             }
 499         }
 500     }
 501
 502     if (scientific) {
 503         wmem_strbuf_append_printf(human_str, "%.*g", precision + 1, size);
 504         exponent = 0;
 505     } else {
 506         if (exponent == 0) {
 507             is_small = true;
 508         }
 509         size = copysign(abs_size, size);
 510         // Truncate trailing zeros, but do it this way because we know
 511         // we don't want scientific notation, and we don't want %g to
 512         // switch to that if precision is small. (We could always use
 513         // %g when precision is large.)
 514         wmem_strbuf_append_printf(human_str, thousands_grouping_fmt_flt, precision, size);
 515         truncate_numeric_strbuf(human_str, precision);
 516         // XXX - when rounding to a certain precision, printf might
 517         // round up to "power" from something like 999.99999995, which
 518         // looks a little odd on a graph when transitioning from 1,000 bytes
 519         // (for values just under 1 kB) to 1 kB (for values 1 kB and larger.)
 520         // Due to edge cases in binary fp representation and how printf might
 521         // round things, the right way to handle it is taking the printf output
 522         // and comparing it to "1000" and "1024" and adjusting the exponent
 523         // if so - though we need to compare to the version with the thousands
 524         // separator if we have that (which makes it harder to use strnatcmp
 525         // as is.)
 526     }
 527
 528     if ((size_t)(pfx_off + exponent) < G_N_ELEMENTS(si_prefix)) {
 529         wmem_strbuf_append(human_str, prefix[pfx_off+exponent]);
 530     }
 531
 532     switch (unit) {
 533         case FORMAT_SIZE_UNIT_NONE:
 534             break;
 535         case FORMAT_SIZE_UNIT_BYTES:
 536             wmem_strbuf_append(human_str, is_small ? "bytes" : "B");
 537             break;
 538         case FORMAT_SIZE_UNIT_BITS:
 539             wmem_strbuf_append(human_str, is_small ? "bits" : "b");
 540             break;
 541         case FORMAT_SIZE_UNIT_BITS_S:
 542             wmem_strbuf_append(human_str, is_small ? "bits/s" : "bps");
 543             break;
 544         case FORMAT_SIZE_UNIT_BYTES_S:
 545             wmem_strbuf_append(human_str, is_small ? "bytes/s" : "Bps");
 546             break;
 547         case FORMAT_SIZE_UNIT_PACKETS:
 548             wmem_strbuf_append(human_str, is_small ? "packets" : "pkts");
 549             break;
 550         case FORMAT_SIZE_UNIT_PACKETS_S:
 551             wmem_strbuf_append(human_str, is_small ? "packets/s" : "pkts/s");
 552             break;
 553         case FORMAT_SIZE_UNIT_EVENTS:
 554             wmem_strbuf_append(human_str, is_small ? "events" : "evts");
 555             break;
 556         case FORMAT_SIZE_UNIT_EVENTS_S:
 557             wmem_strbuf_append(human_str, is_small ? "events/s" : "evts/s");
 558             break;
 559         case FORMAT_SIZE_UNIT_FIELDS:
 560             wmem_strbuf_append(human_str, is_small ? "fields" : "flds");
 561             break;
 562         case FORMAT_SIZE_UNIT_SECONDS:
 563             wmem_strbuf_append(human_str, is_small ? "seconds" : "s");
 564             break;
 565         case FORMAT_SIZE_UNIT_ERLANGS:
 566             wmem_strbuf_append(human_str, is_small ? "erlangs" : "E");
 567             break;
 568         default:
 569             ws_assert_not_reached();
 570     }
 571
 572     ret_val = wmem_strbuf_finalize(human_str);
 573     /* Convention is a space between the value and the units. If we have
 574      * a prefix, the space is before the prefix. There are two possible
 575      * uses of FORMAT_SIZE_UNIT_NONE:
 576      * 1. Add a unit immediately after the string returned. In this case,
 577      *    we would want the string to end with a space if there's no prefix.
 578      * 2. The unit appears somewhere else, e.g. in a legend, header, or
 579      *    different column. In this case, we don't want the string to end
 580      *    with a space if there's no prefix.
 581      * chomping the string here, as we've traditionally done, optimizes for
 582      * the latter case but makes the former case harder.
 583      * Perhaps the right approach is to distinguish the cases with a new
 584      * enum value.
 585      */
 586     return g_strchomp(ret_val);
 587 }
 588
 589 /* Given a size, return its value in a human-readable format */
 590 /* This doesn't handle fractional values. We might want to just
 591  * call the version with the double and precision 0 (possibly
 592  * slower due to the use of floating point math, but do we care?)
 593  */
 594 char *
 595 format_size_wmem(wmem_allocator_t *allocator, int64_t size,
 596                         format_size_units_e unit, uint16_t flags)
 597 {
 598     wmem_strbuf_t *human_str = wmem_strbuf_new(allocator, NULL);
 599     int power = 1000;
 600     int pfx_off = 0;
 601     bool is_small = false;
 602     static const char *prefix[] = {" T", " G", " M", " k", " Ti", " Gi", " Mi", " Ki"};
 603     char *ret_val;
 604
 605     if (thousands_grouping_fmt == NULL)
 606         test_printf_thousands_grouping();
 607
 608     if (flags & FORMAT_SIZE_PREFIX_IEC) {
 609         pfx_off = 4;
 610         power = 1024;
 611     }
 612
 613     if (size / power / power / power / power >= 10) {
 614         wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power / power / power);
 615         wmem_strbuf_append(human_str, prefix[pfx_off]);
 616     } else if (size / power / power / power >= 10) {
 617         wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power / power);
 618         wmem_strbuf_append(human_str, prefix[pfx_off+1]);
 619     } else if (size / power / power >= 10) {
 620         wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power);
 621         wmem_strbuf_append(human_str, prefix[pfx_off+2]);
 622     } else if (size / power >= 10) {
 623         wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power);
 624         wmem_strbuf_append(human_str, prefix[pfx_off+3]);
 625     } else {
 626         wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size);
 627         is_small = true;
 628     }
 629
 630     switch (unit) {
 631         case FORMAT_SIZE_UNIT_NONE:
 632             break;
 633         case FORMAT_SIZE_UNIT_BYTES:
 634             wmem_strbuf_append(human_str, is_small ? " bytes" : "B");
 635             break;
 636         case FORMAT_SIZE_UNIT_BITS:
 637             wmem_strbuf_append(human_str, is_small ? " bits" : "b");
 638             break;
 639         case FORMAT_SIZE_UNIT_BITS_S:
 640             wmem_strbuf_append(human_str, is_small ? " bits/s" : "bps");
 641             break;
 642         case FORMAT_SIZE_UNIT_BYTES_S:
 643             wmem_strbuf_append(human_str, is_small ? " bytes/s" : "Bps");
 644             break;
 645         case FORMAT_SIZE_UNIT_PACKETS:
 646             wmem_strbuf_append(human_str, is_small ? " packets" : "packets");
 647             break;
 648         case FORMAT_SIZE_UNIT_PACKETS_S:
 649             wmem_strbuf_append(human_str, is_small ? " packets/s" : "packets/s");
 650             break;
 651         case FORMAT_SIZE_UNIT_FIELDS:
 652             wmem_strbuf_append(human_str, is_small ? " fields" : "fields");
 653             break;
 654         /* These aren't that practical to use with integers, but
 655          * perhaps better than asserting.
 656          */
 657         case FORMAT_SIZE_UNIT_SECONDS:
 658             wmem_strbuf_append(human_str, is_small ? " seconds" : "s");
 659             break;
 660         case FORMAT_SIZE_UNIT_ERLANGS:
 661             wmem_strbuf_append(human_str, is_small ? " erlangs" : "E");
 662             break;
 663         default:
 664             ws_assert_not_reached();
 665     }
 666
 667     ret_val = wmem_strbuf_finalize(human_str);
 668     return g_strchomp(ret_val);
 669 }
 670
 671 char
 672 printable_char_or_period(char c)
 673 {
 674     return g_ascii_isprint(c) ? c : '.';
 675 }
 676
 677 /*
 678  * This is used by the display filter engine and must be compatible
 679  * with display filter syntax.
 680  */
 681 static inline bool
 682 escape_char(char c, char *p)
 683 {
 684     int r = -1;
 685     ws_assert(p);
 686
 687     /*
 688      * backslashes and double-quotes must be escaped (double-quotes
 689      * are escaped by passing '"' as quote_char in escape_string_len)
 690      * whitespace is also escaped.
 691      */
 692     switch (c) {
 693         case '\a': r = 'a'; break;
 694         case '\b': r = 'b'; break;
 695         case '\f': r = 'f'; break;
 696         case '\n': r = 'n'; break;
 697         case '\r': r = 'r'; break;
 698         case '\t': r = 't'; break;
 699         case '\v': r = 'v'; break;
 700         case '\\': r = '\\'; break;
 701         case '\0': r = '0'; break;
 702     }
 703
 704     if (r != -1) {
 705         *p = r;
 706         return true;
 707     }
 708     return false;
 709 }
 710
 711 static inline bool
 712 escape_null(char c, char *p)
 713 {
 714     ws_assert(p);
 715     if (c == '\0') {
 716         *p = '0';
 717         return true;
 718     }
 719     return false;
 720 }
 721
 722 static char *
 723 escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len,
 724                     bool (*escape_func)(char c, char *p), bool add_quotes,
 725                     char quote_char, bool double_quote)
 726 {
 727     char c, r;
 728     wmem_strbuf_t *buf;
 729     size_t alloc_size;
 730     ssize_t i;
 731
 732     if (len < 0)
 733         len = strlen(string);
 734
 735     alloc_size = len;
 736     if (add_quotes)
 737         alloc_size += 2;
 738
 739     buf = wmem_strbuf_new_sized(alloc, alloc_size);
 740
 741     if (add_quotes && quote_char != '\0')
 742         wmem_strbuf_append_c(buf, quote_char);
 743
 744     for (i = 0; i < len; i++) {
 745         c = string[i];
 746         if ((escape_func(c, &r))) {
 747             wmem_strbuf_append_c(buf, '\\');
 748             wmem_strbuf_append_c(buf, r);
 749         }
 750         else if (c == quote_char && quote_char != '\0') {
 751             /* If quoting, we must escape the quote_char somehow. */
 752             if (double_quote) {
 753                 wmem_strbuf_append_c(buf, c);
 754                 wmem_strbuf_append_c(buf, c);
 755             } else {
 756                 wmem_strbuf_append_c(buf, '\\');
 757                 wmem_strbuf_append_c(buf, c);
 758             }
 759         }
 760         else if (c == '\\' && quote_char != '\0' && !double_quote) {
 761             /* If quoting, and escaping the quote_char with a backslash,
 762              * then backslash must be escaped, even if escape_func doesn't. */
 763             wmem_strbuf_append_c(buf, '\\');
 764             wmem_strbuf_append_c(buf, '\\');
 765         }
 766         else {
 767             /* Other UTF-8 bytes are passed through. */
 768             wmem_strbuf_append_c(buf, c);
 769         }
 770     }
 771
 772     if (add_quotes && quote_char != '\0')
 773         wmem_strbuf_append_c(buf, quote_char);
 774
 775     return wmem_strbuf_finalize(buf);
 776 }
 777
 778 char *
 779 ws_escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len, bool add_quotes)
 780 {
 781     return escape_string_len(alloc, string, len, escape_char, add_quotes, '"', false);
 782 }
 783
 784 char *
 785 ws_escape_string(wmem_allocator_t *alloc, const char *string, bool add_quotes)
 786 {
 787     return escape_string_len(alloc, string, -1, escape_char, add_quotes, '"', false);
 788 }
 789
 790 char *ws_escape_null(wmem_allocator_t *alloc, const char *string, size_t len, bool add_quotes)
 791 {
 792     /* XXX: The existing behavior (maintained) here is not to escape
 793      * backslashes even though NUL is escaped.
 794      */
 795     return escape_string_len(alloc, string, len, escape_null, add_quotes, add_quotes ? '"' : '\0', false);
 796 }
 797
 798 char *ws_escape_csv(wmem_allocator_t *alloc, const char *string, bool add_quotes, char quote_char, bool double_quote, bool escape_whitespace)
 799 {
 800     if (escape_whitespace)
 801         return escape_string_len(alloc, string, -1, escape_char, add_quotes, quote_char, double_quote);
 802     else
 803         return escape_string_len(alloc, string, -1, escape_null, add_quotes, quote_char, double_quote);
 804 }
 805
 806 const char *
 807 ws_strerrorname_r(int errnum, char *buf, size_t buf_size)
 808 {
 809 #ifdef HAVE_STRERRORNAME_NP
 810     const char *errstr = strerrorname_np(errnum);
 811     if (errstr != NULL) {
 812         (void)g_strlcpy(buf, errstr, buf_size);
 813         return buf;
 814     }
 815 #endif
 816     snprintf(buf, buf_size, "Errno(%d)", errnum);
 817     return buf;
 818 }
 819
 820 char *
 821 ws_strdup_underline(wmem_allocator_t *allocator, long offset, size_t len)
 822 {
 823     if (offset < 0)
 824         return NULL;
 825
 826     wmem_strbuf_t *buf = wmem_strbuf_new_sized(allocator, offset + len);
 827
 828     for (int i = 0; i < offset; i++) {
 829         wmem_strbuf_append_c(buf, ' ');
 830     }
 831     wmem_strbuf_append_c(buf, '^');
 832
 833     for (size_t l = len; l > 1; l--) {
 834         wmem_strbuf_append_c(buf, '~');
 835     }
 836
 837     return wmem_strbuf_finalize(buf);
 838 }
 839
 840 #define    INITIAL_FMTBUF_SIZE    128
 841
 842 /*
 843  * Declare, and initialize, the variables used for an output buffer.
 844  */
 845 #define FMTBUF_VARS \
 846     char *fmtbuf = (char*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE); \
 847     unsigned fmtbuf_len = INITIAL_FMTBUF_SIZE; \
 848     unsigned column = 0
 849
 850 /*
 851  * Expand the buffer to be large enough to add nbytes bytes, plus a
 852  * terminating '\0'.
 853  */
 854 #define FMTBUF_EXPAND(nbytes) \
 855     /* \
 856      * Is there enough room for those bytes and also enough room for \
 857      * a terminating '\0'? \
 858      */ \
 859     if (column+(nbytes+1) >= fmtbuf_len) { \
 860         /* \
 861          * Double the buffer's size if it's not big enough. \
 862          * The size of the buffer starts at 128, so doubling its size \
 863          * adds at least another 128 bytes, which is more than enough \
 864          * for one more character plus a terminating '\0'. \
 865          */ \
 866         fmtbuf_len *= 2; \
 867         fmtbuf = (char *)wmem_realloc(allocator, fmtbuf, fmtbuf_len); \
 868     }
 869
 870 /*
 871  * Put a byte into the buffer; space must have been ensured for it.
 872  */
 873 #define FMTBUF_PUTCHAR(b) \
 874     fmtbuf[column] = (b); \
 875     column++
 876
 877 /*
 878  * Add the one-byte argument, as an octal escape sequence, to the end
 879  * of the buffer.
 880  */
 881 #define FMTBUF_PUTBYTE_OCTAL(b) \
 882     FMTBUF_PUTCHAR((((b)>>6)&03) + '0'); \
 883     FMTBUF_PUTCHAR((((b)>>3)&07) + '0'); \
 884     FMTBUF_PUTCHAR((((b)>>0)&07) + '0')
 885
 886 /*
 887  * Add the one-byte argument, as a hex escape sequence, to the end
 888  * of the buffer.
 889  */
 890 #define FMTBUF_PUTBYTE_HEX(b) \
 891     FMTBUF_PUTCHAR('\\'); \
 892     FMTBUF_PUTCHAR('x'); \
 893     FMTBUF_PUTCHAR(hex[((b) >> 4) & 0xF]); \
 894     FMTBUF_PUTCHAR(hex[((b) >> 0) & 0xF])
 895
 896 /*
 897  * Put the trailing '\0' at the end of the buffer.
 898  */
 899 #define FMTBUF_ENDSTR \
 900     fmtbuf[column] = '\0'
 901
 902 static char *
 903 format_text_internal(wmem_allocator_t *allocator,
 904                         const unsigned char *string, size_t len,
 905                         bool replace_space)
 906 {
 907     FMTBUF_VARS;
 908     const unsigned char *stringend = string + len;
 909     unsigned char c;
 910
 911     while (string < stringend) {
 912         /*
 913          * Get the first byte of this character.
 914          */
 915         c = *string++;
 916         if (g_ascii_isprint(c)) {
 917             /*
 918              * Printable ASCII, so not part of a multi-byte UTF-8 sequence.
 919              * Make sure there's enough room for one more byte, and add
 920              * the character.
 921              */
 922             FMTBUF_EXPAND(1);
 923             FMTBUF_PUTCHAR(c);
 924         } else if (replace_space && g_ascii_isspace(c)) {
 925             /*
 926              * ASCII, so not part of a multi-byte UTF-8 sequence, but
 927              * not printable, but is a space character; show it as a
 928              * blank.
 929              *
 930              * Make sure there's enough room for one more byte, and add
 931              * the blank.
 932              */
 933             FMTBUF_EXPAND(1);
 934             FMTBUF_PUTCHAR(' ');
 935         } else if (c < 128) {
 936             /*
 937              * ASCII, so not part of a multi-byte UTF-8 sequence, but not
 938              * printable.
 939              *
 940              * That requires a minimum of 2 bytes, one for the backslash
 941              * and one for a letter, so make sure we have enough room
 942              * for that, plus a trailing '\0'.
 943              */
 944             FMTBUF_EXPAND(2);
 945             FMTBUF_PUTCHAR('\\');
 946             switch (c) {
 947
 948                 case '\a':
 949                     FMTBUF_PUTCHAR('a');
 950                     break;
 951
 952                 case '\b':
 953                     FMTBUF_PUTCHAR('b'); /* BS */
 954                     break;
 955
 956                 case '\f':
 957                     FMTBUF_PUTCHAR('f'); /* FF */
 958                     break;
 959
 960                 case '\n':
 961                     FMTBUF_PUTCHAR('n'); /* NL */
 962                     break;
 963
 964                 case '\r':
 965                     FMTBUF_PUTCHAR('r'); /* CR */
 966                     break;
 967
 968                 case '\t':
 969                     FMTBUF_PUTCHAR('t'); /* tab */
 970                     break;
 971
 972                 case '\v':
 973                     FMTBUF_PUTCHAR('v');
 974                     break;
 975
 976                 default:
 977                     /*
 978                      * We've already put the backslash, but this
 979                      * will put 3 more characters for the octal
 980                      * number; make sure we have enough room for
 981                      * that, plus the trailing '\0'.
 982                      */
 983                     FMTBUF_EXPAND(3);
 984                     FMTBUF_PUTBYTE_OCTAL(c);
 985                     break;
 986             }
 987         } else {
 988             /*
 989              * We've fetched the first byte of a multi-byte UTF-8
 990              * sequence into c.
 991              */
 992             int utf8_len;
 993             unsigned char mask;
 994             gunichar uc;
 995             unsigned char first;
 996
 997             if ((c & 0xe0) == 0xc0) {
 998                 /* Starts a 2-byte UTF-8 sequence; 1 byte left */
 999                 utf8_len = 1;
1000                 mask = 0x1f;
1001             } else if ((c & 0xf0) == 0xe0) {
1002                 /* Starts a 3-byte UTF-8 sequence; 2 bytes left */
1003                 utf8_len = 2;
1004                 mask = 0x0f;
1005             } else if ((c & 0xf8) == 0xf0) {
1006                 /* Starts a 4-byte UTF-8 sequence; 3 bytes left */
1007                 utf8_len = 3;
1008                 mask = 0x07;
1009             } else if ((c & 0xfc) == 0xf8) {
1010                 /* Starts an old-style 5-byte UTF-8 sequence; 4 bytes left */
1011                 utf8_len = 4;
1012                 mask = 0x03;
1013             } else if ((c & 0xfe) == 0xfc) {
1014                 /* Starts an old-style 6-byte UTF-8 sequence; 5 bytes left */
1015                 utf8_len = 5;
1016                 mask = 0x01;
1017             } else {
1018                 /* 0xfe or 0xff or a continuation byte - not valid */
1019                 utf8_len = -1;
1020             }
1021             if (utf8_len > 0) {
1022                 /* Try to construct the Unicode character */
1023                 uc = c & mask;
1024                 for (int i = 0; i < utf8_len; i++) {
1025                     if (string >= stringend) {
1026                         /*
1027                          * Ran out of octets, so the character is
1028                          * incomplete.  Put in a REPLACEMENT CHARACTER
1029                          * instead, and then continue the loop, which
1030                          * will terminate.
1031                          */
1032                         uc = UNICODE_REPLACEMENT_CHARACTER;
1033                         break;
1034                     }
1035                     c = *string;
1036                     if ((c & 0xc0) != 0x80) {
1037                         /*
1038                          * Not valid UTF-8 continuation character; put in
1039                          * a replacement character, and then re-process
1040                          * this octet as the beginning of a new character.
1041                          */
1042                         uc = UNICODE_REPLACEMENT_CHARACTER;
1043                         break;
1044                     }
1045                     string++;
1046                     uc = (uc << 6) | (c & 0x3f);
1047                 }
1048
1049                 /*
1050                  * If this isn't a valid Unicode character, put in
1051                  * a REPLACEMENT CHARACTER.
1052                  */
1053                 if (!g_unichar_validate(uc))
1054                     uc = UNICODE_REPLACEMENT_CHARACTER;
1055             } else {
1056                 /* 0xfe or 0xff; put it a REPLACEMENT CHARACTER */
1057                 uc = UNICODE_REPLACEMENT_CHARACTER;
1058             }
1059
1060             /*
1061              * OK, is it a printable Unicode character?
1062              */
1063             if (g_unichar_isprint(uc)) {
1064                 /*
1065                  * Yes - put it into the string as UTF-8.
1066                  * This means that if it was an overlong
1067                  * encoding, this will put out the right
1068                  * sized encoding.
1069                  */
1070                 if (uc < 0x80) {
1071                     first = 0;
1072                     utf8_len = 1;
1073                 } else if (uc < 0x800) {
1074                     first = 0xc0;
1075                     utf8_len = 2;
1076                 } else if (uc < 0x10000) {
1077                     first = 0xe0;
1078                     utf8_len = 3;
1079                 } else if (uc < 0x200000) {
1080                     first = 0xf0;
1081                     utf8_len = 4;
1082                 } else if (uc < 0x4000000) {
1083                     /*
1084                      * This should never happen, as Unicode doesn't
1085                      * go that high.
1086                      */
1087                     first = 0xf8;
1088                     utf8_len = 5;
1089                 } else {
1090                     /*
1091                      * This should never happen, as Unicode doesn't
1092                      * go that high.
1093                      */
1094                     first = 0xfc;
1095                     utf8_len = 6;
1096                 }
1097                 FMTBUF_EXPAND(utf8_len);
1098                 for (int i = utf8_len - 1; i > 0; i--) {
1099                     fmtbuf[column + i] = (uc & 0x3f) | 0x80;
1100                     uc >>= 6;
1101                 }
1102                 fmtbuf[column] = uc | first;
1103                 column += utf8_len;
1104             } else if (replace_space && g_unichar_isspace(uc)) {
1105                 /*
1106                  * Not printable, but is a space character; show it
1107                  * as a blank.
1108                  *
1109                  * Make sure there's enough room for one more byte,
1110                  * and add the blank.
1111                  */
1112                 FMTBUF_EXPAND(1);
1113                 FMTBUF_PUTCHAR(' ');
1114             } else if (c < 128) {
1115                 /*
1116                  * ASCII, but not printable.
1117                  * Yes, this could happen with an overlong encoding.
1118                  *
1119                  * That requires a minimum of 2 bytes, one for the
1120                  * backslash and one for a letter, so make sure we
1121                  * have enough room for that, plus a trailing '\0'.
1122                  */
1123                 FMTBUF_EXPAND(2);
1124                 FMTBUF_PUTCHAR('\\');
1125                 switch (c) {
1126
1127                     case '\a':
1128                         FMTBUF_PUTCHAR('a');
1129                         break;
1130
1131                     case '\b':
1132                         FMTBUF_PUTCHAR('b'); /* BS */
1133                         break;
1134
1135                     case '\f':
1136                         FMTBUF_PUTCHAR('f'); /* FF */
1137                         break;
1138
1139                     case '\n':
1140                         FMTBUF_PUTCHAR('n'); /* NL */
1141                         break;
1142
1143                     case '\r':
1144                         FMTBUF_PUTCHAR('r'); /* CR */
1145                         break;
1146
1147                     case '\t':
1148                         FMTBUF_PUTCHAR('t'); /* tab */
1149                         break;
1150
1151                     case '\v':
1152                         FMTBUF_PUTCHAR('v');
1153                         break;
1154
1155                     default:
1156                         /*
1157                          * We've already put the backslash, but this
1158                          * will put 3 more characters for the octal
1159                          * number; make sure we have enough room for
1160                          * that, plus the trailing '\0'.
1161                          */
1162                         FMTBUF_EXPAND(3);
1163                         FMTBUF_PUTBYTE_OCTAL(c);
1164                         break;
1165                 }
1166             } else {
1167                 /*
1168                  * Unicode, but not printable, and not ASCII;
1169                  * put it out as \uxxxx or \Uxxxxxxxx.
1170                  */
1171                 if (uc <= 0xFFFF) {
1172                     FMTBUF_EXPAND(6);
1173                     FMTBUF_PUTCHAR('\\');
1174                     FMTBUF_PUTCHAR('u');
1175                     FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
1176                     FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
1177                     FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
1178                     FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
1179                 } else {
1180                     FMTBUF_EXPAND(10);
1181                     FMTBUF_PUTCHAR('\\');
1182                     FMTBUF_PUTCHAR('U');
1183                     FMTBUF_PUTCHAR(hex[(uc >> 28) & 0xF]);
1184                     FMTBUF_PUTCHAR(hex[(uc >> 24) & 0xF]);
1185                     FMTBUF_PUTCHAR(hex[(uc >> 20) & 0xF]);
1186                     FMTBUF_PUTCHAR(hex[(uc >> 16) & 0xF]);
1187                     FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
1188                     FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
1189                     FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
1190                     FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
1191                 }
1192             }
1193         }
1194     }
1195
1196     FMTBUF_ENDSTR;
1197
1198     return fmtbuf;
1199 }
1200
1201 /*
1202  * Given a wmem scope, a not-necessarily-null-terminated string,
1203  * expected to be in UTF-8 but possibly containing invalid sequences
1204  * (as it may have come from packet data), and the length of the string,
1205  * generate a valid UTF-8 string from it, allocated in the specified
1206  * wmem scope, that:
1207  *
1208  *   shows printable Unicode characters as themselves;
1209  *
1210  *   shows non-printable ASCII characters as C-style escapes (octal
1211  *   if not one of the standard ones such as LF -> '\n');
1212  *
1213  *   shows non-printable Unicode-but-not-ASCII characters as
1214  *   their universal character names;
1215  *
1216  *   shows illegal UTF-8 sequences as a sequence of bytes represented
1217  *   as C-style hex escapes (XXX: Does not actually do this. Some illegal
1218  *   sequences, such as overlong encodings, the sequences reserved for
1219  *   UTF-16 surrogate halves (paired or unpaired), and values outside
1220  *   Unicode (i.e., the old sequences for code points above U+10FFFF)
1221  *   will be decoded in a permissive way. Other illegal sequences,
1222  *   such 0xFE and 0xFF and the presence of a continuation byte where
1223  *   not expected (or vice versa its absence), are replaced with
1224  *   REPLACEMENT CHARACTER.)
1225  *
1226  * and return a pointer to it.
1227  */
1228 char *
1229 format_text(wmem_allocator_t *allocator,
1230                         const char *string, size_t len)
1231 {
1232     return format_text_internal(allocator, string, len, false);
1233 }
1234
1235 /** Given a wmem scope and a null-terminated string, expected to be in
1236  *  UTF-8 but possibly containing invalid sequences (as it may have come
1237  *  from packet data), and the length of the string, generate a valid
1238  *  UTF-8 string from it, allocated in the specified wmem scope, that:
1239  *
1240  *   shows printable Unicode characters as themselves;
1241  *
1242  *   shows non-printable ASCII characters as C-style escapes (octal
1243  *   if not one of the standard ones such as LF -> '\n');
1244  *
1245  *   shows non-printable Unicode-but-not-ASCII characters as
1246  *   their universal character names;
1247  *
1248  *   shows illegal UTF-8 sequences as a sequence of bytes represented
1249  *   as C-style hex escapes;
1250  *
1251  *  and return a pointer to it.
1252  */
1253 char *
1254 format_text_string(wmem_allocator_t* allocator, const char *string)
1255 {
1256     return format_text_internal(allocator, string, strlen(string), false);
1257 }
1258
1259 /*
1260  * Given a string, generate a string from it that shows non-printable
1261  * characters as C-style escapes except a whitespace character
1262  * (space, tab, carriage return, new line, vertical tab, or formfeed)
1263  * which will be replaced by a space, and return a pointer to it.
1264  */
1265 char *
1266 format_text_wsp(wmem_allocator_t* allocator, const char *string, size_t len)
1267 {
1268     return format_text_internal(allocator, string, len, true);
1269 }
1270
1271 /*
1272  * Given a string, generate a string from it that shows non-printable
1273  * characters as the chr parameter passed, except a whitespace character
1274  * (space, tab, carriage return, new line, vertical tab, or formfeed)
1275  * which will be replaced by a space, and return a pointer to it.
1276  *
1277  * This does *not* treat the input string as UTF-8.
1278  *
1279  * This is useful for displaying binary data that frequently but not always
1280  * contains text; otherwise the number of C escape codes makes it unreadable.
1281  */
1282 char *
1283 format_text_chr(wmem_allocator_t *allocator, const char *string, size_t len, char chr)
1284 {
1285     wmem_strbuf_t *buf;
1286
1287     buf = wmem_strbuf_new_sized(allocator, len + 1);
1288     for (const char *p = string; p < string + len; p++) {
1289         if (g_ascii_isprint(*p)) {
1290             wmem_strbuf_append_c(buf, *p);
1291         }
1292         else if (g_ascii_isspace(*p)) {
1293             wmem_strbuf_append_c(buf, ' ');
1294         }
1295         else {
1296             wmem_strbuf_append_c(buf, chr);
1297         }
1298     }
1299     return wmem_strbuf_finalize(buf);
1300 }
1301
1302 char *
1303 format_char(wmem_allocator_t *allocator, char c)
1304 {
1305     char *buf;
1306     char r;
1307
1308     if (g_ascii_isprint(c)) {
1309         buf = wmem_alloc_array(allocator, char, 2);
1310         buf[0] = c;
1311         buf[1] = '\0';
1312         return buf;
1313     }
1314     if (escape_char(c, &r)) {
1315         buf = wmem_alloc_array(allocator, char, 3);
1316         buf[0] = '\\';
1317         buf[1] = r;
1318         buf[2] = '\0';
1319         return buf;
1320     }
1321     buf = wmem_alloc_array(allocator, char, 5);
1322     buf[0] = '\\';
1323     buf[1] = 'x';
1324     buf[2] = hex[((uint8_t)c >> 4) & 0xF];
1325     buf[3] = hex[((uint8_t)c >> 0) & 0xF];
1326     buf[4] = '\0';
1327     return buf;
1328 }
1329
1330 char*
1331 ws_utf8_truncate(char *string, size_t len)
1332 {
1333     char* last_char;
1334
1335     /* Ensure that it is null terminated */
1336     string[len] = '\0';
1337     last_char = g_utf8_find_prev_char(string, string + len);
1338     if (last_char != NULL && g_utf8_get_char_validated(last_char, -1) == (gunichar)-2) {
1339         /* The last UTF-8 character was truncated into a partial sequence. */
1340         *last_char = '\0';
1341     }
1342     return string;
1343 }
1344
1345 /* ASCII/EBCDIC conversion tables from
1346  * https://web.archive.org/web/20060813174742/http://www.room42.com/store/computer_center/code_tables.shtml
1347  */
1348 #if 0
1349 static const uint8_t ASCII_translate_EBCDIC [ 256 ] = {
1350     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
1351     0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1352     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
1353     0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
1354     0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D,
1355     0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
1356     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
1357     0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
1358     0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8,
1359     0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
1360     0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
1361     0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
1362     0x7D, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
1363     0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
1364     0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
1365     0xA8, 0xA9, 0xC0, 0x6A, 0xD0, 0xA1, 0x4B,
1366     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1367     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1368     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1369     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1370     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1371     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1372     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1373     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1374     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1375     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1376     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1377     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1378     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1379     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1380     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1381     0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B
1382 };
1383
1384 void
1385 ASCII_to_EBCDIC(uint8_t *buf, unsigned bytes)
1386 {
1387     unsigned i;
1388     uint8_t   *bufptr;
1389
1390     bufptr = buf;
1391
1392     for (i = 0; i < bytes; i++, bufptr++) {
1393         *bufptr = ASCII_translate_EBCDIC[*bufptr];
1394     }
1395 }
1396
1397 uint8_t
1398 ASCII_to_EBCDIC1(uint8_t c)
1399 {
1400     return ASCII_translate_EBCDIC[c];
1401 }
1402 #endif
1403
1404 static const uint8_t EBCDIC_translate_ASCII [ 256 ] = {
1405     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1406     0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1407     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1408     0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
1409     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
1410     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
1411     0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
1412     0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
1413     0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1414     0x2E, 0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
1415     0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1416     0x2E, 0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
1417     0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1418     0x2E, 0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
1419     0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1420     0x2E, 0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
1421     0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
1422     0x68, 0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1423     0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
1424     0x71, 0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1425     0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
1426     0x79, 0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
1427     0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1428     0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
1429     0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
1430     0x48, 0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1431     0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
1432     0x51, 0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1433     0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
1434     0x59, 0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1435     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
1436     0x38, 0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
1437 };
1438
1439 void
1440 EBCDIC_to_ASCII(uint8_t *buf, unsigned bytes)
1441 {
1442     unsigned   i;
1443     uint8_t *bufptr;
1444
1445     bufptr = buf;
1446
1447     for (i = 0; i < bytes; i++, bufptr++) {
1448         *bufptr = EBCDIC_translate_ASCII[*bufptr];
1449     }
1450 }
1451
1452 uint8_t
1453 EBCDIC_to_ASCII1(uint8_t c)
1454 {
1455     return EBCDIC_translate_ASCII[c];
1456 }
1457
1458 /*
1459  * This routine is based on a routine created by Dan Lasley
1460  * <DLASLEY@PROMUS.com>.
1461  *
1462  * It was modified for Wireshark by Gilbert Ramirez and others.
1463  */
1464
1465 #define MAX_OFFSET_LEN   8       /* max length of hex offset of bytes */
1466 #define BYTES_PER_LINE  16      /* max byte values printed on a line */
1467 #define HEX_DUMP_LEN    (BYTES_PER_LINE*3)
1468                                 /* max number of characters hex dump takes -
1469                                    2 digits plus trailing blank */
1470 #define DATA_DUMP_LEN   (HEX_DUMP_LEN + 2 + 2 + BYTES_PER_LINE)
1471                                 /* number of characters those bytes take;
1472                                    3 characters per byte of hex dump,
1473                                    2 blanks separating hex from ASCII,
1474                                    2 optional ASCII dump delimiters,
1475                                    1 character per byte of ASCII dump */
1476 #define MAX_LINE_LEN    (MAX_OFFSET_LEN + 2 + DATA_DUMP_LEN)
1477                                 /* number of characters per line;
1478                                    offset, 2 blanks separating offset
1479                                    from data dump, data dump */
1480
1481 bool
1482 hex_dump_buffer(bool (*print_line)(void *, const char *), void *fp,
1483                                     const unsigned char *cp, unsigned length,
1484                                     hex_dump_enc encoding,
1485                                     unsigned ascii_option)
1486 {
1487     register unsigned int ad, i, j, k, l;
1488     unsigned char         c;
1489     char                  line[MAX_LINE_LEN + 1];
1490     unsigned int          use_digits;
1491
1492     static char binhex[16] = {
1493         '0', '1', '2', '3', '4', '5', '6', '7',
1494         '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
1495
1496     /*
1497      * How many of the leading digits of the offset will we supply?
1498      * We always supply at least 4 digits, but if the maximum offset
1499      * won't fit in 4 digits, we use as many digits as will be needed.
1500      */
1501     if (((length - 1) & 0xF0000000) != 0)
1502         use_digits = 8; /* need all 8 digits */
1503     else if (((length - 1) & 0x0F000000) != 0)
1504         use_digits = 7; /* need 7 digits */
1505     else if (((length - 1) & 0x00F00000) != 0)
1506         use_digits = 6; /* need 6 digits */
1507     else if (((length - 1) & 0x000F0000) != 0)
1508         use_digits = 5; /* need 5 digits */
1509     else
1510         use_digits = 4; /* we'll supply 4 digits */
1511
1512     ad = 0;
1513     i = 0;
1514     j = 0;
1515     k = 0;
1516     while (i < length) {
1517         if ((i & 15) == 0) {
1518             /*
1519              * Start of a new line.
1520              */
1521             j = 0;
1522             l = use_digits;
1523             do {
1524                 l--;
1525                 c = (ad >> (l*4)) & 0xF;
1526                 line[j++] = binhex[c];
1527             } while (l != 0);
1528             line[j++] = ' ';
1529             line[j++] = ' ';
1530             memset(line+j, ' ', DATA_DUMP_LEN);
1531
1532             /*
1533              * Offset in line of ASCII dump.
1534              */
1535             k = j + HEX_DUMP_LEN + 2;
1536             if (ascii_option == HEXDUMP_ASCII_DELIMIT)
1537                 line[k++] = '|';
1538         }
1539         c = *cp++;
1540         line[j++] = binhex[c>>4];
1541         line[j++] = binhex[c&0xf];
1542         j++;
1543         if (ascii_option != HEXDUMP_ASCII_EXCLUDE ) {
1544             if (encoding == HEXDUMP_ENC_EBCDIC) {
1545                 c = EBCDIC_to_ASCII1(c);
1546             }
1547             line[k++] = ((c >= ' ') && (c < 0x7f)) ? c : '.';
1548         }
1549         i++;
1550         if (((i & 15) == 0) || (i == length)) {
1551             /*
1552              * We'll be starting a new line, or
1553              * we're finished printing this buffer;
1554              * dump out the line we've constructed,
1555              * and advance the offset.
1556              */
1557             if (ascii_option == HEXDUMP_ASCII_DELIMIT)
1558                 line[k++] = '|';
1559             line[k] = '\0';
1560             if (!print_line(fp, line))
1561                 return false;
1562             ad += 16;
1563         }
1564     }
1565     return true;
1566 }
1567
1568 /*
1569  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
1570  *
1571  * Local variables:
1572  * c-basic-offset: 4
1573  * tab-width: 8
1574  * indent-tabs-mode: nil
1575  * End:
1576  *
1577  * vi: set shiftwidth=4 tabstop=8 expandtab:
1578  * :indentSize=4:tabSize=8:noTabs=true:
1579  */