regen pidl all: rm epan/dissectors/pidl/*-stamp; pushd epan/dissectors/pidl/ && make...
[wireshark-sm.git] / epan / strutil.c
blob5980ebb0b5bc6e46684620fdd2906b7f02817aeb
1 /* strutil.c
2 * String utility routines
4 * Wireshark - Network traffic analyzer
5 * By Gerald Combs <gerald@wireshark.org>
6 * Copyright 1998 Gerald Combs
8 * SPDX-License-Identifier: GPL-2.0-or-later
9 */
11 #include "config.h"
13 #include <stdlib.h>
14 #include <string.h>
15 #include <glib.h>
16 #include "strutil.h"
18 #include <wsutil/str_util.h>
19 #include <wsutil/unicode-utils.h>
20 #include <epan/proto.h>
22 #ifdef _WIN32
23 #include <windows.h>
24 #include <tchar.h>
25 #include <wchar.h>
26 #endif
30 * Given a pointer into a data buffer, and to the end of the buffer,
31 * find the end of the (putative) line at that position in the data
32 * buffer.
33 * Return a pointer to the EOL character(s) in "*eol".
35 const unsigned char *
36 find_line_end(const unsigned char *data, const unsigned char *dataend, const unsigned char **eol)
38 const unsigned char *lineend;
40 lineend = (unsigned char *)memchr(data, '\n', dataend - data);
41 if (lineend == NULL) {
43 * No LF - line is probably continued in next TCP segment.
45 lineend = dataend;
46 *eol = dataend;
47 } else {
49 * Is the LF at the beginning of the line?
51 if (lineend > data) {
53 * No - is it preceded by a carriage return?
54 * (Perhaps it's supposed to be, but that's not guaranteed....)
56 if (*(lineend - 1) == '\r') {
58 * Yes. The EOL starts with the CR.
60 *eol = lineend - 1;
61 } else {
63 * No. The EOL starts with the LF.
65 *eol = lineend;
68 * I seem to remember that we once saw lines ending with LF-CR
69 * in an HTTP request or response, so check if it's *followed*
70 * by a carriage return.
72 if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
74 * It's <non-LF><LF><CR>; say it ends with the CR.
76 lineend++;
79 } else {
81 * Yes - the EOL starts with the LF.
83 *eol = lineend;
87 * Point to the character after the last character.
89 lineend++;
91 return lineend;
95 * Get the length of the next token in a line, and the beginning of the
96 * next token after that (if any).
97 * Return 0 if there is no next token.
99 int
100 get_token_len(const unsigned char *linep, const unsigned char *lineend,
101 const unsigned char **next_token)
103 const unsigned char *tokenp;
104 int token_len;
106 tokenp = linep;
109 * Search for a blank, a CR or an LF, or the end of the buffer.
111 while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
112 linep++;
113 token_len = (int) (linep - tokenp);
116 * Skip trailing blanks.
118 while (linep < lineend && *linep == ' ')
119 linep++;
121 *next_token = linep;
123 return token_len;
126 static bool
127 is_byte_sep(uint8_t c)
129 return (c == '-' || c == ':' || c == '.');
132 /* Turn a string of hex digits with optional separators (defined by
133 * is_byte_sep() into a byte array.
135 * XXX - This function is perhaps too generous in what it accepts.
136 * It allows the separator to change from one character to another,
137 * or to and from no separator if force_separators is false.
139 bool
140 hex_str_to_bytes(const char *hex_str, GByteArray *bytes, bool force_separators)
142 uint8_t val;
143 const char *p, *q, *r, *s, *punct;
144 char four_digits_first_half[3];
145 char four_digits_second_half[3];
146 char two_digits[3];
147 char one_digit[2];
149 if (! hex_str || ! bytes) {
150 return false;
152 g_byte_array_set_size(bytes, 0);
153 p = hex_str;
154 while (*p) {
155 q = p+1;
156 r = p+2;
157 s = p+3;
159 if (*q && *r
160 && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q) &&
161 g_ascii_isxdigit(*r)) {
164 * Three hex bytes in a row, followed by a non hex byte
165 * (possibly the end of the string). We don't accept an
166 * odd number of hex digits except for single digits
167 * by themselves or after a separator.
169 if (!g_ascii_isxdigit(*s)) {
170 return false;
172 four_digits_first_half[0] = *p;
173 four_digits_first_half[1] = *q;
174 four_digits_first_half[2] = '\0';
175 four_digits_second_half[0] = *r;
176 four_digits_second_half[1] = *s;
177 four_digits_second_half[2] = '\0';
180 * Four or more hex digits in a row.
182 val = (uint8_t) strtoul(four_digits_first_half, NULL, 16);
183 g_byte_array_append(bytes, &val, 1);
184 val = (uint8_t) strtoul(four_digits_second_half, NULL, 16);
185 g_byte_array_append(bytes, &val, 1);
187 punct = s + 1;
188 if (*punct) {
190 * Make sure the character after
191 * the fourth hex digit is a byte
192 * separator, i.e. that we don't have
193 * more than four hex digits, or a
194 * bogus character.
196 if (is_byte_sep(*punct)) {
197 p = punct + 1;
198 continue;
200 else if (force_separators) {
201 return false;
204 p = punct;
205 continue;
207 else if (*q && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q)) {
208 two_digits[0] = *p;
209 two_digits[1] = *q;
210 two_digits[2] = '\0';
213 * Two hex digits in a row.
215 val = (uint8_t) strtoul(two_digits, NULL, 16);
216 g_byte_array_append(bytes, &val, 1);
217 punct = q + 1;
218 if (*punct) {
220 * Make sure the character after
221 * the second hex digit is a byte
222 * separator, i.e. that we don't have
223 * more than two hex digits, or a
224 * bogus character.
226 if (is_byte_sep(*punct)) {
227 p = punct + 1;
228 continue;
230 else if (force_separators) {
231 return false;
234 p = punct;
235 continue;
237 else if (*q && g_ascii_isxdigit(*p) && is_byte_sep(*q)) {
238 one_digit[0] = *p;
239 one_digit[1] = '\0';
242 * Only one hex digit (not at the end of the string)
244 val = (uint8_t) strtoul(one_digit, NULL, 16);
245 g_byte_array_append(bytes, &val, 1);
246 p = q + 1;
247 continue;
249 else if (!*q && g_ascii_isxdigit(*p)) {
250 one_digit[0] = *p;
251 one_digit[1] = '\0';
254 * Only one hex digit (at the end of the string)
256 val = (uint8_t) strtoul(one_digit, NULL, 16);
257 g_byte_array_append(bytes, &val, 1);
258 p = q;
259 continue;
261 else {
262 return false;
265 return true;
268 static inline char
269 get_valid_byte_sep(char c, const unsigned encoding)
271 char retval = -1; /* -1 means failure */
273 switch (c) {
274 case ':':
275 if (encoding & ENC_SEP_COLON)
276 retval = c;
277 break;
278 case '-':
279 if (encoding & ENC_SEP_DASH)
280 retval = c;
281 break;
282 case '.':
283 if (encoding & ENC_SEP_DOT)
284 retval = c;
285 break;
286 case ' ':
287 if (encoding & ENC_SEP_SPACE)
288 retval = c;
289 break;
290 case '\0':
291 /* we were given the end of the string, so it's fine */
292 retval = 0;
293 break;
294 default:
295 if (g_ascii_isxdigit(c) && (encoding & ENC_SEP_NONE))
296 retval = 0;
297 /* anything else means we've got a failure */
298 break;
301 return retval;
304 /* Turn a string of hex digits with optional separators (defined by is_byte_sep())
305 * into a byte array. Unlike hex_str_to_bytes(), this will read as many hex-char
306 * pairs as possible and not error if it hits a non-hex-char; instead it just ends
307 * there. (i.e., like strtol()/atoi()/etc.) Unless fail_if_partial is true.
309 * The **endptr, if not NULL, is set to the char after the last hex character.
311 bool
312 hex_str_to_bytes_encoding(const char *hex_str, GByteArray *bytes, const char **endptr,
313 const unsigned encoding, const bool fail_if_partial)
315 int8_t c, d;
316 uint8_t val;
317 const char *end = hex_str;
318 bool retval = false;
319 char sep = -1;
321 /* a map from ASCII hex chars to their value */
322 static const int8_t str_to_nibble[256] = {
323 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
324 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
325 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
326 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
327 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
328 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
329 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
330 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
331 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
332 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
333 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
334 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
335 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
336 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
337 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
338 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
341 /* we must see two hex chars at the beginning, or fail */
342 if (bytes && *end && g_ascii_isxdigit(*end) && g_ascii_isxdigit(*(end+1))) {
343 retval = true;
345 /* set the separator character we'll allow; if this returns a -1, it means something's
346 * invalid after the hex, but we'll let the while-loop grab the first hex-pair anyway
348 sep = get_valid_byte_sep(*(end+2), encoding);
350 while (*end) {
351 c = str_to_nibble[(unsigned char)*end];
352 if (c < 0) {
353 if (fail_if_partial) retval = false;
354 break;
357 d = str_to_nibble[(unsigned char)*(end+1)];
358 if (d < 0) {
359 if (fail_if_partial) retval = false;
360 break;
362 val = ((uint8_t)c * 16) + d;
363 g_byte_array_append(bytes, &val, 1);
364 end += 2;
366 /* check for separator and peek at next char to make sure we should keep going */
367 if (sep > 0 && *end == sep && str_to_nibble[(unsigned char)*(end+1)] > -1) {
368 /* yes, it's the right sep and followed by more hex, so skip the sep */
369 ++end;
370 } else if (sep != 0 && *end) {
371 /* we either need a separator, but we don't see one; or the get_valid_byte_sep()
372 earlier didn't find a valid one to begin with */
373 if (fail_if_partial) retval = false;
374 break;
376 /* otherwise, either no separator allowed, or *end is null, or *end is an invalid
377 * sep, or *end is a valid sep but after it is not a hex char - in all those
378 * cases, just loop back up and let it fail later naturally.
383 if (!retval) {
384 if (bytes) g_byte_array_set_size(bytes, 0);
385 end = hex_str;
388 if (endptr) *endptr = end;
390 return retval;
394 * Turn an RFC 3986 percent-encoded array of characters, not
395 * necessarily null-terminated, into a byte array.
396 * XXX - We don't check for reserved characters.
397 * XXX - g_uri_unescape_bytes is superior, but limited to
398 * glib >= 2.66
400 #define HEX_DIGIT_BUF_LEN 3
401 bool
402 uri_to_bytes(const char *uri_str, GByteArray *bytes, size_t len)
404 uint8_t val;
405 const char *p;
406 const char *uri_end = uri_str + len;
407 char hex_digit[HEX_DIGIT_BUF_LEN];
409 g_byte_array_set_size(bytes, 0);
410 if (! uri_str) {
411 return false;
414 p = uri_str;
416 while (p < uri_end) {
417 if (!g_ascii_isprint(*p))
418 return false;
419 if (*p == '%') {
420 p++;
421 if (*p == '\0') return false;
422 hex_digit[0] = *p;
423 p++;
424 if (*p == '\0') return false;
425 hex_digit[1] = *p;
426 hex_digit[2] = '\0';
427 if (! g_ascii_isxdigit(hex_digit[0]) || ! g_ascii_isxdigit(hex_digit[1]))
428 return false;
429 val = (uint8_t) strtoul(hex_digit, NULL, 16);
430 g_byte_array_append(bytes, &val, 1);
431 } else {
432 g_byte_array_append(bytes, (const uint8_t *) p, 1);
434 p++;
437 return true;
441 * Turn an RFC 3986 percent-encoded string into a byte array.
442 * XXX - We don't check for reserved characters.
443 * XXX - Just use g_uri_unescape_string instead?
445 bool
446 uri_str_to_bytes(const char *uri_str, GByteArray *bytes)
448 return uri_to_bytes(uri_str, bytes, strlen(uri_str));
452 * Create a copy of a GByteArray
454 * @param ba The byte array to be copied.
455 * @return If ba exists, a freshly allocated copy. NULL otherwise.
458 GByteArray *
459 byte_array_dup(const GByteArray *ba)
461 GByteArray *new_ba;
463 if (!ba)
464 return NULL;
466 new_ba = g_byte_array_new();
467 g_byte_array_append(new_ba, ba->data, ba->len);
468 return new_ba;
471 #define SUBID_BUF_LEN 5
472 bool
473 oid_str_to_bytes(const char *oid_str, GByteArray *bytes)
475 return rel_oid_str_to_bytes(oid_str, bytes, true);
477 bool
478 rel_oid_str_to_bytes(const char *oid_str, GByteArray *bytes, bool is_absolute)
480 uint32_t subid0, subid, sicnt, i;
481 const char *p, *dot;
482 uint8_t buf[SUBID_BUF_LEN];
484 g_byte_array_set_size(bytes, 0);
486 /* check syntax */
487 p = oid_str;
488 dot = NULL;
489 while (*p) {
490 if (!g_ascii_isdigit(*p) && (*p != '.')) return false;
491 if (*p == '.') {
492 if (p == oid_str && is_absolute) return false;
493 if (!*(p+1)) return false;
494 if ((p-1) == dot) return false;
495 dot = p;
497 p++;
499 if (!dot) return false;
501 p = oid_str;
502 sicnt = is_absolute ? 0 : 2;
503 if (!is_absolute) p++;
504 subid0 = 0; /* squelch GCC complaints */
505 while (*p) {
506 subid = 0;
507 while (g_ascii_isdigit(*p)) {
508 subid *= 10;
509 subid += *p - '0';
510 p++;
512 if (sicnt == 0) {
513 subid0 = subid;
514 if (subid0 > 2) return false;
515 } else if (sicnt == 1) {
516 if ((subid0 < 2) && (subid > 39)) return false;
517 subid += 40 * subid0;
519 if (sicnt) {
520 i = SUBID_BUF_LEN;
521 do {
522 i--;
523 buf[i] = 0x80 | (subid % 0x80);
524 subid >>= 7;
525 } while (subid && i);
526 buf[SUBID_BUF_LEN-1] &= 0x7F;
527 g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
529 sicnt++;
530 if (*p) p++;
533 return true;
537 * Compare the contents of two GByteArrays
539 * @param ba1 A byte array
540 * @param ba2 A byte array
541 * @return If both arrays are non-NULL and their lengths are equal and
542 * their contents are equal, returns true. Otherwise, returns
543 * false.
545 * XXX - Should this be in strutil.c?
547 bool
548 byte_array_equal(GByteArray *ba1, GByteArray *ba2)
550 if (!ba1 || !ba2)
551 return false;
553 if (ba1->len != ba2->len)
554 return false;
556 if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
557 return false;
559 return true;
563 /* Return a XML escaped representation of the unescaped string.
564 * The returned string must be freed when no longer in use. */
565 char *
566 xml_escape(const char *unescaped)
568 GString *buffer = g_string_sized_new(128);
569 const char *p;
570 char c;
572 p = unescaped;
573 while ( (c = *p++) ) {
574 switch (c) {
575 case '<':
576 g_string_append(buffer, "&lt;");
577 break;
578 case '>':
579 g_string_append(buffer, "&gt;");
580 break;
581 case '&':
582 g_string_append(buffer, "&amp;");
583 break;
584 case '\'':
585 g_string_append(buffer, "&#x27;");
586 break;
587 case '"':
588 g_string_append(buffer, "&quot;");
589 break;
590 case '\t':
591 case '\n':
592 case '\r':
593 g_string_append_c(buffer, c);
594 break;
595 default:
596 /* XML 1.0 doesn't allow ASCII control characters, except
597 * for the three whitespace ones above (which do *not*
598 * include '\v' and '\f', so not the same group as isspace),
599 * even as character references.
600 * There's no official way to escape them, so we'll do this. */
601 if (g_ascii_iscntrl(c)) {
602 g_string_append_printf(buffer, "\\x%x", c);
603 } else {
604 g_string_append_c(buffer, c);
606 break;
609 /* Return the string value contained within the GString
610 * after getting rid of the GString structure.
611 * This is the way to do this, see the GLib reference. */
612 return g_string_free(buffer, FALSE);
616 * Scan the search string to make sure it's valid hex. Return the
617 * number of bytes in nbytes.
619 uint8_t *
620 convert_string_to_hex(const char *string, size_t *nbytes)
622 size_t n_bytes;
623 const char *p;
624 char c;
625 uint8_t *bytes, *q, byte_val;
627 n_bytes = 0;
628 p = &string[0];
629 for (;;) {
630 c = *p++;
631 if (c == '\0')
632 break;
633 if (g_ascii_isspace(c))
634 continue; /* allow white space */
635 if (c==':' || c=='.' || c=='-')
636 continue; /* skip any ':', '.', or '-' between bytes */
637 if (!g_ascii_isxdigit(c)) {
638 /* Not a valid hex digit - fail */
639 return NULL;
643 * We can only match bytes, not nibbles; we must have a valid
644 * hex digit immediately after that hex digit.
646 c = *p++;
647 if (!g_ascii_isxdigit(c))
648 return NULL;
650 /* 2 hex digits = 1 byte */
651 n_bytes++;
655 * Were we given any hex digits?
657 if (n_bytes == 0) {
658 /* No. */
659 return NULL;
663 * OK, it's valid, and it generates "n_bytes" bytes; generate the
664 * raw byte array.
666 bytes = (uint8_t *)g_malloc(n_bytes);
667 p = &string[0];
668 q = &bytes[0];
669 for (;;) {
670 c = *p++;
671 if (c == '\0')
672 break;
673 if (g_ascii_isspace(c))
674 continue; /* allow white space */
675 if (c==':' || c=='.' || c=='-')
676 continue; /* skip any ':', '.', or '-' between bytes */
677 /* From the loop above, we know this is a hex digit */
678 byte_val = ws_xton(c);
679 byte_val <<= 4;
681 /* We also know this is a hex digit */
682 c = *p++;
683 byte_val |= ws_xton(c);
685 *q++ = byte_val;
687 *nbytes = n_bytes;
688 return bytes;
692 * Copy if it's a case-sensitive search; uppercase it if it's
693 * a case-insensitive search.
695 char *
696 convert_string_case(const char *string, bool case_insensitive)
699 if (case_insensitive) {
700 return g_utf8_strup(string, -1);
701 } else {
702 return g_strdup(string);
706 #define GN_CHAR_ALPHABET_SIZE 128
708 static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
710 /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
712 '?', '?', '?', '?', '?', '?', '?', '?',
713 '?', '?', '?', '?', '?', '?', '?', '?',
714 '?', '?', '?', '?', '?', '?', '?', '?',
715 '?', '?', '?', '?', '?', '?', '?', '?',
716 ' ', '!', '\"','#', '$', '%', '&', '\'',
717 '(', ')', '*', '+', ',', '-', '.', '/',
718 '0', '1', '2', '3', '4', '5', '6', '7',
719 '8', '9', ':', ';', '<', '=', '>', '?',
720 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
721 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
722 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
723 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
724 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
725 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
726 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
727 'x', 'y', 'z', '{', '|', '}', '~', '?'
730 static gunichar
731 char_def_ia5_alphabet_decode(unsigned char value)
733 if (value < GN_CHAR_ALPHABET_SIZE) {
734 return IA5_default_alphabet[value];
736 else {
737 return '?';
741 void
742 IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
744 int i, j;
745 gunichar buf;
747 for (i = 0, j = 0; j < len; j++) {
748 buf = char_def_ia5_alphabet_decode(src[j]);
749 i += g_unichar_to_utf8(buf,&(dest[i]));
751 dest[i]=0;
754 /* chars allowed: lower case letters, digits, '-', "_", and ".". */
755 static
756 const uint8_t module_valid_chars_lower_case[256] = {
757 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0F */
758 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1F */
759 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2F '-', '.' */
760 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3F '0'-'9' */
761 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x4F */
762 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50-0x5F '_' */
763 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6F 'a'-'o' */
764 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7F 'p'-'z' */
765 /* upper 128 empty-initialized to 0 */
768 /* chars allowed: alphanumerics, '-', "_", and ".". */
769 static
770 const uint8_t module_valid_chars[256] = {
771 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x0F */
772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x1F */
773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 0x20-0x2F '-', '.' */
774 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30-0x3F '0'-'9' */
775 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40-0x4F 'A'-'O' */
776 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50-0x5F 'P'-'Z', '_' */
777 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60-0x6F 'a'-'o' */
778 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70-0x7F 'p'-'z' */
779 /* upper 128 empty-initialized to 0 */
782 unsigned char
783 module_check_valid_name(const char *name, bool lower_only)
785 const char *p = name;
786 unsigned char c = '.', lastc;
787 const uint8_t *chars;
789 /* First character cannot be '-'. */
790 if (name[0] == '-')
791 return '-';
793 if (lower_only)
794 chars = module_valid_chars_lower_case;
795 else
796 chars = module_valid_chars;
798 do {
799 lastc = c;
800 c = *(p++);
801 /* Leading '.' or substring ".." are disallowed. */
802 if (c == '.' && lastc == '.') {
803 break;
805 } while (chars[c]);
807 /* Trailing '.' is disallowed. */
808 if (lastc == '.') {
809 return '.';
811 return c;
814 static const char _hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
815 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
818 * Copy byte by byte without UTF-8 truncation (assume valid UTF-8 input).
819 * Return byte size written, or that would have been
820 * written with enough space.
822 size_t
823 ws_label_strcpy(char *label_str, size_t buf_size, size_t pos,
824 const uint8_t *str, int flags)
826 if (pos >= buf_size)
827 return pos;
829 uint8_t r = 0;
830 ssize_t chlen;
831 ssize_t idx, src_len;
832 ssize_t free_len;
834 label_str[pos] = '\0';
836 ws_return_val_if(str == NULL, pos);
837 idx = 0;
838 src_len = strlen(str);
839 free_len = buf_size - pos - 1;
841 while (idx < src_len) {
842 chlen = ws_utf8_char_len(str[idx]);
843 if (chlen <= 0) {
844 /* We were passed invalid UTF-8. This is an error. Complain and do... something. */
845 ws_log_utf8(str, -1, NULL);
847 * XXX If we are going to return here instead of trying to recover maybe the log level should
848 * be higher than DEBUG.
850 return pos;
853 /* ASCII */
854 if (chlen == 1) {
855 if (flags & FORMAT_LABEL_REPLACE_SPACE && g_ascii_isspace(str[idx])) {
856 if (free_len >= 1) {
857 label_str[pos] = ' ';
858 label_str[pos+1] = '\0';
860 pos++;
861 idx++;
862 free_len--;
863 continue;
866 r = 0;
867 switch (str[idx]) {
868 case '\a': r = 'a'; break;
869 case '\b': r = 'b'; break;
870 case '\f': r = 'f'; break;
871 case '\n': r = 'n'; break;
872 case '\r': r = 'r'; break;
873 case '\t': r = 't'; break;
874 case '\v': r = 'v'; break;
876 if (r != 0) {
877 if (free_len >= 2) {
878 label_str[pos] = '\\';
879 label_str[pos+1] = r;
880 label_str[pos+2] = '\0';
882 pos += 2;
883 idx += 1;
884 free_len -= 2;
885 continue;
888 if (g_ascii_isprint(str[idx])) {
889 if (free_len >= 1) {
890 label_str[pos] = str[idx];
891 label_str[pos+1] = '\0';
893 pos++;
894 idx++;
895 free_len--;
896 continue;
899 if (free_len >= 4) {
900 label_str[pos+0] = '\\';
901 label_str[pos+1] = 'x';
903 uint8_t ch = str[idx];
904 label_str[pos+2] = _hex[ch >> 4];
905 label_str[pos+3] = _hex[ch & 0x0F];
906 label_str[pos+4] = '\0';
908 pos += 4;
909 idx += chlen;
910 free_len -= 4;
911 continue;
914 /* UTF-8 multibyte */
915 if (chlen == 2 && str[idx] == 0xC2 &&
916 str[idx+1] >= 0x80 && str[idx+1] <= 0x9F) {
918 * Escape the C1 control codes. C0 (covered above) and C1 are
919 * inband signalling and transparent to Unicode.
920 * Anything else probably has text semantics should not be removed.
923 * Special case: The second UTF-8 byte is the same as the Unicode
924 * code point for range U+0080 - U+009F.
926 if (free_len >= 6) {
927 label_str[pos+0] = '\\';
928 label_str[pos+1] = 'u';
929 label_str[pos+2] = '0';
930 label_str[pos+3] = '0';
932 uint8_t ch = str[idx+1];
933 label_str[pos+4] = _hex[ch >> 4];
934 label_str[pos+5] = _hex[ch & 0x0F];
935 label_str[pos+6] = '\0';
937 pos += 6;
938 idx += chlen;
939 free_len -= 6;
940 continue;
943 /* Just copy */
944 if (free_len >= chlen) {
945 for (ssize_t j = 0; j < chlen; j++) {
946 label_str[pos+j] = str[idx+j];
948 label_str[pos+chlen] = '\0';
950 pos += chlen;
951 idx += chlen;
952 free_len -= chlen;
955 return pos;
958 size_t
959 ws_label_strcat(char *label_str, size_t bufsize, const uint8_t *str, int flags)
961 return ws_label_strcpy(label_str, bufsize, strlen(label_str), str, flags);
965 * Editor modelines - https://www.wireshark.org/tools/modelines.html
967 * Local variables:
968 * c-basic-offset: 4
969 * tab-width: 8
970 * indent-tabs-mode: nil
971 * End:
973 * vi: set shiftwidth=4 tabstop=8 expandtab:
974 * :indentSize=4:tabSize=8:noTabs=true: