2 * String utility routines
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
33 #include <../isprint.h>
42 static const char hex
[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
43 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 * Given a pointer into a data buffer, and to the end of the buffer,
47 * find the end of the (putative) line at that position in the data
49 * Return a pointer to the EOL character(s) in "*eol".
52 find_line_end(const guchar
*data
, const guchar
*dataend
, const guchar
**eol
)
54 const guchar
*lineend
;
56 lineend
= (guchar
*)memchr(data
, '\n', dataend
- data
);
57 if (lineend
== NULL
) {
59 * No LF - line is probably continued in next TCP segment.
65 * Is the LF at the beginning of the line?
69 * No - is it preceded by a carriage return?
70 * (Perhaps it's supposed to be, but that's not guaranteed....)
72 if (*(lineend
- 1) == '\r') {
74 * Yes. The EOL starts with the CR.
79 * No. The EOL starts with the LF.
84 * I seem to remember that we once saw lines ending with LF-CR
85 * in an HTTP request or response, so check if it's *followed*
86 * by a carriage return.
88 if (lineend
< (dataend
- 1) && *(lineend
+ 1) == '\r') {
90 * It's <non-LF><LF><CR>; say it ends with the CR.
97 * Yes - the EOL starts with the LF.
103 * Point to the character after the last character.
111 * Get the length of the next token in a line, and the beginning of the
112 * next token after that (if any).
113 * Return 0 if there is no next token.
116 get_token_len(const guchar
*linep
, const guchar
*lineend
,
117 const guchar
**next_token
)
119 const guchar
*tokenp
;
125 * Search for a blank, a CR or an LF, or the end of the buffer.
127 while (linep
< lineend
&& *linep
!= ' ' && *linep
!= '\r' && *linep
!= '\n')
129 token_len
= (int) (linep
- tokenp
);
132 * Skip trailing blanks.
134 while (linep
< lineend
&& *linep
== ' ')
143 #define INITIAL_FMTBUF_SIZE 128
146 * Given a string, generate a string from it that shows non-printable
147 * characters as C-style escapes, and return a pointer to it.
150 format_text(const guchar
*string
, size_t len
)
152 static gchar
*fmtbuf
[3];
153 static int fmtbuf_len
[3];
156 const guchar
*stringend
= string
+ len
;
163 * Allocate the buffer if it's not already allocated.
165 if (fmtbuf
[idx
] == NULL
) {
166 fmtbuf
[idx
] = (gchar
*)g_malloc(INITIAL_FMTBUF_SIZE
);
167 fmtbuf_len
[idx
] = INITIAL_FMTBUF_SIZE
;
170 while (string
< stringend
) {
172 * Is there enough room for this character, if it expands to
173 * a backslash plus 3 octal digits (which is the most it can
174 * expand to), and also enough room for a terminating '\0'?
176 if (column
+3+1 >= fmtbuf_len
[idx
]) {
178 * Double the buffer's size if it's not big enough.
179 * The size of the buffer starts at 128, so doubling its size
180 * adds at least another 128 bytes, which is more than enough
181 * for one more character plus a terminating '\0'.
183 fmtbuf_len
[idx
] = fmtbuf_len
[idx
] * 2;
184 fmtbuf
[idx
] = (gchar
*)g_realloc(fmtbuf
[idx
], fmtbuf_len
[idx
]);
189 fmtbuf
[idx
][column
] = c
;
192 fmtbuf
[idx
][column
] = '\\';
197 fmtbuf
[idx
][column
] = 'a';
202 fmtbuf
[idx
][column
] = 'b'; /* BS */
207 fmtbuf
[idx
][column
] = 'f'; /* FF */
212 fmtbuf
[idx
][column
] = 'n'; /* NL */
217 fmtbuf
[idx
][column
] = 'r'; /* CR */
222 fmtbuf
[idx
][column
] = 't'; /* tab */
227 fmtbuf
[idx
][column
] = 'v';
233 fmtbuf
[idx
][column
] = i
+ '0';
236 fmtbuf
[idx
][column
] = i
+ '0';
239 fmtbuf
[idx
][column
] = i
+ '0';
245 fmtbuf
[idx
][column
] = '\0';
250 * Given a string, generate a string from it that shows non-printable
251 * characters as C-style escapes except a whitespace character
252 * (space, tab, carriage return, new line, vertical tab, or formfeed)
253 * which will be replaced by a space, and return a pointer to it.
256 format_text_wsp(const guchar
*string
, size_t len
)
258 static gchar
*fmtbuf
[3];
259 static int fmtbuf_len
[3];
262 const guchar
*stringend
= string
+ len
;
269 * Allocate the buffer if it's not already allocated.
271 if (fmtbuf
[idx
] == NULL
) {
272 fmtbuf
[idx
] = (gchar
*)g_malloc(INITIAL_FMTBUF_SIZE
);
273 fmtbuf_len
[idx
] = INITIAL_FMTBUF_SIZE
;
276 while (string
< stringend
) {
278 * Is there enough room for this character, if it expands to
279 * a backslash plus 3 octal digits (which is the most it can
280 * expand to), and also enough room for a terminating '\0'?
282 if (column
+3+1 >= fmtbuf_len
[idx
]) {
284 * Double the buffer's size if it's not big enough.
285 * The size of the buffer starts at 128, so doubling its size
286 * adds at least another 128 bytes, which is more than enough
287 * for one more character plus a terminating '\0'.
289 fmtbuf_len
[idx
] = fmtbuf_len
[idx
] * 2;
290 fmtbuf
[idx
] = (gchar
*)g_realloc(fmtbuf
[idx
], fmtbuf_len
[idx
]);
295 fmtbuf
[idx
][column
] = c
;
297 } else if (isspace(c
)) {
298 fmtbuf
[idx
][column
] = ' ';
301 fmtbuf
[idx
][column
] = '\\';
306 fmtbuf
[idx
][column
] = 'a';
311 fmtbuf
[idx
][column
] = 'b'; /* BS */
316 fmtbuf
[idx
][column
] = 'f'; /* FF */
321 fmtbuf
[idx
][column
] = 'n'; /* NL */
326 fmtbuf
[idx
][column
] = 'r'; /* CR */
331 fmtbuf
[idx
][column
] = 't'; /* tab */
336 fmtbuf
[idx
][column
] = 'v';
342 fmtbuf
[idx
][column
] = i
+ '0';
345 fmtbuf
[idx
][column
] = i
+ '0';
348 fmtbuf
[idx
][column
] = i
+ '0';
354 fmtbuf
[idx
][column
] = '\0';
359 * Given a string, generate a string from it that shows non-printable
360 * characters as the chr parameter passed, except a whitespace character
361 * (space, tab, carriage return, new line, vertical tab, or formfeed)
362 * which will be replaced by a space, and return a pointer to it.
365 format_text_chr(const guchar
*string
, const size_t len
, const guchar chr
)
367 static gchar
*fmtbuf
[3];
368 static int fmtbuf_len
[3];
371 const guchar
*stringend
= string
+ len
;
377 * Allocate the buffer if it's not already allocated.
379 if (fmtbuf
[idx
] == NULL
) {
380 fmtbuf
[idx
] = (gchar
*)g_malloc(INITIAL_FMTBUF_SIZE
);
381 fmtbuf_len
[idx
] = INITIAL_FMTBUF_SIZE
;
384 while (string
< stringend
)
387 * Is there enough room for this character,
388 * and also enough room for a terminating '\0'?
390 if (column
+1 >= fmtbuf_len
[idx
])
393 * Double the buffer's size if it's not big enough.
394 * The size of the buffer starts at 128, so doubling its size
395 * adds at least another 128 bytes, which is more than enough
396 * for one more character plus a terminating '\0'.
398 fmtbuf_len
[idx
] = fmtbuf_len
[idx
] * 2;
399 fmtbuf
[idx
] = (gchar
*)g_realloc(fmtbuf
[idx
], fmtbuf_len
[idx
]);
405 fmtbuf
[idx
][column
] = c
;
410 fmtbuf
[idx
][column
] = ' ';
415 fmtbuf
[idx
][column
] = chr
;
419 fmtbuf
[idx
][column
] = '\0';
424 is_byte_sep(guint8 c
)
426 return (c
== '-' || c
== ':' || c
== '.');
429 /* Turn a string of hex digits with optional separators (defined by
430 * is_byte_sep() into a byte array.
433 hex_str_to_bytes(const char *hex_str
, GByteArray
*bytes
, gboolean force_separators
) {
435 const guchar
*p
, *q
, *r
, *s
, *punct
;
436 char four_digits_first_half
[3];
437 char four_digits_second_half
[3];
441 if (! hex_str
|| ! bytes
) {
444 g_byte_array_set_size(bytes
, 0);
445 p
= (const guchar
*)hex_str
;
452 && isxdigit(*p
) && isxdigit(*q
) &&
453 isxdigit(*r
) && isxdigit(*s
)) {
454 four_digits_first_half
[0] = *p
;
455 four_digits_first_half
[1] = *q
;
456 four_digits_first_half
[2] = '\0';
457 four_digits_second_half
[0] = *r
;
458 four_digits_second_half
[1] = *s
;
459 four_digits_second_half
[2] = '\0';
462 * Four or more hex digits in a row.
464 val
= (guint8
) strtoul(four_digits_first_half
, NULL
, 16);
465 g_byte_array_append(bytes
, &val
, 1);
466 val
= (guint8
) strtoul(four_digits_second_half
, NULL
, 16);
467 g_byte_array_append(bytes
, &val
, 1);
472 * Make sure the character after
473 * the forth hex digit is a byte
474 * separator, i.e. that we don't have
475 * more than four hex digits, or a
478 if (is_byte_sep(*punct
)) {
482 else if (force_separators
) {
489 else if (*q
&& isxdigit(*p
) && isxdigit(*q
)) {
492 two_digits
[2] = '\0';
495 * Two hex digits in a row.
497 val
= (guint8
) strtoul(two_digits
, NULL
, 16);
498 g_byte_array_append(bytes
, &val
, 1);
502 * Make sure the character after
503 * the second hex digit is a byte
504 * separator, i.e. that we don't have
505 * more than two hex digits, or a
508 if (is_byte_sep(*punct
)) {
512 else if (force_separators
) {
519 else if (*q
&& isxdigit(*p
) && is_byte_sep(*q
)) {
524 * Only one hex digit (not at the end of the string)
526 val
= (guint8
) strtoul(one_digit
, NULL
, 16);
527 g_byte_array_append(bytes
, &val
, 1);
531 else if (!*q
&& isxdigit(*p
)) {
536 * Only one hex digit (at the end of the string)
538 val
= (guint8
) strtoul(one_digit
, NULL
, 16);
539 g_byte_array_append(bytes
, &val
, 1);
551 * Turn an RFC 3986 percent-encoded string into a byte array.
552 * XXX - We don't check for reserved characters.
554 #define HEX_DIGIT_BUF_LEN 3
556 uri_str_to_bytes(const char *uri_str
, GByteArray
*bytes
) {
559 guchar hex_digit
[HEX_DIGIT_BUF_LEN
];
561 g_byte_array_set_size(bytes
, 0);
566 p
= (const guchar
*)uri_str
;
569 if (! isascii(*p
) || ! isprint(*p
))
573 if (*p
== '\0') return FALSE
;
576 if (*p
== '\0') return FALSE
;
579 if (! isxdigit(hex_digit
[0]) || ! isxdigit(hex_digit
[1]))
581 val
= (guint8
) strtoul((char *)hex_digit
, NULL
, 16);
582 g_byte_array_append(bytes
, &val
, 1);
584 g_byte_array_append(bytes
, (const guint8
*) p
, 1);
593 * Given a GByteArray, generate a string from it that shows non-printable
594 * characters as percent-style escapes, and return a pointer to it.
597 format_uri(const GByteArray
*bytes
, const gchar
*reserved_chars
)
599 static gchar
*fmtbuf
[3];
600 static guint fmtbuf_len
[3];
602 static const guchar
*reserved_def
= ":/?#[]@!$&'()*+,;= ";
603 const guchar
*reserved
= reserved_def
;
606 gboolean is_reserved
= FALSE
;
613 reserved
= reserved_chars
;
616 * Allocate the buffer if it's not already allocated.
618 if (fmtbuf
[idx
] == NULL
) {
619 fmtbuf
[idx
] = (gchar
*)g_malloc(INITIAL_FMTBUF_SIZE
);
620 fmtbuf_len
[idx
] = INITIAL_FMTBUF_SIZE
;
622 for (column
= 0; column
< bytes
->len
; column
++) {
624 * Is there enough room for this character, if it expands to
625 * a percent plus 2 hex digits (which is the most it can
626 * expand to), and also enough room for a terminating '\0'?
628 if (column
+2+1 >= fmtbuf_len
[idx
]) {
630 * Double the buffer's size if it's not big enough.
631 * The size of the buffer starts at 128, so doubling its size
632 * adds at least another 128 bytes, which is more than enough
633 * for one more character plus a terminating '\0'.
635 fmtbuf_len
[idx
] = fmtbuf_len
[idx
] * 2;
636 fmtbuf
[idx
] = (gchar
*)g_realloc(fmtbuf
[idx
], fmtbuf_len
[idx
]);
638 c
= bytes
->data
[column
];
640 if (!isascii(c
) || !isprint(c
) || c
== '%') {
644 for (i
= 0; reserved
[i
]; i
++) {
645 if (c
== reserved
[i
])
650 fmtbuf
[idx
][column
] = c
;
652 fmtbuf
[idx
][column
] = '%';
654 fmtbuf
[idx
][column
] = hex
[c
>> 4];
656 fmtbuf
[idx
][column
] = hex
[c
& 0xF];
659 fmtbuf
[idx
][column
] = '\0';
664 * Create a copy of a GByteArray
666 * @param ba The byte array to be copied.
667 * @return If ba exists, a freshly allocated copy. NULL otherwise.
671 byte_array_dup(GByteArray
*ba
) {
677 new_ba
= g_byte_array_new();
678 g_byte_array_append(new_ba
, ba
->data
, ba
->len
);
682 #define SUBID_BUF_LEN 5
684 oid_str_to_bytes(const char *oid_str
, GByteArray
*bytes
) {
685 return rel_oid_str_to_bytes(oid_str
, bytes
, TRUE
);
688 rel_oid_str_to_bytes(const char *oid_str
, GByteArray
*bytes
, gboolean is_absolute
) {
689 guint32 subid0
, subid
, sicnt
, i
;
691 guint8 buf
[SUBID_BUF_LEN
];
693 g_byte_array_set_size(bytes
, 0);
699 if (!isdigit((guchar
)*p
) && (*p
!= '.')) return FALSE
;
701 if (p
== oid_str
&& is_absolute
) return FALSE
;
702 if (!*(p
+1)) return FALSE
;
703 if ((p
-1) == dot
) return FALSE
;
708 if (!dot
) return FALSE
;
711 sicnt
= is_absolute
? 0 : 2;
712 if (!is_absolute
) p
++;
713 subid0
= 0; /* squelch GCC complaints */
716 while (isdigit((guchar
)*p
)) {
723 if (subid0
> 2) return FALSE
;
724 } else if (sicnt
== 1) {
725 if ((subid0
< 2) && (subid
> 39)) return FALSE
;
726 subid
+= 40 * subid0
;
732 buf
[i
] = 0x80 | (subid
% 0x80);
734 } while (subid
&& i
);
735 buf
[SUBID_BUF_LEN
-1] &= 0x7F;
736 g_byte_array_append(bytes
, buf
+ i
, SUBID_BUF_LEN
- i
);
746 * Compare the contents of two GByteArrays
748 * @param ba1 A byte array
749 * @param ba2 A byte array
750 * @return If both arrays are non-NULL and their lengths are equal and
751 * their contents are equal, returns TRUE. Otherwise, returns
754 * XXX - Should this be in strutil.c?
757 byte_array_equal(GByteArray
*ba1
, GByteArray
*ba2
) {
761 if (ba1
->len
!= ba2
->len
)
764 if (memcmp(ba1
->data
, ba2
->data
, ba1
->len
) != 0)
771 /* Return a XML escaped representation of the unescaped string.
772 * The returned string must be freed when no longer in use. */
774 xml_escape(const gchar
*unescaped
)
776 GString
*buffer
= g_string_sized_new(128);
781 while ( (c
= *p
++) ) {
784 g_string_append(buffer
, "<");
787 g_string_append(buffer
, ">");
790 g_string_append(buffer
, "&");
793 g_string_append(buffer
, "'");
796 g_string_append(buffer
, """);
799 g_string_append_c(buffer
, c
);
803 /* Return the string value contained within the GString
804 * after getting rid of the GString structure.
805 * This is the way to do this, see the GLib reference. */
806 return g_string_free(buffer
, FALSE
);
810 /* Return the first occurrence of needle in haystack.
811 * If not found, return NULL.
812 * If either haystack or needle has 0 length, return NULL.
813 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
815 epan_memmem(const guint8
*haystack
, guint haystack_len
,
816 const guint8
*needle
, guint needle_len
)
819 const guint8
*const last_possible
= haystack
+ haystack_len
- needle_len
;
821 if (needle_len
== 0) {
825 if (needle_len
> haystack_len
) {
829 for (begin
= haystack
; begin
<= last_possible
; ++begin
) {
830 if (begin
[0] == needle
[0] &&
831 !memcmp(&begin
[1], needle
+ 1,
841 * Scan the search string to make sure it's valid hex. Return the
842 * number of bytes in nbytes.
845 convert_string_to_hex(const char *string
, size_t *nbytes
)
850 guint8
*bytes
, *q
, byte_val
;
859 continue; /* allow white space */
860 if (c
==':' || c
=='.' || c
=='-')
861 continue; /* skip any ':', '.', or '-' between bytes */
863 /* Not a valid hex digit - fail */
868 * We can only match bytes, not nibbles; we must have a valid
869 * hex digit immediately after that hex digit.
875 /* 2 hex digits = 1 byte */
880 * Were we given any hex digits?
888 * OK, it's valid, and it generates "n_bytes" bytes; generate the
891 bytes
= (guint8
*)g_malloc(n_bytes
);
899 continue; /* allow white space */
900 if (c
==':' || c
=='.' || c
=='-')
901 continue; /* skip any ':', '.', or '-' between bytes */
902 /* From the loop above, we know this is a hex digit */
906 byte_val
= (c
- 'a') + 10;
908 byte_val
= (c
- 'A') + 10;
911 /* We also know this is a hex digit */
916 byte_val
|= (c
- 'a') + 10;
918 byte_val
|= (c
- 'A') + 10;
927 * Copy if if it's a case-sensitive search; uppercase it if it's
928 * a case-insensitive search.
931 convert_string_case(const char *string
, gboolean case_insensitive
)
934 if (case_insensitive
) {
935 return g_utf8_strup(string
, -1);
937 return g_strdup(string
);
942 epan_strcasestr(const char *haystack
, const char *needle
)
944 gsize hlen
= strlen(haystack
);
945 gsize nlen
= strlen(needle
);
947 while (hlen
-- >= nlen
) {
948 if (!g_ascii_strncasecmp(haystack
, needle
, nlen
))
949 return (char*) haystack
;
956 string_or_null(const char *string
)
964 escape_string_len(const char *string
)
971 for (p
= string
; (c
= *p
) != '\0'; p
++) {
972 /* Backslashes and double-quotes must
974 if (c
== '\\' || c
== '"') {
977 /* Values that can't nicely be represented
978 * in ASCII need to be escaped. */
979 else if (!isprint((unsigned char)c
)) {
983 /* Other characters are just passed through. */
988 return repr_len
+ 2; /* string plus leading and trailing quotes */
992 escape_string(char *buf
, const char *string
)
1001 for (p
= string
; (c
= *p
) != '\0'; p
++) {
1002 /* Backslashes and double-quotes must
1004 if (c
== '\\' || c
== '"') {
1008 /* Values that can't nicely be represented
1009 * in ASCII need to be escaped. */
1010 else if (!isprint((unsigned char)c
)) {
1012 g_snprintf(hexbuf
,sizeof(hexbuf
), "%02x", (unsigned char) c
);
1015 *bufp
++ = hexbuf
[0];
1016 *bufp
++ = hexbuf
[1];
1018 /* Other characters are just passed through. */
1028 #define GN_CHAR_ALPHABET_SIZE 128
1030 static gunichar IA5_default_alphabet
[GN_CHAR_ALPHABET_SIZE
] = {
1032 /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
1034 '?', '?', '?', '?', '?', '?', '?', '?',
1035 '?', '?', '?', '?', '?', '?', '?', '?',
1036 '?', '?', '?', '?', '?', '?', '?', '?',
1037 '?', '?', '?', '?', '?', '?', '?', '?',
1038 ' ', '!', '\"','#', '$', '%', '&', '\'',
1039 '(', ')', '*', '+', ',', '-', '.', '/',
1040 '0', '1', '2', '3', '4', '5', '6', '7',
1041 '8', '9', ':', ';', '<', '=', '>', '?',
1042 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1043 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1044 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1045 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
1046 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
1047 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1048 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
1049 'x', 'y', 'z', '{', '|', '}', '~', '?'
1053 char_def_ia5_alphabet_decode(unsigned char value
)
1055 if (value
< GN_CHAR_ALPHABET_SIZE
) {
1056 return IA5_default_alphabet
[value
];
1064 IA5_7BIT_decode(unsigned char * dest
, const unsigned char* src
, int len
)
1069 for (i
= 0, j
= 0; j
< len
; j
++) {
1070 buf
= char_def_ia5_alphabet_decode(src
[j
]);
1071 i
+= g_unichar_to_utf8(buf
,&(dest
[i
]));
1078 * This function takes a string and copies it, inserting a 'chr' before
1079 * every 'chr' in it.
1082 ws_strdup_escape_char (const gchar
*str
, const gchar chr
)
1091 /* Worst case: A string that is full of 'chr' */
1092 q
= new_str
= (gchar
*)g_malloc (strlen(str
) * 2 + 1);
1106 * This function takes a string and copies it, removing any occurences of double
1107 * 'chr' with a single 'chr'.
1110 ws_strdup_unescape_char (const gchar
*str
, const char chr
)
1119 /* Worst case: A string that contains no 'chr' */
1120 q
= new_str
= (gchar
*)g_malloc (strlen(str
) + 1);
1124 if ((*p
== chr
) && (*(p
+1) == chr
))
1134 /* Create a newly-allocated string with replacement values. */
1135 gchar
*string_replace(const gchar
* str
, const gchar
*old_val
, const gchar
*new_val
) {
1139 if (!str
|| !old_val
) {
1143 str_parts
= g_strsplit(str
, old_val
, 0);
1144 new_str
= g_strjoinv(new_val
, str_parts
);
1145 g_strfreev(str_parts
);
1151 * Editor modelines - http://www.wireshark.org/tools/modelines.html
1156 * indent-tabs-mode: nil
1159 * vi: set shiftwidth=4 tabstop=8 expandtab:
1160 * :indentSize=4:tabSize=8:noTabs=true: