2 * Wireshark - Network traffic analyzer
3 * By Gerald Combs <gerald@wireshark.org>
4 * Copyright 1998 Gerald Combs
6 * SPDX-License-Identifier: GPL-2.0-or-later
13 #include <wsutil/str_util.h>
22 #define ERROR_MAXLEN_IN_CODE_UNITS 128
25 get_error_msg(int errorcode
)
30 * We have to provide a buffer and we don't know how long the
31 * error message is or even the maximum size. From pcre2api(3):
32 * "None of the messages are very long; a
33 * buffer size of 120 code units is ample."
35 /* Code unit = one byte */
36 buffer
= g_malloc(ERROR_MAXLEN_IN_CODE_UNITS
);
37 /* Message is returned with a trailing zero. */
38 pcre2_get_error_message(errorcode
, buffer
, ERROR_MAXLEN_IN_CODE_UNITS
);
39 /* One more at the end for good luck. */
40 buffer
[ERROR_MAXLEN_IN_CODE_UNITS
-1] = '\0';
46 compile_pcre2(const char *patt
, ssize_t size
, char **errmsg
, unsigned flags
)
51 PCRE2_SIZE erroroffset
;
55 length
= PCRE2_ZERO_TERMINATED
;
57 length
= (PCRE2_SIZE
)size
;
59 if (flags
& WS_REGEX_NEVER_UTF
)
60 options
|= PCRE2_NEVER_UTF
;
61 if (flags
& WS_REGEX_CASELESS
)
62 options
|= PCRE2_CASELESS
;
63 if (flags
& WS_REGEX_ANCHORED
)
64 options
|= PCRE2_ANCHORED
;
66 /* By default UTF-8 is off. */
67 code
= pcre2_compile_8((PCRE2_SPTR
)patt
,
75 *errmsg
= get_error_msg(errorcode
);
84 ws_regex_compile_ex(const char *patt
, ssize_t size
, char **errmsg
, unsigned flags
)
86 ws_return_val_if(!patt
, NULL
);
88 pcre2_code
*code
= compile_pcre2(patt
, size
, errmsg
, flags
);
92 ws_regex_t
*re
= g_new(ws_regex_t
, 1);
94 re
->pattern
= ws_escape_string_len(NULL
, patt
, size
, false);
100 ws_regex_compile(const char *patt
, char **errmsg
)
102 return ws_regex_compile_ex(patt
, -1, errmsg
, 0);
107 match_pcre2(pcre2_code
*code
, const char *subject
, ssize_t subj_length
,
108 size_t subj_offset
, pcre2_match_data
*match_data
)
114 length
= PCRE2_ZERO_TERMINATED
;
116 length
= (PCRE2_SIZE
)subj_length
;
118 rc
= pcre2_match(code
,
121 (PCRE2_SIZE
)subj_offset
,
122 0, /* default options */
128 if (rc
!= PCRE2_ERROR_NOMATCH
) {
129 /* Error. Should not happen with UTF-8 disabled. Some huge
130 * subject strings could hit some internal limit. */
131 char *msg
= get_error_msg(rc
);
132 ws_debug("Unexpected pcre2_match() error: %s.", msg
);
144 ws_regex_matches(const ws_regex_t
*re
, const char *subj
)
146 return ws_regex_matches_length(re
, subj
, -1);
151 ws_regex_matches_length(const ws_regex_t
*re
,
152 const char *subj
, ssize_t subj_length
)
155 pcre2_match_data
*match_data
;
157 ws_return_val_if(!re
, false);
158 ws_return_val_if(!subj
, false);
160 /* We don't use the matched substring but pcre2_match requires
161 * at least one pair of offsets. */
162 match_data
= pcre2_match_data_create(1, NULL
);
163 matched
= match_pcre2(re
->code
, subj
, subj_length
, 0, match_data
);
164 pcre2_match_data_free(match_data
);
170 ws_regex_matches_pos(const ws_regex_t
*re
,
171 const char *subj
, ssize_t subj_length
,
172 size_t subj_offset
, size_t pos_vect
[2])
175 pcre2_match_data
*match_data
;
177 ws_return_val_if(!re
, false);
178 ws_return_val_if(!subj
, false);
180 match_data
= pcre2_match_data_create(1, NULL
);
181 matched
= match_pcre2(re
->code
, subj
, subj_length
, subj_offset
, match_data
);
182 if (matched
&& pos_vect
) {
183 PCRE2_SIZE
*ovect
= pcre2_get_ovector_pointer(match_data
);
184 pos_vect
[0] = ovect
[0];
185 pos_vect
[1] = ovect
[1];
187 pcre2_match_data_free(match_data
);
193 ws_regex_free(ws_regex_t
*re
)
195 pcre2_code_free(re
->code
);
202 ws_regex_pattern(const ws_regex_t
*re
)