TODO epan/dissectors/asn1/kerberos/packet-kerberos-template.c new GSS flags
[wireshark-sm.git] / wsutil / regex.c
blob464a42239fe8a6fc421172e0a57c8074cee94d49
1 /*
2 * Wireshark - Network traffic analyzer
3 * By Gerald Combs <gerald@wireshark.org>
4 * Copyright 1998 Gerald Combs
6 * SPDX-License-Identifier: GPL-2.0-or-later
7 */
9 #include "config.h"
11 #include "regex.h"
13 #include <wsutil/str_util.h>
14 #include <pcre2.h>
17 struct _ws_regex {
18 pcre2_code *code;
19 char *pattern;
22 #define ERROR_MAXLEN_IN_CODE_UNITS 128
24 static char *
25 get_error_msg(int errorcode)
27 char *buffer;
30 * We have to provide a buffer and we don't know how long the
31 * error message is or even the maximum size. From pcre2api(3):
32 * "None of the messages are very long; a
33 * buffer size of 120 code units is ample."
35 /* Code unit = one byte */
36 buffer = g_malloc(ERROR_MAXLEN_IN_CODE_UNITS);
37 /* Message is returned with a trailing zero. */
38 pcre2_get_error_message(errorcode, buffer, ERROR_MAXLEN_IN_CODE_UNITS);
39 /* One more at the end for good luck. */
40 buffer[ERROR_MAXLEN_IN_CODE_UNITS-1] = '\0';
41 return buffer;
45 static pcre2_code *
46 compile_pcre2(const char *patt, ssize_t size, char **errmsg, unsigned flags)
48 pcre2_code *code;
49 int errorcode;
50 PCRE2_SIZE length;
51 PCRE2_SIZE erroroffset;
52 uint32_t options = 0;
54 if (size < 0)
55 length = PCRE2_ZERO_TERMINATED;
56 else
57 length = (PCRE2_SIZE)size;
59 if (flags & WS_REGEX_NEVER_UTF)
60 options |= PCRE2_NEVER_UTF;
61 if (flags & WS_REGEX_CASELESS)
62 options |= PCRE2_CASELESS;
63 if (flags & WS_REGEX_ANCHORED)
64 options |= PCRE2_ANCHORED;
66 /* By default UTF-8 is off. */
67 code = pcre2_compile_8((PCRE2_SPTR)patt,
68 length,
69 options,
70 &errorcode,
71 &erroroffset,
72 NULL);
74 if (code == NULL) {
75 *errmsg = get_error_msg(errorcode);
76 return NULL;
79 return code;
83 ws_regex_t *
84 ws_regex_compile_ex(const char *patt, ssize_t size, char **errmsg, unsigned flags)
86 ws_return_val_if(!patt, NULL);
88 pcre2_code *code = compile_pcre2(patt, size, errmsg, flags);
89 if (code == NULL)
90 return NULL;
92 ws_regex_t *re = g_new(ws_regex_t, 1);
93 re->code = code;
94 re->pattern = ws_escape_string_len(NULL, patt, size, false);
95 return re;
99 ws_regex_t *
100 ws_regex_compile(const char *patt, char **errmsg)
102 return ws_regex_compile_ex(patt, -1, errmsg, 0);
106 static bool
107 match_pcre2(pcre2_code *code, const char *subject, ssize_t subj_length,
108 size_t subj_offset, pcre2_match_data *match_data)
110 PCRE2_SIZE length;
111 int rc;
113 if (subj_length < 0)
114 length = PCRE2_ZERO_TERMINATED;
115 else
116 length = (PCRE2_SIZE)subj_length;
118 rc = pcre2_match(code,
119 subject,
120 length,
121 (PCRE2_SIZE)subj_offset,
122 0, /* default options */
123 match_data,
124 NULL);
126 if (rc < 0) {
127 /* No match */
128 if (rc != PCRE2_ERROR_NOMATCH) {
129 /* Error. Should not happen with UTF-8 disabled. Some huge
130 * subject strings could hit some internal limit. */
131 char *msg = get_error_msg(rc);
132 ws_debug("Unexpected pcre2_match() error: %s.", msg);
133 g_free(msg);
135 return false;
138 /* Matched */
139 return true;
143 bool
144 ws_regex_matches(const ws_regex_t *re, const char *subj)
146 return ws_regex_matches_length(re, subj, -1);
150 bool
151 ws_regex_matches_length(const ws_regex_t *re,
152 const char *subj, ssize_t subj_length)
154 bool matched;
155 pcre2_match_data *match_data;
157 ws_return_val_if(!re, false);
158 ws_return_val_if(!subj, false);
160 /* We don't use the matched substring but pcre2_match requires
161 * at least one pair of offsets. */
162 match_data = pcre2_match_data_create(1, NULL);
163 matched = match_pcre2(re->code, subj, subj_length, 0, match_data);
164 pcre2_match_data_free(match_data);
165 return matched;
169 bool
170 ws_regex_matches_pos(const ws_regex_t *re,
171 const char *subj, ssize_t subj_length,
172 size_t subj_offset, size_t pos_vect[2])
174 bool matched;
175 pcre2_match_data *match_data;
177 ws_return_val_if(!re, false);
178 ws_return_val_if(!subj, false);
180 match_data = pcre2_match_data_create(1, NULL);
181 matched = match_pcre2(re->code, subj, subj_length, subj_offset, match_data);
182 if (matched && pos_vect) {
183 PCRE2_SIZE *ovect = pcre2_get_ovector_pointer(match_data);
184 pos_vect[0] = ovect[0];
185 pos_vect[1] = ovect[1];
187 pcre2_match_data_free(match_data);
188 return matched;
192 void
193 ws_regex_free(ws_regex_t *re)
195 pcre2_code_free(re->code);
196 g_free(re->pattern);
197 g_free(re);
201 const char *
202 ws_regex_pattern(const ws_regex_t *re)
204 return re->pattern;