packet-ldap: fix regression for SASL handling
[wireshark-sm.git] / epan / tvbparse.h
blobb206ee2b6256f0ebcfc85aeaf028be6004240c96
1 /* tvbparse.h
3 * an API for text tvb parsers
5 * Copyright 2005, Luis E. Garcia Ontanon <luis@ontanon.org>
7 * Wireshark - Network traffic analyzer
8 * By Gerald Combs <gerald@wireshark.org>
9 * Copyright 1998 Gerald Combs
11 * SPDX-License-Identifier: GPL-2.0-or-later
15 The intention behind this is to ease the writing of dissectors that have to
16 parse text without the need of writing into buffers.
18 It was originally written to avoid using lex and yacc for the xml dissector.
20 the parser is able to look for wanted elements these can be:
22 simple tokens:
23 - a char out of a string of needles
24 - a char not belonging to a string of needles
25 - a sequence of chars that belong to a set of chars
26 - a sequence of chars that do not belong to a set of chars
27 - a string
28 - a caseless string
29 - all the characters up to a certain wanted element (included or excluded)
31 composed elements:
32 - one of a given group of wanted elements
33 - a sequence of wanted elements
34 - some (at least one) instances of a wanted element
36 Once a wanted element is successfully extracted, by either tvbparse_get or
37 tvbparse_find, the parser will invoke a given callback
38 before and another one after every of its component's subelement's callbacks
39 are being called.
41 If tvbparse_get or tvbparse_find fail to extract the wanted element the
42 subelements callbacks are not going to be invoked.
44 The wanted elements are instantiated once by the proto_register_xxx function.
46 The parser is instantiated for every packet and it mantains its state.
48 The element's data is destroyed before the next packet is dissected.
51 #ifndef _TVB_PARSE_H_
52 #define _TVB_PARSE_H_
54 #include <epan/tvbuff.h>
55 #include <glib.h>
56 #include "ws_symbol_export.h"
58 typedef struct _tvbparse_elem_t tvbparse_elem_t;
59 typedef struct _tvbparse_wanted_t tvbparse_wanted_t;
60 typedef struct _tvbparse_t tvbparse_t;
64 * a callback function to be called before or after an element has been
65 * successfuly extracted.
67 * Note that if the token belongs to a composed token the callbacks of the
68 * components won't be called unless the composed token is successfully
69 * extracted.
71 * tvbparse_data: the private data of the parser
72 * wanted_data: the private data of the wanted element
73 * elem: the extracted element
75 typedef void (*tvbparse_action_t)(void* tvbparse_data, const void* wanted_data, struct _tvbparse_elem_t* elem);
77 typedef int (*tvbparse_condition_t)
78 (tvbparse_t*, const int,
79 const tvbparse_wanted_t*,
80 tvbparse_elem_t**);
83 typedef enum {
84 TP_UNTIL_INCLUDE, /* last elem is included, its span is spent by the parser */
85 TP_UNTIL_SPEND, /* last elem is not included, but its span is spent by the parser */
86 TP_UNTIL_LEAVE /* last elem is not included, neither its span is spent by the parser */
87 } until_mode_t;
90 struct _tvbparse_wanted_t {
91 int id;
92 tvbparse_condition_t condition;
94 union {
95 const gchar* str;
96 struct _tvbparse_wanted_t** handle;
97 struct {
98 union {
99 gint64 i;
100 guint64 u;
101 gdouble f;
102 } value;
103 gboolean (*comp)(void*,const void*);
104 void* (*extract)(tvbuff_t*,guint);
105 } number;
106 enum ftenum ftenum;
107 struct {
108 until_mode_t mode;
109 const tvbparse_wanted_t* subelem;
110 } until;
111 struct {
112 wmem_map_t* table;
113 struct _tvbparse_wanted_t* key;
114 struct _tvbparse_wanted_t* other;
115 } hash;
116 GPtrArray* elems;
117 const tvbparse_wanted_t* subelem;
118 void* p;
119 } control;
121 int len;
123 guint min;
124 guint max;
126 const void* data;
128 tvbparse_action_t before;
129 tvbparse_action_t after;
132 /* an instance of a per packet parser */
133 struct _tvbparse_t {
134 tvbuff_t* tvb;
135 int offset;
136 int end_offset;
137 void* data;
138 const tvbparse_wanted_t* ignore;
139 int recursion_depth;
143 /* a matching token returned by either tvbparser_get or tvb_parser_find */
144 struct _tvbparse_elem_t {
145 int id;
147 tvbuff_t* tvb;
148 int offset;
149 int len;
151 void* data;
153 struct _tvbparse_elem_t* sub;
155 struct _tvbparse_elem_t* next;
156 struct _tvbparse_elem_t* last;
158 const tvbparse_wanted_t* wanted;
163 * definition of wanted token types
165 * the following functions define the tokens we will be able to look for in a tvb
166 * common parameters are:
168 * id: an arbitrary id that will be copied to the eventual token (don't use 0)
169 * private_data: persistent data to be passed to the callback action (wanted_data)
170 * before_cb: an callback function to be called before those of the subelements
171 * after_cb: an callback function to be called after those of the subelements
176 * a char element.
178 * When looked for it returns a simple element one character long if the char
179 * at the current offset matches one of the the needles.
181 WS_DLL_PUBLIC
182 tvbparse_wanted_t* tvbparse_char(const int id,
183 const gchar* needles,
184 const void* private_data,
185 tvbparse_action_t before_cb,
186 tvbparse_action_t after_cb);
189 * a not_char element.
191 * When looked for it returns a simple element one character long if the char
192 * at the current offset does not match one of the the needles.
194 WS_DLL_PUBLIC
195 tvbparse_wanted_t* tvbparse_not_char(const int id,
196 const gchar* needle,
197 const void* private_data,
198 tvbparse_action_t before_cb,
199 tvbparse_action_t after_cb);
202 * a chars element
204 * When looked for it returns a simple element one or more characters long if
205 * one or more char(s) starting from the current offset match one of the needles.
206 * An element will be returned if at least min_len chars are given (1 if it's 0)
207 * It will get at most max_len chars or as much as it can if max_len is 0.
209 WS_DLL_PUBLIC
210 tvbparse_wanted_t* tvbparse_chars(const int id,
211 const guint min_len,
212 const guint max_len,
213 const gchar* needles,
214 const void* private_data,
215 tvbparse_action_t before_cb,
216 tvbparse_action_t after_cb);
219 * a not_chars element
221 * When looked for it returns a simple element one or more characters long if
222 * one or more char(s) starting from the current offset do not match one of the
223 * needles.
224 * An element will be returned if at least min_len chars are given (1 if it's 0)
225 * It will get at most max_len chars or as much as it can if max_len is 0.
227 WS_DLL_PUBLIC
228 tvbparse_wanted_t* tvbparse_not_chars(const int id,
229 const guint min_len,
230 const guint max_len,
231 const gchar* needles,
232 const void* private_data,
233 tvbparse_action_t before_cb,
234 tvbparse_action_t after_cb);
237 * a string element
239 * When looked for it returns a simple element if we have the given string at
240 * the current offset
242 WS_DLL_PUBLIC
243 tvbparse_wanted_t* tvbparse_string(const int id,
244 const gchar* string,
245 const void* private_data,
246 tvbparse_action_t before_cb,
247 tvbparse_action_t after_cb);
250 * casestring
252 * When looked for it returns a simple element if we have a matching string at
253 * the current offset
255 WS_DLL_PUBLIC
256 tvbparse_wanted_t* tvbparse_casestring(const int id,
257 const gchar* str,
258 const void* data,
259 tvbparse_action_t before_cb,
260 tvbparse_action_t after_cb);
263 * until
265 * When looked for it returns a simple element containing all the characters
266 * found until the first match of the ending element if the ending element is
267 * found.
269 * When looking for until elements it calls tvbparse_find so it can be very slow.
271 * It won't have a subelement, the ending's callbacks won't get called.
275 * op_mode values determine how the terminating element and the current offset
276 * of the parser are handled
278 WS_DLL_PUBLIC
279 tvbparse_wanted_t* tvbparse_until(const int id,
280 const void* private_data,
281 tvbparse_action_t before_cb,
282 tvbparse_action_t after_cb,
283 const tvbparse_wanted_t* ending,
284 until_mode_t until_mode);
287 * one_of
289 * When looked for it will try to match to the given candidates and return a
290 * composed element whose subelement is the first match.
292 * The list of candidates is terminated with a NULL
295 WS_DLL_PUBLIC
296 tvbparse_wanted_t* tvbparse_set_oneof(const int id,
297 const void* private_data,
298 tvbparse_action_t before_cb,
299 tvbparse_action_t after_cb,
300 ...);
303 * hashed
305 WS_DLL_PUBLIC
306 tvbparse_wanted_t* tvbparse_hashed(const int id,
307 const void* data,
308 tvbparse_action_t before_cb,
309 tvbparse_action_t after_cb,
310 tvbparse_wanted_t* key,
311 tvbparse_wanted_t* other,
312 ...);
314 WS_DLL_PUBLIC
315 void tvbparse_hashed_add(tvbparse_wanted_t* w, ...);
318 * sequence
320 * When looked for it will try to match in order all the given candidates. If
321 * every candidate is found in the given order it will return a composed
322 * element whose subelements are the matcheed elemets.
324 * The list of candidates is terminated with a NULL.
327 WS_DLL_PUBLIC
328 tvbparse_wanted_t* tvbparse_set_seq(const int id,
329 const void* private_data,
330 tvbparse_action_t before_cb,
331 tvbparse_action_t after_cb,
332 ...);
335 * some
337 * When looked for it will try to match the given candidate at least min times
338 * and at most max times. If the given candidate is matched at least min times
339 * a composed element is returned.
342 WS_DLL_PUBLIC
343 tvbparse_wanted_t* tvbparse_some(const int id,
344 const guint min,
345 const guint max,
346 const void* private_data,
347 tvbparse_action_t before_cb,
348 tvbparse_action_t after_cb,
349 const tvbparse_wanted_t* wanted);
351 #define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
352 tvbparse_some(id, 1, G_MAXINT, private_data, before_cb, after_cb, wanted)
356 * handle
358 * this is a pointer to a pointer to a wanted element (that might have not
359 * been initialized yet) so that recursive structures
361 WS_DLL_PUBLIC
362 tvbparse_wanted_t* tvbparse_handle(tvbparse_wanted_t** handle);
364 #if 0
366 enum ft_cmp_op {
367 TVBPARSE_CMP_GT,
368 TVBPARSE_CMP_GE,
369 TVBPARSE_CMP_EQ,
370 TVBPARSE_CMP_NE,
371 TVBPARSE_CMP_LE,
372 TVBPARSE_CMP_LT
375 /* not yet tested */
376 tvbparse_wanted_t* tvbparse_ft(int id,
377 const void* data,
378 tvbparse_action_t before_cb,
379 tvbparse_action_t after_cb,
380 enum ftenum ftenum);
382 /* not yet tested */
383 tvbparse_wanted_t* tvbparse_end_of_buffer(int id,
384 const void* data,
385 tvbparse_action_t before_cb,
386 tvbparse_action_t after_cb);
387 /* not yet tested */
388 tvbparse_wanted_t* tvbparse_ft_numcmp(int id,
389 const void* data,
390 tvbparse_action_t before_cb,
391 tvbparse_action_t after_cb,
392 enum ftenum ftenum,
393 int little_endian,
394 enum ft_cmp_op ft_cmp_op,
395 ... );
397 #endif
399 /* quoted
400 * this is a composed candidate, that will try to match a quoted string
401 * (included the quotes) including into it every escaped quote.
403 * C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
405 WS_DLL_PUBLIC
406 tvbparse_wanted_t* tvbparse_quoted(const int id,
407 const void* data,
408 tvbparse_action_t before_cb,
409 tvbparse_action_t after_cb,
410 const char quote,
411 const char escape);
414 * a helper callback for quoted strings that will shrink the token to contain
415 * only the string andnot the quotes
417 WS_DLL_PUBLIC
418 void tvbparse_shrink_token_cb(void* tvbparse_data,
419 const void* wanted_data,
420 tvbparse_elem_t* tok);
425 /* initialize the parser (at every packet)
426 * tvb: what are we parsing?
427 * offset: from where
428 * len: for how many bytes
429 * private_data: will be passed to the action callbacks
430 * ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
432 WS_DLL_PUBLIC
433 tvbparse_t* tvbparse_init(tvbuff_t* tvb,
434 const int offset,
435 int len,
436 void* private_data,
437 const tvbparse_wanted_t* ignore);
439 /* reset the parser */
440 WS_DLL_PUBLIC
441 gboolean tvbparse_reset(tvbparse_t* tt, const int offset, int len);
443 WS_DLL_PUBLIC
444 guint tvbparse_curr_offset(tvbparse_t* tt);
445 guint tvbparse_len_left(tvbparse_t* tt);
450 * This will look for the wanted token at the current offset or after any given
451 * number of ignored tokens returning FALSE if there's no match or TRUE if there
452 * is a match.
453 * The parser will be left in its original state and no callbacks will be called.
455 WS_DLL_PUBLIC
456 gboolean tvbparse_peek(tvbparse_t* tt,
457 const tvbparse_wanted_t* wanted);
460 * This will look for the wanted token at the current offset or after any given
461 * number of ignored tokens returning NULL if there's no match.
462 * if there is a match it will set the offset of the current parser after
463 * the end of the token
465 WS_DLL_PUBLIC
466 tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
467 const tvbparse_wanted_t* wanted);
470 * Like tvbparse_get but this will look for a wanted token even beyond the
471 * current offset.
472 * This function is slow.
474 WS_DLL_PUBLIC
475 tvbparse_elem_t* tvbparse_find(tvbparse_t* tt,
476 const tvbparse_wanted_t* wanted);
479 WS_DLL_PUBLIC
480 void tvbparse_tree_add_elem(proto_tree* tree, tvbparse_elem_t* curr);
482 #endif