3 * an API for text tvb parsers
5 * Copyright 2005, Luis E. Garcia Ontanon <luis@ontanon.org>
7 * Wireshark - Network traffic analyzer
8 * By Gerald Combs <gerald@wireshark.org>
9 * Copyright 1998 Gerald Combs
11 * SPDX-License-Identifier: GPL-2.0-or-later
15 The intention behind this is to ease the writing of dissectors that have to
16 parse text without the need of writing into buffers.
18 It was originally written to avoid using lex and yacc for the xml dissector.
20 the parser is able to look for wanted elements these can be:
23 - a char out of a string of needles
24 - a char not belonging to a string of needles
25 - a sequence of chars that belong to a set of chars
26 - a sequence of chars that do not belong to a set of chars
29 - all the characters up to a certain wanted element (included or excluded)
32 - one of a given group of wanted elements
33 - a sequence of wanted elements
34 - some (at least one) instances of a wanted element
36 Once a wanted element is successfully extracted, by either tvbparse_get or
37 tvbparse_find, the parser will invoke a given callback
38 before and another one after every of its component's subelement's callbacks
41 If tvbparse_get or tvbparse_find fail to extract the wanted element the
42 subelements callbacks are not going to be invoked.
44 The wanted elements are instantiated once by the proto_register_xxx function.
46 The parser is instantiated for every packet and it mantains its state.
48 The element's data is destroyed before the next packet is dissected.
54 #include <epan/tvbuff.h>
55 #include "ws_symbol_export.h"
57 typedef struct _tvbparse_elem_t tvbparse_elem_t
;
58 typedef struct _tvbparse_wanted_t tvbparse_wanted_t
;
59 typedef struct _tvbparse_t tvbparse_t
;
63 * a callback function to be called before or after an element has been
64 * successfuly extracted.
66 * Note that if the token belongs to a composed token the callbacks of the
67 * components won't be called unless the composed token is successfully
70 * tvbparse_data: the private data of the parser
71 * wanted_data: the private data of the wanted element
72 * elem: the extracted element
74 typedef void (*tvbparse_action_t
)(void* tvbparse_data
, const void* wanted_data
, struct _tvbparse_elem_t
* elem
);
76 typedef int (*tvbparse_condition_t
)
77 (tvbparse_t
*, const int,
78 const tvbparse_wanted_t
*,
83 TP_UNTIL_INCLUDE
, /* last elem is included, its span is spent by the parser */
84 TP_UNTIL_SPEND
, /* last elem is not included, but its span is spent by the parser */
85 TP_UNTIL_LEAVE
/* last elem is not included, neither its span is spent by the parser */
89 struct _tvbparse_wanted_t
{
91 tvbparse_condition_t condition
;
95 struct _tvbparse_wanted_t
** handle
;
106 const tvbparse_wanted_t
* subelem
;
110 struct _tvbparse_wanted_t
* key
;
111 struct _tvbparse_wanted_t
* other
;
114 const tvbparse_wanted_t
* subelem
;
125 tvbparse_action_t before
;
126 tvbparse_action_t after
;
129 /* an instance of a per packet parser */
131 wmem_allocator_t
* scope
;
136 const tvbparse_wanted_t
* ignore
;
141 /* a matching token returned by either tvbparser_get or tvb_parser_find */
142 struct _tvbparse_elem_t
{
152 struct _tvbparse_elem_t
* sub
;
154 struct _tvbparse_elem_t
* next
;
155 struct _tvbparse_elem_t
* last
;
157 const tvbparse_wanted_t
* wanted
;
162 * definition of wanted token types
164 * the following functions define the tokens we will be able to look for in a tvb
165 * common parameters are:
167 * id: an arbitrary id that will be copied to the eventual token (don't use 0)
168 * private_data: persistent data to be passed to the callback action (wanted_data)
169 * before_cb: an callback function to be called before those of the subelements
170 * after_cb: an callback function to be called after those of the subelements
177 * When looked for it returns a simple element one character long if the char
178 * at the current offset matches one of the needles.
181 tvbparse_wanted_t
* tvbparse_char(const int id
,
183 const void* private_data
,
184 tvbparse_action_t before_cb
,
185 tvbparse_action_t after_cb
);
188 * a not_char element.
190 * When looked for it returns a simple element one character long if the char
191 * at the current offset does not match one of the needles.
194 tvbparse_wanted_t
* tvbparse_not_char(const int id
,
196 const void* private_data
,
197 tvbparse_action_t before_cb
,
198 tvbparse_action_t after_cb
);
203 * When looked for it returns a simple element one or more characters long if
204 * one or more char(s) starting from the current offset match one of the needles.
205 * An element will be returned if at least min_len chars are given (1 if it's 0)
206 * It will get at most max_len chars or as much as it can if max_len is 0.
209 tvbparse_wanted_t
* tvbparse_chars(const int id
,
210 const unsigned min_len
,
211 const unsigned max_len
,
213 const void* private_data
,
214 tvbparse_action_t before_cb
,
215 tvbparse_action_t after_cb
);
218 * a not_chars element
220 * When looked for it returns a simple element one or more characters long if
221 * one or more char(s) starting from the current offset do not match one of the
223 * An element will be returned if at least min_len chars are given (1 if it's 0)
224 * It will get at most max_len chars or as much as it can if max_len is 0.
227 tvbparse_wanted_t
* tvbparse_not_chars(const int id
,
228 const unsigned min_len
,
229 const unsigned max_len
,
231 const void* private_data
,
232 tvbparse_action_t before_cb
,
233 tvbparse_action_t after_cb
);
238 * When looked for it returns a simple element if we have the given string at
242 tvbparse_wanted_t
* tvbparse_string(const int id
,
244 const void* private_data
,
245 tvbparse_action_t before_cb
,
246 tvbparse_action_t after_cb
);
251 * When looked for it returns a simple element if we have a matching string at
255 tvbparse_wanted_t
* tvbparse_casestring(const int id
,
258 tvbparse_action_t before_cb
,
259 tvbparse_action_t after_cb
);
264 * When looked for it returns a simple element containing all the characters
265 * found until the first match of the ending element if the ending element is
268 * When looking for until elements it calls tvbparse_find so it can be very slow.
270 * It won't have a subelement, the ending's callbacks won't get called.
274 * op_mode values determine how the terminating element and the current offset
275 * of the parser are handled
278 tvbparse_wanted_t
* tvbparse_until(const int id
,
279 const void* private_data
,
280 tvbparse_action_t before_cb
,
281 tvbparse_action_t after_cb
,
282 const tvbparse_wanted_t
* ending
,
283 until_mode_t until_mode
);
288 * When looked for it will try to match to the given candidates and return a
289 * composed element whose subelement is the first match.
291 * The list of candidates is terminated with a NULL
295 tvbparse_wanted_t
* tvbparse_set_oneof(const int id
,
296 const void* private_data
,
297 tvbparse_action_t before_cb
,
298 tvbparse_action_t after_cb
,
305 tvbparse_wanted_t
* tvbparse_hashed(const int id
,
307 tvbparse_action_t before_cb
,
308 tvbparse_action_t after_cb
,
309 tvbparse_wanted_t
* key
,
310 tvbparse_wanted_t
* other
,
314 void tvbparse_hashed_add(tvbparse_wanted_t
* w
, ...);
319 * When looked for it will try to match in order all the given candidates. If
320 * every candidate is found in the given order it will return a composed
321 * element whose subelements are the matched elements.
323 * The list of candidates is terminated with a NULL.
327 tvbparse_wanted_t
* tvbparse_set_seq(const int id
,
328 const void* private_data
,
329 tvbparse_action_t before_cb
,
330 tvbparse_action_t after_cb
,
336 * When looked for it will try to match the given candidate at least min times
337 * and at most max times. If the given candidate is matched at least min times
338 * a composed element is returned.
342 tvbparse_wanted_t
* tvbparse_some(const int id
,
345 const void* private_data
,
346 tvbparse_action_t before_cb
,
347 tvbparse_action_t after_cb
,
348 const tvbparse_wanted_t
* wanted
);
350 #define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
351 tvbparse_some(id, 1, INT_MAX, private_data, before_cb, after_cb, wanted)
357 * this is a pointer to a pointer to a wanted element (that might have not
358 * been initialized yet) so that recursive structures
361 tvbparse_wanted_t
* tvbparse_handle(tvbparse_wanted_t
** handle
);
364 * this is a composed candidate, that will try to match a quoted string
365 * (included the quotes) including into it every escaped quote.
367 * C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
370 tvbparse_wanted_t
* tvbparse_quoted(const int id
,
372 tvbparse_action_t before_cb
,
373 tvbparse_action_t after_cb
,
378 * a helper callback for quoted strings that will shrink the token to contain
379 * only the string andnot the quotes
382 void tvbparse_shrink_token_cb(void* tvbparse_data
,
383 const void* wanted_data
,
384 tvbparse_elem_t
* tok
);
389 /* initialize the parser (at every packet)
390 * scope: memory scope/pool
391 * tvb: what are we parsing?
393 * len: for how many bytes
394 * private_data: will be passed to the action callbacks
395 * ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
398 tvbparse_t
* tvbparse_init(wmem_allocator_t
*scope
,
403 const tvbparse_wanted_t
* ignore
);
405 /* reset the parser */
407 bool tvbparse_reset(tvbparse_t
* tt
, const int offset
, int len
);
410 unsigned tvbparse_curr_offset(tvbparse_t
* tt
);
411 unsigned tvbparse_len_left(tvbparse_t
* tt
);
416 * This will look for the wanted token at the current offset or after any given
417 * number of ignored tokens returning false if there's no match or true if there
419 * The parser will be left in its original state and no callbacks will be called.
422 bool tvbparse_peek(tvbparse_t
* tt
,
423 const tvbparse_wanted_t
* wanted
);
426 * This will look for the wanted token at the current offset or after any given
427 * number of ignored tokens returning NULL if there's no match.
428 * if there is a match it will set the offset of the current parser after
429 * the end of the token
432 tvbparse_elem_t
* tvbparse_get(tvbparse_t
* tt
,
433 const tvbparse_wanted_t
* wanted
);
436 * Like tvbparse_get but this will look for a wanted token even beyond the
438 * This function is slow.
441 tvbparse_elem_t
* tvbparse_find(tvbparse_t
* tt
,
442 const tvbparse_wanted_t
* wanted
);
446 void tvbparse_tree_add_elem(proto_tree
* tree
, tvbparse_elem_t
* curr
);