3 * an API for text tvb parsers
5 * Copyright 2005, Luis E. Garcia Ontanon <luis@ontanon.org>
9 * Wireshark - Network traffic analyzer
10 * By Gerald Combs <gerald@wireshark.org>
11 * Copyright 1998 Gerald Combs
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 2
16 * of the License, or (at your option) any later version.
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29 The intention behind this is to ease the writing of dissectors that have to
30 parse text without the need of writing into buffers.
32 It was originally written to avoid using lex and yacc for the xml dissector.
34 the parser is able to look for wanted elements these can be:
37 - a char out of a string of needles
38 - a char not belonging to a string of needles
39 - a sequence of chars that belong to a set of chars
40 - a sequence of chars that do not belong to a set of chars
43 - all the characters up to a certain wanted element (included or excluded)
46 - one of a given group of wanted elements
47 - a sequence of wanted elements
48 - some (at least one) instances of a wanted element
50 Once a wanted element is successfully extracted, by either tvbparse_get or
51 tvbparse_find, the parser will invoke a given callback
52 before and another one after every of its component's subelement's callbacks
55 If tvbparse_get or tvbparse_find fail to extract the wanted element the
56 subelements callbacks are not going to be invoked.
58 The wanted elements are instantiated once by the proto_register_xxx function.
60 The parser is instantiated for every packet and it mantains its state.
62 The element's data is destroyed before the next packet is dissected.
68 #include <epan/tvbuff.h>
70 #include "ws_symbol_export.h"
72 typedef struct _tvbparse_elem_t tvbparse_elem_t
;
73 typedef struct _tvbparse_wanted_t tvbparse_wanted_t
;
74 typedef struct _tvbparse_t tvbparse_t
;
78 * a callback function to be called before or after an element has been
79 * successfuly extracted.
81 * Note that if the token belongs to a composed token the callbacks of the
82 * components won't be called unless the composed token is successfully
85 * tvbparse_data: the private data of the parser
86 * wanted_data: the private data of the wanted element
87 * elem: the extracted element
89 typedef void (*tvbparse_action_t
)(void* tvbparse_data
, const void* wanted_data
, struct _tvbparse_elem_t
* elem
);
91 typedef int (*tvbparse_condition_t
)
92 (tvbparse_t
*, const int,
93 const tvbparse_wanted_t
*,
98 TP_UNTIL_INCLUDE
, /* last elem is included, its span is spent by the parser */
99 TP_UNTIL_SPEND
, /* last elem is not included, but its span is spent by the parser */
100 TP_UNTIL_LEAVE
/* last elem is not included, neither its span is spent by the parser */
104 struct _tvbparse_wanted_t
{
106 tvbparse_condition_t condition
;
110 struct _tvbparse_wanted_t
** handle
;
117 gboolean (*comp
)(void*,const void*);
118 void* (*extract
)(tvbuff_t
*,guint
);
123 const tvbparse_wanted_t
* subelem
;
127 struct _tvbparse_wanted_t
* key
;
128 struct _tvbparse_wanted_t
* other
;
131 const tvbparse_wanted_t
* subelem
;
142 tvbparse_action_t before
;
143 tvbparse_action_t after
;
146 /* an instance of a per packet parser */
152 const tvbparse_wanted_t
* ignore
;
156 /* a matching token returned by either tvbparser_get or tvb_parser_find */
157 struct _tvbparse_elem_t
{
166 struct _tvbparse_elem_t
* sub
;
168 struct _tvbparse_elem_t
* next
;
169 struct _tvbparse_elem_t
* last
;
171 const tvbparse_wanted_t
* wanted
;
176 * definition of wanted token types
178 * the following functions define the tokens we will be able to look for in a tvb
179 * common parameters are:
181 * id: an arbitrary id that will be copied to the eventual token (don't use 0)
182 * private_data: persistent data to be passed to the callback action (wanted_data)
183 * before_cb: an callback function to be called before those of the subelements
184 * after_cb: an callback function to be called after those of the subelements
191 * When looked for it returns a simple element one character long if the char
192 * at the current offset matches one of the the needles.
195 tvbparse_wanted_t
* tvbparse_char(const int id
,
196 const gchar
* needles
,
197 const void* private_data
,
198 tvbparse_action_t before_cb
,
199 tvbparse_action_t after_cb
);
202 * a not_char element.
204 * When looked for it returns a simple element one character long if the char
205 * at the current offset does not match one of the the needles.
208 tvbparse_wanted_t
* tvbparse_not_char(const int id
,
210 const void* private_data
,
211 tvbparse_action_t before_cb
,
212 tvbparse_action_t after_cb
);
217 * When looked for it returns a simple element one or more characters long if
218 * one or more char(s) starting from the current offset match one of the needles.
219 * An element will be returned if at least min_len chars are given (1 if it's 0)
220 * It will get at most max_len chars or as much as it can if max_len is 0.
223 tvbparse_wanted_t
* tvbparse_chars(const int id
,
226 const gchar
* needles
,
227 const void* private_data
,
228 tvbparse_action_t before_cb
,
229 tvbparse_action_t after_cb
);
232 * a not_chars element
234 * When looked for it returns a simple element one or more characters long if
235 * one or more char(s) starting from the current offset do not match one of the
237 * An element will be returned if at least min_len chars are given (1 if it's 0)
238 * It will get at most max_len chars or as much as it can if max_len is 0.
241 tvbparse_wanted_t
* tvbparse_not_chars(const int id
,
244 const gchar
* needles
,
245 const void* private_data
,
246 tvbparse_action_t before_cb
,
247 tvbparse_action_t after_cb
);
252 * When looked for it returns a simple element if we have the given string at
256 tvbparse_wanted_t
* tvbparse_string(const int id
,
258 const void* private_data
,
259 tvbparse_action_t before_cb
,
260 tvbparse_action_t after_cb
);
265 * When looked for it returns a simple element if we have a matching string at
269 tvbparse_wanted_t
* tvbparse_casestring(const int id
,
272 tvbparse_action_t before_cb
,
273 tvbparse_action_t after_cb
);
278 * When looked for it returns a simple element containing all the characters
279 * found until the first match of the ending element if the ending element is
282 * When looking for until elements it calls tvbparse_find so it can be very slow.
284 * It won't have a subelement, the ending's callbacks won't get called.
288 * op_mode values determine how the terminating element and the current offset
289 * of the parser are handled
292 tvbparse_wanted_t
* tvbparse_until(const int id
,
293 const void* private_data
,
294 tvbparse_action_t before_cb
,
295 tvbparse_action_t after_cb
,
296 const tvbparse_wanted_t
* ending
,
297 until_mode_t until_mode
);
302 * When looked for it will try to match to the given candidates and return a
303 * composed element whose subelement is the first match.
305 * The list of candidates is terminated with a NULL
309 tvbparse_wanted_t
* tvbparse_set_oneof(const int id
,
310 const void* private_data
,
311 tvbparse_action_t before_cb
,
312 tvbparse_action_t after_cb
,
319 tvbparse_wanted_t
* tvbparse_hashed(const int id
,
321 tvbparse_action_t before_cb
,
322 tvbparse_action_t after_cb
,
323 tvbparse_wanted_t
* key
,
324 tvbparse_wanted_t
* other
,
328 void tvbparse_hashed_add(tvbparse_wanted_t
* w
, ...);
333 * When looked for it will try to match in order all the given candidates. If
334 * every candidate is found in the given order it will return a composed
335 * element whose subelements are the matcheed elemets.
337 * The list of candidates is terminated with a NULL.
341 tvbparse_wanted_t
* tvbparse_set_seq(const int id
,
342 const void* private_data
,
343 tvbparse_action_t before_cb
,
344 tvbparse_action_t after_cb
,
350 * When looked for it will try to match the given candidate at least min times
351 * and at most max times. If the given candidate is matched at least min times
352 * a composed element is returned.
356 tvbparse_wanted_t
* tvbparse_some(const int id
,
359 const void* private_data
,
360 tvbparse_action_t before_cb
,
361 tvbparse_action_t after_cb
,
362 const tvbparse_wanted_t
* wanted
);
364 #define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
365 tvbparse_some(id, 1, G_MAXINT, private_data, before_cb, after_cb, wanted)
371 * this is a pointer to a pointer to a wanted element (that might have not
372 * been initialized yet) so that recursive structures
375 tvbparse_wanted_t
* tvbparse_handle(tvbparse_wanted_t
** handle
);
389 tvbparse_wanted_t
* tvbparse_ft(int id
,
391 tvbparse_action_t before_cb
,
392 tvbparse_action_t after_cb
,
396 tvbparse_wanted_t
* tvbparse_end_of_buffer(int id
,
398 tvbparse_action_t before_cb
,
399 tvbparse_action_t after_cb
);
401 tvbparse_wanted_t
* tvbparse_ft_numcmp(int id
,
403 tvbparse_action_t before_cb
,
404 tvbparse_action_t after_cb
,
407 enum ft_cmp_op ft_cmp_op
,
413 * this is a composed candidate, that will try to match a quoted string
414 * (included the quotes) including into it every escaped quote.
416 * C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
419 tvbparse_wanted_t
* tvbparse_quoted(const int id
,
421 tvbparse_action_t before_cb
,
422 tvbparse_action_t after_cb
,
427 * a helper callback for quoted strings that will shrink the token to contain
428 * only the string andnot the quotes
431 void tvbparse_shrink_token_cb(void* tvbparse_data
,
432 const void* wanted_data
,
433 tvbparse_elem_t
* tok
);
438 /* initialize the parser (at every packet)
439 * tvb: what are we parsing?
441 * len: for how many bytes
442 * private_data: will be passed to the action callbacks
443 * ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
446 tvbparse_t
* tvbparse_init(tvbuff_t
* tvb
,
450 const tvbparse_wanted_t
* ignore
);
452 /* reset the parser */
454 gboolean
tvbparse_reset(tvbparse_t
* tt
, const int offset
, int len
);
457 guint
tvbparse_curr_offset(tvbparse_t
* tt
);
458 guint
tvbparse_len_left(tvbparse_t
* tt
);
463 * This will look for the wanted token at the current offset or after any given
464 * number of ignored tokens returning FALSE if there's no match or TRUE if there
466 * The parser will be left in its original state and no callbacks will be called.
469 gboolean
tvbparse_peek(tvbparse_t
* tt
,
470 const tvbparse_wanted_t
* wanted
);
473 * This will look for the wanted token at the current offset or after any given
474 * number of ignored tokens returning NULL if there's no match.
475 * if there is a match it will set the offset of the current parser after
476 * the end of the token
479 tvbparse_elem_t
* tvbparse_get(tvbparse_t
* tt
,
480 const tvbparse_wanted_t
* wanted
);
483 * Like tvbparse_get but this will look for a wanted token even beyond the
485 * This function is slow.
488 tvbparse_elem_t
* tvbparse_find(tvbparse_t
* tt
,
489 const tvbparse_wanted_t
* wanted
);
493 void tvbparse_tree_add_elem(proto_tree
* tree
, tvbparse_elem_t
* curr
);