HACK: pinfo->private_data points to smb_info again
[wireshark-wip.git] / epan / dissectors / packet-json.c
blob246440f9f239755b3516bec2e438b1f353c21568
1 /* packet-json.c
2 * Routines for JSON dissection
3 * References:
4 * RFC 4627: http://tools.ietf.org/html/rfc4627
5 * Website: http://json.org/
7 * Copyright 2010, Jakub Zawadzki <darkjames-ws@darkjames.pl>
9 * $Id$
11 * Wireshark - Network traffic analyzer
12 * By Gerald Combs <gerald@wireshark.org>
13 * Copyright 1998 Gerald Combs
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 #define NEW_PROTO_TREE_API
32 #include "config.h"
34 #include <glib.h>
36 #include <epan/wmem/wmem.h>
37 #include <epan/packet.h>
38 #include <epan/tvbparse.h>
40 static gint ett_json = -1;
41 static gint ett_json_array = -1;
42 static gint ett_json_object = -1;
43 static gint ett_json_member = -1;
45 static header_field_info *hfi_json = NULL;
47 #define JSON_HFI_INIT HFI_INIT(proto_json)
49 static header_field_info hfi_json_array JSON_HFI_INIT =
50 { "Array", "json.array", FT_NONE, BASE_NONE, NULL, 0x00, "JSON array", HFILL };
52 static header_field_info hfi_json_object JSON_HFI_INIT =
53 { "Object", "json.object", FT_NONE, BASE_NONE, NULL, 0x00, "JSON object", HFILL };
55 static header_field_info hfi_json_member JSON_HFI_INIT =
56 { "Member", "json.member", FT_NONE, BASE_NONE, NULL, 0x00, "JSON object member", HFILL };
58 #if 0
59 /* XXX */
60 static header_field_info hfi_json_member_key JSON_HFI_INIT =
61 { "Key", "json.member.key", FT_NONE, BASE_NONE, NULL, 0x00, NULL, HFILL };
62 #endif
64 static header_field_info hfi_json_value_string JSON_HFI_INIT = /* FT_STRINGZ? */
65 { "String value", "json.value.string", FT_STRING, BASE_NONE, NULL, 0x00, "JSON string value", HFILL };
67 static header_field_info hfi_json_value_number JSON_HFI_INIT = /* FT_DOUBLE/ FT_INT64? */
68 { "Number value", "json.value.number", FT_STRING, BASE_NONE, NULL, 0x00, "JSON number value", HFILL };
70 static header_field_info hfi_json_value_false JSON_HFI_INIT =
71 { "False value", "json.value.false", FT_NONE, BASE_NONE, NULL, 0x00, "JSON false value", HFILL };
73 static header_field_info hfi_json_value_null JSON_HFI_INIT =
74 { "Null value", "json.value.null", FT_NONE, BASE_NONE, NULL, 0x00, "JSON null value", HFILL };
76 static header_field_info hfi_json_value_true JSON_HFI_INIT =
77 { "True value", "json.value.true", FT_NONE, BASE_NONE, NULL, 0x00, "JSON true value", HFILL };
80 static tvbparse_wanted_t* want;
81 static tvbparse_wanted_t* want_ignore;
83 static dissector_handle_t text_lines_handle;
85 typedef enum {
86 JSON_TOKEN_INVALID = -1,
87 JSON_TOKEN_NUMBER = 0,
88 JSON_TOKEN_STRING,
89 JSON_TOKEN_FALSE,
90 JSON_TOKEN_NULL,
91 JSON_TOKEN_TRUE,
93 /* not really tokens ... */
94 JSON_OBJECT,
95 JSON_ARRAY
97 } json_token_type_t;
99 typedef struct {
100 wmem_stack_t *stack;
102 } json_parser_data_t;
104 static int
105 dissect_json(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data)
107 proto_tree *json_tree = NULL;
108 proto_item *ti = NULL;
110 json_parser_data_t parser_data;
111 tvbparse_t *tt;
113 const char *data_name;
114 int offset;
116 data_name = pinfo->match_string;
117 if (! (data_name && data_name[0])) {
119 * No information from "match_string"
121 data_name = (char *)data;
122 if (! (data_name && data_name[0])) {
124 * No information from dissector data
126 data_name = (char *)(pinfo->private_data);
127 if (! (data_name && data_name[0])) {
129 * No information from "private_data"
131 data_name = NULL;
136 if (tree) {
137 ti = proto_tree_add_item(tree, hfi_json, tvb, 0, -1, ENC_NA);
138 json_tree = proto_item_add_subtree(ti, ett_json);
140 if (data_name)
141 proto_item_append_text(ti, ": %s", data_name);
144 offset = 0;
146 parser_data.stack = wmem_stack_new(wmem_packet_scope());
147 wmem_stack_push(parser_data.stack, json_tree);
149 tt = tvbparse_init(tvb, offset, -1, &parser_data, want_ignore);
151 /* XXX, only one json in packet? */
152 while ((tvbparse_get(tt, want)))
155 offset = tvbparse_curr_offset(tt);
157 proto_item_set_len(ti, offset);
159 /* if we have some unparsed data, pass to data-text-lines dissector (?) */
160 if (tvb_length_remaining(tvb, offset) > 0) {
161 int datalen, reported_datalen;
162 tvbuff_t *next_tvb;
164 datalen = tvb_length_remaining(tvb, offset);
165 reported_datalen = tvb_reported_length_remaining(tvb, offset);
167 next_tvb = tvb_new_subset(tvb, offset, datalen, reported_datalen);
169 call_dissector(text_lines_handle, next_tvb, pinfo, tree);
170 } else if (data_name) {
171 col_append_sep_fstr(pinfo->cinfo, COL_INFO, " ", "(%s)", data_name);
174 return tvb_length(tvb);
177 static void before_object(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
178 json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
180 proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack);
181 proto_tree *subtree;
182 proto_item *ti;
184 ti = proto_tree_add_item(tree, &hfi_json_object, tok->tvb, tok->offset, tok->len, ENC_NA);
186 subtree = proto_item_add_subtree(ti, ett_json_object);
187 wmem_stack_push(data->stack, subtree);
190 static void after_object(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *elem _U_) {
191 json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
193 wmem_stack_pop(data->stack);
196 static void before_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
197 json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
199 proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack);
200 proto_tree *subtree;
201 proto_item *ti;
203 ti = proto_tree_add_item(tree, &hfi_json_member, tok->tvb, tok->offset, tok->len, ENC_NA);
205 subtree = proto_item_add_subtree(ti, ett_json_member);
206 wmem_stack_push(data->stack, subtree);
209 static void after_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
210 json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
212 proto_tree *tree = (proto_tree *)wmem_stack_pop(data->stack);
214 if (tree) {
215 tvbparse_elem_t *key_tok = tok->sub;
217 if (key_tok && key_tok->id == JSON_TOKEN_STRING) {
218 char *key = tvb_get_string(wmem_packet_scope(), key_tok->tvb, key_tok->offset, key_tok->len);
220 proto_item_append_text(tree, " Key: %s", key);
222 /* XXX, &hfi_json_member_key */
226 static void before_array(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
227 json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
229 proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack);
230 proto_tree *subtree;
231 proto_item *ti;
233 ti = proto_tree_add_item(tree, &hfi_json_array, tok->tvb, tok->offset, tok->len, ENC_NA);
235 subtree = proto_item_add_subtree(ti, ett_json_array);
236 wmem_stack_push(data->stack, subtree);
239 static void after_array(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *elem _U_) {
240 json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
242 wmem_stack_pop(data->stack);
246 * defines for helping with UTF-16 surrogate pairs
249 #define LEAD_SURROGATE_START 0xd800
250 #define LEAD_SURROGATE_END 0xdbff
251 #define TRAIL_SURROGATE_START 0xdc00
252 #define TRAIL_SURROGATE_END 0xdfff
254 #define IS_LEAD_SURROGATE(l) (((l)>=LEAD_SURROGATE_START)&&((l)<=LEAD_SURROGATE_END))
255 #define IS_TRAIL_SURROGATE(t) (((t)>=TRAIL_SURROGATE_START)&&((t)<=TRAIL_SURROGATE_END))
257 #define GET_UNICHAR_FROM_SURROGATES(l,t) (0x10000+(((l-LEAD_SURROGATE_START)<<10)|(t-TRAIL_SURROGATE_START)))
259 static char *json_string_unescape(tvbparse_elem_t *tok)
261 char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1);
262 int i, j;
264 j = 0;
265 for (i = 1; i < tok->len - 1; i++) {
266 guint8 ch = tvb_get_guint8(tok->tvb, tok->offset + i);
268 if (ch == '\\') {
269 i++;
271 ch = tvb_get_guint8(tok->tvb, tok->offset + i);
272 switch (ch) {
273 case '\"':
274 case '\\':
275 case '/':
276 default:
277 str[j++] = ch;
278 break;
280 case 'b':
281 str[j++] = '\b';
282 break;
283 case 'f':
284 str[j++] = '\f';
285 break;
286 case 'n':
287 str[j++] = '\n';
288 break;
289 case 'r':
290 str[j++] = '\r';
291 break;
292 case 't':
293 str[j++] = '\t';
294 break;
296 case 'u':
298 guint32 unicode_hex = 0;
299 gboolean valid = TRUE;
300 int k;
302 for (k = 0; k < 4; k++) {
303 i++;
304 unicode_hex <<= 4;
306 ch = tvb_get_guint8(tok->tvb, tok->offset + i);
307 if (ch >= '0' && ch <= '9')
308 unicode_hex |= (ch - '0');
309 else if (ch >= 'a' && ch <= 'f')
310 unicode_hex |= (10 + (ch - 'a'));
311 else if (ch >= 'A' && ch <= 'F')
312 unicode_hex |= (10 + (ch - 'A'));
313 else {
314 valid = FALSE;
315 break;
319 if ((IS_LEAD_SURROGATE(unicode_hex))) {
320 ch = tvb_get_guint8(tok->tvb, tok->offset + i + 1);
322 if (ch == '\\') {
323 i++;
324 ch = tvb_get_guint8(tok->tvb, tok->offset + i + 1);
325 if (ch == 'u') {
326 guint16 lead_surrogate = unicode_hex;
327 guint16 trail_surrogate = 0;
328 i++;
330 for (k = 0; k < 4; k++) {
331 i++;
332 trail_surrogate <<= 4;
334 ch = tvb_get_guint8(tok->tvb, tok->offset + i);
335 if (ch >= '0' && ch <= '9')
336 trail_surrogate |= (ch - '0');
337 else if (ch >= 'a' && ch <= 'f')
338 trail_surrogate |= (10 + (ch - 'a'));
339 else if (ch >= 'A' && ch <= 'F')
340 trail_surrogate |= (10 + (ch - 'A'));
341 else {
342 valid = FALSE;
343 break;
347 if ((IS_TRAIL_SURROGATE(trail_surrogate))) {
348 unicode_hex = GET_UNICHAR_FROM_SURROGATES(lead_surrogate,trail_surrogate);
349 } else {
350 valid = FALSE;
352 } else {
353 valid = FALSE;
355 } else {
356 valid = FALSE;
358 } else if ((IS_TRAIL_SURROGATE(unicode_hex))) {
359 i++;
360 valid = FALSE;
363 if (valid && g_unichar_validate(unicode_hex) && g_unichar_isprint(unicode_hex)) {
364 /* \uXXXX => 6 bytes */
365 int charlen = g_unichar_to_utf8(unicode_hex, &str[j]);
366 j += charlen;
367 } else
368 str[j++] = '?';
369 break;
373 } else
374 str[j++] = ch;
377 str[j] = '\0';
379 return str;
382 static void after_value(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) {
383 json_parser_data_t *data = (json_parser_data_t *) tvbparse_data;
385 proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack);
386 json_token_type_t value_id = JSON_TOKEN_INVALID;
388 if (tok->sub)
389 value_id = (json_token_type_t)tok->sub->id;
391 switch (value_id) {
392 case JSON_TOKEN_STRING:
393 if (tok->len >= 2)
394 proto_tree_add_unicode_string(tree, hfi_json_value_string.id, tok->tvb, tok->offset, tok->len, json_string_unescape(tok));
395 else
396 proto_tree_add_item(tree, &hfi_json_value_string, tok->tvb, tok->offset, tok->len, ENC_ASCII|ENC_NA);
397 break;
399 case JSON_TOKEN_NUMBER:
400 /* XXX, convert to number */
401 proto_tree_add_item(tree, &hfi_json_value_number, tok->tvb, tok->offset, tok->len, ENC_ASCII|ENC_NA);
402 break;
404 case JSON_TOKEN_FALSE:
405 proto_tree_add_item(tree, &hfi_json_value_false, tok->tvb, tok->offset, tok->len, ENC_NA);
406 break;
408 case JSON_TOKEN_NULL:
409 proto_tree_add_item(tree, &hfi_json_value_null, tok->tvb, tok->offset, tok->len, ENC_NA);
410 break;
412 case JSON_TOKEN_TRUE:
413 proto_tree_add_item(tree, &hfi_json_value_true, tok->tvb, tok->offset, tok->len, ENC_NA);
414 break;
416 case JSON_OBJECT:
417 case JSON_ARRAY:
418 /* already added */
419 break;
421 default:
422 proto_tree_add_text(tree, tok->tvb, tok->offset, tok->len, "%s", tvb_format_text(tok->tvb, tok->offset, tok->len));
423 break;
427 static void init_json_parser(void) {
428 static tvbparse_wanted_t _want_object;
429 static tvbparse_wanted_t _want_array;
431 tvbparse_wanted_t *want_object, *want_array;
432 tvbparse_wanted_t *want_member;
433 tvbparse_wanted_t *want_string;
434 tvbparse_wanted_t *want_number, *want_int;
435 tvbparse_wanted_t *want_value;
436 tvbparse_wanted_t *want_value_separator;
438 #define tvbparse_optional(id, private_data, before_cb, after_cb, wanted) \
439 tvbparse_some(id, 0, 1, private_data, before_cb, after_cb, wanted)
441 tvbparse_wanted_t *want_quot = tvbparse_char(-1,"\"",NULL,NULL,NULL);
443 want_string = tvbparse_set_seq(JSON_TOKEN_STRING, NULL, NULL, NULL,
444 want_quot,
445 tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL,
446 tvbparse_set_oneof(-1, NULL, NULL, NULL,
447 tvbparse_not_chars(-1, 0, 0, "\"" "\\", NULL, NULL, NULL), /* XXX, without invalid unicode characters */
448 tvbparse_set_seq(-1, NULL, NULL, NULL,
449 tvbparse_char(-1, "\\", NULL, NULL, NULL),
450 tvbparse_set_oneof(-1, NULL, NULL, NULL,
451 tvbparse_chars(-1, 0, 1, "\"" "\\" "/bfnrt", NULL, NULL, NULL),
452 tvbparse_set_seq(-1, NULL, NULL, NULL,
453 tvbparse_char(-1, "u", NULL, NULL, NULL),
454 tvbparse_chars(-1, 4, 4, "0123456789abcdefABCDEF", NULL, NULL, NULL),
455 NULL),
456 NULL),
457 NULL),
458 NULL)
460 want_quot,
461 NULL);
463 want_value_separator = tvbparse_char(-1, ",", NULL, NULL, NULL);
465 /* int = zero / ( digit1-9 *DIGIT ) */
466 want_int = tvbparse_set_oneof(-1, NULL, NULL, NULL,
467 tvbparse_char(-1, "0", NULL, NULL, NULL),
468 tvbparse_set_seq(-1, NULL, NULL, NULL,
469 tvbparse_chars(-1, 1, 1, "123456789", NULL, NULL, NULL),
470 tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */
471 tvbparse_chars(-1, 0, 0, "0123456789", NULL, NULL, NULL)),
472 NULL),
473 NULL);
475 /* number = [ minus ] int [ frac ] [ exp ] */
476 want_number = tvbparse_set_seq(JSON_TOKEN_NUMBER, NULL, NULL, NULL,
477 tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */
478 tvbparse_chars(-1, 0, 1, "-", NULL, NULL, NULL)),
479 want_int,
480 /* frac = decimal-point 1*DIGIT */
481 tvbparse_optional(-1, NULL, NULL, NULL,
482 tvbparse_set_seq(-1, NULL, NULL, NULL,
483 tvbparse_char(-1, ".", NULL, NULL, NULL),
484 tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL),
485 NULL)),
486 /* exp = e [ minus / plus ] 1*DIGIT */
487 tvbparse_optional(-1, NULL, NULL, NULL,
488 tvbparse_set_seq(-1, NULL, NULL, NULL,
489 tvbparse_char(-1, "eE", NULL, NULL, NULL),
490 tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */
491 tvbparse_chars(-1, 0, 1, "-+", NULL, NULL, NULL)),
492 tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL),
493 NULL)),
494 NULL);
496 /* value = false / null / true / object / array / number / string */
497 want_value = tvbparse_set_oneof(-1, NULL, NULL, after_value,
498 tvbparse_string(JSON_TOKEN_FALSE, "false", NULL, NULL, NULL),
499 tvbparse_string(JSON_TOKEN_NULL, "null", NULL, NULL, NULL),
500 tvbparse_string(JSON_TOKEN_TRUE, "true", NULL, NULL, NULL),
501 &_want_object,
502 &_want_array,
503 want_number,
504 want_string,
505 NULL);
507 /* array = begin-array [ value *( value-separator value ) ] end-array */
508 want_array = tvbparse_set_seq(JSON_ARRAY, NULL, before_array, after_array,
509 tvbparse_char(-1, "[", NULL, NULL, NULL),
510 tvbparse_optional(-1, NULL, NULL, NULL,
511 tvbparse_set_seq(-1, NULL, NULL, NULL,
512 want_value,
513 tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL,
514 tvbparse_set_seq(-1, NULL, NULL, NULL,
515 want_value_separator,
516 want_value,
517 NULL)),
518 NULL)
520 tvbparse_char(-1, "]", NULL, NULL, NULL),
521 NULL);
522 _want_array = *want_array;
524 /* member = string name-separator value */
525 want_member = tvbparse_set_seq(-1, NULL, before_member, after_member,
526 want_string,
527 tvbparse_char(-1, ":", NULL, NULL, NULL),
528 want_value,
529 NULL);
531 /* object = begin-object [ member *( value-separator member ) ] end-object */
532 want_object = tvbparse_set_seq(JSON_OBJECT, NULL, before_object, after_object,
533 tvbparse_char(-1, "{", NULL, NULL, NULL),
534 tvbparse_optional(-1, NULL, NULL, NULL,
535 tvbparse_set_seq(-1, NULL, NULL, NULL,
536 want_member,
537 tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL,
538 tvbparse_set_seq(-1, NULL, NULL, NULL,
539 want_value_separator,
540 want_member,
541 NULL)),
542 NULL)
544 tvbparse_char(-1, "}", NULL, NULL, NULL),
545 NULL);
546 _want_object = *want_object;
548 want_ignore = tvbparse_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL);
550 /* JSON-text = object / array */
551 want = tvbparse_set_oneof(-1, NULL, NULL, NULL,
552 want_object,
553 want_array,
554 /* tvbparse_not_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL), */
555 NULL);
557 /* XXX, heur? */
560 void
561 proto_register_json(void) {
562 static gint *ett[] = {
563 &ett_json,
564 &ett_json_array,
565 &ett_json_object,
566 &ett_json_member
569 #ifndef HAVE_HFI_SECTION_INIT
570 static header_field_info *hfi[] = {
571 &hfi_json_array,
572 &hfi_json_object,
573 &hfi_json_member,
574 /* &hfi_json_member_key, */
575 &hfi_json_value_string,
576 &hfi_json_value_number,
577 &hfi_json_value_false,
578 &hfi_json_value_null,
579 &hfi_json_value_true,
581 #endif
583 int proto_json;
585 proto_json = proto_register_protocol("JavaScript Object Notation", "JSON", "json");
586 hfi_json = proto_registrar_get_nth(proto_json);
588 proto_register_fields(proto_json, hfi, array_length(hfi));
589 proto_register_subtree_array(ett, array_length(ett));
591 new_register_dissector("json", dissect_json, proto_json);
593 init_json_parser();
596 void
597 proto_reg_handoff_json(void)
599 dissector_handle_t json_handle;
601 json_handle = find_dissector("json");
603 dissector_add_string("media_type", "application/json", json_handle); /* RFC 4627 */
604 dissector_add_string("media_type", "application/json-rpc", json_handle); /* JSON-RPC over HTTP */
605 dissector_add_string("media_type", "application/jsonrequest", json_handle); /* JSON-RPC over HTTP */
607 text_lines_handle = find_dissector("data-text-lines");