2 * Routines for JSON dissection
4 * RFC 4627: http://tools.ietf.org/html/rfc4627
5 * Website: http://json.org/
7 * Copyright 2010, Jakub Zawadzki <darkjames-ws@darkjames.pl>
11 * Wireshark - Network traffic analyzer
12 * By Gerald Combs <gerald@wireshark.org>
13 * Copyright 1998 Gerald Combs
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 #define NEW_PROTO_TREE_API
36 #include <epan/wmem/wmem.h>
37 #include <epan/packet.h>
38 #include <epan/tvbparse.h>
40 static gint ett_json
= -1;
41 static gint ett_json_array
= -1;
42 static gint ett_json_object
= -1;
43 static gint ett_json_member
= -1;
45 static header_field_info
*hfi_json
= NULL
;
47 #define JSON_HFI_INIT HFI_INIT(proto_json)
49 static header_field_info hfi_json_array JSON_HFI_INIT
=
50 { "Array", "json.array", FT_NONE
, BASE_NONE
, NULL
, 0x00, "JSON array", HFILL
};
52 static header_field_info hfi_json_object JSON_HFI_INIT
=
53 { "Object", "json.object", FT_NONE
, BASE_NONE
, NULL
, 0x00, "JSON object", HFILL
};
55 static header_field_info hfi_json_member JSON_HFI_INIT
=
56 { "Member", "json.member", FT_NONE
, BASE_NONE
, NULL
, 0x00, "JSON object member", HFILL
};
60 static header_field_info hfi_json_member_key JSON_HFI_INIT
=
61 { "Key", "json.member.key", FT_NONE
, BASE_NONE
, NULL
, 0x00, NULL
, HFILL
};
64 static header_field_info hfi_json_value_string JSON_HFI_INIT
= /* FT_STRINGZ? */
65 { "String value", "json.value.string", FT_STRING
, BASE_NONE
, NULL
, 0x00, "JSON string value", HFILL
};
67 static header_field_info hfi_json_value_number JSON_HFI_INIT
= /* FT_DOUBLE/ FT_INT64? */
68 { "Number value", "json.value.number", FT_STRING
, BASE_NONE
, NULL
, 0x00, "JSON number value", HFILL
};
70 static header_field_info hfi_json_value_false JSON_HFI_INIT
=
71 { "False value", "json.value.false", FT_NONE
, BASE_NONE
, NULL
, 0x00, "JSON false value", HFILL
};
73 static header_field_info hfi_json_value_null JSON_HFI_INIT
=
74 { "Null value", "json.value.null", FT_NONE
, BASE_NONE
, NULL
, 0x00, "JSON null value", HFILL
};
76 static header_field_info hfi_json_value_true JSON_HFI_INIT
=
77 { "True value", "json.value.true", FT_NONE
, BASE_NONE
, NULL
, 0x00, "JSON true value", HFILL
};
80 static tvbparse_wanted_t
* want
;
81 static tvbparse_wanted_t
* want_ignore
;
83 static dissector_handle_t text_lines_handle
;
86 JSON_TOKEN_INVALID
= -1,
87 JSON_TOKEN_NUMBER
= 0,
93 /* not really tokens ... */
102 } json_parser_data_t
;
105 dissect_json(tvbuff_t
*tvb
, packet_info
*pinfo
, proto_tree
*tree
, void* data
)
107 proto_tree
*json_tree
= NULL
;
108 proto_item
*ti
= NULL
;
110 json_parser_data_t parser_data
;
113 const char *data_name
;
116 data_name
= pinfo
->match_string
;
117 if (! (data_name
&& data_name
[0])) {
119 * No information from "match_string"
121 data_name
= (char *)data
;
122 if (! (data_name
&& data_name
[0])) {
124 * No information from dissector data
126 data_name
= (char *)(pinfo
->private_data
);
127 if (! (data_name
&& data_name
[0])) {
129 * No information from "private_data"
137 ti
= proto_tree_add_item(tree
, hfi_json
, tvb
, 0, -1, ENC_NA
);
138 json_tree
= proto_item_add_subtree(ti
, ett_json
);
141 proto_item_append_text(ti
, ": %s", data_name
);
146 parser_data
.stack
= wmem_stack_new(wmem_packet_scope());
147 wmem_stack_push(parser_data
.stack
, json_tree
);
149 tt
= tvbparse_init(tvb
, offset
, -1, &parser_data
, want_ignore
);
151 /* XXX, only one json in packet? */
152 while ((tvbparse_get(tt
, want
)))
155 offset
= tvbparse_curr_offset(tt
);
157 proto_item_set_len(ti
, offset
);
159 /* if we have some unparsed data, pass to data-text-lines dissector (?) */
160 if (tvb_length_remaining(tvb
, offset
) > 0) {
161 int datalen
, reported_datalen
;
164 datalen
= tvb_length_remaining(tvb
, offset
);
165 reported_datalen
= tvb_reported_length_remaining(tvb
, offset
);
167 next_tvb
= tvb_new_subset(tvb
, offset
, datalen
, reported_datalen
);
169 call_dissector(text_lines_handle
, next_tvb
, pinfo
, tree
);
170 } else if (data_name
) {
171 col_append_sep_fstr(pinfo
->cinfo
, COL_INFO
, " ", "(%s)", data_name
);
174 return tvb_length(tvb
);
177 static void before_object(void *tvbparse_data
, const void *wanted_data _U_
, tvbparse_elem_t
*tok
) {
178 json_parser_data_t
*data
= (json_parser_data_t
*) tvbparse_data
;
180 proto_tree
*tree
= (proto_tree
*)wmem_stack_peek(data
->stack
);
184 ti
= proto_tree_add_item(tree
, &hfi_json_object
, tok
->tvb
, tok
->offset
, tok
->len
, ENC_NA
);
186 subtree
= proto_item_add_subtree(ti
, ett_json_object
);
187 wmem_stack_push(data
->stack
, subtree
);
190 static void after_object(void *tvbparse_data
, const void *wanted_data _U_
, tvbparse_elem_t
*elem _U_
) {
191 json_parser_data_t
*data
= (json_parser_data_t
*) tvbparse_data
;
193 wmem_stack_pop(data
->stack
);
196 static void before_member(void *tvbparse_data
, const void *wanted_data _U_
, tvbparse_elem_t
*tok
) {
197 json_parser_data_t
*data
= (json_parser_data_t
*) tvbparse_data
;
199 proto_tree
*tree
= (proto_tree
*)wmem_stack_peek(data
->stack
);
203 ti
= proto_tree_add_item(tree
, &hfi_json_member
, tok
->tvb
, tok
->offset
, tok
->len
, ENC_NA
);
205 subtree
= proto_item_add_subtree(ti
, ett_json_member
);
206 wmem_stack_push(data
->stack
, subtree
);
209 static void after_member(void *tvbparse_data
, const void *wanted_data _U_
, tvbparse_elem_t
*tok
) {
210 json_parser_data_t
*data
= (json_parser_data_t
*) tvbparse_data
;
212 proto_tree
*tree
= (proto_tree
*)wmem_stack_pop(data
->stack
);
215 tvbparse_elem_t
*key_tok
= tok
->sub
;
217 if (key_tok
&& key_tok
->id
== JSON_TOKEN_STRING
) {
218 char *key
= tvb_get_string(wmem_packet_scope(), key_tok
->tvb
, key_tok
->offset
, key_tok
->len
);
220 proto_item_append_text(tree
, " Key: %s", key
);
222 /* XXX, &hfi_json_member_key */
226 static void before_array(void *tvbparse_data
, const void *wanted_data _U_
, tvbparse_elem_t
*tok
) {
227 json_parser_data_t
*data
= (json_parser_data_t
*) tvbparse_data
;
229 proto_tree
*tree
= (proto_tree
*)wmem_stack_peek(data
->stack
);
233 ti
= proto_tree_add_item(tree
, &hfi_json_array
, tok
->tvb
, tok
->offset
, tok
->len
, ENC_NA
);
235 subtree
= proto_item_add_subtree(ti
, ett_json_array
);
236 wmem_stack_push(data
->stack
, subtree
);
239 static void after_array(void *tvbparse_data
, const void *wanted_data _U_
, tvbparse_elem_t
*elem _U_
) {
240 json_parser_data_t
*data
= (json_parser_data_t
*) tvbparse_data
;
242 wmem_stack_pop(data
->stack
);
246 * defines for helping with UTF-16 surrogate pairs
249 #define LEAD_SURROGATE_START 0xd800
250 #define LEAD_SURROGATE_END 0xdbff
251 #define TRAIL_SURROGATE_START 0xdc00
252 #define TRAIL_SURROGATE_END 0xdfff
254 #define IS_LEAD_SURROGATE(l) (((l)>=LEAD_SURROGATE_START)&&((l)<=LEAD_SURROGATE_END))
255 #define IS_TRAIL_SURROGATE(t) (((t)>=TRAIL_SURROGATE_START)&&((t)<=TRAIL_SURROGATE_END))
257 #define GET_UNICHAR_FROM_SURROGATES(l,t) (0x10000+(((l-LEAD_SURROGATE_START)<<10)|(t-TRAIL_SURROGATE_START)))
259 static char *json_string_unescape(tvbparse_elem_t
*tok
)
261 char *str
= (char *)wmem_alloc(wmem_packet_scope(), tok
->len
- 1);
265 for (i
= 1; i
< tok
->len
- 1; i
++) {
266 guint8 ch
= tvb_get_guint8(tok
->tvb
, tok
->offset
+ i
);
271 ch
= tvb_get_guint8(tok
->tvb
, tok
->offset
+ i
);
298 guint32 unicode_hex
= 0;
299 gboolean valid
= TRUE
;
302 for (k
= 0; k
< 4; k
++) {
306 ch
= tvb_get_guint8(tok
->tvb
, tok
->offset
+ i
);
307 if (ch
>= '0' && ch
<= '9')
308 unicode_hex
|= (ch
- '0');
309 else if (ch
>= 'a' && ch
<= 'f')
310 unicode_hex
|= (10 + (ch
- 'a'));
311 else if (ch
>= 'A' && ch
<= 'F')
312 unicode_hex
|= (10 + (ch
- 'A'));
319 if ((IS_LEAD_SURROGATE(unicode_hex
))) {
320 ch
= tvb_get_guint8(tok
->tvb
, tok
->offset
+ i
+ 1);
324 ch
= tvb_get_guint8(tok
->tvb
, tok
->offset
+ i
+ 1);
326 guint16 lead_surrogate
= unicode_hex
;
327 guint16 trail_surrogate
= 0;
330 for (k
= 0; k
< 4; k
++) {
332 trail_surrogate
<<= 4;
334 ch
= tvb_get_guint8(tok
->tvb
, tok
->offset
+ i
);
335 if (ch
>= '0' && ch
<= '9')
336 trail_surrogate
|= (ch
- '0');
337 else if (ch
>= 'a' && ch
<= 'f')
338 trail_surrogate
|= (10 + (ch
- 'a'));
339 else if (ch
>= 'A' && ch
<= 'F')
340 trail_surrogate
|= (10 + (ch
- 'A'));
347 if ((IS_TRAIL_SURROGATE(trail_surrogate
))) {
348 unicode_hex
= GET_UNICHAR_FROM_SURROGATES(lead_surrogate
,trail_surrogate
);
358 } else if ((IS_TRAIL_SURROGATE(unicode_hex
))) {
363 if (valid
&& g_unichar_validate(unicode_hex
) && g_unichar_isprint(unicode_hex
)) {
364 /* \uXXXX => 6 bytes */
365 int charlen
= g_unichar_to_utf8(unicode_hex
, &str
[j
]);
382 static void after_value(void *tvbparse_data
, const void *wanted_data _U_
, tvbparse_elem_t
*tok
) {
383 json_parser_data_t
*data
= (json_parser_data_t
*) tvbparse_data
;
385 proto_tree
*tree
= (proto_tree
*)wmem_stack_peek(data
->stack
);
386 json_token_type_t value_id
= JSON_TOKEN_INVALID
;
389 value_id
= (json_token_type_t
)tok
->sub
->id
;
392 case JSON_TOKEN_STRING
:
394 proto_tree_add_unicode_string(tree
, hfi_json_value_string
.id
, tok
->tvb
, tok
->offset
, tok
->len
, json_string_unescape(tok
));
396 proto_tree_add_item(tree
, &hfi_json_value_string
, tok
->tvb
, tok
->offset
, tok
->len
, ENC_ASCII
|ENC_NA
);
399 case JSON_TOKEN_NUMBER
:
400 /* XXX, convert to number */
401 proto_tree_add_item(tree
, &hfi_json_value_number
, tok
->tvb
, tok
->offset
, tok
->len
, ENC_ASCII
|ENC_NA
);
404 case JSON_TOKEN_FALSE
:
405 proto_tree_add_item(tree
, &hfi_json_value_false
, tok
->tvb
, tok
->offset
, tok
->len
, ENC_NA
);
408 case JSON_TOKEN_NULL
:
409 proto_tree_add_item(tree
, &hfi_json_value_null
, tok
->tvb
, tok
->offset
, tok
->len
, ENC_NA
);
412 case JSON_TOKEN_TRUE
:
413 proto_tree_add_item(tree
, &hfi_json_value_true
, tok
->tvb
, tok
->offset
, tok
->len
, ENC_NA
);
422 proto_tree_add_text(tree
, tok
->tvb
, tok
->offset
, tok
->len
, "%s", tvb_format_text(tok
->tvb
, tok
->offset
, tok
->len
));
427 static void init_json_parser(void) {
428 static tvbparse_wanted_t _want_object
;
429 static tvbparse_wanted_t _want_array
;
431 tvbparse_wanted_t
*want_object
, *want_array
;
432 tvbparse_wanted_t
*want_member
;
433 tvbparse_wanted_t
*want_string
;
434 tvbparse_wanted_t
*want_number
, *want_int
;
435 tvbparse_wanted_t
*want_value
;
436 tvbparse_wanted_t
*want_value_separator
;
438 #define tvbparse_optional(id, private_data, before_cb, after_cb, wanted) \
439 tvbparse_some(id, 0, 1, private_data, before_cb, after_cb, wanted)
441 tvbparse_wanted_t
*want_quot
= tvbparse_char(-1,"\"",NULL
,NULL
,NULL
);
443 want_string
= tvbparse_set_seq(JSON_TOKEN_STRING
, NULL
, NULL
, NULL
,
445 tvbparse_some(-1, 0, G_MAXINT
, NULL
, NULL
, NULL
,
446 tvbparse_set_oneof(-1, NULL
, NULL
, NULL
,
447 tvbparse_not_chars(-1, 0, 0, "\"" "\\", NULL
, NULL
, NULL
), /* XXX, without invalid unicode characters */
448 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
449 tvbparse_char(-1, "\\", NULL
, NULL
, NULL
),
450 tvbparse_set_oneof(-1, NULL
, NULL
, NULL
,
451 tvbparse_chars(-1, 0, 1, "\"" "\\" "/bfnrt", NULL
, NULL
, NULL
),
452 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
453 tvbparse_char(-1, "u", NULL
, NULL
, NULL
),
454 tvbparse_chars(-1, 4, 4, "0123456789abcdefABCDEF", NULL
, NULL
, NULL
),
463 want_value_separator
= tvbparse_char(-1, ",", NULL
, NULL
, NULL
);
465 /* int = zero / ( digit1-9 *DIGIT ) */
466 want_int
= tvbparse_set_oneof(-1, NULL
, NULL
, NULL
,
467 tvbparse_char(-1, "0", NULL
, NULL
, NULL
),
468 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
469 tvbparse_chars(-1, 1, 1, "123456789", NULL
, NULL
, NULL
),
470 tvbparse_optional(-1, NULL
, NULL
, NULL
, /* tvbparse_chars() don't respect 0 as min_len ;/ */
471 tvbparse_chars(-1, 0, 0, "0123456789", NULL
, NULL
, NULL
)),
475 /* number = [ minus ] int [ frac ] [ exp ] */
476 want_number
= tvbparse_set_seq(JSON_TOKEN_NUMBER
, NULL
, NULL
, NULL
,
477 tvbparse_optional(-1, NULL
, NULL
, NULL
, /* tvbparse_chars() don't respect 0 as min_len ;/ */
478 tvbparse_chars(-1, 0, 1, "-", NULL
, NULL
, NULL
)),
480 /* frac = decimal-point 1*DIGIT */
481 tvbparse_optional(-1, NULL
, NULL
, NULL
,
482 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
483 tvbparse_char(-1, ".", NULL
, NULL
, NULL
),
484 tvbparse_chars(-1, 1, 0, "0123456789", NULL
, NULL
, NULL
),
486 /* exp = e [ minus / plus ] 1*DIGIT */
487 tvbparse_optional(-1, NULL
, NULL
, NULL
,
488 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
489 tvbparse_char(-1, "eE", NULL
, NULL
, NULL
),
490 tvbparse_optional(-1, NULL
, NULL
, NULL
, /* tvbparse_chars() don't respect 0 as min_len ;/ */
491 tvbparse_chars(-1, 0, 1, "-+", NULL
, NULL
, NULL
)),
492 tvbparse_chars(-1, 1, 0, "0123456789", NULL
, NULL
, NULL
),
496 /* value = false / null / true / object / array / number / string */
497 want_value
= tvbparse_set_oneof(-1, NULL
, NULL
, after_value
,
498 tvbparse_string(JSON_TOKEN_FALSE
, "false", NULL
, NULL
, NULL
),
499 tvbparse_string(JSON_TOKEN_NULL
, "null", NULL
, NULL
, NULL
),
500 tvbparse_string(JSON_TOKEN_TRUE
, "true", NULL
, NULL
, NULL
),
507 /* array = begin-array [ value *( value-separator value ) ] end-array */
508 want_array
= tvbparse_set_seq(JSON_ARRAY
, NULL
, before_array
, after_array
,
509 tvbparse_char(-1, "[", NULL
, NULL
, NULL
),
510 tvbparse_optional(-1, NULL
, NULL
, NULL
,
511 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
513 tvbparse_some(-1, 0, G_MAXINT
, NULL
, NULL
, NULL
,
514 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
515 want_value_separator
,
520 tvbparse_char(-1, "]", NULL
, NULL
, NULL
),
522 _want_array
= *want_array
;
524 /* member = string name-separator value */
525 want_member
= tvbparse_set_seq(-1, NULL
, before_member
, after_member
,
527 tvbparse_char(-1, ":", NULL
, NULL
, NULL
),
531 /* object = begin-object [ member *( value-separator member ) ] end-object */
532 want_object
= tvbparse_set_seq(JSON_OBJECT
, NULL
, before_object
, after_object
,
533 tvbparse_char(-1, "{", NULL
, NULL
, NULL
),
534 tvbparse_optional(-1, NULL
, NULL
, NULL
,
535 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
537 tvbparse_some(-1, 0, G_MAXINT
, NULL
, NULL
, NULL
,
538 tvbparse_set_seq(-1, NULL
, NULL
, NULL
,
539 want_value_separator
,
544 tvbparse_char(-1, "}", NULL
, NULL
, NULL
),
546 _want_object
= *want_object
;
548 want_ignore
= tvbparse_chars(-1, 1, 0, " \t\r\n", NULL
, NULL
, NULL
);
550 /* JSON-text = object / array */
551 want
= tvbparse_set_oneof(-1, NULL
, NULL
, NULL
,
554 /* tvbparse_not_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL), */
561 proto_register_json(void) {
562 static gint
*ett
[] = {
569 #ifndef HAVE_HFI_SECTION_INIT
570 static header_field_info
*hfi
[] = {
574 /* &hfi_json_member_key, */
575 &hfi_json_value_string
,
576 &hfi_json_value_number
,
577 &hfi_json_value_false
,
578 &hfi_json_value_null
,
579 &hfi_json_value_true
,
585 proto_json
= proto_register_protocol("JavaScript Object Notation", "JSON", "json");
586 hfi_json
= proto_registrar_get_nth(proto_json
);
588 proto_register_fields(proto_json
, hfi
, array_length(hfi
));
589 proto_register_subtree_array(ett
, array_length(ett
));
591 new_register_dissector("json", dissect_json
, proto_json
);
597 proto_reg_handoff_json(void)
599 dissector_handle_t json_handle
;
601 json_handle
= find_dissector("json");
603 dissector_add_string("media_type", "application/json", json_handle
); /* RFC 4627 */
604 dissector_add_string("media_type", "application/json-rpc", json_handle
); /* JSON-RPC over HTTP */
605 dissector_add_string("media_type", "application/jsonrequest", json_handle
); /* JSON-RPC over HTTP */
607 text_lines_handle
= find_dissector("data-text-lines");