1 % This is the MIT license.
3 % Copyright (c) 2007 Mochi Media, Inc.
5 % Permission is hereby granted, free of charge, to any person obtaining a
6 % copy of this software and associated documentation files (the "Software"),
7 % to deal in the Software without restriction, including without limitation
8 % the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 % and/or sell copies of the Software, and to permit persons to whom the
10 % Software is furnished to do so, subject to the following conditions:
12 % The above copyright notice and this permission notice shall be included
13 % in all copies or substantial portions of the Software.
15 % THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 % OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 % FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 % THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 % OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 % ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
21 % OR OTHER DEALINGS IN THE SOFTWARE.
24 %% @author Bob Ippolito <bob@mochimedia.com>
25 %% @copyright 2007 Mochi Media, Inc.
27 %% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
28 %% with binaries as strings, arrays as lists (without an {array, _})
29 %% wrapper and it only knows how to decode UTF-8 (and ASCII).
31 %% JSON terms are decoded as follows (javascript -> erlang):
33 %% <li>{"key": "value"} ->
34 %% {struct, [{<<"key">>, <<"value">>}]}</li>
35 %% <li>["array", 123, 12.34, true, false, null] ->
36 %% [<<"array">>, 123, 12.34, true, false, null]
40 %% <li>Strings in JSON decode to UTF-8 binaries in Erlang</li>
41 %% <li>Objects decode to {struct, PropList}</li>
42 %% <li>Numbers decode to integer or float</li>
43 %% <li>true, false, null decode to their respective terms.</li>
45 %% The encoder will accept the same format that the decoder will produce,
46 %% but will also allow additional cases for leniency:
48 %% <li>atoms other than true, false, null will be considered UTF-8
49 %% strings (even as a proplist key)
51 %% <li>{json, IoList} will insert IoList directly into the output
54 %% <li>{array, Array} will be encoded as Array
55 %% (legacy mochijson style)
57 %% <li>A non-empty raw proplist will be encoded as an object as long
58 %% as the first pair does not have an atom key of json, struct,
63 -module(lwes_mochijson2
).
64 -author('bob@mochimedia.com').
65 -export([encoder
/1, encode
/1]).
66 -export([decoder
/1, decode
/1, decode
/2]).
68 %% This is a macro to placate syntax highlighters..
70 -define(ADV_COL(S
, N
), S#decoder
{offset
=N
+S#decoder
.offset
,
71 column
=N
+S#decoder
.column
}).
72 -define(INC_COL(S
), S#decoder
{offset
=1+S#decoder
.offset
,
73 column
=1+S#decoder
.column
}).
74 -define(INC_LINE(S
), S#decoder
{offset
=1+S#decoder
.offset
,
76 line
=1+S#decoder
.line
}).
77 -define(INC_CHAR(S
, C
),
81 line
=1+S#decoder
.line
,
82 offset
=1+S#decoder
.offset
};
84 S#decoder
{column
=1+S#decoder
.column
,
85 offset
=1+S#decoder
.offset
}
87 -define(IS_WHITESPACE(C
),
88 (C
=:= $\s orelse C
=:= $
\t orelse C
=:= $
\r orelse C
=:= $
\n)).
90 %% @type json_string() = atom | binary()
91 %% @type json_number() = integer() | float()
92 %% @type json_array() = [json_term()]
93 %% @type json_object() = {struct, [{json_string(), json_term()}]}
94 %% @type json_eep18_object() = {[{json_string(), json_term()}]}
95 %% @type json_iolist() = {json, iolist()}
96 %% @type json_term() = json_string() | json_number() | json_array() |
97 %% json_object() | json_eep18_object() | json_iolist()
99 -record(encoder
, {handler
=null
,
102 -record(decoder
, {object_hook
=null
,
108 %% @spec encoder([encoder_option()]) -> function()
109 %% @doc Create an encoder/1 with the given options.
110 %% @type encoder_option() = handler_option() | utf8_option()
111 %% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
113 State
= parse_encoder_options(Options
, #encoder
{}),
114 fun (O
) -> json_encode(O
, State
) end.
116 %% @spec encode(json_term()) -> iolist()
117 %% @doc Encode the given as JSON to an iolist.
119 json_encode(Any
, #encoder
{}).
121 %% @spec decoder([decoder_option()]) -> function()
122 %% @doc Create a decoder/1 with the given options.
124 State
= parse_decoder_options(Options
, #decoder
{}),
125 fun (O
) -> json_decode(O
, State
) end.
127 %% @spec decode(iolist(), [{format, proplist | eep18 | struct}]) -> json_term()
128 %% @doc Decode the given iolist to Erlang terms using the given object format
129 %% for decoding, where proplist returns JSON objects as [{binary(), json_term()}]
130 %% proplists, eep18 returns JSON objects as {[binary(), json_term()]}, and struct
131 %% returns them as-is.
132 decode(S
, Options
) ->
133 json_decode(S
, parse_decoder_options(Options
, #decoder
{})).
135 %% @spec decode(iolist()) -> json_term()
136 %% @doc Decode the given iolist to Erlang terms.
138 json_decode(S
, #decoder
{}).
142 parse_encoder_options([], State
) ->
144 parse_encoder_options([{handler
, Handler
} | Rest
], State
) ->
145 parse_encoder_options(Rest
, State#encoder
{handler
=Handler
});
146 parse_encoder_options([{utf8
, Switch
} | Rest
], State
) ->
147 parse_encoder_options(Rest
, State#encoder
{utf8
=Switch
}).
149 parse_decoder_options([], State
) ->
151 parse_decoder_options([{object_hook
, Hook
} | Rest
], State
) ->
152 parse_decoder_options(Rest
, State#decoder
{object_hook
=Hook
});
153 parse_decoder_options([{format
, Format
} | Rest
], State
)
154 when Format
=:= struct orelse Format
=:= eep18 orelse Format
=:= proplist
->
155 parse_decoder_options(Rest
, State#decoder
{object_hook
=Format
}).
157 json_encode(true
, _State
) ->
159 json_encode(false
, _State
) ->
161 json_encode(null
, _State
) ->
163 json_encode(I
, _State
) when is_integer(I
) ->
165 json_encode(F
, _State
) when is_float(F
) ->
166 lwes_mochinum:digits(F
);
167 json_encode(S
, State
) when is_binary(S
); is_atom(S
) ->
168 json_encode_string(S
, State
);
169 json_encode([{K
, _
}|_
] = Props
, State
) when (K
=/= struct andalso
172 json_encode_proplist(Props
, State
);
173 json_encode({struct
, Props
}, State
) when is_list(Props
) ->
174 json_encode_proplist(Props
, State
);
175 json_encode({Props
}, State
) when is_list(Props
) ->
176 json_encode_proplist(Props
, State
);
177 json_encode({}, State
) ->
178 json_encode_proplist([], State
);
179 json_encode(Array
, State
) when is_list(Array
) ->
180 json_encode_array(Array
, State
);
181 json_encode({array
, Array
}, State
) when is_list(Array
) ->
182 json_encode_array(Array
, State
);
183 json_encode({json
, IoList
}, _State
) ->
185 json_encode(Bad
, #encoder
{handler
=null
}) ->
186 exit({json_encode
, {bad_term
, Bad
}});
187 json_encode(Bad
, State
=#encoder
{handler
=Handler
}) ->
188 json_encode(Handler(Bad
), State
).
190 json_encode_array([], _State
) ->
192 json_encode_array(L
, State
) ->
194 [$
,, json_encode(O
, State
) | Acc
]
196 [$
, | Acc1
] = lists:foldl(F
, "[", L
),
197 lists:reverse([$\
] | Acc1
]).
199 json_encode_proplist([], _State
) ->
201 json_encode_proplist(Props
, State
) ->
202 F
= fun ({K
, V
}, Acc
) ->
203 KS
= json_encode_string(K
, State
),
204 VS
= json_encode(V
, State
),
205 [$
,, VS
, $
:, KS
| Acc
]
207 [$
, | Acc1
] = lists:foldl(F
, "{", Props
),
208 lists:reverse([$\
} | Acc1
]).
210 json_encode_string(A
, State
) when is_atom(A
) ->
212 case json_string_is_safe(L
) of
216 json_encode_string_unicode(xmerl_ucs:from_utf8(L
), State
, [?Q
])
218 json_encode_string(B
, State
) when is_binary(B
) ->
219 case json_bin_is_safe(B
) of
223 json_encode_string_unicode(xmerl_ucs:from_utf8(B
), State
, [?Q
])
225 json_encode_string(I
, _State
) when is_integer(I
) ->
226 [?Q
, integer_to_list(I
), ?Q
];
227 json_encode_string(L
, State
) when is_list(L
) ->
228 case json_string_is_safe(L
) of
232 json_encode_string_unicode(L
, State
, [?Q
])
235 json_string_is_safe([]) ->
237 json_string_is_safe([C
| Rest
]) ->
253 C
when C
>= 0, C
< $\s
; C
>= 16#
7f
, C
=< 16#
10FFFF
->
256 json_string_is_safe(Rest
);
261 json_bin_is_safe(<<>>) ->
263 json_bin_is_safe(<<C
, Rest
/binary>>) ->
279 C
when C
>= 0, C
< $\s
; C
>= 16#
7f
->
282 json_bin_is_safe(Rest
)
285 json_encode_string_unicode([], _State
, Acc
) ->
286 lists:reverse([$
\" | Acc
]);
287 json_encode_string_unicode([C
| Cs
], State
, Acc
) ->
291 %% Escaping solidus is only useful when trying to protect
292 %% against "</script>" injection attacks which are only
293 %% possible when JSON is inserted into a HTML document
294 %% in-line. mochijson2 does not protect you from this, so
295 %% if you do insert directly into HTML then you need to
296 %% uncomment the following case or escape the output of encode.
313 C
when C
>= 0, C
< $\s
->
315 C
when C
>= 16#
7f
, C
=< 16#
10FFFF
, State#encoder
.utf8
->
316 [xmerl_ucs:to_utf8(C
) | Acc
];
317 C
when C
>= 16#
7f
, C
=< 16#
10FFFF
, not State#encoder
.utf8
->
322 exit({json_encode
, {bad_char
, C
}})
324 json_encode_string_unicode(Cs
, State
, Acc1
).
326 hexdigit(C
) when C
>= 0, C
=< 9 ->
328 hexdigit(C
) when C
=< 15 ->
331 unihex(C
) when C
< 16#
10000 ->
332 <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
333 Digits
= [hexdigit(D
) || D
<- [D3
, D2
, D1
, D0
]],
335 unihex(C
) when C
=< 16#
10FFFF
->
337 S1
= 16#d800
bor ((N bsr
10) band
16#
3ff
),
338 S2
= 16#dc00
bor (N band
16#
3ff
),
339 [unihex(S1
), unihex(S2
)].
341 json_decode(L
, S
) when is_list(L
) ->
342 json_decode(iolist_to_binary(L
), S
);
344 {Res
, S1
} = decode1(B
, S
),
345 {eof
, _
} = tokenize(B
, S1#decoder
{state
=trim
}),
348 decode1(B
, S
=#decoder
{state
=null
}) ->
349 case tokenize(B
, S#decoder
{state
=any
}) of
354 {start_object
, S1
} ->
358 make_object(V
, #decoder
{object_hook
=N
}) when N
=:= null orelse N
=:= struct
->
360 make_object({struct
, P
}, #decoder
{object_hook
=eep18
}) ->
362 make_object({struct
, P
}, #decoder
{object_hook
=proplist
}) ->
364 make_object(V
, #decoder
{object_hook
=Hook
}) ->
367 decode_object(B
, S
) ->
368 decode_object(B
, S#decoder
{state
=key
}, []).
370 decode_object(B
, S
=#decoder
{state
=key
}, Acc
) ->
371 case tokenize(B
, S
) of
373 V
= make_object({struct
, lists:reverse(Acc
)}, S1
),
374 {V
, S1#decoder
{state
=null
}};
376 {colon
, S2
} = tokenize(B
, S1
),
377 {V
, S3
} = decode1(B
, S2#decoder
{state
=null
}),
378 decode_object(B
, S3#decoder
{state
=comma
}, [{K
, V
} | Acc
])
380 decode_object(B
, S
=#decoder
{state
=comma
}, Acc
) ->
381 case tokenize(B
, S
) of
383 V
= make_object({struct
, lists:reverse(Acc
)}, S1
),
384 {V
, S1#decoder
{state
=null
}};
386 decode_object(B
, S1#decoder
{state
=key
}, Acc
)
389 decode_array(B
, S
) ->
390 decode_array(B
, S#decoder
{state
=any
}, []).
392 decode_array(B
, S
=#decoder
{state
=any
}, Acc
) ->
393 case tokenize(B
, S
) of
395 {lists:reverse(Acc
), S1#decoder
{state
=null
}};
397 {Array
, S2
} = decode_array(B
, S1
),
398 decode_array(B
, S2#decoder
{state
=comma
}, [Array
| Acc
]);
399 {start_object
, S1
} ->
400 {Array
, S2
} = decode_object(B
, S1
),
401 decode_array(B
, S2#decoder
{state
=comma
}, [Array
| Acc
]);
402 {{const
, Const
}, S1
} ->
403 decode_array(B
, S1#decoder
{state
=comma
}, [Const
| Acc
])
405 decode_array(B
, S
=#decoder
{state
=comma
}, Acc
) ->
406 case tokenize(B
, S
) of
408 {lists:reverse(Acc
), S1#decoder
{state
=null
}};
410 decode_array(B
, S1#decoder
{state
=any
}, Acc
)
413 tokenize_string(B
, S
=#decoder
{offset
=O
}) ->
414 case tokenize_string_fast(B
, O
) of
417 S1
= ?
ADV_COL(S
, Length
),
418 <<_:O
/binary, Head:Length
/binary, _
/binary>> = B
,
419 tokenize_string(B
, S1
, lists:reverse(binary_to_list(Head
)));
422 <<_:O
/binary, String:Length
/binary, ?Q
, _
/binary>> = B
,
423 {{const
, String
}, ?
ADV_COL(S
, Length
+ 1)}
426 tokenize_string_fast(B
, O
) ->
428 <<_:O
/binary, ?Q
, _
/binary>> ->
430 <<_:O
/binary, $
\\, _
/binary>> ->
432 <<_:O
/binary, C1
, _
/binary>> when C1
< 128 ->
433 tokenize_string_fast(B
, 1 + O
);
434 <<_:O
/binary, C1
, C2
, _
/binary>> when C1
>= 194, C1
=< 223,
435 C2
>= 128, C2
=< 191 ->
436 tokenize_string_fast(B
, 2 + O
);
437 <<_:O
/binary, C1
, C2
, C3
, _
/binary>> when C1
>= 224, C1
=< 239,
438 C2
>= 128, C2
=< 191,
439 C3
>= 128, C3
=< 191 ->
440 tokenize_string_fast(B
, 3 + O
);
441 <<_:O
/binary, C1
, C2
, C3
, C4
, _
/binary>> when C1
>= 240, C1
=< 244,
442 C2
>= 128, C2
=< 191,
443 C3
>= 128, C3
=< 191,
444 C4
>= 128, C4
=< 191 ->
445 tokenize_string_fast(B
, 4 + O
);
450 tokenize_string(B
, S
=#decoder
{offset
=O
}, Acc
) ->
452 <<_:O
/binary, ?Q
, _
/binary>> ->
453 {{const
, iolist_to_binary(lists:reverse(Acc
))}, ?
INC_COL(S
)};
454 <<_:O
/binary, "\\\"", _
/binary>> ->
455 tokenize_string(B
, ?
ADV_COL(S
, 2), [$
\" | Acc
]);
456 <<_:O
/binary, "\\\\", _
/binary>> ->
457 tokenize_string(B
, ?
ADV_COL(S
, 2), [$
\\ | Acc
]);
458 <<_:O
/binary, "\\/", _
/binary>> ->
459 tokenize_string(B
, ?
ADV_COL(S
, 2), [$
/ | Acc
]);
460 <<_:O
/binary, "\\b", _
/binary>> ->
461 tokenize_string(B
, ?
ADV_COL(S
, 2), [$
\b | Acc
]);
462 <<_:O
/binary, "\\f", _
/binary>> ->
463 tokenize_string(B
, ?
ADV_COL(S
, 2), [$
\f | Acc
]);
464 <<_:O
/binary, "\\n", _
/binary>> ->
465 tokenize_string(B
, ?
ADV_COL(S
, 2), [$
\n | Acc
]);
466 <<_:O
/binary, "\\r", _
/binary>> ->
467 tokenize_string(B
, ?
ADV_COL(S
, 2), [$
\r | Acc
]);
468 <<_:O
/binary, "\\t", _
/binary>> ->
469 tokenize_string(B
, ?
ADV_COL(S
, 2), [$
\t | Acc
]);
470 <<_:O
/binary, "\\u", C3
, C2
, C1
, C0
, Rest
/binary>> ->
471 C
= erlang:list_to_integer([C3
, C2
, C1
, C0
], 16),
472 if C
> 16#D7FF
, C
< 16#DC00
->
473 %% coalesce UTF-16 surrogate pair
474 <<"\\u", D3
, D2
, D1
, D0
, _
/binary>> = Rest
,
475 D
= erlang:list_to_integer([D3
,D2
,D1
,D0
], 16),
476 [CodePoint
] = xmerl_ucs:from_utf16be(<<C:16/big
-unsigned
-integer,
477 D:16/big
-unsigned
-integer>>),
478 Acc1
= lists:reverse(xmerl_ucs:to_utf8(CodePoint
), Acc
),
479 tokenize_string(B
, ?
ADV_COL(S
, 12), Acc1
);
481 Acc1
= lists:reverse(xmerl_ucs:to_utf8(C
), Acc
),
482 tokenize_string(B
, ?
ADV_COL(S
, 6), Acc1
)
484 <<_:O
/binary, C1
, _
/binary>> when C1
< 128 ->
485 tokenize_string(B
, ?
INC_CHAR(S
, C1
), [C1
| Acc
]);
486 <<_:O
/binary, C1
, C2
, _
/binary>> when C1
>= 194, C1
=< 223,
487 C2
>= 128, C2
=< 191 ->
488 tokenize_string(B
, ?
ADV_COL(S
, 2), [C2
, C1
| Acc
]);
489 <<_:O
/binary, C1
, C2
, C3
, _
/binary>> when C1
>= 224, C1
=< 239,
490 C2
>= 128, C2
=< 191,
491 C3
>= 128, C3
=< 191 ->
492 tokenize_string(B
, ?
ADV_COL(S
, 3), [C3
, C2
, C1
| Acc
]);
493 <<_:O
/binary, C1
, C2
, C3
, C4
, _
/binary>> when C1
>= 240, C1
=< 244,
494 C2
>= 128, C2
=< 191,
495 C3
>= 128, C3
=< 191,
496 C4
>= 128, C4
=< 191 ->
497 tokenize_string(B
, ?
ADV_COL(S
, 4), [C4
, C3
, C2
, C1
| Acc
]);
502 tokenize_number(B
, S
) ->
503 case tokenize_number(B
, sign
, S
, []) of
505 {{const
, list_to_integer(Int
)}, S1
};
506 {{float, Float
}, S1
} ->
507 {{const
, list_to_float(Float
)}, S1
}
510 tokenize_number(B
, sign
, S
=#decoder
{offset
=O
}, []) ->
512 <<_:O
/binary, $
-, _
/binary>> ->
513 tokenize_number(B
, int
, ?
INC_COL(S
), [$
-]);
515 tokenize_number(B
, int
, S
, [])
517 tokenize_number(B
, int
, S
=#decoder
{offset
=O
}, Acc
) ->
519 <<_:O
/binary, $
0, _
/binary>> ->
520 tokenize_number(B
, frac
, ?
INC_COL(S
), [$
0 | Acc
]);
521 <<_:O
/binary, C
, _
/binary>> when C
>= $
1 andalso C
=< $
9 ->
522 tokenize_number(B
, int1
, ?
INC_COL(S
), [C
| Acc
])
524 tokenize_number(B
, int1
, S
=#decoder
{offset
=O
}, Acc
) ->
526 <<_:O
/binary, C
, _
/binary>> when C
>= $
0 andalso C
=< $
9 ->
527 tokenize_number(B
, int1
, ?
INC_COL(S
), [C
| Acc
]);
529 tokenize_number(B
, frac
, S
, Acc
)
531 tokenize_number(B
, frac
, S
=#decoder
{offset
=O
}, Acc
) ->
533 <<_:O
/binary, $
., C
, _
/binary>> when C
>= $
0, C
=< $
9 ->
534 tokenize_number(B
, frac1
, ?
ADV_COL(S
, 2), [C
, $
. | Acc
]);
535 <<_:O
/binary, E
, _
/binary>> when E
=:= $e orelse E
=:= $E
->
536 tokenize_number(B
, esign
, ?
INC_COL(S
), [$e
, $
0, $
. | Acc
]);
538 {{int
, lists:reverse(Acc
)}, S
}
540 tokenize_number(B
, frac1
, S
=#decoder
{offset
=O
}, Acc
) ->
542 <<_:O
/binary, C
, _
/binary>> when C
>= $
0 andalso C
=< $
9 ->
543 tokenize_number(B
, frac1
, ?
INC_COL(S
), [C
| Acc
]);
544 <<_:O
/binary, E
, _
/binary>> when E
=:= $e orelse E
=:= $E
->
545 tokenize_number(B
, esign
, ?
INC_COL(S
), [$e
| Acc
]);
547 {{float, lists:reverse(Acc
)}, S
}
549 tokenize_number(B
, esign
, S
=#decoder
{offset
=O
}, Acc
) ->
551 <<_:O
/binary, C
, _
/binary>> when C
=:= $
- orelse C
=:= $
+ ->
552 tokenize_number(B
, eint
, ?
INC_COL(S
), [C
| Acc
]);
554 tokenize_number(B
, eint
, S
, Acc
)
556 tokenize_number(B
, eint
, S
=#decoder
{offset
=O
}, Acc
) ->
558 <<_:O
/binary, C
, _
/binary>> when C
>= $
0 andalso C
=< $
9 ->
559 tokenize_number(B
, eint1
, ?
INC_COL(S
), [C
| Acc
])
561 tokenize_number(B
, eint1
, S
=#decoder
{offset
=O
}, Acc
) ->
563 <<_:O
/binary, C
, _
/binary>> when C
>= $
0 andalso C
=< $
9 ->
564 tokenize_number(B
, eint1
, ?
INC_COL(S
), [C
| Acc
]);
566 {{float, lists:reverse(Acc
)}, S
}
569 tokenize(B
, S
=#decoder
{offset
=O
}) ->
571 <<_:O
/binary, C
, _
/binary>> when ?
IS_WHITESPACE(C
) ->
572 tokenize(B
, ?
INC_CHAR(S
, C
));
573 <<_:O
/binary, "{", _
/binary>> ->
574 {start_object
, ?
INC_COL(S
)};
575 <<_:O
/binary, "}", _
/binary>> ->
576 {end_object
, ?
INC_COL(S
)};
577 <<_:O
/binary, "[", _
/binary>> ->
578 {start_array
, ?
INC_COL(S
)};
579 <<_:O
/binary, "]", _
/binary>> ->
580 {end_array
, ?
INC_COL(S
)};
581 <<_:O
/binary, ",", _
/binary>> ->
582 {comma
, ?
INC_COL(S
)};
583 <<_:O
/binary, ":", _
/binary>> ->
584 {colon
, ?
INC_COL(S
)};
585 <<_:O
/binary, "null", _
/binary>> ->
586 {{const
, null
}, ?
ADV_COL(S
, 4)};
587 <<_:O
/binary, "true", _
/binary>> ->
588 {{const
, true
}, ?
ADV_COL(S
, 4)};
589 <<_:O
/binary, "false", _
/binary>> ->
590 {{const
, false
}, ?
ADV_COL(S
, 5)};
591 <<_:O
/binary, "\"", _
/binary>> ->
592 tokenize_string(B
, ?
INC_COL(S
));
593 <<_:O
/binary, C
, _
/binary>> when (C
>= $
0 andalso C
=< $
9)
595 tokenize_number(B
, S
);
597 trim
= S#decoder
.state
,
604 -include_lib("eunit/include/eunit.hrl").
607 %% testing constructs borrowed from the Yaws JSON implementation.
609 %% Create an object from a list of Key/Value pairs.
614 is_obj({struct
, Props
}) ->
615 F
= fun ({K
, _
}) when is_binary(K
) -> true
end,
618 obj_from_list(Props
) ->
619 Obj
= {struct
, Props
},
620 ?
assert(is_obj(Obj
)),
623 %% Test for equivalence of Erlang terms.
624 %% Due to arbitrary order of construction, equivalent objects might
625 %% compare unequal as erlang terms, so we need to carefully recurse
626 %% through aggregates (tuples and objects).
628 equiv({struct
, Props1
}, {struct
, Props2
}) ->
629 equiv_object(Props1
, Props2
);
630 equiv(L1
, L2
) when is_list(L1
), is_list(L2
) ->
632 equiv(N1
, N2
) when is_number(N1
), is_number(N2
) -> N1
== N2
;
633 equiv(B1
, B2
) when is_binary(B1
), is_binary(B2
) -> B1
== B2
;
634 equiv(A
, A
) when A
=:= true orelse A
=:= false orelse A
=:= null
-> true
.
636 %% Object representation and traversal order is unknown.
637 %% Use the sledgehammer and sort property lists.
639 equiv_object(Props1
, Props2
) ->
640 L1
= lists:keysort(1, Props1
),
641 L2
= lists:keysort(1, Props2
),
642 Pairs
= lists:zip(L1
, L2
),
643 true
= lists:all(fun({{K1
, V1
}, {K2
, V2
}}) ->
644 equiv(K1
, K2
) and
equiv(V1
, V2
)
647 %% Recursively compare tuple elements for equivalence.
649 equiv_list([], []) ->
651 equiv_list([V1
| L1
], [V2
| L2
]) ->
652 equiv(V1
, V2
) andalso
equiv_list(L1
, L2
).
655 [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
656 <<16#F0
,16#
9D
,16#
9C
,16#
95>> = decode([34,"\\ud835","\\udf15",34]).
659 test_one(e2j_test_vec(utf8
), 1).
662 %% io:format("~p tests passed~n", [N-1]),
664 test_one([{E
, J
} | Rest
], N
) ->
665 %% io:format("[~p] ~p ~p~n", [N, E, J]),
666 true
= equiv(E
, decode(J
)),
667 true
= equiv(E
, decode(encode(E
))),
670 e2j_test_vec(utf8
) ->
673 {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
675 {-3.1416, "-3.14160"},
676 {12.0e10
, "1.20000e+11"},
677 {1.234E+10, "1.23400e+10"},
678 {-1.234E-10, "-1.23400e-10"},
680 {123.456, "1.23456E+2"},
682 {<<"foo">>, "\"foo\""},
683 {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
685 {<<"\n\n\n">>, "\"\\n\\n\\n\""},
686 {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
688 {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
689 {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
690 "{\"foo\":\"bar\",\"baz\":123}"},
693 {[1, <<"foo">>], "[1,\"foo\"]"},
695 %% json array in a json object
696 {obj_from_list([{<<"foo">>, [123]}]),
699 %% json object in a json object
700 {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true
}])}]),
701 "{\"foo\":{\"bar\":true}}"},
703 %% fold evaluation order
704 {obj_from_list([{<<"foo">>, []},
705 {<<"bar">>, obj_from_list([{<<"baz">>, true
}])},
706 {<<"alice">>, <<"bob">>}]),
707 "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
709 %% json object in a json array
710 {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null
],
711 "[-123,\"foo\",{\"bar\":[]},null]"}
714 %% test utf8 encoding
715 encoder_utf8_test() ->
716 %% safe conversion case (default)
717 [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
718 encode(<<1,"\321\202\320\265\321\201\321\202">>),
720 %% raw utf8 output (optional)
721 Enc
= encoder([{utf8
, true
}]),
722 [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
723 Enc(<<1,"\321\202\320\265\321\201\321\202">>).
725 input_validation_test() ->
727 {16#
00A3
, <<?Q
, 16#C2
, 16#A3
, ?Q
>>}, %% pound
728 {16#
20AC
, <<?Q
, 16#E2
, 16#
82, 16#AC
, ?Q
>>}, %% euro
729 {16#
10196, <<?Q
, 16#F0
, 16#
90, 16#
86, 16#
96, ?Q
>>} %% denarius
731 lists:foreach(fun({CodePoint
, UTF8
}) ->
732 Expect
= list_to_binary(xmerl_ucs:to_utf8(CodePoint
)),
733 Expect
= decode(UTF8
)
737 %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
739 %% missing continuations, last byte in each should be 80-BF
740 <<?Q
, 16#C2
, 16#
7F
, ?Q
>>,
741 <<?Q
, 16#E0
, 16#
80,16#
7F
, ?Q
>>,
742 <<?Q
, 16#F0
, 16#
80, 16#
80, 16#
7F
, ?Q
>>,
743 %% we don't support code points > 10FFFF per RFC 3629
744 <<?Q
, 16#F5
, 16#
80, 16#
80, 16#
80, ?Q
>>,
745 %% escape characters trigger a different code path
746 <<?Q
, $
\\, $
\n, 16#
80, ?Q
>>
750 ok
= try
decode(X
) catch invalid_utf8
-> ok
end,
751 %% could be {ucs,{bad_utf8_character_code}} or
752 %% {json_encode,{bad_char,_}}
753 {'EXIT', _
} = (catch encode(X
))
756 inline_json_test() ->
757 ?
assertEqual(<<"\"iodata iodata\"">>,
759 encode({json
, [<<"\"iodata">>, " iodata\""]}))),
760 ?
assertEqual({struct
, [{<<"key">>, <<"iodata iodata">>}]},
763 [{key
, {json
, [<<"\"iodata">>, " iodata\""]}}]}))),
766 big_unicode_test() ->
767 UTF8Seq
= list_to_binary(xmerl_ucs:to_utf8(16#
0001d120
)),
769 <<"\"\\ud834\\udd20\"">>,
770 iolist_to_binary(encode(UTF8Seq
))),
773 decode(iolist_to_binary(encode(UTF8Seq
)))),
776 custom_decoder_test() ->
778 {struct
, [{<<"key">>, <<"value">>}]},
779 (decoder([]))("{\"key\": \"value\"}")),
780 F
= fun ({struct
, [{<<"key">>, <<"value">>}]}) -> win
end,
783 (decoder([{object_hook
, F
}]))("{\"key\": \"value\"}")),
789 ?
assertEqual(A
, decode(atom_to_list(A
))),
790 ?
assertEqual(iolist_to_binary(atom_to_list(A
)),
791 iolist_to_binary(encode(A
)))
792 end || A
<- [true
, false
, null
]],
796 iolist_to_binary(encode(foo
))),
798 <<"\"\\ud834\\udd20\"">>,
799 iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#
0001d120
))))),
803 %% Some forms are accepted as keys that would not be strings in other
807 iolist_to_binary(encode({struct
, [{foo
, 1}]}))),
810 iolist_to_binary(encode({struct
, [{<<"foo">>, 1}]}))),
813 iolist_to_binary(encode({struct
, [{"foo", 1}]}))),
816 iolist_to_binary(encode([{foo
, 1}]))),
819 iolist_to_binary(encode([{<<"foo">>, 1}]))),
822 iolist_to_binary(encode([{"foo", 1}]))),
824 <<"{\"\\ud834\\udd20\":1}">>,
826 encode({struct
, [{[16#
0001d120
], 1}]}))),
829 iolist_to_binary(encode({struct
, [{1, 1}]}))),
832 unsafe_chars_test() ->
833 Chars
= "\"\\\b\f\n\r\t",
835 ?
assertEqual(false
, json_string_is_safe([C
])),
836 ?
assertEqual(false
, json_bin_is_safe(<<C
>>)),
837 ?
assertEqual(<<C
>>, decode(encode(<<C
>>)))
841 json_string_is_safe([16#
0001d120
])),
844 json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#
0001d120
)))),
849 decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#
0001d120
))))))),
852 json_string_is_safe([16#
110000])),
855 json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#
110000])))),
856 %% solidus can be escaped but isn't unsafe by default
859 decode(<<"\"\\/\"">>)),
863 ?
assertEqual(0, decode("0")),
864 ?
assertEqual(1, decode("1")),
865 ?
assertEqual(11, decode("11")),
869 ?
assertEqual(<<"-2147483649214748364921474836492147483649">>,
870 iolist_to_binary(encode(-2147483649214748364921474836492147483649))),
871 ?
assertEqual(<<"2147483649214748364921474836492147483649">>,
872 iolist_to_binary(encode(2147483649214748364921474836492147483649))),
876 ?
assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))),
877 ?
assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))),
882 {'EXIT',{json_encode
,{bad_term
,{x
,y
}}}},
883 catch encode({x
,y
})),
884 F
= fun ({x
,y
}) -> [] end,
887 iolist_to_binary((encoder([{handler
, F
}]))({x
, y
}))),
890 encode_empty_test_() ->
891 [{A
, ?
_assertEqual(<<"{}">>, iolist_to_binary(encode(B
)))}
892 || {A
, B
} <- [{"eep18 {}", {}},
893 {"eep18 {[]}", {[]}},
894 {"{struct, []}", {struct
, []}}]].
897 P
= [{<<"k">>, <<"v">>}],
898 JSON
= iolist_to_binary(encode({struct
, P
})),
900 ?
_assertEqual(JSON
, iolist_to_binary(encode(decode(JSON
, [{format
, F
}]))))}
901 || F
<- [struct
, eep18
, proplist
]].
904 P
= [{<<"k">>, <<"v">>}],
905 JSON
= iolist_to_binary(encode({struct
, P
})),
907 ?
_assertEqual(A
, decode(JSON
, [{format
, F
}]))}
908 || {F
, A
} <- [{struct
, {struct
, P
}},