restructure to be a littel more flexible
[lwes-erlang/github-mirror.git] / src / lwes_mochijson2.erl
blobe8569a85b2729cc09434c500234903aa0c27bc08
1 % This is the MIT license.
3 % Copyright (c) 2007 Mochi Media, Inc.
5 % Permission is hereby granted, free of charge, to any person obtaining a
6 % copy of this software and associated documentation files (the "Software"),
7 % to deal in the Software without restriction, including without limitation
8 % the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 % and/or sell copies of the Software, and to permit persons to whom the
10 % Software is furnished to do so, subject to the following conditions:
12 % The above copyright notice and this permission notice shall be included
13 % in all copies or substantial portions of the Software.
15 % THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 % OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 % FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 % THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 % OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 % ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
21 % OR OTHER DEALINGS IN THE SOFTWARE.
24 %% @author Bob Ippolito <bob@mochimedia.com>
25 %% @copyright 2007 Mochi Media, Inc.
27 %% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
28 %% with binaries as strings, arrays as lists (without an {array, _})
29 %% wrapper and it only knows how to decode UTF-8 (and ASCII).
31 %% JSON terms are decoded as follows (javascript -> erlang):
32 %% <ul>
33 %% <li>{"key": "value"} ->
34 %% {struct, [{&lt;&lt;"key">>, &lt;&lt;"value">>}]}</li>
35 %% <li>["array", 123, 12.34, true, false, null] ->
36 %% [&lt;&lt;"array">>, 123, 12.34, true, false, null]
37 %% </li>
38 %% </ul>
39 %% <ul>
40 %% <li>Strings in JSON decode to UTF-8 binaries in Erlang</li>
41 %% <li>Objects decode to {struct, PropList}</li>
42 %% <li>Numbers decode to integer or float</li>
43 %% <li>true, false, null decode to their respective terms.</li>
44 %% </ul>
45 %% The encoder will accept the same format that the decoder will produce,
46 %% but will also allow additional cases for leniency:
47 %% <ul>
48 %% <li>atoms other than true, false, null will be considered UTF-8
49 %% strings (even as a proplist key)
50 %% </li>
51 %% <li>{json, IoList} will insert IoList directly into the output
52 %% with no validation
53 %% </li>
54 %% <li>{array, Array} will be encoded as Array
55 %% (legacy mochijson style)
56 %% </li>
57 %% <li>A non-empty raw proplist will be encoded as an object as long
58 %% as the first pair does not have an atom key of json, struct,
59 %% or array
60 %% </li>
61 %% </ul>
63 -module(lwes_mochijson2).
64 -author('bob@mochimedia.com').
65 -export([encoder/1, encode/1]).
66 -export([decoder/1, decode/1, decode/2]).
68 %% This is a macro to placate syntax highlighters..
69 -define(Q, $\").
70 -define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
71 column=N+S#decoder.column}).
72 -define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
73 column=1+S#decoder.column}).
74 -define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
75 column=1,
76 line=1+S#decoder.line}).
77 -define(INC_CHAR(S, C),
78 case C of
79 $\n ->
80 S#decoder{column=1,
81 line=1+S#decoder.line,
82 offset=1+S#decoder.offset};
83 _ ->
84 S#decoder{column=1+S#decoder.column,
85 offset=1+S#decoder.offset}
86 end).
87 -define(IS_WHITESPACE(C),
88 (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
90 %% @type json_string() = atom | binary()
91 %% @type json_number() = integer() | float()
92 %% @type json_array() = [json_term()]
93 %% @type json_object() = {struct, [{json_string(), json_term()}]}
94 %% @type json_eep18_object() = {[{json_string(), json_term()}]}
95 %% @type json_iolist() = {json, iolist()}
96 %% @type json_term() = json_string() | json_number() | json_array() |
97 %% json_object() | json_eep18_object() | json_iolist()
99 -record(encoder, {handler=null,
100 utf8=false}).
102 -record(decoder, {object_hook=null,
103 offset=0,
104 line=1,
105 column=1,
106 state=null}).
108 %% @spec encoder([encoder_option()]) -> function()
109 %% @doc Create an encoder/1 with the given options.
110 %% @type encoder_option() = handler_option() | utf8_option()
111 %% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
112 encoder(Options) ->
113 State = parse_encoder_options(Options, #encoder{}),
114 fun (O) -> json_encode(O, State) end.
116 %% @spec encode(json_term()) -> iolist()
117 %% @doc Encode the given as JSON to an iolist.
118 encode(Any) ->
119 json_encode(Any, #encoder{}).
121 %% @spec decoder([decoder_option()]) -> function()
122 %% @doc Create a decoder/1 with the given options.
123 decoder(Options) ->
124 State = parse_decoder_options(Options, #decoder{}),
125 fun (O) -> json_decode(O, State) end.
127 %% @spec decode(iolist(), [{format, proplist | eep18 | struct}]) -> json_term()
128 %% @doc Decode the given iolist to Erlang terms using the given object format
129 %% for decoding, where proplist returns JSON objects as [{binary(), json_term()}]
130 %% proplists, eep18 returns JSON objects as {[binary(), json_term()]}, and struct
131 %% returns them as-is.
132 decode(S, Options) ->
133 json_decode(S, parse_decoder_options(Options, #decoder{})).
135 %% @spec decode(iolist()) -> json_term()
136 %% @doc Decode the given iolist to Erlang terms.
137 decode(S) ->
138 json_decode(S, #decoder{}).
140 %% Internal API
142 parse_encoder_options([], State) ->
143 State;
144 parse_encoder_options([{handler, Handler} | Rest], State) ->
145 parse_encoder_options(Rest, State#encoder{handler=Handler});
146 parse_encoder_options([{utf8, Switch} | Rest], State) ->
147 parse_encoder_options(Rest, State#encoder{utf8=Switch}).
149 parse_decoder_options([], State) ->
150 State;
151 parse_decoder_options([{object_hook, Hook} | Rest], State) ->
152 parse_decoder_options(Rest, State#decoder{object_hook=Hook});
153 parse_decoder_options([{format, Format} | Rest], State)
154 when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist ->
155 parse_decoder_options(Rest, State#decoder{object_hook=Format}).
157 json_encode(true, _State) ->
158 <<"true">>;
159 json_encode(false, _State) ->
160 <<"false">>;
161 json_encode(null, _State) ->
162 <<"null">>;
163 json_encode(I, _State) when is_integer(I) ->
164 integer_to_list(I);
165 json_encode(F, _State) when is_float(F) ->
166 lwes_mochinum:digits(F);
167 json_encode(S, State) when is_binary(S); is_atom(S) ->
168 json_encode_string(S, State);
169 json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso
170 K =/= array andalso
171 K =/= json) ->
172 json_encode_proplist(Props, State);
173 json_encode({struct, Props}, State) when is_list(Props) ->
174 json_encode_proplist(Props, State);
175 json_encode({Props}, State) when is_list(Props) ->
176 json_encode_proplist(Props, State);
177 json_encode({}, State) ->
178 json_encode_proplist([], State);
179 json_encode(Array, State) when is_list(Array) ->
180 json_encode_array(Array, State);
181 json_encode({array, Array}, State) when is_list(Array) ->
182 json_encode_array(Array, State);
183 json_encode({json, IoList}, _State) ->
184 IoList;
185 json_encode(Bad, #encoder{handler=null}) ->
186 exit({json_encode, {bad_term, Bad}});
187 json_encode(Bad, State=#encoder{handler=Handler}) ->
188 json_encode(Handler(Bad), State).
190 json_encode_array([], _State) ->
191 <<"[]">>;
192 json_encode_array(L, State) ->
193 F = fun (O, Acc) ->
194 [$,, json_encode(O, State) | Acc]
195 end,
196 [$, | Acc1] = lists:foldl(F, "[", L),
197 lists:reverse([$\] | Acc1]).
199 json_encode_proplist([], _State) ->
200 <<"{}">>;
201 json_encode_proplist(Props, State) ->
202 F = fun ({K, V}, Acc) ->
203 KS = json_encode_string(K, State),
204 VS = json_encode(V, State),
205 [$,, VS, $:, KS | Acc]
206 end,
207 [$, | Acc1] = lists:foldl(F, "{", Props),
208 lists:reverse([$\} | Acc1]).
210 json_encode_string(A, State) when is_atom(A) ->
211 L = atom_to_list(A),
212 case json_string_is_safe(L) of
213 true ->
214 [?Q, L, ?Q];
215 false ->
216 json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
217 end;
218 json_encode_string(B, State) when is_binary(B) ->
219 case json_bin_is_safe(B) of
220 true ->
221 [?Q, B, ?Q];
222 false ->
223 json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
224 end;
225 json_encode_string(I, _State) when is_integer(I) ->
226 [?Q, integer_to_list(I), ?Q];
227 json_encode_string(L, State) when is_list(L) ->
228 case json_string_is_safe(L) of
229 true ->
230 [?Q, L, ?Q];
231 false ->
232 json_encode_string_unicode(L, State, [?Q])
233 end.
235 json_string_is_safe([]) ->
236 true;
237 json_string_is_safe([C | Rest]) ->
238 case C of
239 ?Q ->
240 false;
241 $\\ ->
242 false;
243 $\b ->
244 false;
245 $\f ->
246 false;
247 $\n ->
248 false;
249 $\r ->
250 false;
251 $\t ->
252 false;
253 C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
254 false;
255 C when C < 16#7f ->
256 json_string_is_safe(Rest);
257 _ ->
258 false
259 end.
261 json_bin_is_safe(<<>>) ->
262 true;
263 json_bin_is_safe(<<C, Rest/binary>>) ->
264 case C of
265 ?Q ->
266 false;
267 $\\ ->
268 false;
269 $\b ->
270 false;
271 $\f ->
272 false;
273 $\n ->
274 false;
275 $\r ->
276 false;
277 $\t ->
278 false;
279 C when C >= 0, C < $\s; C >= 16#7f ->
280 false;
281 C when C < 16#7f ->
282 json_bin_is_safe(Rest)
283 end.
285 json_encode_string_unicode([], _State, Acc) ->
286 lists:reverse([$\" | Acc]);
287 json_encode_string_unicode([C | Cs], State, Acc) ->
288 Acc1 = case C of
289 ?Q ->
290 [?Q, $\\ | Acc];
291 %% Escaping solidus is only useful when trying to protect
292 %% against "</script>" injection attacks which are only
293 %% possible when JSON is inserted into a HTML document
294 %% in-line. mochijson2 does not protect you from this, so
295 %% if you do insert directly into HTML then you need to
296 %% uncomment the following case or escape the output of encode.
298 %% $/ ->
299 %% [$/, $\\ | Acc];
301 $\\ ->
302 [$\\, $\\ | Acc];
303 $\b ->
304 [$b, $\\ | Acc];
305 $\f ->
306 [$f, $\\ | Acc];
307 $\n ->
308 [$n, $\\ | Acc];
309 $\r ->
310 [$r, $\\ | Acc];
311 $\t ->
312 [$t, $\\ | Acc];
313 C when C >= 0, C < $\s ->
314 [unihex(C) | Acc];
315 C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
316 [xmerl_ucs:to_utf8(C) | Acc];
317 C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
318 [unihex(C) | Acc];
319 C when C < 16#7f ->
320 [C | Acc];
321 _ ->
322 exit({json_encode, {bad_char, C}})
323 end,
324 json_encode_string_unicode(Cs, State, Acc1).
326 hexdigit(C) when C >= 0, C =< 9 ->
327 C + $0;
328 hexdigit(C) when C =< 15 ->
329 C + $a - 10.
331 unihex(C) when C < 16#10000 ->
332 <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
333 Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
334 [$\\, $u | Digits];
335 unihex(C) when C =< 16#10FFFF ->
336 N = C - 16#10000,
337 S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
338 S2 = 16#dc00 bor (N band 16#3ff),
339 [unihex(S1), unihex(S2)].
341 json_decode(L, S) when is_list(L) ->
342 json_decode(iolist_to_binary(L), S);
343 json_decode(B, S) ->
344 {Res, S1} = decode1(B, S),
345 {eof, _} = tokenize(B, S1#decoder{state=trim}),
346 Res.
348 decode1(B, S=#decoder{state=null}) ->
349 case tokenize(B, S#decoder{state=any}) of
350 {{const, C}, S1} ->
351 {C, S1};
352 {start_array, S1} ->
353 decode_array(B, S1);
354 {start_object, S1} ->
355 decode_object(B, S1)
356 end.
358 make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct ->
360 make_object({struct, P}, #decoder{object_hook=eep18}) ->
361 {P};
362 make_object({struct, P}, #decoder{object_hook=proplist}) ->
364 make_object(V, #decoder{object_hook=Hook}) ->
365 Hook(V).
367 decode_object(B, S) ->
368 decode_object(B, S#decoder{state=key}, []).
370 decode_object(B, S=#decoder{state=key}, Acc) ->
371 case tokenize(B, S) of
372 {end_object, S1} ->
373 V = make_object({struct, lists:reverse(Acc)}, S1),
374 {V, S1#decoder{state=null}};
375 {{const, K}, S1} ->
376 {colon, S2} = tokenize(B, S1),
377 {V, S3} = decode1(B, S2#decoder{state=null}),
378 decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
379 end;
380 decode_object(B, S=#decoder{state=comma}, Acc) ->
381 case tokenize(B, S) of
382 {end_object, S1} ->
383 V = make_object({struct, lists:reverse(Acc)}, S1),
384 {V, S1#decoder{state=null}};
385 {comma, S1} ->
386 decode_object(B, S1#decoder{state=key}, Acc)
387 end.
389 decode_array(B, S) ->
390 decode_array(B, S#decoder{state=any}, []).
392 decode_array(B, S=#decoder{state=any}, Acc) ->
393 case tokenize(B, S) of
394 {end_array, S1} ->
395 {lists:reverse(Acc), S1#decoder{state=null}};
396 {start_array, S1} ->
397 {Array, S2} = decode_array(B, S1),
398 decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
399 {start_object, S1} ->
400 {Array, S2} = decode_object(B, S1),
401 decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
402 {{const, Const}, S1} ->
403 decode_array(B, S1#decoder{state=comma}, [Const | Acc])
404 end;
405 decode_array(B, S=#decoder{state=comma}, Acc) ->
406 case tokenize(B, S) of
407 {end_array, S1} ->
408 {lists:reverse(Acc), S1#decoder{state=null}};
409 {comma, S1} ->
410 decode_array(B, S1#decoder{state=any}, Acc)
411 end.
413 tokenize_string(B, S=#decoder{offset=O}) ->
414 case tokenize_string_fast(B, O) of
415 {escape, O1} ->
416 Length = O1 - O,
417 S1 = ?ADV_COL(S, Length),
418 <<_:O/binary, Head:Length/binary, _/binary>> = B,
419 tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
420 O1 ->
421 Length = O1 - O,
422 <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
423 {{const, String}, ?ADV_COL(S, Length + 1)}
424 end.
426 tokenize_string_fast(B, O) ->
427 case B of
428 <<_:O/binary, ?Q, _/binary>> ->
430 <<_:O/binary, $\\, _/binary>> ->
431 {escape, O};
432 <<_:O/binary, C1, _/binary>> when C1 < 128 ->
433 tokenize_string_fast(B, 1 + O);
434 <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
435 C2 >= 128, C2 =< 191 ->
436 tokenize_string_fast(B, 2 + O);
437 <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
438 C2 >= 128, C2 =< 191,
439 C3 >= 128, C3 =< 191 ->
440 tokenize_string_fast(B, 3 + O);
441 <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
442 C2 >= 128, C2 =< 191,
443 C3 >= 128, C3 =< 191,
444 C4 >= 128, C4 =< 191 ->
445 tokenize_string_fast(B, 4 + O);
446 _ ->
447 throw(invalid_utf8)
448 end.
450 tokenize_string(B, S=#decoder{offset=O}, Acc) ->
451 case B of
452 <<_:O/binary, ?Q, _/binary>> ->
453 {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
454 <<_:O/binary, "\\\"", _/binary>> ->
455 tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
456 <<_:O/binary, "\\\\", _/binary>> ->
457 tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
458 <<_:O/binary, "\\/", _/binary>> ->
459 tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
460 <<_:O/binary, "\\b", _/binary>> ->
461 tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
462 <<_:O/binary, "\\f", _/binary>> ->
463 tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
464 <<_:O/binary, "\\n", _/binary>> ->
465 tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
466 <<_:O/binary, "\\r", _/binary>> ->
467 tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
468 <<_:O/binary, "\\t", _/binary>> ->
469 tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
470 <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
471 C = erlang:list_to_integer([C3, C2, C1, C0], 16),
472 if C > 16#D7FF, C < 16#DC00 ->
473 %% coalesce UTF-16 surrogate pair
474 <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
475 D = erlang:list_to_integer([D3,D2,D1,D0], 16),
476 [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer,
477 D:16/big-unsigned-integer>>),
478 Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc),
479 tokenize_string(B, ?ADV_COL(S, 12), Acc1);
480 true ->
481 Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
482 tokenize_string(B, ?ADV_COL(S, 6), Acc1)
483 end;
484 <<_:O/binary, C1, _/binary>> when C1 < 128 ->
485 tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]);
486 <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
487 C2 >= 128, C2 =< 191 ->
488 tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]);
489 <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
490 C2 >= 128, C2 =< 191,
491 C3 >= 128, C3 =< 191 ->
492 tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]);
493 <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
494 C2 >= 128, C2 =< 191,
495 C3 >= 128, C3 =< 191,
496 C4 >= 128, C4 =< 191 ->
497 tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]);
498 _ ->
499 throw(invalid_utf8)
500 end.
502 tokenize_number(B, S) ->
503 case tokenize_number(B, sign, S, []) of
504 {{int, Int}, S1} ->
505 {{const, list_to_integer(Int)}, S1};
506 {{float, Float}, S1} ->
507 {{const, list_to_float(Float)}, S1}
508 end.
510 tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
511 case B of
512 <<_:O/binary, $-, _/binary>> ->
513 tokenize_number(B, int, ?INC_COL(S), [$-]);
514 _ ->
515 tokenize_number(B, int, S, [])
516 end;
517 tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
518 case B of
519 <<_:O/binary, $0, _/binary>> ->
520 tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
521 <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
522 tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
523 end;
524 tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
525 case B of
526 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
527 tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
528 _ ->
529 tokenize_number(B, frac, S, Acc)
530 end;
531 tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
532 case B of
533 <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
534 tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
535 <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
536 tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
537 _ ->
538 {{int, lists:reverse(Acc)}, S}
539 end;
540 tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
541 case B of
542 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
543 tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
544 <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
545 tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
546 _ ->
547 {{float, lists:reverse(Acc)}, S}
548 end;
549 tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
550 case B of
551 <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
552 tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
553 _ ->
554 tokenize_number(B, eint, S, Acc)
555 end;
556 tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
557 case B of
558 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
559 tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
560 end;
561 tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
562 case B of
563 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
564 tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
565 _ ->
566 {{float, lists:reverse(Acc)}, S}
567 end.
569 tokenize(B, S=#decoder{offset=O}) ->
570 case B of
571 <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
572 tokenize(B, ?INC_CHAR(S, C));
573 <<_:O/binary, "{", _/binary>> ->
574 {start_object, ?INC_COL(S)};
575 <<_:O/binary, "}", _/binary>> ->
576 {end_object, ?INC_COL(S)};
577 <<_:O/binary, "[", _/binary>> ->
578 {start_array, ?INC_COL(S)};
579 <<_:O/binary, "]", _/binary>> ->
580 {end_array, ?INC_COL(S)};
581 <<_:O/binary, ",", _/binary>> ->
582 {comma, ?INC_COL(S)};
583 <<_:O/binary, ":", _/binary>> ->
584 {colon, ?INC_COL(S)};
585 <<_:O/binary, "null", _/binary>> ->
586 {{const, null}, ?ADV_COL(S, 4)};
587 <<_:O/binary, "true", _/binary>> ->
588 {{const, true}, ?ADV_COL(S, 4)};
589 <<_:O/binary, "false", _/binary>> ->
590 {{const, false}, ?ADV_COL(S, 5)};
591 <<_:O/binary, "\"", _/binary>> ->
592 tokenize_string(B, ?INC_COL(S));
593 <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
594 orelse C =:= $- ->
595 tokenize_number(B, S);
596 <<_:O/binary>> ->
597 trim = S#decoder.state,
598 {eof, S}
599 end.
601 %% Tests
603 -ifdef(TEST).
604 -include_lib("eunit/include/eunit.hrl").
607 %% testing constructs borrowed from the Yaws JSON implementation.
609 %% Create an object from a list of Key/Value pairs.
611 obj_new() ->
612 {struct, []}.
614 is_obj({struct, Props}) ->
615 F = fun ({K, _}) when is_binary(K) -> true end,
616 lists:all(F, Props).
618 obj_from_list(Props) ->
619 Obj = {struct, Props},
620 ?assert(is_obj(Obj)),
621 Obj.
623 %% Test for equivalence of Erlang terms.
624 %% Due to arbitrary order of construction, equivalent objects might
625 %% compare unequal as erlang terms, so we need to carefully recurse
626 %% through aggregates (tuples and objects).
628 equiv({struct, Props1}, {struct, Props2}) ->
629 equiv_object(Props1, Props2);
630 equiv(L1, L2) when is_list(L1), is_list(L2) ->
631 equiv_list(L1, L2);
632 equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
633 equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
634 equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true.
636 %% Object representation and traversal order is unknown.
637 %% Use the sledgehammer and sort property lists.
639 equiv_object(Props1, Props2) ->
640 L1 = lists:keysort(1, Props1),
641 L2 = lists:keysort(1, Props2),
642 Pairs = lists:zip(L1, L2),
643 true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
644 equiv(K1, K2) and equiv(V1, V2)
645 end, Pairs).
647 %% Recursively compare tuple elements for equivalence.
649 equiv_list([], []) ->
650 true;
651 equiv_list([V1 | L1], [V2 | L2]) ->
652 equiv(V1, V2) andalso equiv_list(L1, L2).
654 decode_test() ->
655 [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
656 <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]).
658 e2j_vec_test() ->
659 test_one(e2j_test_vec(utf8), 1).
661 test_one([], _N) ->
662 %% io:format("~p tests passed~n", [N-1]),
664 test_one([{E, J} | Rest], N) ->
665 %% io:format("[~p] ~p ~p~n", [N, E, J]),
666 true = equiv(E, decode(J)),
667 true = equiv(E, decode(encode(E))),
668 test_one(Rest, 1+N).
670 e2j_test_vec(utf8) ->
672 {1, "1"},
673 {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
674 {-1, "-1"},
675 {-3.1416, "-3.14160"},
676 {12.0e10, "1.20000e+11"},
677 {1.234E+10, "1.23400e+10"},
678 {-1.234E-10, "-1.23400e-10"},
679 {10.0, "1.0e+01"},
680 {123.456, "1.23456E+2"},
681 {10.0, "1e1"},
682 {<<"foo">>, "\"foo\""},
683 {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
684 {<<"">>, "\"\""},
685 {<<"\n\n\n">>, "\"\\n\\n\\n\""},
686 {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
687 {obj_new(), "{}"},
688 {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
689 {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
690 "{\"foo\":\"bar\",\"baz\":123}"},
691 {[], "[]"},
692 {[[]], "[[]]"},
693 {[1, <<"foo">>], "[1,\"foo\"]"},
695 %% json array in a json object
696 {obj_from_list([{<<"foo">>, [123]}]),
697 "{\"foo\":[123]}"},
699 %% json object in a json object
700 {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
701 "{\"foo\":{\"bar\":true}}"},
703 %% fold evaluation order
704 {obj_from_list([{<<"foo">>, []},
705 {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
706 {<<"alice">>, <<"bob">>}]),
707 "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
709 %% json object in a json array
710 {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
711 "[-123,\"foo\",{\"bar\":[]},null]"}
714 %% test utf8 encoding
715 encoder_utf8_test() ->
716 %% safe conversion case (default)
717 [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
718 encode(<<1,"\321\202\320\265\321\201\321\202">>),
720 %% raw utf8 output (optional)
721 Enc = encoder([{utf8, true}]),
722 [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
723 Enc(<<1,"\321\202\320\265\321\201\321\202">>).
725 input_validation_test() ->
726 Good = [
727 {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound
728 {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro
729 {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius
731 lists:foreach(fun({CodePoint, UTF8}) ->
732 Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
733 Expect = decode(UTF8)
734 end, Good),
736 Bad = [
737 %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
738 <<?Q, 16#80, ?Q>>,
739 %% missing continuations, last byte in each should be 80-BF
740 <<?Q, 16#C2, 16#7F, ?Q>>,
741 <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
742 <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
743 %% we don't support code points > 10FFFF per RFC 3629
744 <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>,
745 %% escape characters trigger a different code path
746 <<?Q, $\\, $\n, 16#80, ?Q>>
748 lists:foreach(
749 fun(X) ->
750 ok = try decode(X) catch invalid_utf8 -> ok end,
751 %% could be {ucs,{bad_utf8_character_code}} or
752 %% {json_encode,{bad_char,_}}
753 {'EXIT', _} = (catch encode(X))
754 end, Bad).
756 inline_json_test() ->
757 ?assertEqual(<<"\"iodata iodata\"">>,
758 iolist_to_binary(
759 encode({json, [<<"\"iodata">>, " iodata\""]}))),
760 ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]},
761 decode(
762 encode({struct,
763 [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))),
766 big_unicode_test() ->
767 UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)),
768 ?assertEqual(
769 <<"\"\\ud834\\udd20\"">>,
770 iolist_to_binary(encode(UTF8Seq))),
771 ?assertEqual(
772 UTF8Seq,
773 decode(iolist_to_binary(encode(UTF8Seq)))),
776 custom_decoder_test() ->
777 ?assertEqual(
778 {struct, [{<<"key">>, <<"value">>}]},
779 (decoder([]))("{\"key\": \"value\"}")),
780 F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end,
781 ?assertEqual(
782 win,
783 (decoder([{object_hook, F}]))("{\"key\": \"value\"}")),
786 atom_test() ->
787 %% JSON native atoms
788 [begin
789 ?assertEqual(A, decode(atom_to_list(A))),
790 ?assertEqual(iolist_to_binary(atom_to_list(A)),
791 iolist_to_binary(encode(A)))
792 end || A <- [true, false, null]],
793 %% Atom to string
794 ?assertEqual(
795 <<"\"foo\"">>,
796 iolist_to_binary(encode(foo))),
797 ?assertEqual(
798 <<"\"\\ud834\\udd20\"">>,
799 iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))),
802 key_encode_test() ->
803 %% Some forms are accepted as keys that would not be strings in other
804 %% cases
805 ?assertEqual(
806 <<"{\"foo\":1}">>,
807 iolist_to_binary(encode({struct, [{foo, 1}]}))),
808 ?assertEqual(
809 <<"{\"foo\":1}">>,
810 iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))),
811 ?assertEqual(
812 <<"{\"foo\":1}">>,
813 iolist_to_binary(encode({struct, [{"foo", 1}]}))),
814 ?assertEqual(
815 <<"{\"foo\":1}">>,
816 iolist_to_binary(encode([{foo, 1}]))),
817 ?assertEqual(
818 <<"{\"foo\":1}">>,
819 iolist_to_binary(encode([{<<"foo">>, 1}]))),
820 ?assertEqual(
821 <<"{\"foo\":1}">>,
822 iolist_to_binary(encode([{"foo", 1}]))),
823 ?assertEqual(
824 <<"{\"\\ud834\\udd20\":1}">>,
825 iolist_to_binary(
826 encode({struct, [{[16#0001d120], 1}]}))),
827 ?assertEqual(
828 <<"{\"1\":1}">>,
829 iolist_to_binary(encode({struct, [{1, 1}]}))),
832 unsafe_chars_test() ->
833 Chars = "\"\\\b\f\n\r\t",
834 [begin
835 ?assertEqual(false, json_string_is_safe([C])),
836 ?assertEqual(false, json_bin_is_safe(<<C>>)),
837 ?assertEqual(<<C>>, decode(encode(<<C>>)))
838 end || C <- Chars],
839 ?assertEqual(
840 false,
841 json_string_is_safe([16#0001d120])),
842 ?assertEqual(
843 false,
844 json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))),
845 ?assertEqual(
846 [16#0001d120],
847 xmerl_ucs:from_utf8(
848 binary_to_list(
849 decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))),
850 ?assertEqual(
851 false,
852 json_string_is_safe([16#110000])),
853 ?assertEqual(
854 false,
855 json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))),
856 %% solidus can be escaped but isn't unsafe by default
857 ?assertEqual(
858 <<"/">>,
859 decode(<<"\"\\/\"">>)),
862 int_test() ->
863 ?assertEqual(0, decode("0")),
864 ?assertEqual(1, decode("1")),
865 ?assertEqual(11, decode("11")),
868 large_int_test() ->
869 ?assertEqual(<<"-2147483649214748364921474836492147483649">>,
870 iolist_to_binary(encode(-2147483649214748364921474836492147483649))),
871 ?assertEqual(<<"2147483649214748364921474836492147483649">>,
872 iolist_to_binary(encode(2147483649214748364921474836492147483649))),
875 float_test() ->
876 ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))),
877 ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))),
880 handler_test() ->
881 ?assertEqual(
882 {'EXIT',{json_encode,{bad_term,{x,y}}}},
883 catch encode({x,y})),
884 F = fun ({x,y}) -> [] end,
885 ?assertEqual(
886 <<"[]">>,
887 iolist_to_binary((encoder([{handler, F}]))({x, y}))),
890 encode_empty_test_() ->
891 [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))}
892 || {A, B} <- [{"eep18 {}", {}},
893 {"eep18 {[]}", {[]}},
894 {"{struct, []}", {struct, []}}]].
896 encode_test_() ->
897 P = [{<<"k">>, <<"v">>}],
898 JSON = iolist_to_binary(encode({struct, P})),
899 [{atom_to_list(F),
900 ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))}
901 || F <- [struct, eep18, proplist]].
903 format_test_() ->
904 P = [{<<"k">>, <<"v">>}],
905 JSON = iolist_to_binary(encode({struct, P})),
906 [{atom_to_list(F),
907 ?_assertEqual(A, decode(JSON, [{format, F}]))}
908 || {F, A} <- [{struct, {struct, P}},
909 {eep18, {P}},
910 {proplist, P}]].
912 -endif.