util.encodings: Spell out all IDNA 2008 options ICU has
[prosody.git] / util / json.lua
bloba750da2e6ea834060e4443beb8f52bf35ee40c0c
1 -- Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
4 --
5 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information.
7 --
9 local type = type;
10 local t_insert, t_concat, t_remove = table.insert, table.concat, table.remove;
11 local s_char = string.char;
12 local tostring, tonumber = tostring, tonumber;
13 local pairs, ipairs, spairs = pairs, ipairs, require "util.iterators".sorted_pairs;
14 local next = next;
15 local getmetatable, setmetatable = getmetatable, setmetatable;
16 local print = print;
18 local has_array, array = pcall(require, "util.array");
19 local array_mt = has_array and getmetatable(array()) or {};
21 --module("json")
22 local module = {};
24 local null = setmetatable({}, { __tostring = function() return "null"; end; });
25 module.null = null;
27 local escapes = {
28 ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b",
29 ["\f"] = "\\f", ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t"};
30 for i=0,31 do
31 local ch = s_char(i);
32 if not escapes[ch] then escapes[ch] = ("\\u%.4X"):format(i); end
33 end
35 local function codepoint_to_utf8(code)
36 if code < 0x80 then return s_char(code); end
37 local bits0_6 = code % 64;
38 if code < 0x800 then
39 local bits6_5 = (code - bits0_6) / 64;
40 return s_char(0x80 + 0x40 + bits6_5, 0x80 + bits0_6);
41 end
42 local bits0_12 = code % 4096;
43 local bits6_6 = (bits0_12 - bits0_6) / 64;
44 local bits12_4 = (code - bits0_12) / 4096;
45 return s_char(0x80 + 0x40 + 0x20 + bits12_4, 0x80 + bits6_6, 0x80 + bits0_6);
46 end
48 local valid_types = {
49 number = true,
50 string = true,
51 table = true,
52 boolean = true
54 local special_keys = {
55 __array = true;
56 __hash = true;
59 local simplesave, tablesave, arraysave, stringsave;
61 function stringsave(o, buffer)
62 -- FIXME do proper utf-8 and binary data detection
63 t_insert(buffer, "\""..(o:gsub(".", escapes)).."\"");
64 end
66 function arraysave(o, buffer)
67 t_insert(buffer, "[");
68 if next(o) then
69 for _, v in ipairs(o) do
70 simplesave(v, buffer);
71 t_insert(buffer, ",");
72 end
73 t_remove(buffer);
74 end
75 t_insert(buffer, "]");
76 end
78 function tablesave(o, buffer)
79 local __array = {};
80 local __hash = {};
81 local hash = {};
82 for i,v in ipairs(o) do
83 __array[i] = v;
84 end
85 for k,v in pairs(o) do
86 local ktype, vtype = type(k), type(v);
87 if valid_types[vtype] or v == null then
88 if ktype == "string" and not special_keys[k] then
89 hash[k] = v;
90 elseif (valid_types[ktype] or k == null) and __array[k] == nil then
91 __hash[k] = v;
92 end
93 end
94 end
95 if next(__hash) ~= nil or next(hash) ~= nil or next(__array) == nil then
96 t_insert(buffer, "{");
97 local mark = #buffer;
98 local _pairs = buffer.ordered and spairs or pairs;
99 for k,v in _pairs(hash) do
100 stringsave(k, buffer);
101 t_insert(buffer, ":");
102 simplesave(v, buffer);
103 t_insert(buffer, ",");
105 if next(__hash) ~= nil then
106 t_insert(buffer, "\"__hash\":[");
107 for k,v in pairs(__hash) do
108 simplesave(k, buffer);
109 t_insert(buffer, ",");
110 simplesave(v, buffer);
111 t_insert(buffer, ",");
113 t_remove(buffer);
114 t_insert(buffer, "]");
115 t_insert(buffer, ",");
117 if next(__array) then
118 t_insert(buffer, "\"__array\":");
119 arraysave(__array, buffer);
120 t_insert(buffer, ",");
122 if mark ~= #buffer then t_remove(buffer); end
123 t_insert(buffer, "}");
124 else
125 arraysave(__array, buffer);
129 function simplesave(o, buffer)
130 local t = type(o);
131 if o == null then
132 t_insert(buffer, "null");
133 elseif t == "number" then
134 t_insert(buffer, tostring(o));
135 elseif t == "string" then
136 stringsave(o, buffer);
137 elseif t == "table" then
138 local mt = getmetatable(o);
139 if mt == array_mt then
140 arraysave(o, buffer);
141 else
142 tablesave(o, buffer);
144 elseif t == "boolean" then
145 t_insert(buffer, (o and "true" or "false"));
146 else
147 t_insert(buffer, "null");
151 function module.encode(obj)
152 local t = {};
153 simplesave(obj, t);
154 return t_concat(t);
156 function module.encode_ordered(obj)
157 local t = { ordered = true };
158 simplesave(obj, t);
159 return t_concat(t);
161 function module.encode_array(obj)
162 local t = {};
163 arraysave(obj, t);
164 return t_concat(t);
167 -----------------------------------
170 local function _skip_whitespace(json, index)
171 return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t
173 local function _fixobject(obj)
174 local __array = obj.__array;
175 if __array then
176 obj.__array = nil;
177 for _, v in ipairs(__array) do
178 t_insert(obj, v);
181 local __hash = obj.__hash;
182 if __hash then
183 obj.__hash = nil;
184 local k;
185 for _, v in ipairs(__hash) do
186 if k ~= nil then
187 obj[k] = v; k = nil;
188 else
189 k = v;
193 return obj;
195 local _readvalue, _readstring;
196 local function _readobject(json, index)
197 local o = {};
198 while true do
199 local key, val;
200 index = _skip_whitespace(json, index + 1);
201 if json:byte(index) ~= 0x22 then -- "\""
202 if json:byte(index) == 0x7d then return o, index + 1; end -- "}"
203 return nil, "key expected";
205 key, index = _readstring(json, index);
206 if key == nil then return nil, index; end
207 index = _skip_whitespace(json, index);
208 if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":"
209 val, index = _readvalue(json, index + 1);
210 if val == nil then return nil, index; end
211 o[key] = val;
212 index = _skip_whitespace(json, index);
213 local b = json:byte(index);
214 if b == 0x7d then return _fixobject(o), index + 1; end -- "}"
215 if b ~= 0x2c then return nil, "object eof"; end -- ","
218 local function _readarray(json, index)
219 local a = {};
220 local oindex = index;
221 while true do
222 local val;
223 val, index = _readvalue(json, index + 1);
224 if val == nil then
225 if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]"
226 return val, index;
228 t_insert(a, val);
229 index = _skip_whitespace(json, index);
230 local b = json:byte(index);
231 if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]"
232 if b ~= 0x2c then return nil, "array eof"; end -- ","
235 local _unescape_error;
236 local function _unescape_surrogate_func(x)
237 local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16);
238 local codepoint = lead * 0x400 + trail - 0x35FDC00;
239 local a = codepoint % 64;
240 codepoint = (codepoint - a) / 64;
241 local b = codepoint % 64;
242 codepoint = (codepoint - b) / 64;
243 local c = codepoint % 64;
244 codepoint = (codepoint - c) / 64;
245 return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a);
247 local function _unescape_func(x)
248 x = x:match("%x%x%x%x", 3);
249 if x then
250 local codepoint = tonumber(x, 16)
251 if codepoint >= 0xD800 and codepoint <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair
252 return codepoint_to_utf8(codepoint);
254 _unescape_error = true;
256 function _readstring(json, index)
257 index = index + 1;
258 local endindex = json:find("\"", index, true);
259 if endindex then
260 local s = json:sub(index, endindex - 1);
261 --if s:find("[%z-\31]") then return nil, "control char in string"; end
262 -- FIXME handle control characters
263 _unescape_error = nil;
264 s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func);
265 -- FIXME handle escapes beyond BMP
266 s = s:gsub("\\u.?.?.?.?", _unescape_func);
267 if _unescape_error then return nil, "invalid escape"; end
268 return s, endindex + 1;
270 return nil, "string eof";
272 local function _readnumber(json, index)
273 local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking
274 return tonumber(m), index + #m;
276 local function _readnull(json, index)
277 local a, b, c = json:byte(index + 1, index + 3);
278 if a == 0x75 and b == 0x6c and c == 0x6c then
279 return null, index + 4;
281 return nil, "null parse failed";
283 local function _readtrue(json, index)
284 local a, b, c = json:byte(index + 1, index + 3);
285 if a == 0x72 and b == 0x75 and c == 0x65 then
286 return true, index + 4;
288 return nil, "true parse failed";
290 local function _readfalse(json, index)
291 local a, b, c, d = json:byte(index + 1, index + 4);
292 if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then
293 return false, index + 5;
295 return nil, "false parse failed";
297 function _readvalue(json, index)
298 index = _skip_whitespace(json, index);
299 local b = json:byte(index);
300 -- TODO try table lookup instead of if-else?
301 if b == 0x7B then -- "{"
302 return _readobject(json, index);
303 elseif b == 0x5B then -- "["
304 return _readarray(json, index);
305 elseif b == 0x22 then -- "\""
306 return _readstring(json, index);
307 elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-"
308 return _readnumber(json, index);
309 elseif b == 0x6e then -- "n"
310 return _readnull(json, index);
311 elseif b == 0x74 then -- "t"
312 return _readtrue(json, index);
313 elseif b == 0x66 then -- "f"
314 return _readfalse(json, index);
315 else
316 return nil, "value expected";
319 local first_escape = {
320 ["\\\""] = "\\u0022";
321 ["\\\\"] = "\\u005c";
322 ["\\/" ] = "\\u002f";
323 ["\\b" ] = "\\u0008";
324 ["\\f" ] = "\\u000C";
325 ["\\n" ] = "\\u000A";
326 ["\\r" ] = "\\u000D";
327 ["\\t" ] = "\\u0009";
328 ["\\u" ] = "\\u";
331 function module.decode(json)
332 json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler
333 --:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings
335 -- TODO do encoding verification
337 local val, index = _readvalue(json, 1);
338 if val == nil then return val, index; end
339 if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end
341 return val;
344 function module.test(object)
345 local encoded = module.encode(object);
346 local decoded = module.decode(encoded);
347 local recoded = module.encode(decoded);
348 if encoded ~= recoded then
349 print("FAILED");
350 print("encoded:", encoded);
351 print("recoded:", recoded);
352 else
353 print(encoded);
355 return encoded == recoded;
358 return module;