Add ICU message format support
[chromium-blink-merge.git] / third_party / simplejson / encoder.py
blob6b4a6a482cb9aec7c6fff1d16a3a798d07e2fc5c
1 """Implementation of JSONEncoder
2 """
3 import re
4 from decimal import Decimal
6 def _import_speedups():
7 try:
8 from simplejson import _speedups
9 return _speedups.encode_basestring_ascii, _speedups.make_encoder
10 except ImportError:
11 return None, None
12 c_encode_basestring_ascii, c_make_encoder = _import_speedups()
14 from simplejson.decoder import PosInf
16 ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
17 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
18 HAS_UTF8 = re.compile(r'[\x80-\xff]')
19 ESCAPE_DCT = {
20 '\\': '\\\\',
21 '"': '\\"',
22 '\b': '\\b',
23 '\f': '\\f',
24 '\n': '\\n',
25 '\r': '\\r',
26 '\t': '\\t',
27 u'\u2028': '\\u2028',
28 u'\u2029': '\\u2029',
30 for i in range(0x20):
31 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
32 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
34 FLOAT_REPR = repr
36 def encode_basestring(s):
37 """Return a JSON representation of a Python string
39 """
40 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
41 s = s.decode('utf-8')
42 def replace(match):
43 return ESCAPE_DCT[match.group(0)]
44 return u'"' + ESCAPE.sub(replace, s) + u'"'
47 def py_encode_basestring_ascii(s):
48 """Return an ASCII-only JSON representation of a Python string
50 """
51 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
52 s = s.decode('utf-8')
53 def replace(match):
54 s = match.group(0)
55 try:
56 return ESCAPE_DCT[s]
57 except KeyError:
58 n = ord(s)
59 if n < 0x10000:
60 #return '\\u{0:04x}'.format(n)
61 return '\\u%04x' % (n,)
62 else:
63 # surrogate pair
64 n -= 0x10000
65 s1 = 0xd800 | ((n >> 10) & 0x3ff)
66 s2 = 0xdc00 | (n & 0x3ff)
67 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
68 return '\\u%04x\\u%04x' % (s1, s2)
69 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
72 encode_basestring_ascii = (
73 c_encode_basestring_ascii or py_encode_basestring_ascii)
75 class JSONEncoder(object):
76 """Extensible JSON <http://json.org> encoder for Python data structures.
78 Supports the following objects and types by default:
80 +-------------------+---------------+
81 | Python | JSON |
82 +===================+===============+
83 | dict, namedtuple | object |
84 +-------------------+---------------+
85 | list, tuple | array |
86 +-------------------+---------------+
87 | str, unicode | string |
88 +-------------------+---------------+
89 | int, long, float | number |
90 +-------------------+---------------+
91 | True | true |
92 +-------------------+---------------+
93 | False | false |
94 +-------------------+---------------+
95 | None | null |
96 +-------------------+---------------+
98 To extend this to recognize other objects, subclass and implement a
99 ``.default()`` method with another method that returns a serializable
100 object for ``o`` if possible, otherwise it should call the superclass
101 implementation (to raise ``TypeError``).
104 item_separator = ', '
105 key_separator = ': '
106 def __init__(self, skipkeys=False, ensure_ascii=True,
107 check_circular=True, allow_nan=True, sort_keys=False,
108 indent=None, separators=None, encoding='utf-8', default=None,
109 use_decimal=True, namedtuple_as_object=True,
110 tuple_as_array=True, bigint_as_string=False,
111 item_sort_key=None):
112 """Constructor for JSONEncoder, with sensible defaults.
114 If skipkeys is false, then it is a TypeError to attempt
115 encoding of keys that are not str, int, long, float or None. If
116 skipkeys is True, such items are simply skipped.
118 If ensure_ascii is true, the output is guaranteed to be str
119 objects with all incoming unicode characters escaped. If
120 ensure_ascii is false, the output will be unicode object.
122 If check_circular is true, then lists, dicts, and custom encoded
123 objects will be checked for circular references during encoding to
124 prevent an infinite recursion (which would cause an OverflowError).
125 Otherwise, no such check takes place.
127 If allow_nan is true, then NaN, Infinity, and -Infinity will be
128 encoded as such. This behavior is not JSON specification compliant,
129 but is consistent with most JavaScript based encoders and decoders.
130 Otherwise, it will be a ValueError to encode such floats.
132 If sort_keys is true, then the output of dictionaries will be
133 sorted by key; this is useful for regression tests to ensure
134 that JSON serializations can be compared on a day-to-day basis.
136 If indent is a string, then JSON array elements and object members
137 will be pretty-printed with a newline followed by that string repeated
138 for each level of nesting. ``None`` (the default) selects the most compact
139 representation without any newlines. For backwards compatibility with
140 versions of simplejson earlier than 2.1.0, an integer is also accepted
141 and is converted to a string with that many spaces.
143 If specified, separators should be a (item_separator, key_separator)
144 tuple. The default is (', ', ': '). To get the most compact JSON
145 representation you should specify (',', ':') to eliminate whitespace.
147 If specified, default is a function that gets called for objects
148 that can't otherwise be serialized. It should return a JSON encodable
149 version of the object or raise a ``TypeError``.
151 If encoding is not None, then all input strings will be
152 transformed into unicode using that encoding prior to JSON-encoding.
153 The default is UTF-8.
155 If use_decimal is true (not the default), ``decimal.Decimal`` will
156 be supported directly by the encoder. For the inverse, decode JSON
157 with ``parse_float=decimal.Decimal``.
159 If namedtuple_as_object is true (the default), objects with
160 ``_asdict()`` methods will be encoded as JSON objects.
162 If tuple_as_array is true (the default), tuple (and subclasses) will
163 be encoded as JSON arrays.
165 If bigint_as_string is true (not the default), ints 2**53 and higher
166 or lower than -2**53 will be encoded as strings. This is to avoid the
167 rounding that happens in Javascript otherwise.
169 If specified, item_sort_key is a callable used to sort the items in
170 each dictionary. This is useful if you want to sort items other than
171 in alphabetical order by key.
174 self.skipkeys = skipkeys
175 self.ensure_ascii = ensure_ascii
176 self.check_circular = check_circular
177 self.allow_nan = allow_nan
178 self.sort_keys = sort_keys
179 self.use_decimal = use_decimal
180 self.namedtuple_as_object = namedtuple_as_object
181 self.tuple_as_array = tuple_as_array
182 self.bigint_as_string = bigint_as_string
183 self.item_sort_key = item_sort_key
184 if indent is not None and not isinstance(indent, basestring):
185 indent = indent * ' '
186 self.indent = indent
187 if separators is not None:
188 self.item_separator, self.key_separator = separators
189 elif indent is not None:
190 self.item_separator = ','
191 if default is not None:
192 self.default = default
193 self.encoding = encoding
195 def default(self, o):
196 """Implement this method in a subclass such that it returns
197 a serializable object for ``o``, or calls the base implementation
198 (to raise a ``TypeError``).
200 For example, to support arbitrary iterators, you could
201 implement default like this::
203 def default(self, o):
204 try:
205 iterable = iter(o)
206 except TypeError:
207 pass
208 else:
209 return list(iterable)
210 return JSONEncoder.default(self, o)
213 raise TypeError(repr(o) + " is not JSON serializable")
215 def encode(self, o):
216 """Return a JSON string representation of a Python data structure.
218 >>> from simplejson import JSONEncoder
219 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
220 '{"foo": ["bar", "baz"]}'
223 # This is for extremely simple cases and benchmarks.
224 if isinstance(o, basestring):
225 if isinstance(o, str):
226 _encoding = self.encoding
227 if (_encoding is not None
228 and not (_encoding == 'utf-8')):
229 o = o.decode(_encoding)
230 if self.ensure_ascii:
231 return encode_basestring_ascii(o)
232 else:
233 return encode_basestring(o)
234 # This doesn't pass the iterator directly to ''.join() because the
235 # exceptions aren't as detailed. The list call should be roughly
236 # equivalent to the PySequence_Fast that ''.join() would do.
237 chunks = self.iterencode(o, _one_shot=True)
238 if not isinstance(chunks, (list, tuple)):
239 chunks = list(chunks)
240 if self.ensure_ascii:
241 return ''.join(chunks)
242 else:
243 return u''.join(chunks)
245 def iterencode(self, o, _one_shot=False):
246 """Encode the given object and yield each string
247 representation as available.
249 For example::
251 for chunk in JSONEncoder().iterencode(bigobject):
252 mysocket.write(chunk)
255 if self.check_circular:
256 markers = {}
257 else:
258 markers = None
259 if self.ensure_ascii:
260 _encoder = encode_basestring_ascii
261 else:
262 _encoder = encode_basestring
263 if self.encoding != 'utf-8':
264 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
265 if isinstance(o, str):
266 o = o.decode(_encoding)
267 return _orig_encoder(o)
269 def floatstr(o, allow_nan=self.allow_nan,
270 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
271 # Check for specials. Note that this type of test is processor
272 # and/or platform-specific, so do tests which don't depend on
273 # the internals.
275 if o != o:
276 text = 'NaN'
277 elif o == _inf:
278 text = 'Infinity'
279 elif o == _neginf:
280 text = '-Infinity'
281 else:
282 return _repr(o)
284 if not allow_nan:
285 raise ValueError(
286 "Out of range float values are not JSON compliant: " +
287 repr(o))
289 return text
292 key_memo = {}
293 if (_one_shot and c_make_encoder is not None
294 and self.indent is None):
295 _iterencode = c_make_encoder(
296 markers, self.default, _encoder, self.indent,
297 self.key_separator, self.item_separator, self.sort_keys,
298 self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
299 self.namedtuple_as_object, self.tuple_as_array,
300 self.bigint_as_string, self.item_sort_key,
301 Decimal)
302 else:
303 _iterencode = _make_iterencode(
304 markers, self.default, _encoder, self.indent, floatstr,
305 self.key_separator, self.item_separator, self.sort_keys,
306 self.skipkeys, _one_shot, self.use_decimal,
307 self.namedtuple_as_object, self.tuple_as_array,
308 self.bigint_as_string, self.item_sort_key,
309 Decimal=Decimal)
310 try:
311 return _iterencode(o, 0)
312 finally:
313 key_memo.clear()
316 class JSONEncoderForHTML(JSONEncoder):
317 """An encoder that produces JSON safe to embed in HTML.
319 To embed JSON content in, say, a script tag on a web page, the
320 characters &, < and > should be escaped. They cannot be escaped
321 with the usual entities (e.g. &amp;) because they are not expanded
322 within <script> tags.
325 def encode(self, o):
326 # Override JSONEncoder.encode because it has hacks for
327 # performance that make things more complicated.
328 chunks = self.iterencode(o, True)
329 if self.ensure_ascii:
330 return ''.join(chunks)
331 else:
332 return u''.join(chunks)
334 def iterencode(self, o, _one_shot=False):
335 chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
336 for chunk in chunks:
337 chunk = chunk.replace('&', '\\u0026')
338 chunk = chunk.replace('<', '\\u003c')
339 chunk = chunk.replace('>', '\\u003e')
340 yield chunk
343 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
344 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
345 _use_decimal, _namedtuple_as_object, _tuple_as_array,
346 _bigint_as_string, _item_sort_key,
347 ## HACK: hand-optimized bytecode; turn globals into locals
348 False=False,
349 True=True,
350 ValueError=ValueError,
351 basestring=basestring,
352 Decimal=Decimal,
353 dict=dict,
354 float=float,
355 id=id,
356 int=int,
357 isinstance=isinstance,
358 list=list,
359 long=long,
360 str=str,
361 tuple=tuple,
363 if _item_sort_key and not callable(_item_sort_key):
364 raise TypeError("item_sort_key must be None or callable")
366 def _iterencode_list(lst, _current_indent_level):
367 if not lst:
368 yield '[]'
369 return
370 if markers is not None:
371 markerid = id(lst)
372 if markerid in markers:
373 raise ValueError("Circular reference detected")
374 markers[markerid] = lst
375 buf = '['
376 if _indent is not None:
377 _current_indent_level += 1
378 newline_indent = '\n' + (_indent * _current_indent_level)
379 separator = _item_separator + newline_indent
380 buf += newline_indent
381 else:
382 newline_indent = None
383 separator = _item_separator
384 first = True
385 for value in lst:
386 if first:
387 first = False
388 else:
389 buf = separator
390 if isinstance(value, basestring):
391 yield buf + _encoder(value)
392 elif value is None:
393 yield buf + 'null'
394 elif value is True:
395 yield buf + 'true'
396 elif value is False:
397 yield buf + 'false'
398 elif isinstance(value, (int, long)):
399 yield ((buf + str(value))
400 if (not _bigint_as_string or
401 (-1 << 53) < value < (1 << 53))
402 else (buf + '"' + str(value) + '"'))
403 elif isinstance(value, float):
404 yield buf + _floatstr(value)
405 elif _use_decimal and isinstance(value, Decimal):
406 yield buf + str(value)
407 else:
408 yield buf
409 if isinstance(value, list):
410 chunks = _iterencode_list(value, _current_indent_level)
411 else:
412 _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
413 if _asdict and callable(_asdict):
414 chunks = _iterencode_dict(_asdict(),
415 _current_indent_level)
416 elif _tuple_as_array and isinstance(value, tuple):
417 chunks = _iterencode_list(value, _current_indent_level)
418 elif isinstance(value, dict):
419 chunks = _iterencode_dict(value, _current_indent_level)
420 else:
421 chunks = _iterencode(value, _current_indent_level)
422 for chunk in chunks:
423 yield chunk
424 if newline_indent is not None:
425 _current_indent_level -= 1
426 yield '\n' + (_indent * _current_indent_level)
427 yield ']'
428 if markers is not None:
429 del markers[markerid]
431 def _iterencode_dict(dct, _current_indent_level):
432 if not dct:
433 yield '{}'
434 return
435 if markers is not None:
436 markerid = id(dct)
437 if markerid in markers:
438 raise ValueError("Circular reference detected")
439 markers[markerid] = dct
440 yield '{'
441 if _indent is not None:
442 _current_indent_level += 1
443 newline_indent = '\n' + (_indent * _current_indent_level)
444 item_separator = _item_separator + newline_indent
445 yield newline_indent
446 else:
447 newline_indent = None
448 item_separator = _item_separator
449 first = True
450 if _item_sort_key:
451 items = dct.items()
452 items.sort(key=_item_sort_key)
453 elif _sort_keys:
454 items = dct.items()
455 items.sort(key=lambda kv: kv[0])
456 else:
457 items = dct.iteritems()
458 for key, value in items:
459 if isinstance(key, basestring):
460 pass
461 # JavaScript is weakly typed for these, so it makes sense to
462 # also allow them. Many encoders seem to do something like this.
463 elif isinstance(key, float):
464 key = _floatstr(key)
465 elif key is True:
466 key = 'true'
467 elif key is False:
468 key = 'false'
469 elif key is None:
470 key = 'null'
471 elif isinstance(key, (int, long)):
472 key = str(key)
473 elif _skipkeys:
474 continue
475 else:
476 raise TypeError("key " + repr(key) + " is not a string")
477 if first:
478 first = False
479 else:
480 yield item_separator
481 yield _encoder(key)
482 yield _key_separator
483 if isinstance(value, basestring):
484 yield _encoder(value)
485 elif value is None:
486 yield 'null'
487 elif value is True:
488 yield 'true'
489 elif value is False:
490 yield 'false'
491 elif isinstance(value, (int, long)):
492 yield (str(value)
493 if (not _bigint_as_string or
494 (-1 << 53) < value < (1 << 53))
495 else ('"' + str(value) + '"'))
496 elif isinstance(value, float):
497 yield _floatstr(value)
498 elif _use_decimal and isinstance(value, Decimal):
499 yield str(value)
500 else:
501 if isinstance(value, list):
502 chunks = _iterencode_list(value, _current_indent_level)
503 else:
504 _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
505 if _asdict and callable(_asdict):
506 chunks = _iterencode_dict(_asdict(),
507 _current_indent_level)
508 elif _tuple_as_array and isinstance(value, tuple):
509 chunks = _iterencode_list(value, _current_indent_level)
510 elif isinstance(value, dict):
511 chunks = _iterencode_dict(value, _current_indent_level)
512 else:
513 chunks = _iterencode(value, _current_indent_level)
514 for chunk in chunks:
515 yield chunk
516 if newline_indent is not None:
517 _current_indent_level -= 1
518 yield '\n' + (_indent * _current_indent_level)
519 yield '}'
520 if markers is not None:
521 del markers[markerid]
523 def _iterencode(o, _current_indent_level):
524 if isinstance(o, basestring):
525 yield _encoder(o)
526 elif o is None:
527 yield 'null'
528 elif o is True:
529 yield 'true'
530 elif o is False:
531 yield 'false'
532 elif isinstance(o, (int, long)):
533 yield (str(o)
534 if (not _bigint_as_string or
535 (-1 << 53) < o < (1 << 53))
536 else ('"' + str(o) + '"'))
537 elif isinstance(o, float):
538 yield _floatstr(o)
539 elif isinstance(o, list):
540 for chunk in _iterencode_list(o, _current_indent_level):
541 yield chunk
542 else:
543 _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
544 if _asdict and callable(_asdict):
545 for chunk in _iterencode_dict(_asdict(), _current_indent_level):
546 yield chunk
547 elif (_tuple_as_array and isinstance(o, tuple)):
548 for chunk in _iterencode_list(o, _current_indent_level):
549 yield chunk
550 elif isinstance(o, dict):
551 for chunk in _iterencode_dict(o, _current_indent_level):
552 yield chunk
553 elif _use_decimal and isinstance(o, Decimal):
554 yield str(o)
555 else:
556 if markers is not None:
557 markerid = id(o)
558 if markerid in markers:
559 raise ValueError("Circular reference detected")
560 markers[markerid] = o
561 o = _default(o)
562 for chunk in _iterencode(o, _current_indent_level):
563 yield chunk
564 if markers is not None:
565 del markers[markerid]
567 return _iterencode