1 """Implementation of JSONEncoder
4 from decimal
import Decimal
6 def _import_speedups():
8 from simplejson
import _speedups
9 return _speedups
.encode_basestring_ascii
, _speedups
.make_encoder
12 c_encode_basestring_ascii
, c_make_encoder
= _import_speedups()
14 from simplejson
.decoder
import PosInf
16 ESCAPE
= re
.compile(ur
'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
17 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
18 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
31 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
32 ESCAPE_DCT
.setdefault(chr(i
), '\\u%04x' % (i
,))
36 def encode_basestring(s
):
37 """Return a JSON representation of a Python string
40 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
43 return ESCAPE_DCT
[match
.group(0)]
44 return u
'"' + ESCAPE
.sub(replace
, s
) + u
'"'
47 def py_encode_basestring_ascii(s
):
48 """Return an ASCII-only JSON representation of a Python string
51 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
60 #return '\\u{0:04x}'.format(n)
61 return '\\u%04x' % (n
,)
65 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
66 s2
= 0xdc00 |
(n
& 0x3ff)
67 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
68 return '\\u%04x\\u%04x' % (s1
, s2
)
69 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
72 encode_basestring_ascii
= (
73 c_encode_basestring_ascii
or py_encode_basestring_ascii
)
75 class JSONEncoder(object):
76 """Extensible JSON <http://json.org> encoder for Python data structures.
78 Supports the following objects and types by default:
80 +-------------------+---------------+
82 +===================+===============+
83 | dict, namedtuple | object |
84 +-------------------+---------------+
85 | list, tuple | array |
86 +-------------------+---------------+
87 | str, unicode | string |
88 +-------------------+---------------+
89 | int, long, float | number |
90 +-------------------+---------------+
92 +-------------------+---------------+
94 +-------------------+---------------+
96 +-------------------+---------------+
98 To extend this to recognize other objects, subclass and implement a
99 ``.default()`` method with another method that returns a serializable
100 object for ``o`` if possible, otherwise it should call the superclass
101 implementation (to raise ``TypeError``).
104 item_separator
= ', '
106 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
107 check_circular
=True, allow_nan
=True, sort_keys
=False,
108 indent
=None, separators
=None, encoding
='utf-8', default
=None,
109 use_decimal
=True, namedtuple_as_object
=True,
110 tuple_as_array
=True, bigint_as_string
=False,
112 """Constructor for JSONEncoder, with sensible defaults.
114 If skipkeys is false, then it is a TypeError to attempt
115 encoding of keys that are not str, int, long, float or None. If
116 skipkeys is True, such items are simply skipped.
118 If ensure_ascii is true, the output is guaranteed to be str
119 objects with all incoming unicode characters escaped. If
120 ensure_ascii is false, the output will be unicode object.
122 If check_circular is true, then lists, dicts, and custom encoded
123 objects will be checked for circular references during encoding to
124 prevent an infinite recursion (which would cause an OverflowError).
125 Otherwise, no such check takes place.
127 If allow_nan is true, then NaN, Infinity, and -Infinity will be
128 encoded as such. This behavior is not JSON specification compliant,
129 but is consistent with most JavaScript based encoders and decoders.
130 Otherwise, it will be a ValueError to encode such floats.
132 If sort_keys is true, then the output of dictionaries will be
133 sorted by key; this is useful for regression tests to ensure
134 that JSON serializations can be compared on a day-to-day basis.
136 If indent is a string, then JSON array elements and object members
137 will be pretty-printed with a newline followed by that string repeated
138 for each level of nesting. ``None`` (the default) selects the most compact
139 representation without any newlines. For backwards compatibility with
140 versions of simplejson earlier than 2.1.0, an integer is also accepted
141 and is converted to a string with that many spaces.
143 If specified, separators should be a (item_separator, key_separator)
144 tuple. The default is (', ', ': '). To get the most compact JSON
145 representation you should specify (',', ':') to eliminate whitespace.
147 If specified, default is a function that gets called for objects
148 that can't otherwise be serialized. It should return a JSON encodable
149 version of the object or raise a ``TypeError``.
151 If encoding is not None, then all input strings will be
152 transformed into unicode using that encoding prior to JSON-encoding.
153 The default is UTF-8.
155 If use_decimal is true (not the default), ``decimal.Decimal`` will
156 be supported directly by the encoder. For the inverse, decode JSON
157 with ``parse_float=decimal.Decimal``.
159 If namedtuple_as_object is true (the default), objects with
160 ``_asdict()`` methods will be encoded as JSON objects.
162 If tuple_as_array is true (the default), tuple (and subclasses) will
163 be encoded as JSON arrays.
165 If bigint_as_string is true (not the default), ints 2**53 and higher
166 or lower than -2**53 will be encoded as strings. This is to avoid the
167 rounding that happens in Javascript otherwise.
169 If specified, item_sort_key is a callable used to sort the items in
170 each dictionary. This is useful if you want to sort items other than
171 in alphabetical order by key.
174 self
.skipkeys
= skipkeys
175 self
.ensure_ascii
= ensure_ascii
176 self
.check_circular
= check_circular
177 self
.allow_nan
= allow_nan
178 self
.sort_keys
= sort_keys
179 self
.use_decimal
= use_decimal
180 self
.namedtuple_as_object
= namedtuple_as_object
181 self
.tuple_as_array
= tuple_as_array
182 self
.bigint_as_string
= bigint_as_string
183 self
.item_sort_key
= item_sort_key
184 if indent
is not None and not isinstance(indent
, basestring
):
185 indent
= indent
* ' '
187 if separators
is not None:
188 self
.item_separator
, self
.key_separator
= separators
189 elif indent
is not None:
190 self
.item_separator
= ','
191 if default
is not None:
192 self
.default
= default
193 self
.encoding
= encoding
195 def default(self
, o
):
196 """Implement this method in a subclass such that it returns
197 a serializable object for ``o``, or calls the base implementation
198 (to raise a ``TypeError``).
200 For example, to support arbitrary iterators, you could
201 implement default like this::
203 def default(self, o):
209 return list(iterable)
210 return JSONEncoder.default(self, o)
213 raise TypeError(repr(o
) + " is not JSON serializable")
216 """Return a JSON string representation of a Python data structure.
218 >>> from simplejson import JSONEncoder
219 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
220 '{"foo": ["bar", "baz"]}'
223 # This is for extremely simple cases and benchmarks.
224 if isinstance(o
, basestring
):
225 if isinstance(o
, str):
226 _encoding
= self
.encoding
227 if (_encoding
is not None
228 and not (_encoding
== 'utf-8')):
229 o
= o
.decode(_encoding
)
230 if self
.ensure_ascii
:
231 return encode_basestring_ascii(o
)
233 return encode_basestring(o
)
234 # This doesn't pass the iterator directly to ''.join() because the
235 # exceptions aren't as detailed. The list call should be roughly
236 # equivalent to the PySequence_Fast that ''.join() would do.
237 chunks
= self
.iterencode(o
, _one_shot
=True)
238 if not isinstance(chunks
, (list, tuple)):
239 chunks
= list(chunks
)
240 if self
.ensure_ascii
:
241 return ''.join(chunks
)
243 return u
''.join(chunks
)
245 def iterencode(self
, o
, _one_shot
=False):
246 """Encode the given object and yield each string
247 representation as available.
251 for chunk in JSONEncoder().iterencode(bigobject):
252 mysocket.write(chunk)
255 if self
.check_circular
:
259 if self
.ensure_ascii
:
260 _encoder
= encode_basestring_ascii
262 _encoder
= encode_basestring
263 if self
.encoding
!= 'utf-8':
264 def _encoder(o
, _orig_encoder
=_encoder
, _encoding
=self
.encoding
):
265 if isinstance(o
, str):
266 o
= o
.decode(_encoding
)
267 return _orig_encoder(o
)
269 def floatstr(o
, allow_nan
=self
.allow_nan
,
270 _repr
=FLOAT_REPR
, _inf
=PosInf
, _neginf
=-PosInf
):
271 # Check for specials. Note that this type of test is processor
272 # and/or platform-specific, so do tests which don't depend on
286 "Out of range float values are not JSON compliant: " +
293 if (_one_shot
and c_make_encoder
is not None
294 and self
.indent
is None):
295 _iterencode
= c_make_encoder(
296 markers
, self
.default
, _encoder
, self
.indent
,
297 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
298 self
.skipkeys
, self
.allow_nan
, key_memo
, self
.use_decimal
,
299 self
.namedtuple_as_object
, self
.tuple_as_array
,
300 self
.bigint_as_string
, self
.item_sort_key
,
303 _iterencode
= _make_iterencode(
304 markers
, self
.default
, _encoder
, self
.indent
, floatstr
,
305 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
306 self
.skipkeys
, _one_shot
, self
.use_decimal
,
307 self
.namedtuple_as_object
, self
.tuple_as_array
,
308 self
.bigint_as_string
, self
.item_sort_key
,
311 return _iterencode(o
, 0)
316 class JSONEncoderForHTML(JSONEncoder
):
317 """An encoder that produces JSON safe to embed in HTML.
319 To embed JSON content in, say, a script tag on a web page, the
320 characters &, < and > should be escaped. They cannot be escaped
321 with the usual entities (e.g. &) because they are not expanded
322 within <script> tags.
326 # Override JSONEncoder.encode because it has hacks for
327 # performance that make things more complicated.
328 chunks
= self
.iterencode(o
, True)
329 if self
.ensure_ascii
:
330 return ''.join(chunks
)
332 return u
''.join(chunks
)
334 def iterencode(self
, o
, _one_shot
=False):
335 chunks
= super(JSONEncoderForHTML
, self
).iterencode(o
, _one_shot
)
337 chunk
= chunk
.replace('&', '\\u0026')
338 chunk
= chunk
.replace('<', '\\u003c')
339 chunk
= chunk
.replace('>', '\\u003e')
343 def _make_iterencode(markers
, _default
, _encoder
, _indent
, _floatstr
,
344 _key_separator
, _item_separator
, _sort_keys
, _skipkeys
, _one_shot
,
345 _use_decimal
, _namedtuple_as_object
, _tuple_as_array
,
346 _bigint_as_string
, _item_sort_key
,
347 ## HACK: hand-optimized bytecode; turn globals into locals
350 ValueError=ValueError,
351 basestring
=basestring
,
357 isinstance=isinstance,
363 if _item_sort_key
and not callable(_item_sort_key
):
364 raise TypeError("item_sort_key must be None or callable")
366 def _iterencode_list(lst
, _current_indent_level
):
370 if markers
is not None:
372 if markerid
in markers
:
373 raise ValueError("Circular reference detected")
374 markers
[markerid
] = lst
376 if _indent
is not None:
377 _current_indent_level
+= 1
378 newline_indent
= '\n' + (_indent
* _current_indent_level
)
379 separator
= _item_separator
+ newline_indent
380 buf
+= newline_indent
382 newline_indent
= None
383 separator
= _item_separator
390 if isinstance(value
, basestring
):
391 yield buf
+ _encoder(value
)
398 elif isinstance(value
, (int, long)):
399 yield ((buf
+ str(value
))
400 if (not _bigint_as_string
or
401 (-1 << 53) < value
< (1 << 53))
402 else (buf
+ '"' + str(value
) + '"'))
403 elif isinstance(value
, float):
404 yield buf
+ _floatstr(value
)
405 elif _use_decimal
and isinstance(value
, Decimal
):
406 yield buf
+ str(value
)
409 if isinstance(value
, list):
410 chunks
= _iterencode_list(value
, _current_indent_level
)
412 _asdict
= _namedtuple_as_object
and getattr(value
, '_asdict', None)
413 if _asdict
and callable(_asdict
):
414 chunks
= _iterencode_dict(_asdict(),
415 _current_indent_level
)
416 elif _tuple_as_array
and isinstance(value
, tuple):
417 chunks
= _iterencode_list(value
, _current_indent_level
)
418 elif isinstance(value
, dict):
419 chunks
= _iterencode_dict(value
, _current_indent_level
)
421 chunks
= _iterencode(value
, _current_indent_level
)
424 if newline_indent
is not None:
425 _current_indent_level
-= 1
426 yield '\n' + (_indent
* _current_indent_level
)
428 if markers
is not None:
429 del markers
[markerid
]
431 def _iterencode_dict(dct
, _current_indent_level
):
435 if markers
is not None:
437 if markerid
in markers
:
438 raise ValueError("Circular reference detected")
439 markers
[markerid
] = dct
441 if _indent
is not None:
442 _current_indent_level
+= 1
443 newline_indent
= '\n' + (_indent
* _current_indent_level
)
444 item_separator
= _item_separator
+ newline_indent
447 newline_indent
= None
448 item_separator
= _item_separator
452 items
.sort(key
=_item_sort_key
)
455 items
.sort(key
=lambda kv
: kv
[0])
457 items
= dct
.iteritems()
458 for key
, value
in items
:
459 if isinstance(key
, basestring
):
461 # JavaScript is weakly typed for these, so it makes sense to
462 # also allow them. Many encoders seem to do something like this.
463 elif isinstance(key
, float):
471 elif isinstance(key
, (int, long)):
476 raise TypeError("key " + repr(key
) + " is not a string")
483 if isinstance(value
, basestring
):
484 yield _encoder(value
)
491 elif isinstance(value
, (int, long)):
493 if (not _bigint_as_string
or
494 (-1 << 53) < value
< (1 << 53))
495 else ('"' + str(value
) + '"'))
496 elif isinstance(value
, float):
497 yield _floatstr(value
)
498 elif _use_decimal
and isinstance(value
, Decimal
):
501 if isinstance(value
, list):
502 chunks
= _iterencode_list(value
, _current_indent_level
)
504 _asdict
= _namedtuple_as_object
and getattr(value
, '_asdict', None)
505 if _asdict
and callable(_asdict
):
506 chunks
= _iterencode_dict(_asdict(),
507 _current_indent_level
)
508 elif _tuple_as_array
and isinstance(value
, tuple):
509 chunks
= _iterencode_list(value
, _current_indent_level
)
510 elif isinstance(value
, dict):
511 chunks
= _iterencode_dict(value
, _current_indent_level
)
513 chunks
= _iterencode(value
, _current_indent_level
)
516 if newline_indent
is not None:
517 _current_indent_level
-= 1
518 yield '\n' + (_indent
* _current_indent_level
)
520 if markers
is not None:
521 del markers
[markerid
]
523 def _iterencode(o
, _current_indent_level
):
524 if isinstance(o
, basestring
):
532 elif isinstance(o
, (int, long)):
534 if (not _bigint_as_string
or
535 (-1 << 53) < o
< (1 << 53))
536 else ('"' + str(o
) + '"'))
537 elif isinstance(o
, float):
539 elif isinstance(o
, list):
540 for chunk
in _iterencode_list(o
, _current_indent_level
):
543 _asdict
= _namedtuple_as_object
and getattr(o
, '_asdict', None)
544 if _asdict
and callable(_asdict
):
545 for chunk
in _iterencode_dict(_asdict(), _current_indent_level
):
547 elif (_tuple_as_array
and isinstance(o
, tuple)):
548 for chunk
in _iterencode_list(o
, _current_indent_level
):
550 elif isinstance(o
, dict):
551 for chunk
in _iterencode_dict(o
, _current_indent_level
):
553 elif _use_decimal
and isinstance(o
, Decimal
):
556 if markers
is not None:
558 if markerid
in markers
:
559 raise ValueError("Circular reference detected")
560 markers
[markerid
] = o
562 for chunk
in _iterencode(o
, _current_indent_level
):
564 if markers
is not None:
565 del markers
[markerid
]