1 """Implementation of JSONEncoder
6 from simplejson
._speedups
import encode_basestring_ascii
as c_encode_basestring_ascii
8 c_encode_basestring_ascii
= None
10 from simplejson
._speedups
import make_encoder
as c_make_encoder
14 ESCAPE
= re
.compile(r
'[\x00-\x1f\\"\b\f\n\r\t]')
15 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
16 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
27 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28 ESCAPE_DCT
.setdefault(chr(i
), '\\u%04x' % (i
,))
30 # Assume this produces an infinity on all machines (probably not guaranteed)
31 INFINITY
= float('1e66666')
34 def encode_basestring(s
):
35 """Return a JSON representation of a Python string
39 return ESCAPE_DCT
[match
.group(0)]
40 return '"' + ESCAPE
.sub(replace
, s
) + '"'
43 def py_encode_basestring_ascii(s
):
44 """Return an ASCII-only JSON representation of a Python string
47 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
56 #return '\\u{0:04x}'.format(n)
57 return '\\u%04x' % (n
,)
61 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
62 s2
= 0xdc00 |
(n
& 0x3ff)
63 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
64 return '\\u%04x\\u%04x' % (s1
, s2
)
65 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
68 encode_basestring_ascii
= c_encode_basestring_ascii
or py_encode_basestring_ascii
70 class JSONEncoder(object):
71 """Extensible JSON <http://json.org> encoder for Python data structures.
73 Supports the following objects and types by default:
75 +-------------------+---------------+
77 +===================+===============+
79 +-------------------+---------------+
80 | list, tuple | array |
81 +-------------------+---------------+
82 | str, unicode | string |
83 +-------------------+---------------+
84 | int, long, float | number |
85 +-------------------+---------------+
87 +-------------------+---------------+
89 +-------------------+---------------+
91 +-------------------+---------------+
93 To extend this to recognize other objects, subclass and implement a
94 ``.default()`` method with another method that returns a serializable
95 object for ``o`` if possible, otherwise it should call the superclass
96 implementation (to raise ``TypeError``).
101 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
102 check_circular
=True, allow_nan
=True, sort_keys
=False,
103 indent
=None, separators
=None, encoding
='utf-8', default
=None):
104 """Constructor for JSONEncoder, with sensible defaults.
106 If skipkeys is false, then it is a TypeError to attempt
107 encoding of keys that are not str, int, long, float or None. If
108 skipkeys is True, such items are simply skipped.
110 If ensure_ascii is true, the output is guaranteed to be str
111 objects with all incoming unicode characters escaped. If
112 ensure_ascii is false, the output will be unicode object.
114 If check_circular is true, then lists, dicts, and custom encoded
115 objects will be checked for circular references during encoding to
116 prevent an infinite recursion (which would cause an OverflowError).
117 Otherwise, no such check takes place.
119 If allow_nan is true, then NaN, Infinity, and -Infinity will be
120 encoded as such. This behavior is not JSON specification compliant,
121 but is consistent with most JavaScript based encoders and decoders.
122 Otherwise, it will be a ValueError to encode such floats.
124 If sort_keys is true, then the output of dictionaries will be
125 sorted by key; this is useful for regression tests to ensure
126 that JSON serializations can be compared on a day-to-day basis.
128 If indent is a non-negative integer, then JSON array
129 elements and object members will be pretty-printed with that
130 indent level. An indent level of 0 will only insert newlines.
131 None is the most compact representation.
133 If specified, separators should be a (item_separator, key_separator)
134 tuple. The default is (', ', ': '). To get the most compact JSON
135 representation you should specify (',', ':') to eliminate whitespace.
137 If specified, default is a function that gets called for objects
138 that can't otherwise be serialized. It should return a JSON encodable
139 version of the object or raise a ``TypeError``.
141 If encoding is not None, then all input strings will be
142 transformed into unicode using that encoding prior to JSON-encoding.
143 The default is UTF-8.
147 self
.skipkeys
= skipkeys
148 self
.ensure_ascii
= ensure_ascii
149 self
.check_circular
= check_circular
150 self
.allow_nan
= allow_nan
151 self
.sort_keys
= sort_keys
153 if separators
is not None:
154 self
.item_separator
, self
.key_separator
= separators
155 if default
is not None:
156 self
.default
= default
157 self
.encoding
= encoding
159 def default(self
, o
):
160 """Implement this method in a subclass such that it returns
161 a serializable object for ``o``, or calls the base implementation
162 (to raise a ``TypeError``).
164 For example, to support arbitrary iterators, you could
165 implement default like this::
167 def default(self, o):
173 return list(iterable)
174 return JSONEncoder.default(self, o)
177 raise TypeError(repr(o
) + " is not JSON serializable")
180 """Return a JSON string representation of a Python data structure.
182 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
183 '{"foo": ["bar", "baz"]}'
186 # This is for extremely simple cases and benchmarks.
187 if isinstance(o
, basestring
):
188 if isinstance(o
, str):
189 _encoding
= self
.encoding
190 if (_encoding
is not None
191 and not (_encoding
== 'utf-8')):
192 o
= o
.decode(_encoding
)
193 if self
.ensure_ascii
:
194 return encode_basestring_ascii(o
)
196 return encode_basestring(o
)
197 # This doesn't pass the iterator directly to ''.join() because the
198 # exceptions aren't as detailed. The list call should be roughly
199 # equivalent to the PySequence_Fast that ''.join() would do.
200 chunks
= self
.iterencode(o
, _one_shot
=True)
201 if not isinstance(chunks
, (list, tuple)):
202 chunks
= list(chunks
)
203 return ''.join(chunks
)
205 def iterencode(self
, o
, _one_shot
=False):
206 """Encode the given object and yield each string
207 representation as available.
211 for chunk in JSONEncoder().iterencode(bigobject):
212 mysocket.write(chunk)
215 if self
.check_circular
:
219 if self
.ensure_ascii
:
220 _encoder
= encode_basestring_ascii
222 _encoder
= encode_basestring
223 if self
.encoding
!= 'utf-8':
224 def _encoder(o
, _orig_encoder
=_encoder
, _encoding
=self
.encoding
):
225 if isinstance(o
, str):
226 o
= o
.decode(_encoding
)
227 return _orig_encoder(o
)
229 def floatstr(o
, allow_nan
=self
.allow_nan
, _repr
=FLOAT_REPR
, _inf
=INFINITY
, _neginf
=-INFINITY
):
230 # Check for specials. Note that this type of test is processor- and/or
231 # platform-specific, so do tests which don't depend on the internals.
244 "Out of range float values are not JSON compliant: " +
250 if _one_shot
and c_make_encoder
is not None and not self
.indent
and not self
.sort_keys
:
251 _iterencode
= c_make_encoder(
252 markers
, self
.default
, _encoder
, self
.indent
,
253 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
254 self
.skipkeys
, self
.allow_nan
)
256 _iterencode
= _make_iterencode(
257 markers
, self
.default
, _encoder
, self
.indent
, floatstr
,
258 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
259 self
.skipkeys
, _one_shot
)
260 return _iterencode(o
, 0)
262 def _make_iterencode(markers
, _default
, _encoder
, _indent
, _floatstr
, _key_separator
, _item_separator
, _sort_keys
, _skipkeys
, _one_shot
,
263 ## HACK: hand-optimized bytecode; turn globals into locals
266 ValueError=ValueError,
267 basestring
=basestring
,
272 isinstance=isinstance,
279 def _iterencode_list(lst
, _current_indent_level
):
283 if markers
is not None:
285 if markerid
in markers
:
286 raise ValueError("Circular reference detected")
287 markers
[markerid
] = lst
289 if _indent
is not None:
290 _current_indent_level
+= 1
291 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
292 separator
= _item_separator
+ newline_indent
293 buf
+= newline_indent
295 newline_indent
= None
296 separator
= _item_separator
303 if isinstance(value
, basestring
):
304 yield buf
+ _encoder(value
)
311 elif isinstance(value
, (int, long)):
312 yield buf
+ str(value
)
313 elif isinstance(value
, float):
314 yield buf
+ _floatstr(value
)
317 if isinstance(value
, (list, tuple)):
318 chunks
= _iterencode_list(value
, _current_indent_level
)
319 elif isinstance(value
, dict):
320 chunks
= _iterencode_dict(value
, _current_indent_level
)
322 chunks
= _iterencode(value
, _current_indent_level
)
325 if newline_indent
is not None:
326 _current_indent_level
-= 1
327 yield '\n' + (' ' * (_indent
* _current_indent_level
))
329 if markers
is not None:
330 del markers
[markerid
]
332 def _iterencode_dict(dct
, _current_indent_level
):
336 if markers
is not None:
338 if markerid
in markers
:
339 raise ValueError("Circular reference detected")
340 markers
[markerid
] = dct
342 if _indent
is not None:
343 _current_indent_level
+= 1
344 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
345 item_separator
= _item_separator
+ newline_indent
348 newline_indent
= None
349 item_separator
= _item_separator
353 items
.sort(key
=lambda kv
: kv
[0])
355 items
= dct
.iteritems()
356 for key
, value
in items
:
357 if isinstance(key
, basestring
):
359 # JavaScript is weakly typed for these, so it makes sense to
360 # also allow them. Many encoders seem to do something like this.
361 elif isinstance(key
, float):
369 elif isinstance(key
, (int, long)):
374 raise TypeError("key " + repr(key
) + " is not a string")
381 if isinstance(value
, basestring
):
382 yield _encoder(value
)
389 elif isinstance(value
, (int, long)):
391 elif isinstance(value
, float):
392 yield _floatstr(value
)
394 if isinstance(value
, (list, tuple)):
395 chunks
= _iterencode_list(value
, _current_indent_level
)
396 elif isinstance(value
, dict):
397 chunks
= _iterencode_dict(value
, _current_indent_level
)
399 chunks
= _iterencode(value
, _current_indent_level
)
402 if newline_indent
is not None:
403 _current_indent_level
-= 1
404 yield '\n' + (' ' * (_indent
* _current_indent_level
))
406 if markers
is not None:
407 del markers
[markerid
]
409 def _iterencode(o
, _current_indent_level
):
410 if isinstance(o
, basestring
):
418 elif isinstance(o
, (int, long)):
420 elif isinstance(o
, float):
422 elif isinstance(o
, (list, tuple)):
423 for chunk
in _iterencode_list(o
, _current_indent_level
):
425 elif isinstance(o
, dict):
426 for chunk
in _iterencode_dict(o
, _current_indent_level
):
429 if markers
is not None:
431 if markerid
in markers
:
432 raise ValueError("Circular reference detected")
433 markers
[markerid
] = o
435 for chunk
in _iterencode(o
, _current_indent_level
):
437 if markers
is not None:
438 del markers
[markerid
]