1 """Implementation of JSONEncoder
4 from decimal
import Decimal
6 def _import_speedups():
8 from . import _speedups
9 return _speedups
.encode_basestring_ascii
, _speedups
.make_encoder
12 c_encode_basestring_ascii
, c_make_encoder
= _import_speedups()
14 from .decoder
import PosInf
16 ESCAPE
= re
.compile(ur
'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
17 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
18 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
31 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
32 ESCAPE_DCT
.setdefault(chr(i
), '\\u%04x' % (i
,))
36 def encode_basestring(s
):
37 """Return a JSON representation of a Python string
40 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
43 return ESCAPE_DCT
[match
.group(0)]
44 return u
'"' + ESCAPE
.sub(replace
, s
) + u
'"'
47 def py_encode_basestring_ascii(s
):
48 """Return an ASCII-only JSON representation of a Python string
51 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
60 #return '\\u{0:04x}'.format(n)
61 return '\\u%04x' % (n
,)
65 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
66 s2
= 0xdc00 |
(n
& 0x3ff)
67 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
68 return '\\u%04x\\u%04x' % (s1
, s2
)
69 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
72 encode_basestring_ascii
= (
73 c_encode_basestring_ascii
or py_encode_basestring_ascii
)
75 class JSONEncoder(object):
76 """Extensible JSON <http://json.org> encoder for Python data structures.
78 Supports the following objects and types by default:
80 +-------------------+---------------+
82 +===================+===============+
83 | dict, namedtuple | object |
84 +-------------------+---------------+
85 | list, tuple | array |
86 +-------------------+---------------+
87 | str, unicode | string |
88 +-------------------+---------------+
89 | int, long, float | number |
90 +-------------------+---------------+
92 +-------------------+---------------+
94 +-------------------+---------------+
96 +-------------------+---------------+
98 To extend this to recognize other objects, subclass and implement a
99 ``.default()`` method with another method that returns a serializable
100 object for ``o`` if possible, otherwise it should call the superclass
101 implementation (to raise ``TypeError``).
104 item_separator
= ', '
106 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
107 check_circular
=True, allow_nan
=True, sort_keys
=False,
108 indent
=None, separators
=None, encoding
='utf-8', default
=None,
109 use_decimal
=True, namedtuple_as_object
=True,
110 tuple_as_array
=True):
111 """Constructor for JSONEncoder, with sensible defaults.
113 If skipkeys is false, then it is a TypeError to attempt
114 encoding of keys that are not str, int, long, float or None. If
115 skipkeys is True, such items are simply skipped.
117 If ensure_ascii is true, the output is guaranteed to be str
118 objects with all incoming unicode characters escaped. If
119 ensure_ascii is false, the output will be unicode object.
121 If check_circular is true, then lists, dicts, and custom encoded
122 objects will be checked for circular references during encoding to
123 prevent an infinite recursion (which would cause an OverflowError).
124 Otherwise, no such check takes place.
126 If allow_nan is true, then NaN, Infinity, and -Infinity will be
127 encoded as such. This behavior is not JSON specification compliant,
128 but is consistent with most JavaScript based encoders and decoders.
129 Otherwise, it will be a ValueError to encode such floats.
131 If sort_keys is true, then the output of dictionaries will be
132 sorted by key; this is useful for regression tests to ensure
133 that JSON serializations can be compared on a day-to-day basis.
135 If indent is a string, then JSON array elements and object members
136 will be pretty-printed with a newline followed by that string repeated
137 for each level of nesting. ``None`` (the default) selects the most compact
138 representation without any newlines. For backwards compatibility with
139 versions of simplejson earlier than 2.1.0, an integer is also accepted
140 and is converted to a string with that many spaces.
142 If specified, separators should be a (item_separator, key_separator)
143 tuple. The default is (', ', ': '). To get the most compact JSON
144 representation you should specify (',', ':') to eliminate whitespace.
146 If specified, default is a function that gets called for objects
147 that can't otherwise be serialized. It should return a JSON encodable
148 version of the object or raise a ``TypeError``.
150 If encoding is not None, then all input strings will be
151 transformed into unicode using that encoding prior to JSON-encoding.
152 The default is UTF-8.
154 If use_decimal is true (not the default), ``decimal.Decimal`` will
155 be supported directly by the encoder. For the inverse, decode JSON
156 with ``parse_float=decimal.Decimal``.
158 If namedtuple_as_object is true (the default), objects with
159 ``_asdict()`` methods will be encoded as JSON objects.
161 If tuple_as_array is true (the default), tuple (and subclasses) will
162 be encoded as JSON arrays.
165 self
.skipkeys
= skipkeys
166 self
.ensure_ascii
= ensure_ascii
167 self
.check_circular
= check_circular
168 self
.allow_nan
= allow_nan
169 self
.sort_keys
= sort_keys
170 self
.use_decimal
= use_decimal
171 self
.namedtuple_as_object
= namedtuple_as_object
172 self
.tuple_as_array
= tuple_as_array
173 if isinstance(indent
, (int, long)):
174 indent
= ' ' * indent
176 if separators
is not None:
177 self
.item_separator
, self
.key_separator
= separators
178 elif indent
is not None:
179 self
.item_separator
= ','
180 if default
is not None:
181 self
.default
= default
182 self
.encoding
= encoding
184 def default(self
, o
):
185 """Implement this method in a subclass such that it returns
186 a serializable object for ``o``, or calls the base implementation
187 (to raise a ``TypeError``).
189 For example, to support arbitrary iterators, you could
190 implement default like this::
192 def default(self, o):
198 return list(iterable)
199 return JSONEncoder.default(self, o)
202 raise TypeError(repr(o
) + " is not JSON serializable")
205 """Return a JSON string representation of a Python data structure.
207 >>> from simplejson import JSONEncoder
208 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
209 '{"foo": ["bar", "baz"]}'
212 # This is for extremely simple cases and benchmarks.
213 if isinstance(o
, basestring
):
214 if isinstance(o
, str):
215 _encoding
= self
.encoding
216 if (_encoding
is not None
217 and not (_encoding
== 'utf-8')):
218 o
= o
.decode(_encoding
)
219 if self
.ensure_ascii
:
220 return encode_basestring_ascii(o
)
222 return encode_basestring(o
)
223 # This doesn't pass the iterator directly to ''.join() because the
224 # exceptions aren't as detailed. The list call should be roughly
225 # equivalent to the PySequence_Fast that ''.join() would do.
226 chunks
= self
.iterencode(o
, _one_shot
=True)
227 if not isinstance(chunks
, (list, tuple)):
228 chunks
= list(chunks
)
229 if self
.ensure_ascii
:
230 return ''.join(chunks
)
232 return u
''.join(chunks
)
234 def iterencode(self
, o
, _one_shot
=False):
235 """Encode the given object and yield each string
236 representation as available.
240 for chunk in JSONEncoder().iterencode(bigobject):
241 mysocket.write(chunk)
244 if self
.check_circular
:
248 if self
.ensure_ascii
:
249 _encoder
= encode_basestring_ascii
251 _encoder
= encode_basestring
252 if self
.encoding
!= 'utf-8':
253 def _encoder(o
, _orig_encoder
=_encoder
, _encoding
=self
.encoding
):
254 if isinstance(o
, str):
255 o
= o
.decode(_encoding
)
256 return _orig_encoder(o
)
258 def floatstr(o
, allow_nan
=self
.allow_nan
,
259 _repr
=FLOAT_REPR
, _inf
=PosInf
, _neginf
=-PosInf
):
260 # Check for specials. Note that this type of test is processor
261 # and/or platform-specific, so do tests which don't depend on
275 "Out of range float values are not JSON compliant: " +
282 if (_one_shot
and c_make_encoder
is not None
283 and self
.indent
is None):
284 _iterencode
= c_make_encoder(
285 markers
, self
.default
, _encoder
, self
.indent
,
286 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
287 self
.skipkeys
, self
.allow_nan
, key_memo
, self
.use_decimal
,
288 self
.namedtuple_as_object
, self
.tuple_as_array
)
290 _iterencode
= _make_iterencode(
291 markers
, self
.default
, _encoder
, self
.indent
, floatstr
,
292 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
293 self
.skipkeys
, _one_shot
, self
.use_decimal
,
294 self
.namedtuple_as_object
, self
.tuple_as_array
)
296 return _iterencode(o
, 0)
301 class JSONEncoderForHTML(JSONEncoder
):
302 """An encoder that produces JSON safe to embed in HTML.
304 To embed JSON content in, say, a script tag on a web page, the
305 characters &, < and > should be escaped. They cannot be escaped
306 with the usual entities (e.g. &) because they are not expanded
307 within <script> tags.
311 # Override JSONEncoder.encode because it has hacks for
312 # performance that make things more complicated.
313 chunks
= self
.iterencode(o
, True)
314 if self
.ensure_ascii
:
315 return ''.join(chunks
)
317 return u
''.join(chunks
)
319 def iterencode(self
, o
, _one_shot
=False):
320 chunks
= super(JSONEncoderForHTML
, self
).iterencode(o
, _one_shot
)
322 chunk
= chunk
.replace('&', '\\u0026')
323 chunk
= chunk
.replace('<', '\\u003c')
324 chunk
= chunk
.replace('>', '\\u003e')
328 def _make_iterencode(markers
, _default
, _encoder
, _indent
, _floatstr
,
329 _key_separator
, _item_separator
, _sort_keys
, _skipkeys
, _one_shot
,
330 _use_decimal
, _namedtuple_as_object
, _tuple_as_array
,
331 ## HACK: hand-optimized bytecode; turn globals into locals
334 ValueError=ValueError,
335 basestring
=basestring
,
341 isinstance=isinstance,
348 def _iterencode_list(lst
, _current_indent_level
):
352 if markers
is not None:
354 if markerid
in markers
:
355 raise ValueError("Circular reference detected")
356 markers
[markerid
] = lst
358 if _indent
is not None:
359 _current_indent_level
+= 1
360 newline_indent
= '\n' + (_indent
* _current_indent_level
)
361 separator
= _item_separator
+ newline_indent
362 buf
+= newline_indent
364 newline_indent
= None
365 separator
= _item_separator
372 if isinstance(value
, basestring
):
373 yield buf
+ _encoder(value
)
380 elif isinstance(value
, (int, long)):
381 yield buf
+ str(value
)
382 elif isinstance(value
, float):
383 yield buf
+ _floatstr(value
)
384 elif _use_decimal
and isinstance(value
, Decimal
):
385 yield buf
+ str(value
)
388 if isinstance(value
, list):
389 chunks
= _iterencode_list(value
, _current_indent_level
)
391 _asdict
= _namedtuple_as_object
and getattr(value
, '_asdict', None)
392 if _asdict
and callable(_asdict
):
393 chunks
= _iterencode_dict(_asdict(),
394 _current_indent_level
)
395 elif _tuple_as_array
and isinstance(value
, tuple):
396 chunks
= _iterencode_list(value
, _current_indent_level
)
397 elif isinstance(value
, dict):
398 chunks
= _iterencode_dict(value
, _current_indent_level
)
400 chunks
= _iterencode(value
, _current_indent_level
)
403 if newline_indent
is not None:
404 _current_indent_level
-= 1
405 yield '\n' + (_indent
* _current_indent_level
)
407 if markers
is not None:
408 del markers
[markerid
]
410 def _iterencode_dict(dct
, _current_indent_level
):
414 if markers
is not None:
416 if markerid
in markers
:
417 raise ValueError("Circular reference detected")
418 markers
[markerid
] = dct
420 if _indent
is not None:
421 _current_indent_level
+= 1
422 newline_indent
= '\n' + (_indent
* _current_indent_level
)
423 item_separator
= _item_separator
+ newline_indent
426 newline_indent
= None
427 item_separator
= _item_separator
431 items
.sort(key
=lambda kv
: kv
[0])
433 items
= dct
.iteritems()
434 for key
, value
in items
:
435 if isinstance(key
, basestring
):
437 # JavaScript is weakly typed for these, so it makes sense to
438 # also allow them. Many encoders seem to do something like this.
439 elif isinstance(key
, float):
447 elif isinstance(key
, (int, long)):
452 raise TypeError("key " + repr(key
) + " is not a string")
459 if isinstance(value
, basestring
):
460 yield _encoder(value
)
467 elif isinstance(value
, (int, long)):
469 elif isinstance(value
, float):
470 yield _floatstr(value
)
471 elif _use_decimal
and isinstance(value
, Decimal
):
474 if isinstance(value
, list):
475 chunks
= _iterencode_list(value
, _current_indent_level
)
477 _asdict
= _namedtuple_as_object
and getattr(value
, '_asdict', None)
478 if _asdict
and callable(_asdict
):
479 chunks
= _iterencode_dict(_asdict(),
480 _current_indent_level
)
481 elif _tuple_as_array
and isinstance(value
, tuple):
482 chunks
= _iterencode_list(value
, _current_indent_level
)
483 elif isinstance(value
, dict):
484 chunks
= _iterencode_dict(value
, _current_indent_level
)
486 chunks
= _iterencode(value
, _current_indent_level
)
489 if newline_indent
is not None:
490 _current_indent_level
-= 1
491 yield '\n' + (_indent
* _current_indent_level
)
493 if markers
is not None:
494 del markers
[markerid
]
496 def _iterencode(o
, _current_indent_level
):
497 if isinstance(o
, basestring
):
505 elif isinstance(o
, (int, long)):
507 elif isinstance(o
, float):
509 elif isinstance(o
, list):
510 for chunk
in _iterencode_list(o
, _current_indent_level
):
513 _asdict
= _namedtuple_as_object
and getattr(o
, '_asdict', None)
514 if _asdict
and callable(_asdict
):
515 for chunk
in _iterencode_dict(_asdict(), _current_indent_level
):
517 elif (_tuple_as_array
and isinstance(o
, tuple)):
518 for chunk
in _iterencode_list(o
, _current_indent_level
):
520 elif isinstance(o
, dict):
521 for chunk
in _iterencode_dict(o
, _current_indent_level
):
523 elif _use_decimal
and isinstance(o
, Decimal
):
526 if markers
is not None:
528 if markerid
in markers
:
529 raise ValueError("Circular reference detected")
530 markers
[markerid
] = o
532 for chunk
in _iterencode(o
, _current_indent_level
):
534 if markers
is not None:
535 del markers
[markerid
]