1 """Implementation of JSONDecoder
7 from simplejson
.scanner
import make_scanner
8 def _import_c_scanstring():
10 from simplejson
._speedups
import scanstring
14 c_scanstring
= _import_c_scanstring()
16 __all__
= ['JSONDecoder']
18 FLAGS
= re
.VERBOSE | re
.MULTILINE | re
.DOTALL
20 def _floatconstants():
21 _BYTES
= '7FF80000000000007FF0000000000000'.decode('hex')
22 # The struct module in Python 2.4 would get frexp() out of range here
23 # when an endian is specified in the format string. Fixed in Python 2.5+
24 if sys
.byteorder
!= 'big':
25 _BYTES
= _BYTES
[:8][::-1] + _BYTES
[8:][::-1]
26 nan
, inf
= struct
.unpack('dd', _BYTES
)
29 NaN
, PosInf
, NegInf
= _floatconstants()
32 class JSONDecodeError(ValueError):
33 """Subclass of ValueError with the following additional properties:
35 msg: The unformatted error message
36 doc: The JSON document being parsed
37 pos: The start index of doc where parsing failed
38 end: The end index of doc where parsing failed (may be None)
39 lineno: The line corresponding to pos
40 colno: The column corresponding to pos
41 endlineno: The line corresponding to end (may be None)
42 endcolno: The column corresponding to end (may be None)
45 def __init__(self
, msg
, doc
, pos
, end
=None):
46 ValueError.__init
__(self
, errmsg(msg
, doc
, pos
, end
=end
))
51 self
.lineno
, self
.colno
= linecol(doc
, pos
)
53 self
.endlineno
, self
.endcolno
= linecol(doc
, end
)
55 self
.endlineno
, self
.endcolno
= None, None
58 def linecol(doc
, pos
):
59 lineno
= doc
.count('\n', 0, pos
) + 1
63 colno
= pos
- doc
.rindex('\n', 0, pos
)
67 def errmsg(msg
, doc
, pos
, end
=None):
68 # Note that this function is called from _speedups
69 lineno
, colno
= linecol(doc
, pos
)
71 #fmt = '{0}: line {1} column {2} (char {3})'
72 #return fmt.format(msg, lineno, colno, pos)
73 fmt
= '%s: line %d column %d (char %d)'
74 return fmt
% (msg
, lineno
, colno
, pos
)
75 endlineno
, endcolno
= linecol(doc
, end
)
76 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
77 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
78 fmt
= '%s: line %d column %d - line %d column %d (char %d - %d)'
79 return fmt
% (msg
, lineno
, colno
, endlineno
, endcolno
, pos
, end
)
88 STRINGCHUNK
= re
.compile(r
'(.*?)(["\\\x00-\x1f])', FLAGS
)
90 '"': u
'"', '\\': u
'\\', '/': u
'/',
91 'b': u
'\b', 'f': u
'\f', 'n': u
'\n', 'r': u
'\r', 't': u
'\t',
94 DEFAULT_ENCODING
= "utf-8"
96 def py_scanstring(s
, end
, encoding
=None, strict
=True,
97 _b
=BACKSLASH
, _m
=STRINGCHUNK
.match
):
98 """Scan the string s for a JSON string. End is the index of the
99 character in s after the quote that started the JSON string.
100 Unescapes all valid JSON string escape sequences and raises ValueError
101 on attempt to decode an invalid string. If strict is False then literal
102 control characters are allowed in the string.
104 Returns a tuple of the decoded string and the index of the character in s
105 after the end quote."""
107 encoding
= DEFAULT_ENCODING
109 _append
= chunks
.append
114 raise JSONDecodeError(
115 "Unterminated string starting at", s
, begin
)
117 content
, terminator
= chunk
.groups()
118 # Content is contains zero or more unescaped string characters
120 if not isinstance(content
, unicode):
121 content
= unicode(content
, encoding
)
123 # Terminator is the end of string, a literal control character,
124 # or a backslash denoting that an escape sequence follows
125 if terminator
== '"':
127 elif terminator
!= '\\':
129 msg
= "Invalid control character %r at" % (terminator
,)
130 #msg = "Invalid control character {0!r} at".format(terminator)
131 raise JSONDecodeError(msg
, s
, end
)
138 raise JSONDecodeError(
139 "Unterminated string starting at", s
, begin
)
140 # If not a unicode escape sequence, must be in the lookup table
145 msg
= "Invalid \\escape: " + repr(esc
)
146 raise JSONDecodeError(msg
, s
, end
)
149 # Unicode escape sequence
150 esc
= s
[end
+ 1:end
+ 5]
153 msg
= "Invalid \\uXXXX escape"
154 raise JSONDecodeError(msg
, s
, end
)
156 # Check for surrogate pair on UCS-4 systems
157 if 0xd800 <= uni
<= 0xdbff and sys
.maxunicode
> 65535:
158 msg
= "Invalid \\uXXXX\\uXXXX surrogate pair"
159 if not s
[end
+ 5:end
+ 7] == '\\u':
160 raise JSONDecodeError(msg
, s
, end
)
161 esc2
= s
[end
+ 7:end
+ 11]
163 raise JSONDecodeError(msg
, s
, end
)
165 uni
= 0x10000 + (((uni
- 0xd800) << 10) |
(uni2
- 0xdc00))
169 # Append the unescaped character
171 return u
''.join(chunks
), end
174 # Use speedup if available
175 scanstring
= c_scanstring
or py_scanstring
177 WHITESPACE
= re
.compile(r
'[ \t\n\r]*', FLAGS
)
178 WHITESPACE_STR
= ' \t\n\r'
180 def JSONObject((s
, end
), encoding
, strict
, scan_once
, object_hook
,
181 object_pairs_hook
, memo
=None,
182 _w
=WHITESPACE
.match
, _ws
=WHITESPACE_STR
):
183 # Backwards compatibility
186 memo_get
= memo
.setdefault
188 # Use a slice to prevent IndexError from being raised, the following
189 # check will raise a more specific ValueError if the string is empty
190 nextchar
= s
[end
:end
+ 1]
191 # Normally we expect nextchar == '"'
194 end
= _w(s
, end
).end()
195 nextchar
= s
[end
:end
+ 1]
196 # Trivial empty object
198 if object_pairs_hook
is not None:
199 result
= object_pairs_hook(pairs
)
200 return result
, end
+ 1
202 if object_hook
is not None:
203 pairs
= object_hook(pairs
)
204 return pairs
, end
+ 1
205 elif nextchar
!= '"':
206 raise JSONDecodeError(
207 "Expecting property name enclosed in double quotes",
211 key
, end
= scanstring(s
, end
, encoding
, strict
)
212 key
= memo_get(key
, key
)
214 # To skip some function call overhead we optimize the fast paths where
215 # the JSON key separator is ": " or just ":".
216 if s
[end
:end
+ 1] != ':':
217 end
= _w(s
, end
).end()
218 if s
[end
:end
+ 1] != ':':
219 raise JSONDecodeError("Expecting ':' delimiter", s
, end
)
227 end
= _w(s
, end
+ 1).end()
232 value
, end
= scan_once(s
, end
)
233 except StopIteration:
234 raise JSONDecodeError("Expecting object", s
, end
)
235 pairs
.append((key
, value
))
240 end
= _w(s
, end
+ 1).end()
248 elif nextchar
!= ',':
249 raise JSONDecodeError("Expecting ',' delimiter", s
, end
- 1)
257 end
= _w(s
, end
+ 1).end()
264 raise JSONDecodeError(
265 "Expecting property name enclosed in double quotes",
268 if object_pairs_hook
is not None:
269 result
= object_pairs_hook(pairs
)
272 if object_hook
is not None:
273 pairs
= object_hook(pairs
)
276 def JSONArray((s
, end
), scan_once
, _w
=WHITESPACE
.match
, _ws
=WHITESPACE_STR
):
278 nextchar
= s
[end
:end
+ 1]
280 end
= _w(s
, end
+ 1).end()
281 nextchar
= s
[end
:end
+ 1]
282 # Look-ahead for trivial empty array
284 return values
, end
+ 1
285 _append
= values
.append
288 value
, end
= scan_once(s
, end
)
289 except StopIteration:
290 raise JSONDecodeError("Expecting object", s
, end
)
292 nextchar
= s
[end
:end
+ 1]
294 end
= _w(s
, end
+ 1).end()
295 nextchar
= s
[end
:end
+ 1]
299 elif nextchar
!= ',':
300 raise JSONDecodeError("Expecting ',' delimiter", s
, end
)
306 end
= _w(s
, end
+ 1).end()
312 class JSONDecoder(object):
313 """Simple JSON <http://json.org> decoder
315 Performs the following translations in decoding by default:
317 +---------------+-------------------+
319 +===============+===================+
321 +---------------+-------------------+
323 +---------------+-------------------+
325 +---------------+-------------------+
326 | number (int) | int, long |
327 +---------------+-------------------+
328 | number (real) | float |
329 +---------------+-------------------+
331 +---------------+-------------------+
333 +---------------+-------------------+
335 +---------------+-------------------+
337 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
338 their corresponding ``float`` values, which is outside the JSON spec.
342 def __init__(self
, encoding
=None, object_hook
=None, parse_float
=None,
343 parse_int
=None, parse_constant
=None, strict
=True,
344 object_pairs_hook
=None):
346 *encoding* determines the encoding used to interpret any
347 :class:`str` objects decoded by this instance (``'utf-8'`` by
348 default). It has no effect when decoding :class:`unicode` objects.
350 Note that currently only encodings that are a superset of ASCII work,
351 strings of other encodings should be passed in as :class:`unicode`.
353 *object_hook*, if specified, will be called with the result of every
354 JSON object decoded and its return value will be used in place of the
355 given :class:`dict`. This can be used to provide custom
356 deserializations (e.g. to support JSON-RPC class hinting).
358 *object_pairs_hook* is an optional function that will be called with
359 the result of any object literal decode with an ordered list of pairs.
360 The return value of *object_pairs_hook* will be used instead of the
361 :class:`dict`. This feature can be used to implement custom decoders
362 that rely on the order that the key and value pairs are decoded (for
363 example, :func:`collections.OrderedDict` will remember the order of
364 insertion). If *object_hook* is also defined, the *object_pairs_hook*
367 *parse_float*, if specified, will be called with the string of every
368 JSON float to be decoded. By default, this is equivalent to
369 ``float(num_str)``. This can be used to use another datatype or parser
370 for JSON floats (e.g. :class:`decimal.Decimal`).
372 *parse_int*, if specified, will be called with the string of every
373 JSON int to be decoded. By default, this is equivalent to
374 ``int(num_str)``. This can be used to use another datatype or parser
375 for JSON integers (e.g. :class:`float`).
377 *parse_constant*, if specified, will be called with one of the
378 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
379 can be used to raise an exception if invalid JSON numbers are
382 *strict* controls the parser's behavior when it encounters an
383 invalid control character in a string. The default setting of
384 ``True`` means that unescaped control characters are parse errors, if
385 ``False`` then control characters will be allowed in strings.
388 self
.encoding
= encoding
389 self
.object_hook
= object_hook
390 self
.object_pairs_hook
= object_pairs_hook
391 self
.parse_float
= parse_float
or float
392 self
.parse_int
= parse_int
or int
393 self
.parse_constant
= parse_constant
or _CONSTANTS
.__getitem
__
395 self
.parse_object
= JSONObject
396 self
.parse_array
= JSONArray
397 self
.parse_string
= scanstring
399 self
.scan_once
= make_scanner(self
)
401 def decode(self
, s
, _w
=WHITESPACE
.match
):
402 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
403 instance containing a JSON document)
406 obj
, end
= self
.raw_decode(s
)
407 end
= _w(s
, end
).end()
409 raise JSONDecodeError("Extra data", s
, end
, len(s
))
412 def raw_decode(self
, s
, idx
=0, _w
=WHITESPACE
.match
):
413 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
414 beginning with a JSON document) and return a 2-tuple of the Python
415 representation and the index in ``s`` where the document ended.
416 Optionally, ``idx`` can be used to specify an offset in ``s`` where
417 the JSON document begins.
419 This can be used to decode a JSON document from a string that may
420 have extraneous data at the end.
424 obj
, end
= self
.scan_once(s
, idx
=_w(s
, idx
).end())
425 except StopIteration:
426 raise JSONDecodeError("No JSON object could be decoded", s
, idx
)