link to bugtracker
[mygpo-feedservice.git] / feedservice / simplejson / decoder.py
blobb769ea486ca932cd83b1689ef8e055ae2658aa72
1 """Implementation of JSONDecoder
2 """
3 import re
4 import sys
5 import struct
7 from simplejson.scanner import make_scanner
8 try:
9 from simplejson._speedups import scanstring as c_scanstring
10 except ImportError:
11 c_scanstring = None
13 __all__ = ['JSONDecoder']
15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
17 def _floatconstants():
18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19 if sys.byteorder != 'big':
20 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21 nan, inf = struct.unpack('dd', _BYTES)
22 return nan, inf, -inf
24 NaN, PosInf, NegInf = _floatconstants()
27 def linecol(doc, pos):
28 lineno = doc.count('\n', 0, pos) + 1
29 if lineno == 1:
30 colno = pos
31 else:
32 colno = pos - doc.rindex('\n', 0, pos)
33 return lineno, colno
36 def errmsg(msg, doc, pos, end=None):
37 # Note that this function is called from _speedups
38 lineno, colno = linecol(doc, pos)
39 if end is None:
40 #fmt = '{0}: line {1} column {2} (char {3})'
41 #return fmt.format(msg, lineno, colno, pos)
42 fmt = '%s: line %d column %d (char %d)'
43 return fmt % (msg, lineno, colno, pos)
44 endlineno, endcolno = linecol(doc, end)
45 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
47 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
51 _CONSTANTS = {
52 '-Infinity': NegInf,
53 'Infinity': PosInf,
54 'NaN': NaN,
57 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
58 BACKSLASH = {
59 '"': u'"', '\\': u'\\', '/': u'/',
60 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
63 DEFAULT_ENCODING = "utf-8"
65 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
66 """Scan the string s for a JSON string. End is the index of the
67 character in s after the quote that started the JSON string.
68 Unescapes all valid JSON string escape sequences and raises ValueError
69 on attempt to decode an invalid string. If strict is False then literal
70 control characters are allowed in the string.
72 Returns a tuple of the decoded string and the index of the character in s
73 after the end quote."""
74 if encoding is None:
75 encoding = DEFAULT_ENCODING
76 chunks = []
77 _append = chunks.append
78 begin = end - 1
79 while 1:
80 chunk = _m(s, end)
81 if chunk is None:
82 raise ValueError(
83 errmsg("Unterminated string starting at", s, begin))
84 end = chunk.end()
85 content, terminator = chunk.groups()
86 # Content is contains zero or more unescaped string characters
87 if content:
88 if not isinstance(content, unicode):
89 content = unicode(content, encoding)
90 _append(content)
91 # Terminator is the end of string, a literal control character,
92 # or a backslash denoting that an escape sequence follows
93 if terminator == '"':
94 break
95 elif terminator != '\\':
96 if strict:
97 msg = "Invalid control character %r at" % (terminator,)
98 #msg = "Invalid control character {0!r} at".format(terminator)
99 raise ValueError(errmsg(msg, s, end))
100 else:
101 _append(terminator)
102 continue
103 try:
104 esc = s[end]
105 except IndexError:
106 raise ValueError(
107 errmsg("Unterminated string starting at", s, begin))
108 # If not a unicode escape sequence, must be in the lookup table
109 if esc != 'u':
110 try:
111 char = _b[esc]
112 except KeyError:
113 msg = "Invalid \\escape: " + repr(esc)
114 raise ValueError(errmsg(msg, s, end))
115 end += 1
116 else:
117 # Unicode escape sequence
118 esc = s[end + 1:end + 5]
119 next_end = end + 5
120 if len(esc) != 4:
121 msg = "Invalid \\uXXXX escape"
122 raise ValueError(errmsg(msg, s, end))
123 uni = int(esc, 16)
124 # Check for surrogate pair on UCS-4 systems
125 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
126 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
127 if not s[end + 5:end + 7] == '\\u':
128 raise ValueError(errmsg(msg, s, end))
129 esc2 = s[end + 7:end + 11]
130 if len(esc2) != 4:
131 raise ValueError(errmsg(msg, s, end))
132 uni2 = int(esc2, 16)
133 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
134 next_end += 6
135 char = unichr(uni)
136 end = next_end
137 # Append the unescaped character
138 _append(char)
139 return u''.join(chunks), end
142 # Use speedup if available
143 scanstring = c_scanstring or py_scanstring
145 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
146 WHITESPACE_STR = ' \t\n\r'
148 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
149 pairs = {}
150 # Use a slice to prevent IndexError from being raised, the following
151 # check will raise a more specific ValueError if the string is empty
152 nextchar = s[end:end + 1]
153 # Normally we expect nextchar == '"'
154 if nextchar != '"':
155 if nextchar in _ws:
156 end = _w(s, end).end()
157 nextchar = s[end:end + 1]
158 # Trivial empty object
159 if nextchar == '}':
160 return pairs, end + 1
161 elif nextchar != '"':
162 raise ValueError(errmsg("Expecting property name", s, end))
163 end += 1
164 while True:
165 key, end = scanstring(s, end, encoding, strict)
167 # To skip some function call overhead we optimize the fast paths where
168 # the JSON key separator is ": " or just ":".
169 if s[end:end + 1] != ':':
170 end = _w(s, end).end()
171 if s[end:end + 1] != ':':
172 raise ValueError(errmsg("Expecting : delimiter", s, end))
174 end += 1
176 try:
177 if s[end] in _ws:
178 end += 1
179 if s[end] in _ws:
180 end = _w(s, end + 1).end()
181 except IndexError:
182 pass
184 try:
185 value, end = scan_once(s, end)
186 except StopIteration:
187 raise ValueError(errmsg("Expecting object", s, end))
188 pairs[key] = value
190 try:
191 nextchar = s[end]
192 if nextchar in _ws:
193 end = _w(s, end + 1).end()
194 nextchar = s[end]
195 except IndexError:
196 nextchar = ''
197 end += 1
199 if nextchar == '}':
200 break
201 elif nextchar != ',':
202 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
204 try:
205 nextchar = s[end]
206 if nextchar in _ws:
207 end += 1
208 nextchar = s[end]
209 if nextchar in _ws:
210 end = _w(s, end + 1).end()
211 nextchar = s[end]
212 except IndexError:
213 nextchar = ''
215 end += 1
216 if nextchar != '"':
217 raise ValueError(errmsg("Expecting property name", s, end - 1))
219 if object_hook is not None:
220 pairs = object_hook(pairs)
221 return pairs, end
223 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
224 values = []
225 nextchar = s[end:end + 1]
226 if nextchar in _ws:
227 end = _w(s, end + 1).end()
228 nextchar = s[end:end + 1]
229 # Look-ahead for trivial empty array
230 if nextchar == ']':
231 return values, end + 1
232 _append = values.append
233 while True:
234 try:
235 value, end = scan_once(s, end)
236 except StopIteration:
237 raise ValueError(errmsg("Expecting object", s, end))
238 _append(value)
239 nextchar = s[end:end + 1]
240 if nextchar in _ws:
241 end = _w(s, end + 1).end()
242 nextchar = s[end:end + 1]
243 end += 1
244 if nextchar == ']':
245 break
246 elif nextchar != ',':
247 raise ValueError(errmsg("Expecting , delimiter", s, end))
249 try:
250 if s[end] in _ws:
251 end += 1
252 if s[end] in _ws:
253 end = _w(s, end + 1).end()
254 except IndexError:
255 pass
257 return values, end
259 class JSONDecoder(object):
260 """Simple JSON <http://json.org> decoder
262 Performs the following translations in decoding by default:
264 +---------------+-------------------+
265 | JSON | Python |
266 +===============+===================+
267 | object | dict |
268 +---------------+-------------------+
269 | array | list |
270 +---------------+-------------------+
271 | string | unicode |
272 +---------------+-------------------+
273 | number (int) | int, long |
274 +---------------+-------------------+
275 | number (real) | float |
276 +---------------+-------------------+
277 | true | True |
278 +---------------+-------------------+
279 | false | False |
280 +---------------+-------------------+
281 | null | None |
282 +---------------+-------------------+
284 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
285 their corresponding ``float`` values, which is outside the JSON spec.
289 def __init__(self, encoding=None, object_hook=None, parse_float=None,
290 parse_int=None, parse_constant=None, strict=True):
291 """``encoding`` determines the encoding used to interpret any ``str``
292 objects decoded by this instance (utf-8 by default). It has no
293 effect when decoding ``unicode`` objects.
295 Note that currently only encodings that are a superset of ASCII work,
296 strings of other encodings should be passed in as ``unicode``.
298 ``object_hook``, if specified, will be called with the result
299 of every JSON object decoded and its return value will be used in
300 place of the given ``dict``. This can be used to provide custom
301 deserializations (e.g. to support JSON-RPC class hinting).
303 ``parse_float``, if specified, will be called with the string
304 of every JSON float to be decoded. By default this is equivalent to
305 float(num_str). This can be used to use another datatype or parser
306 for JSON floats (e.g. decimal.Decimal).
308 ``parse_int``, if specified, will be called with the string
309 of every JSON int to be decoded. By default this is equivalent to
310 int(num_str). This can be used to use another datatype or parser
311 for JSON integers (e.g. float).
313 ``parse_constant``, if specified, will be called with one of the
314 following strings: -Infinity, Infinity, NaN.
315 This can be used to raise an exception if invalid JSON numbers
316 are encountered.
319 self.encoding = encoding
320 self.object_hook = object_hook
321 self.parse_float = parse_float or float
322 self.parse_int = parse_int or int
323 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
324 self.strict = strict
325 self.parse_object = JSONObject
326 self.parse_array = JSONArray
327 self.parse_string = scanstring
328 self.scan_once = make_scanner(self)
330 def decode(self, s, _w=WHITESPACE.match):
331 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
332 instance containing a JSON document)
335 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
336 end = _w(s, end).end()
337 if end != len(s):
338 raise ValueError(errmsg("Extra data", s, end, len(s)))
339 return obj
341 def raw_decode(self, s, idx=0):
342 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
343 with a JSON document) and return a 2-tuple of the Python
344 representation and the index in ``s`` where the document ended.
346 This can be used to decode a JSON document from a string that may
347 have extraneous data at the end.
350 try:
351 obj, end = self.scan_once(s, idx)
352 except StopIteration:
353 raise ValueError("No JSON object could be decoded")
354 return obj, end