Add ICU message format support
[chromium-blink-merge.git] / third_party / simplejson / _speedups.c
blobbe68b2dadac762f4afd4ef60d8ea12fbbb15e690
1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double)
4 #define PyOS_string_to_double json_PyOS_string_to_double
5 static double
6 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
7 static double
8 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) {
9 double x;
10 assert(endptr == NULL);
11 assert(overflow_exception == NULL);
12 PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
13 x = PyOS_ascii_atof(s);
14 PyFPE_END_PROTECT(x)
15 return x;
17 #endif
18 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
19 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
20 #endif
21 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE)
22 #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
23 #endif
24 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
25 typedef int Py_ssize_t;
26 #define PY_SSIZE_T_MAX INT_MAX
27 #define PY_SSIZE_T_MIN INT_MIN
28 #define PyInt_FromSsize_t PyInt_FromLong
29 #define PyInt_AsSsize_t PyInt_AsLong
30 #endif
31 #ifndef Py_IS_FINITE
32 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
33 #endif
35 #ifdef __GNUC__
36 #define UNUSED __attribute__((__unused__))
37 #else
38 #define UNUSED
39 #endif
41 #define DEFAULT_ENCODING "utf-8"
43 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
44 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
45 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
46 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
48 static PyTypeObject PyScannerType;
49 static PyTypeObject PyEncoderType;
51 typedef struct _PyScannerObject {
52 PyObject_HEAD
53 PyObject *encoding;
54 PyObject *strict;
55 PyObject *object_hook;
56 PyObject *pairs_hook;
57 PyObject *parse_float;
58 PyObject *parse_int;
59 PyObject *parse_constant;
60 PyObject *memo;
61 } PyScannerObject;
63 static PyMemberDef scanner_members[] = {
64 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
65 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
66 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
67 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
68 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
69 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
70 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
71 {NULL}
74 typedef struct _PyEncoderObject {
75 PyObject_HEAD
76 PyObject *markers;
77 PyObject *defaultfn;
78 PyObject *encoder;
79 PyObject *indent;
80 PyObject *key_separator;
81 PyObject *item_separator;
82 PyObject *sort_keys;
83 PyObject *skipkeys;
84 PyObject *key_memo;
85 PyObject *Decimal;
86 int fast_encode;
87 int allow_nan;
88 int use_decimal;
89 int namedtuple_as_object;
90 int tuple_as_array;
91 int bigint_as_string;
92 PyObject *item_sort_key;
93 } PyEncoderObject;
95 static PyMemberDef encoder_members[] = {
96 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
97 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
98 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
99 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
100 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
101 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
102 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
103 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
104 {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
105 {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
106 {NULL}
109 static PyObject *
110 maybe_quote_bigint(PyObject *encoded, PyObject *obj);
112 static Py_ssize_t
113 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
114 static PyObject *
115 ascii_escape_unicode(PyObject *pystr);
116 static PyObject *
117 ascii_escape_str(PyObject *pystr);
118 static PyObject *
119 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
120 void init_speedups(void);
121 static PyObject *
122 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
123 static PyObject *
124 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
125 static PyObject *
126 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
127 static PyObject *
128 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
129 static int
130 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
131 static void
132 scanner_dealloc(PyObject *self);
133 static int
134 scanner_clear(PyObject *self);
135 static PyObject *
136 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
137 static int
138 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
139 static void
140 encoder_dealloc(PyObject *self);
141 static int
142 encoder_clear(PyObject *self);
143 static int
144 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
145 static int
146 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
147 static int
148 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
149 static PyObject *
150 _encoded_const(PyObject *obj);
151 static void
152 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
153 static PyObject *
154 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
155 static int
156 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
157 static PyObject *
158 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
159 static PyObject *
160 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
161 static int
162 _is_namedtuple(PyObject *obj);
164 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
165 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
167 #define MIN_EXPANSION 6
168 #ifdef Py_UNICODE_WIDE
169 #define MAX_EXPANSION (2 * MIN_EXPANSION)
170 #else
171 #define MAX_EXPANSION MIN_EXPANSION
172 #endif
174 static PyObject *
175 maybe_quote_bigint(PyObject *encoded, PyObject *obj)
177 static PyObject *big_long = NULL;
178 static PyObject *small_long = NULL;
179 if (big_long == NULL) {
180 big_long = PyLong_FromLongLong(1LL << 53);
181 if (big_long == NULL) {
182 Py_DECREF(encoded);
183 return NULL;
186 if (small_long == NULL) {
187 small_long = PyLong_FromLongLong(-1LL << 53);
188 if (small_long == NULL) {
189 Py_DECREF(encoded);
190 return NULL;
193 if (PyObject_RichCompareBool(obj, big_long, Py_GE) ||
194 PyObject_RichCompareBool(obj, small_long, Py_LE)) {
195 PyObject* quoted = PyString_FromFormat("\"%s\"",
196 PyString_AsString(encoded));
197 Py_DECREF(encoded);
198 encoded = quoted;
200 return encoded;
203 static int
204 _is_namedtuple(PyObject *obj)
206 int rval = 0;
207 PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict");
208 if (_asdict == NULL) {
209 PyErr_Clear();
210 return 0;
212 rval = PyCallable_Check(_asdict);
213 Py_DECREF(_asdict);
214 return rval;
217 static int
218 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
220 /* PyObject to Py_ssize_t converter */
221 *size_ptr = PyInt_AsSsize_t(o);
222 if (*size_ptr == -1 && PyErr_Occurred())
223 return 0;
224 return 1;
227 static PyObject *
228 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
230 /* Py_ssize_t to PyObject converter */
231 return PyInt_FromSsize_t(*size_ptr);
234 static Py_ssize_t
235 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
237 /* Escape unicode code point c to ASCII escape sequences
238 in char *output. output must have at least 12 bytes unused to
239 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
240 output[chars++] = '\\';
241 switch (c) {
242 case '\\': output[chars++] = (char)c; break;
243 case '"': output[chars++] = (char)c; break;
244 case '\b': output[chars++] = 'b'; break;
245 case '\f': output[chars++] = 'f'; break;
246 case '\n': output[chars++] = 'n'; break;
247 case '\r': output[chars++] = 'r'; break;
248 case '\t': output[chars++] = 't'; break;
249 default:
250 #ifdef Py_UNICODE_WIDE
251 if (c >= 0x10000) {
252 /* UTF-16 surrogate pair */
253 Py_UNICODE v = c - 0x10000;
254 c = 0xd800 | ((v >> 10) & 0x3ff);
255 output[chars++] = 'u';
256 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
257 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
258 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
259 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
260 c = 0xdc00 | (v & 0x3ff);
261 output[chars++] = '\\';
263 #endif
264 output[chars++] = 'u';
265 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
266 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
267 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
268 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
270 return chars;
273 static PyObject *
274 ascii_escape_unicode(PyObject *pystr)
276 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
277 Py_ssize_t i;
278 Py_ssize_t input_chars;
279 Py_ssize_t output_size;
280 Py_ssize_t max_output_size;
281 Py_ssize_t chars;
282 PyObject *rval;
283 char *output;
284 Py_UNICODE *input_unicode;
286 input_chars = PyUnicode_GET_SIZE(pystr);
287 input_unicode = PyUnicode_AS_UNICODE(pystr);
289 /* One char input can be up to 6 chars output, estimate 4 of these */
290 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
291 max_output_size = 2 + (input_chars * MAX_EXPANSION);
292 rval = PyString_FromStringAndSize(NULL, output_size);
293 if (rval == NULL) {
294 return NULL;
296 output = PyString_AS_STRING(rval);
297 chars = 0;
298 output[chars++] = '"';
299 for (i = 0; i < input_chars; i++) {
300 Py_UNICODE c = input_unicode[i];
301 if (S_CHAR(c)) {
302 output[chars++] = (char)c;
304 else {
305 chars = ascii_escape_char(c, output, chars);
307 if (output_size - chars < (1 + MAX_EXPANSION)) {
308 /* There's more than four, so let's resize by a lot */
309 Py_ssize_t new_output_size = output_size * 2;
310 /* This is an upper bound */
311 if (new_output_size > max_output_size) {
312 new_output_size = max_output_size;
314 /* Make sure that the output size changed before resizing */
315 if (new_output_size != output_size) {
316 output_size = new_output_size;
317 if (_PyString_Resize(&rval, output_size) == -1) {
318 return NULL;
320 output = PyString_AS_STRING(rval);
324 output[chars++] = '"';
325 if (_PyString_Resize(&rval, chars) == -1) {
326 return NULL;
328 return rval;
331 static PyObject *
332 ascii_escape_str(PyObject *pystr)
334 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
335 Py_ssize_t i;
336 Py_ssize_t input_chars;
337 Py_ssize_t output_size;
338 Py_ssize_t chars;
339 PyObject *rval;
340 char *output;
341 char *input_str;
343 input_chars = PyString_GET_SIZE(pystr);
344 input_str = PyString_AS_STRING(pystr);
346 /* Fast path for a string that's already ASCII */
347 for (i = 0; i < input_chars; i++) {
348 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
349 if (!S_CHAR(c)) {
350 /* If we have to escape something, scan the string for unicode */
351 Py_ssize_t j;
352 for (j = i; j < input_chars; j++) {
353 c = (Py_UNICODE)(unsigned char)input_str[j];
354 if (c > 0x7f) {
355 /* We hit a non-ASCII character, bail to unicode mode */
356 PyObject *uni;
357 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
358 if (uni == NULL) {
359 return NULL;
361 rval = ascii_escape_unicode(uni);
362 Py_DECREF(uni);
363 return rval;
366 break;
370 if (i == input_chars) {
371 /* Input is already ASCII */
372 output_size = 2 + input_chars;
374 else {
375 /* One char input can be up to 6 chars output, estimate 4 of these */
376 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
378 rval = PyString_FromStringAndSize(NULL, output_size);
379 if (rval == NULL) {
380 return NULL;
382 output = PyString_AS_STRING(rval);
383 output[0] = '"';
385 /* We know that everything up to i is ASCII already */
386 chars = i + 1;
387 memcpy(&output[1], input_str, i);
389 for (; i < input_chars; i++) {
390 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
391 if (S_CHAR(c)) {
392 output[chars++] = (char)c;
394 else {
395 chars = ascii_escape_char(c, output, chars);
397 /* An ASCII char can't possibly expand to a surrogate! */
398 if (output_size - chars < (1 + MIN_EXPANSION)) {
399 /* There's more than four, so let's resize by a lot */
400 output_size *= 2;
401 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
402 output_size = 2 + (input_chars * MIN_EXPANSION);
404 if (_PyString_Resize(&rval, output_size) == -1) {
405 return NULL;
407 output = PyString_AS_STRING(rval);
410 output[chars++] = '"';
411 if (_PyString_Resize(&rval, chars) == -1) {
412 return NULL;
414 return rval;
417 static void
418 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
420 /* Use the Python function simplejson.decoder.errmsg to raise a nice
421 looking ValueError exception */
422 static PyObject *JSONDecodeError = NULL;
423 PyObject *exc;
424 if (JSONDecodeError == NULL) {
425 PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
426 if (decoder == NULL)
427 return;
428 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
429 Py_DECREF(decoder);
430 if (JSONDecodeError == NULL)
431 return;
433 exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
434 if (exc) {
435 PyErr_SetObject(JSONDecodeError, exc);
436 Py_DECREF(exc);
440 static PyObject *
441 join_list_unicode(PyObject *lst)
443 /* return u''.join(lst) */
444 static PyObject *joinfn = NULL;
445 if (joinfn == NULL) {
446 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
447 if (ustr == NULL)
448 return NULL;
450 joinfn = PyObject_GetAttrString(ustr, "join");
451 Py_DECREF(ustr);
452 if (joinfn == NULL)
453 return NULL;
455 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
458 static PyObject *
459 join_list_string(PyObject *lst)
461 /* return ''.join(lst) */
462 static PyObject *joinfn = NULL;
463 if (joinfn == NULL) {
464 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
465 if (ustr == NULL)
466 return NULL;
468 joinfn = PyObject_GetAttrString(ustr, "join");
469 Py_DECREF(ustr);
470 if (joinfn == NULL)
471 return NULL;
473 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
476 static PyObject *
477 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
478 /* return (rval, idx) tuple, stealing reference to rval */
479 PyObject *tpl;
480 PyObject *pyidx;
482 steal a reference to rval, returns (rval, idx)
484 if (rval == NULL) {
485 return NULL;
487 pyidx = PyInt_FromSsize_t(idx);
488 if (pyidx == NULL) {
489 Py_DECREF(rval);
490 return NULL;
492 tpl = PyTuple_New(2);
493 if (tpl == NULL) {
494 Py_DECREF(pyidx);
495 Py_DECREF(rval);
496 return NULL;
498 PyTuple_SET_ITEM(tpl, 0, rval);
499 PyTuple_SET_ITEM(tpl, 1, pyidx);
500 return tpl;
503 #define APPEND_OLD_CHUNK \
504 if (chunk != NULL) { \
505 if (chunks == NULL) { \
506 chunks = PyList_New(0); \
507 if (chunks == NULL) { \
508 goto bail; \
511 if (PyList_Append(chunks, chunk)) { \
512 goto bail; \
514 Py_CLEAR(chunk); \
517 static PyObject *
518 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
520 /* Read the JSON string from PyString pystr.
521 end is the index of the first character after the quote.
522 encoding is the encoding of pystr (must be an ASCII superset)
523 if strict is zero then literal control characters are allowed
524 *next_end_ptr is a return-by-reference index of the character
525 after the end quote
527 Return value is a new PyString (if ASCII-only) or PyUnicode
529 PyObject *rval;
530 Py_ssize_t len = PyString_GET_SIZE(pystr);
531 Py_ssize_t begin = end - 1;
532 Py_ssize_t next = begin;
533 int has_unicode = 0;
534 char *buf = PyString_AS_STRING(pystr);
535 PyObject *chunks = NULL;
536 PyObject *chunk = NULL;
538 if (len == end) {
539 raise_errmsg("Unterminated string starting at", pystr, begin);
541 else if (end < 0 || len < end) {
542 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
543 goto bail;
545 while (1) {
546 /* Find the end of the string or the next escape */
547 Py_UNICODE c = 0;
548 for (next = end; next < len; next++) {
549 c = (unsigned char)buf[next];
550 if (c == '"' || c == '\\') {
551 break;
553 else if (strict && c <= 0x1f) {
554 raise_errmsg("Invalid control character at", pystr, next);
555 goto bail;
557 else if (c > 0x7f) {
558 has_unicode = 1;
561 if (!(c == '"' || c == '\\')) {
562 raise_errmsg("Unterminated string starting at", pystr, begin);
563 goto bail;
565 /* Pick up this chunk if it's not zero length */
566 if (next != end) {
567 PyObject *strchunk;
568 APPEND_OLD_CHUNK
569 strchunk = PyString_FromStringAndSize(&buf[end], next - end);
570 if (strchunk == NULL) {
571 goto bail;
573 if (has_unicode) {
574 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
575 Py_DECREF(strchunk);
576 if (chunk == NULL) {
577 goto bail;
580 else {
581 chunk = strchunk;
584 next++;
585 if (c == '"') {
586 end = next;
587 break;
589 if (next == len) {
590 raise_errmsg("Unterminated string starting at", pystr, begin);
591 goto bail;
593 c = buf[next];
594 if (c != 'u') {
595 /* Non-unicode backslash escapes */
596 end = next + 1;
597 switch (c) {
598 case '"': break;
599 case '\\': break;
600 case '/': break;
601 case 'b': c = '\b'; break;
602 case 'f': c = '\f'; break;
603 case 'n': c = '\n'; break;
604 case 'r': c = '\r'; break;
605 case 't': c = '\t'; break;
606 default: c = 0;
608 if (c == 0) {
609 raise_errmsg("Invalid \\escape", pystr, end - 2);
610 goto bail;
613 else {
614 c = 0;
615 next++;
616 end = next + 4;
617 if (end >= len) {
618 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
619 goto bail;
621 /* Decode 4 hex digits */
622 for (; next < end; next++) {
623 Py_UNICODE digit = buf[next];
624 c <<= 4;
625 switch (digit) {
626 case '0': case '1': case '2': case '3': case '4':
627 case '5': case '6': case '7': case '8': case '9':
628 c |= (digit - '0'); break;
629 case 'a': case 'b': case 'c': case 'd': case 'e':
630 case 'f':
631 c |= (digit - 'a' + 10); break;
632 case 'A': case 'B': case 'C': case 'D': case 'E':
633 case 'F':
634 c |= (digit - 'A' + 10); break;
635 default:
636 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
637 goto bail;
640 #ifdef Py_UNICODE_WIDE
641 /* Surrogate pair */
642 if ((c & 0xfc00) == 0xd800) {
643 Py_UNICODE c2 = 0;
644 if (end + 6 >= len) {
645 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
646 goto bail;
648 if (buf[next++] != '\\' || buf[next++] != 'u') {
649 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
650 goto bail;
652 end += 6;
653 /* Decode 4 hex digits */
654 for (; next < end; next++) {
655 c2 <<= 4;
656 Py_UNICODE digit = buf[next];
657 switch (digit) {
658 case '0': case '1': case '2': case '3': case '4':
659 case '5': case '6': case '7': case '8': case '9':
660 c2 |= (digit - '0'); break;
661 case 'a': case 'b': case 'c': case 'd': case 'e':
662 case 'f':
663 c2 |= (digit - 'a' + 10); break;
664 case 'A': case 'B': case 'C': case 'D': case 'E':
665 case 'F':
666 c2 |= (digit - 'A' + 10); break;
667 default:
668 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
669 goto bail;
672 if ((c2 & 0xfc00) != 0xdc00) {
673 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
674 goto bail;
676 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
678 else if ((c & 0xfc00) == 0xdc00) {
679 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
680 goto bail;
682 #endif
684 if (c > 0x7f) {
685 has_unicode = 1;
687 APPEND_OLD_CHUNK
688 if (has_unicode) {
689 chunk = PyUnicode_FromUnicode(&c, 1);
690 if (chunk == NULL) {
691 goto bail;
694 else {
695 char c_char = Py_CHARMASK(c);
696 chunk = PyString_FromStringAndSize(&c_char, 1);
697 if (chunk == NULL) {
698 goto bail;
703 if (chunks == NULL) {
704 if (chunk != NULL)
705 rval = chunk;
706 else
707 rval = PyString_FromStringAndSize("", 0);
709 else {
710 APPEND_OLD_CHUNK
711 rval = join_list_string(chunks);
712 if (rval == NULL) {
713 goto bail;
715 Py_CLEAR(chunks);
718 *next_end_ptr = end;
719 return rval;
720 bail:
721 *next_end_ptr = -1;
722 Py_XDECREF(chunk);
723 Py_XDECREF(chunks);
724 return NULL;
728 static PyObject *
729 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
731 /* Read the JSON string from PyUnicode pystr.
732 end is the index of the first character after the quote.
733 if strict is zero then literal control characters are allowed
734 *next_end_ptr is a return-by-reference index of the character
735 after the end quote
737 Return value is a new PyUnicode
739 PyObject *rval;
740 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
741 Py_ssize_t begin = end - 1;
742 Py_ssize_t next = begin;
743 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
744 PyObject *chunks = NULL;
745 PyObject *chunk = NULL;
747 if (len == end) {
748 raise_errmsg("Unterminated string starting at", pystr, begin);
750 else if (end < 0 || len < end) {
751 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
752 goto bail;
754 while (1) {
755 /* Find the end of the string or the next escape */
756 Py_UNICODE c = 0;
757 for (next = end; next < len; next++) {
758 c = buf[next];
759 if (c == '"' || c == '\\') {
760 break;
762 else if (strict && c <= 0x1f) {
763 raise_errmsg("Invalid control character at", pystr, next);
764 goto bail;
767 if (!(c == '"' || c == '\\')) {
768 raise_errmsg("Unterminated string starting at", pystr, begin);
769 goto bail;
771 /* Pick up this chunk if it's not zero length */
772 if (next != end) {
773 APPEND_OLD_CHUNK
774 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
775 if (chunk == NULL) {
776 goto bail;
779 next++;
780 if (c == '"') {
781 end = next;
782 break;
784 if (next == len) {
785 raise_errmsg("Unterminated string starting at", pystr, begin);
786 goto bail;
788 c = buf[next];
789 if (c != 'u') {
790 /* Non-unicode backslash escapes */
791 end = next + 1;
792 switch (c) {
793 case '"': break;
794 case '\\': break;
795 case '/': break;
796 case 'b': c = '\b'; break;
797 case 'f': c = '\f'; break;
798 case 'n': c = '\n'; break;
799 case 'r': c = '\r'; break;
800 case 't': c = '\t'; break;
801 default: c = 0;
803 if (c == 0) {
804 raise_errmsg("Invalid \\escape", pystr, end - 2);
805 goto bail;
808 else {
809 c = 0;
810 next++;
811 end = next + 4;
812 if (end >= len) {
813 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
814 goto bail;
816 /* Decode 4 hex digits */
817 for (; next < end; next++) {
818 Py_UNICODE digit = buf[next];
819 c <<= 4;
820 switch (digit) {
821 case '0': case '1': case '2': case '3': case '4':
822 case '5': case '6': case '7': case '8': case '9':
823 c |= (digit - '0'); break;
824 case 'a': case 'b': case 'c': case 'd': case 'e':
825 case 'f':
826 c |= (digit - 'a' + 10); break;
827 case 'A': case 'B': case 'C': case 'D': case 'E':
828 case 'F':
829 c |= (digit - 'A' + 10); break;
830 default:
831 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
832 goto bail;
835 #ifdef Py_UNICODE_WIDE
836 /* Surrogate pair */
837 if ((c & 0xfc00) == 0xd800) {
838 Py_UNICODE c2 = 0;
839 if (end + 6 >= len) {
840 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
841 goto bail;
843 if (buf[next++] != '\\' || buf[next++] != 'u') {
844 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
845 goto bail;
847 end += 6;
848 /* Decode 4 hex digits */
849 for (; next < end; next++) {
850 c2 <<= 4;
851 Py_UNICODE digit = buf[next];
852 switch (digit) {
853 case '0': case '1': case '2': case '3': case '4':
854 case '5': case '6': case '7': case '8': case '9':
855 c2 |= (digit - '0'); break;
856 case 'a': case 'b': case 'c': case 'd': case 'e':
857 case 'f':
858 c2 |= (digit - 'a' + 10); break;
859 case 'A': case 'B': case 'C': case 'D': case 'E':
860 case 'F':
861 c2 |= (digit - 'A' + 10); break;
862 default:
863 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
864 goto bail;
867 if ((c2 & 0xfc00) != 0xdc00) {
868 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
869 goto bail;
871 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
873 else if ((c & 0xfc00) == 0xdc00) {
874 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
875 goto bail;
877 #endif
879 APPEND_OLD_CHUNK
880 chunk = PyUnicode_FromUnicode(&c, 1);
881 if (chunk == NULL) {
882 goto bail;
886 if (chunks == NULL) {
887 if (chunk != NULL)
888 rval = chunk;
889 else
890 rval = PyUnicode_FromUnicode(NULL, 0);
892 else {
893 APPEND_OLD_CHUNK
894 rval = join_list_unicode(chunks);
895 if (rval == NULL) {
896 goto bail;
898 Py_CLEAR(chunks);
900 *next_end_ptr = end;
901 return rval;
902 bail:
903 *next_end_ptr = -1;
904 Py_XDECREF(chunk);
905 Py_XDECREF(chunks);
906 return NULL;
909 PyDoc_STRVAR(pydoc_scanstring,
910 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
911 "\n"
912 "Scan the string s for a JSON string. End is the index of the\n"
913 "character in s after the quote that started the JSON string.\n"
914 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
915 "on attempt to decode an invalid string. If strict is False then literal\n"
916 "control characters are allowed in the string.\n"
917 "\n"
918 "Returns a tuple of the decoded string and the index of the character in s\n"
919 "after the end quote."
922 static PyObject *
923 py_scanstring(PyObject* self UNUSED, PyObject *args)
925 PyObject *pystr;
926 PyObject *rval;
927 Py_ssize_t end;
928 Py_ssize_t next_end = -1;
929 char *encoding = NULL;
930 int strict = 1;
931 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
932 return NULL;
934 if (encoding == NULL) {
935 encoding = DEFAULT_ENCODING;
937 if (PyString_Check(pystr)) {
938 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
940 else if (PyUnicode_Check(pystr)) {
941 rval = scanstring_unicode(pystr, end, strict, &next_end);
943 else {
944 PyErr_Format(PyExc_TypeError,
945 "first argument must be a string, not %.80s",
946 Py_TYPE(pystr)->tp_name);
947 return NULL;
949 return _build_rval_index_tuple(rval, next_end);
952 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
953 "encode_basestring_ascii(basestring) -> str\n"
954 "\n"
955 "Return an ASCII-only JSON representation of a Python string"
958 static PyObject *
959 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
961 /* Return an ASCII-only JSON representation of a Python string */
962 /* METH_O */
963 if (PyString_Check(pystr)) {
964 return ascii_escape_str(pystr);
966 else if (PyUnicode_Check(pystr)) {
967 return ascii_escape_unicode(pystr);
969 else {
970 PyErr_Format(PyExc_TypeError,
971 "first argument must be a string, not %.80s",
972 Py_TYPE(pystr)->tp_name);
973 return NULL;
977 static void
978 scanner_dealloc(PyObject *self)
980 /* Deallocate scanner object */
981 scanner_clear(self);
982 Py_TYPE(self)->tp_free(self);
985 static int
986 scanner_traverse(PyObject *self, visitproc visit, void *arg)
988 PyScannerObject *s;
989 assert(PyScanner_Check(self));
990 s = (PyScannerObject *)self;
991 Py_VISIT(s->encoding);
992 Py_VISIT(s->strict);
993 Py_VISIT(s->object_hook);
994 Py_VISIT(s->pairs_hook);
995 Py_VISIT(s->parse_float);
996 Py_VISIT(s->parse_int);
997 Py_VISIT(s->parse_constant);
998 Py_VISIT(s->memo);
999 return 0;
1002 static int
1003 scanner_clear(PyObject *self)
1005 PyScannerObject *s;
1006 assert(PyScanner_Check(self));
1007 s = (PyScannerObject *)self;
1008 Py_CLEAR(s->encoding);
1009 Py_CLEAR(s->strict);
1010 Py_CLEAR(s->object_hook);
1011 Py_CLEAR(s->pairs_hook);
1012 Py_CLEAR(s->parse_float);
1013 Py_CLEAR(s->parse_int);
1014 Py_CLEAR(s->parse_constant);
1015 Py_CLEAR(s->memo);
1016 return 0;
1019 static PyObject *
1020 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1021 /* Read a JSON object from PyString pystr.
1022 idx is the index of the first character after the opening curly brace.
1023 *next_idx_ptr is a return-by-reference index to the first character after
1024 the closing curly brace.
1026 Returns a new PyObject (usually a dict, but object_hook or
1027 object_pairs_hook can change that)
1029 char *str = PyString_AS_STRING(pystr);
1030 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1031 PyObject *rval = NULL;
1032 PyObject *pairs = NULL;
1033 PyObject *item;
1034 PyObject *key = NULL;
1035 PyObject *val = NULL;
1036 char *encoding = PyString_AS_STRING(s->encoding);
1037 int strict = PyObject_IsTrue(s->strict);
1038 int has_pairs_hook = (s->pairs_hook != Py_None);
1039 Py_ssize_t next_idx;
1040 if (has_pairs_hook) {
1041 pairs = PyList_New(0);
1042 if (pairs == NULL)
1043 return NULL;
1045 else {
1046 rval = PyDict_New();
1047 if (rval == NULL)
1048 return NULL;
1051 /* skip whitespace after { */
1052 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1054 /* only loop if the object is non-empty */
1055 if (idx <= end_idx && str[idx] != '}') {
1056 while (idx <= end_idx) {
1057 PyObject *memokey;
1059 /* read key */
1060 if (str[idx] != '"') {
1061 raise_errmsg(
1062 "Expecting property name enclosed in double quotes",
1063 pystr, idx);
1064 goto bail;
1066 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
1067 if (key == NULL)
1068 goto bail;
1069 memokey = PyDict_GetItem(s->memo, key);
1070 if (memokey != NULL) {
1071 Py_INCREF(memokey);
1072 Py_DECREF(key);
1073 key = memokey;
1075 else {
1076 if (PyDict_SetItem(s->memo, key, key) < 0)
1077 goto bail;
1079 idx = next_idx;
1081 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1082 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1083 if (idx > end_idx || str[idx] != ':') {
1084 raise_errmsg("Expecting ':' delimiter", pystr, idx);
1085 goto bail;
1087 idx++;
1088 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1090 /* read any JSON data type */
1091 val = scan_once_str(s, pystr, idx, &next_idx);
1092 if (val == NULL)
1093 goto bail;
1095 if (has_pairs_hook) {
1096 item = PyTuple_Pack(2, key, val);
1097 if (item == NULL)
1098 goto bail;
1099 Py_CLEAR(key);
1100 Py_CLEAR(val);
1101 if (PyList_Append(pairs, item) == -1) {
1102 Py_DECREF(item);
1103 goto bail;
1105 Py_DECREF(item);
1107 else {
1108 if (PyDict_SetItem(rval, key, val) < 0)
1109 goto bail;
1110 Py_CLEAR(key);
1111 Py_CLEAR(val);
1113 idx = next_idx;
1115 /* skip whitespace before } or , */
1116 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1118 /* bail if the object is closed or we didn't get the , delimiter */
1119 if (idx > end_idx) break;
1120 if (str[idx] == '}') {
1121 break;
1123 else if (str[idx] != ',') {
1124 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1125 goto bail;
1127 idx++;
1129 /* skip whitespace after , delimiter */
1130 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1133 /* verify that idx < end_idx, str[idx] should be '}' */
1134 if (idx > end_idx || str[idx] != '}') {
1135 raise_errmsg("Expecting object", pystr, end_idx);
1136 goto bail;
1139 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1140 if (s->pairs_hook != Py_None) {
1141 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1142 if (val == NULL)
1143 goto bail;
1144 Py_DECREF(pairs);
1145 *next_idx_ptr = idx + 1;
1146 return val;
1149 /* if object_hook is not None: rval = object_hook(rval) */
1150 if (s->object_hook != Py_None) {
1151 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1152 if (val == NULL)
1153 goto bail;
1154 Py_DECREF(rval);
1155 rval = val;
1156 val = NULL;
1158 *next_idx_ptr = idx + 1;
1159 return rval;
1160 bail:
1161 Py_XDECREF(rval);
1162 Py_XDECREF(key);
1163 Py_XDECREF(val);
1164 Py_XDECREF(pairs);
1165 return NULL;
1168 static PyObject *
1169 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1170 /* Read a JSON object from PyUnicode pystr.
1171 idx is the index of the first character after the opening curly brace.
1172 *next_idx_ptr is a return-by-reference index to the first character after
1173 the closing curly brace.
1175 Returns a new PyObject (usually a dict, but object_hook can change that)
1177 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1178 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1179 PyObject *rval = NULL;
1180 PyObject *pairs = NULL;
1181 PyObject *item;
1182 PyObject *key = NULL;
1183 PyObject *val = NULL;
1184 int strict = PyObject_IsTrue(s->strict);
1185 int has_pairs_hook = (s->pairs_hook != Py_None);
1186 Py_ssize_t next_idx;
1188 if (has_pairs_hook) {
1189 pairs = PyList_New(0);
1190 if (pairs == NULL)
1191 return NULL;
1193 else {
1194 rval = PyDict_New();
1195 if (rval == NULL)
1196 return NULL;
1199 /* skip whitespace after { */
1200 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1202 /* only loop if the object is non-empty */
1203 if (idx <= end_idx && str[idx] != '}') {
1204 while (idx <= end_idx) {
1205 PyObject *memokey;
1207 /* read key */
1208 if (str[idx] != '"') {
1209 raise_errmsg(
1210 "Expecting property name enclosed in double quotes",
1211 pystr, idx);
1212 goto bail;
1214 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1215 if (key == NULL)
1216 goto bail;
1217 memokey = PyDict_GetItem(s->memo, key);
1218 if (memokey != NULL) {
1219 Py_INCREF(memokey);
1220 Py_DECREF(key);
1221 key = memokey;
1223 else {
1224 if (PyDict_SetItem(s->memo, key, key) < 0)
1225 goto bail;
1227 idx = next_idx;
1229 /* skip whitespace between key and : delimiter, read :, skip
1230 whitespace */
1231 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232 if (idx > end_idx || str[idx] != ':') {
1233 raise_errmsg("Expecting ':' delimiter", pystr, idx);
1234 goto bail;
1236 idx++;
1237 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1239 /* read any JSON term */
1240 val = scan_once_unicode(s, pystr, idx, &next_idx);
1241 if (val == NULL)
1242 goto bail;
1244 if (has_pairs_hook) {
1245 item = PyTuple_Pack(2, key, val);
1246 if (item == NULL)
1247 goto bail;
1248 Py_CLEAR(key);
1249 Py_CLEAR(val);
1250 if (PyList_Append(pairs, item) == -1) {
1251 Py_DECREF(item);
1252 goto bail;
1254 Py_DECREF(item);
1256 else {
1257 if (PyDict_SetItem(rval, key, val) < 0)
1258 goto bail;
1259 Py_CLEAR(key);
1260 Py_CLEAR(val);
1262 idx = next_idx;
1264 /* skip whitespace before } or , */
1265 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1267 /* bail if the object is closed or we didn't get the ,
1268 delimiter */
1269 if (idx > end_idx) break;
1270 if (str[idx] == '}') {
1271 break;
1273 else if (str[idx] != ',') {
1274 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1275 goto bail;
1277 idx++;
1279 /* skip whitespace after , delimiter */
1280 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1284 /* verify that idx < end_idx, str[idx] should be '}' */
1285 if (idx > end_idx || str[idx] != '}') {
1286 raise_errmsg("Expecting object", pystr, end_idx);
1287 goto bail;
1290 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1291 if (s->pairs_hook != Py_None) {
1292 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1293 if (val == NULL)
1294 goto bail;
1295 Py_DECREF(pairs);
1296 *next_idx_ptr = idx + 1;
1297 return val;
1300 /* if object_hook is not None: rval = object_hook(rval) */
1301 if (s->object_hook != Py_None) {
1302 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1303 if (val == NULL)
1304 goto bail;
1305 Py_DECREF(rval);
1306 rval = val;
1307 val = NULL;
1309 *next_idx_ptr = idx + 1;
1310 return rval;
1311 bail:
1312 Py_XDECREF(rval);
1313 Py_XDECREF(key);
1314 Py_XDECREF(val);
1315 Py_XDECREF(pairs);
1316 return NULL;
1319 static PyObject *
1320 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1321 /* Read a JSON array from PyString pystr.
1322 idx is the index of the first character after the opening brace.
1323 *next_idx_ptr is a return-by-reference index to the first character after
1324 the closing brace.
1326 Returns a new PyList
1328 char *str = PyString_AS_STRING(pystr);
1329 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1330 PyObject *val = NULL;
1331 PyObject *rval = PyList_New(0);
1332 Py_ssize_t next_idx;
1333 if (rval == NULL)
1334 return NULL;
1336 /* skip whitespace after [ */
1337 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1339 /* only loop if the array is non-empty */
1340 if (idx <= end_idx && str[idx] != ']') {
1341 while (idx <= end_idx) {
1343 /* read any JSON term and de-tuplefy the (rval, idx) */
1344 val = scan_once_str(s, pystr, idx, &next_idx);
1345 if (val == NULL) {
1346 if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1347 PyErr_Clear();
1348 raise_errmsg("Expecting object", pystr, idx);
1350 goto bail;
1353 if (PyList_Append(rval, val) == -1)
1354 goto bail;
1356 Py_CLEAR(val);
1357 idx = next_idx;
1359 /* skip whitespace between term and , */
1360 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1362 /* bail if the array is closed or we didn't get the , delimiter */
1363 if (idx > end_idx) break;
1364 if (str[idx] == ']') {
1365 break;
1367 else if (str[idx] != ',') {
1368 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1369 goto bail;
1371 idx++;
1373 /* skip whitespace after , */
1374 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1378 /* verify that idx < end_idx, str[idx] should be ']' */
1379 if (idx > end_idx || str[idx] != ']') {
1380 raise_errmsg("Expecting object", pystr, end_idx);
1381 goto bail;
1383 *next_idx_ptr = idx + 1;
1384 return rval;
1385 bail:
1386 Py_XDECREF(val);
1387 Py_DECREF(rval);
1388 return NULL;
1391 static PyObject *
1392 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1393 /* Read a JSON array from PyString pystr.
1394 idx is the index of the first character after the opening brace.
1395 *next_idx_ptr is a return-by-reference index to the first character after
1396 the closing brace.
1398 Returns a new PyList
1400 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1401 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1402 PyObject *val = NULL;
1403 PyObject *rval = PyList_New(0);
1404 Py_ssize_t next_idx;
1405 if (rval == NULL)
1406 return NULL;
1408 /* skip whitespace after [ */
1409 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1411 /* only loop if the array is non-empty */
1412 if (idx <= end_idx && str[idx] != ']') {
1413 while (idx <= end_idx) {
1415 /* read any JSON term */
1416 val = scan_once_unicode(s, pystr, idx, &next_idx);
1417 if (val == NULL) {
1418 if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1419 PyErr_Clear();
1420 raise_errmsg("Expecting object", pystr, idx);
1422 goto bail;
1425 if (PyList_Append(rval, val) == -1)
1426 goto bail;
1428 Py_CLEAR(val);
1429 idx = next_idx;
1431 /* skip whitespace between term and , */
1432 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1434 /* bail if the array is closed or we didn't get the , delimiter */
1435 if (idx > end_idx) break;
1436 if (str[idx] == ']') {
1437 break;
1439 else if (str[idx] != ',') {
1440 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1441 goto bail;
1443 idx++;
1445 /* skip whitespace after , */
1446 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1450 /* verify that idx < end_idx, str[idx] should be ']' */
1451 if (idx > end_idx || str[idx] != ']') {
1452 raise_errmsg("Expecting object", pystr, end_idx);
1453 goto bail;
1455 *next_idx_ptr = idx + 1;
1456 return rval;
1457 bail:
1458 Py_XDECREF(val);
1459 Py_DECREF(rval);
1460 return NULL;
1463 static PyObject *
1464 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1465 /* Read a JSON constant from PyString pystr.
1466 constant is the constant string that was found
1467 ("NaN", "Infinity", "-Infinity").
1468 idx is the index of the first character of the constant
1469 *next_idx_ptr is a return-by-reference index to the first character after
1470 the constant.
1472 Returns the result of parse_constant
1474 PyObject *cstr;
1475 PyObject *rval;
1476 /* constant is "NaN", "Infinity", or "-Infinity" */
1477 cstr = PyString_InternFromString(constant);
1478 if (cstr == NULL)
1479 return NULL;
1481 /* rval = parse_constant(constant) */
1482 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1483 idx += PyString_GET_SIZE(cstr);
1484 Py_DECREF(cstr);
1485 *next_idx_ptr = idx;
1486 return rval;
1489 static PyObject *
1490 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1491 /* Read a JSON number from PyString pystr.
1492 idx is the index of the first character of the number
1493 *next_idx_ptr is a return-by-reference index to the first character after
1494 the number.
1496 Returns a new PyObject representation of that number:
1497 PyInt, PyLong, or PyFloat.
1498 May return other types if parse_int or parse_float are set
1500 char *str = PyString_AS_STRING(pystr);
1501 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1502 Py_ssize_t idx = start;
1503 int is_float = 0;
1504 PyObject *rval;
1505 PyObject *numstr;
1507 /* read a sign if it's there, make sure it's not the end of the string */
1508 if (str[idx] == '-') {
1509 idx++;
1510 if (idx > end_idx) {
1511 PyErr_SetNone(PyExc_StopIteration);
1512 return NULL;
1516 /* read as many integer digits as we find as long as it doesn't start with 0 */
1517 if (str[idx] >= '1' && str[idx] <= '9') {
1518 idx++;
1519 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1521 /* if it starts with 0 we only expect one integer digit */
1522 else if (str[idx] == '0') {
1523 idx++;
1525 /* no integer digits, error */
1526 else {
1527 PyErr_SetNone(PyExc_StopIteration);
1528 return NULL;
1531 /* if the next char is '.' followed by a digit then read all float digits */
1532 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1533 is_float = 1;
1534 idx += 2;
1535 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1538 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1539 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1541 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1542 Py_ssize_t e_start = idx;
1543 idx++;
1545 /* read an exponent sign if present */
1546 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1548 /* read all digits */
1549 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1551 /* if we got a digit, then parse as float. if not, backtrack */
1552 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1553 is_float = 1;
1555 else {
1556 idx = e_start;
1560 /* copy the section we determined to be a number */
1561 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1562 if (numstr == NULL)
1563 return NULL;
1564 if (is_float) {
1565 /* parse as a float using a fast path if available, otherwise call user defined method */
1566 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1567 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1569 else {
1570 /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
1571 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1572 NULL, NULL);
1573 if (d == -1.0 && PyErr_Occurred())
1574 return NULL;
1575 rval = PyFloat_FromDouble(d);
1578 else {
1579 /* parse as an int using a fast path if available, otherwise call user defined method */
1580 if (s->parse_int != (PyObject *)&PyInt_Type) {
1581 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1583 else {
1584 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1587 Py_DECREF(numstr);
1588 *next_idx_ptr = idx;
1589 return rval;
1592 static PyObject *
1593 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1594 /* Read a JSON number from PyUnicode pystr.
1595 idx is the index of the first character of the number
1596 *next_idx_ptr is a return-by-reference index to the first character after
1597 the number.
1599 Returns a new PyObject representation of that number:
1600 PyInt, PyLong, or PyFloat.
1601 May return other types if parse_int or parse_float are set
1603 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1604 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1605 Py_ssize_t idx = start;
1606 int is_float = 0;
1607 PyObject *rval;
1608 PyObject *numstr;
1610 /* read a sign if it's there, make sure it's not the end of the string */
1611 if (str[idx] == '-') {
1612 idx++;
1613 if (idx > end_idx) {
1614 PyErr_SetNone(PyExc_StopIteration);
1615 return NULL;
1619 /* read as many integer digits as we find as long as it doesn't start with 0 */
1620 if (str[idx] >= '1' && str[idx] <= '9') {
1621 idx++;
1622 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1624 /* if it starts with 0 we only expect one integer digit */
1625 else if (str[idx] == '0') {
1626 idx++;
1628 /* no integer digits, error */
1629 else {
1630 PyErr_SetNone(PyExc_StopIteration);
1631 return NULL;
1634 /* if the next char is '.' followed by a digit then read all float digits */
1635 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1636 is_float = 1;
1637 idx += 2;
1638 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1641 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1642 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1643 Py_ssize_t e_start = idx;
1644 idx++;
1646 /* read an exponent sign if present */
1647 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1649 /* read all digits */
1650 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1652 /* if we got a digit, then parse as float. if not, backtrack */
1653 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1654 is_float = 1;
1656 else {
1657 idx = e_start;
1661 /* copy the section we determined to be a number */
1662 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1663 if (numstr == NULL)
1664 return NULL;
1665 if (is_float) {
1666 /* parse as a float using a fast path if available, otherwise call user defined method */
1667 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1668 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1670 else {
1671 rval = PyFloat_FromString(numstr, NULL);
1674 else {
1675 /* no fast path for unicode -> int, just call */
1676 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1678 Py_DECREF(numstr);
1679 *next_idx_ptr = idx;
1680 return rval;
1683 static PyObject *
1684 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1686 /* Read one JSON term (of any kind) from PyString pystr.
1687 idx is the index of the first character of the term
1688 *next_idx_ptr is a return-by-reference index to the first character after
1689 the number.
1691 Returns a new PyObject representation of the term.
1693 char *str = PyString_AS_STRING(pystr);
1694 Py_ssize_t length = PyString_GET_SIZE(pystr);
1695 PyObject *rval = NULL;
1696 int fallthrough = 0;
1697 if (idx >= length) {
1698 PyErr_SetNone(PyExc_StopIteration);
1699 return NULL;
1701 if (Py_EnterRecursiveCall(" while decoding a JSON document"))
1702 return NULL;
1703 switch (str[idx]) {
1704 case '"':
1705 /* string */
1706 rval = scanstring_str(pystr, idx + 1,
1707 PyString_AS_STRING(s->encoding),
1708 PyObject_IsTrue(s->strict),
1709 next_idx_ptr);
1710 break;
1711 case '{':
1712 /* object */
1713 rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1714 break;
1715 case '[':
1716 /* array */
1717 rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1718 break;
1719 case 'n':
1720 /* null */
1721 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1722 Py_INCREF(Py_None);
1723 *next_idx_ptr = idx + 4;
1724 rval = Py_None;
1726 else
1727 fallthrough = 1;
1728 break;
1729 case 't':
1730 /* true */
1731 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1732 Py_INCREF(Py_True);
1733 *next_idx_ptr = idx + 4;
1734 rval = Py_True;
1736 else
1737 fallthrough = 1;
1738 break;
1739 case 'f':
1740 /* false */
1741 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1742 Py_INCREF(Py_False);
1743 *next_idx_ptr = idx + 5;
1744 rval = Py_False;
1746 else
1747 fallthrough = 1;
1748 break;
1749 case 'N':
1750 /* NaN */
1751 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1752 rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
1754 else
1755 fallthrough = 1;
1756 break;
1757 case 'I':
1758 /* Infinity */
1759 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1760 rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
1762 else
1763 fallthrough = 1;
1764 break;
1765 case '-':
1766 /* -Infinity */
1767 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1768 rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1770 else
1771 fallthrough = 1;
1772 break;
1773 default:
1774 fallthrough = 1;
1776 /* Didn't find a string, object, array, or named constant. Look for a number. */
1777 if (fallthrough)
1778 rval = _match_number_str(s, pystr, idx, next_idx_ptr);
1779 Py_LeaveRecursiveCall();
1780 return rval;
1783 static PyObject *
1784 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1786 /* Read one JSON term (of any kind) from PyUnicode pystr.
1787 idx is the index of the first character of the term
1788 *next_idx_ptr is a return-by-reference index to the first character after
1789 the number.
1791 Returns a new PyObject representation of the term.
1793 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1794 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1795 PyObject *rval = NULL;
1796 int fallthrough = 0;
1797 if (idx >= length) {
1798 PyErr_SetNone(PyExc_StopIteration);
1799 return NULL;
1801 if (Py_EnterRecursiveCall(" while decoding a JSON document"))
1802 return NULL;
1803 switch (str[idx]) {
1804 case '"':
1805 /* string */
1806 rval = scanstring_unicode(pystr, idx + 1,
1807 PyObject_IsTrue(s->strict),
1808 next_idx_ptr);
1809 break;
1810 case '{':
1811 /* object */
1812 rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1813 break;
1814 case '[':
1815 /* array */
1816 rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1817 break;
1818 case 'n':
1819 /* null */
1820 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1821 Py_INCREF(Py_None);
1822 *next_idx_ptr = idx + 4;
1823 rval = Py_None;
1825 else
1826 fallthrough = 1;
1827 break;
1828 case 't':
1829 /* true */
1830 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1831 Py_INCREF(Py_True);
1832 *next_idx_ptr = idx + 4;
1833 rval = Py_True;
1835 else
1836 fallthrough = 1;
1837 break;
1838 case 'f':
1839 /* false */
1840 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1841 Py_INCREF(Py_False);
1842 *next_idx_ptr = idx + 5;
1843 rval = Py_False;
1845 else
1846 fallthrough = 1;
1847 break;
1848 case 'N':
1849 /* NaN */
1850 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1851 rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
1853 else
1854 fallthrough = 1;
1855 break;
1856 case 'I':
1857 /* Infinity */
1858 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1859 rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
1861 else
1862 fallthrough = 1;
1863 break;
1864 case '-':
1865 /* -Infinity */
1866 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1867 rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1869 else
1870 fallthrough = 1;
1871 break;
1872 default:
1873 fallthrough = 1;
1875 /* Didn't find a string, object, array, or named constant. Look for a number. */
1876 if (fallthrough)
1877 rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
1878 Py_LeaveRecursiveCall();
1879 return rval;
1882 static PyObject *
1883 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1885 /* Python callable interface to scan_once_{str,unicode} */
1886 PyObject *pystr;
1887 PyObject *rval;
1888 Py_ssize_t idx;
1889 Py_ssize_t next_idx = -1;
1890 static char *kwlist[] = {"string", "idx", NULL};
1891 PyScannerObject *s;
1892 assert(PyScanner_Check(self));
1893 s = (PyScannerObject *)self;
1894 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1895 return NULL;
1897 if (PyString_Check(pystr)) {
1898 rval = scan_once_str(s, pystr, idx, &next_idx);
1900 else if (PyUnicode_Check(pystr)) {
1901 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1903 else {
1904 PyErr_Format(PyExc_TypeError,
1905 "first argument must be a string, not %.80s",
1906 Py_TYPE(pystr)->tp_name);
1907 return NULL;
1909 PyDict_Clear(s->memo);
1910 return _build_rval_index_tuple(rval, next_idx);
1913 static PyObject *
1914 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1916 PyScannerObject *s;
1917 s = (PyScannerObject *)type->tp_alloc(type, 0);
1918 if (s != NULL) {
1919 s->encoding = NULL;
1920 s->strict = NULL;
1921 s->object_hook = NULL;
1922 s->pairs_hook = NULL;
1923 s->parse_float = NULL;
1924 s->parse_int = NULL;
1925 s->parse_constant = NULL;
1927 return (PyObject *)s;
1930 static int
1931 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1933 /* Initialize Scanner object */
1934 PyObject *ctx;
1935 static char *kwlist[] = {"context", NULL};
1936 PyScannerObject *s;
1938 assert(PyScanner_Check(self));
1939 s = (PyScannerObject *)self;
1941 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1942 return -1;
1944 if (s->memo == NULL) {
1945 s->memo = PyDict_New();
1946 if (s->memo == NULL)
1947 goto bail;
1950 /* PyString_AS_STRING is used on encoding */
1951 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1952 if (s->encoding == NULL)
1953 goto bail;
1954 if (s->encoding == Py_None) {
1955 Py_DECREF(Py_None);
1956 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1958 else if (PyUnicode_Check(s->encoding)) {
1959 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1960 Py_DECREF(s->encoding);
1961 s->encoding = tmp;
1963 if (s->encoding == NULL || !PyString_Check(s->encoding))
1964 goto bail;
1966 /* All of these will fail "gracefully" so we don't need to verify them */
1967 s->strict = PyObject_GetAttrString(ctx, "strict");
1968 if (s->strict == NULL)
1969 goto bail;
1970 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1971 if (s->object_hook == NULL)
1972 goto bail;
1973 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1974 if (s->pairs_hook == NULL)
1975 goto bail;
1976 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1977 if (s->parse_float == NULL)
1978 goto bail;
1979 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1980 if (s->parse_int == NULL)
1981 goto bail;
1982 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1983 if (s->parse_constant == NULL)
1984 goto bail;
1986 return 0;
1988 bail:
1989 Py_CLEAR(s->encoding);
1990 Py_CLEAR(s->strict);
1991 Py_CLEAR(s->object_hook);
1992 Py_CLEAR(s->pairs_hook);
1993 Py_CLEAR(s->parse_float);
1994 Py_CLEAR(s->parse_int);
1995 Py_CLEAR(s->parse_constant);
1996 return -1;
1999 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
2001 static
2002 PyTypeObject PyScannerType = {
2003 PyObject_HEAD_INIT(NULL)
2004 0, /* tp_internal */
2005 "simplejson._speedups.Scanner", /* tp_name */
2006 sizeof(PyScannerObject), /* tp_basicsize */
2007 0, /* tp_itemsize */
2008 scanner_dealloc, /* tp_dealloc */
2009 0, /* tp_print */
2010 0, /* tp_getattr */
2011 0, /* tp_setattr */
2012 0, /* tp_compare */
2013 0, /* tp_repr */
2014 0, /* tp_as_number */
2015 0, /* tp_as_sequence */
2016 0, /* tp_as_mapping */
2017 0, /* tp_hash */
2018 scanner_call, /* tp_call */
2019 0, /* tp_str */
2020 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
2021 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
2022 0, /* tp_as_buffer */
2023 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2024 scanner_doc, /* tp_doc */
2025 scanner_traverse, /* tp_traverse */
2026 scanner_clear, /* tp_clear */
2027 0, /* tp_richcompare */
2028 0, /* tp_weaklistoffset */
2029 0, /* tp_iter */
2030 0, /* tp_iternext */
2031 0, /* tp_methods */
2032 scanner_members, /* tp_members */
2033 0, /* tp_getset */
2034 0, /* tp_base */
2035 0, /* tp_dict */
2036 0, /* tp_descr_get */
2037 0, /* tp_descr_set */
2038 0, /* tp_dictoffset */
2039 scanner_init, /* tp_init */
2040 0,/* PyType_GenericAlloc, */ /* tp_alloc */
2041 scanner_new, /* tp_new */
2042 0,/* PyObject_GC_Del, */ /* tp_free */
2045 static PyObject *
2046 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2048 PyEncoderObject *s;
2049 s = (PyEncoderObject *)type->tp_alloc(type, 0);
2050 if (s != NULL) {
2051 s->markers = NULL;
2052 s->defaultfn = NULL;
2053 s->encoder = NULL;
2054 s->indent = NULL;
2055 s->key_separator = NULL;
2056 s->item_separator = NULL;
2057 s->sort_keys = NULL;
2058 s->skipkeys = NULL;
2059 s->key_memo = NULL;
2060 s->item_sort_key = NULL;
2061 s->Decimal = NULL;
2063 return (PyObject *)s;
2066 static int
2067 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
2069 /* initialize Encoder object */
2070 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "Decimal", NULL};
2072 PyEncoderObject *s;
2073 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
2074 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
2075 PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array;
2076 PyObject *bigint_as_string, *item_sort_key, *Decimal;
2078 assert(PyEncoder_Check(self));
2079 s = (PyEncoderObject *)self;
2081 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOO:make_encoder", kwlist,
2082 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
2083 &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
2084 &namedtuple_as_object, &tuple_as_array, &bigint_as_string,
2085 &item_sort_key, &Decimal))
2086 return -1;
2088 s->markers = markers;
2089 s->defaultfn = defaultfn;
2090 s->encoder = encoder;
2091 s->indent = indent;
2092 s->key_separator = key_separator;
2093 s->item_separator = item_separator;
2094 s->sort_keys = sort_keys;
2095 s->skipkeys = skipkeys;
2096 s->key_memo = key_memo;
2097 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
2098 s->allow_nan = PyObject_IsTrue(allow_nan);
2099 s->use_decimal = PyObject_IsTrue(use_decimal);
2100 s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
2101 s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
2102 s->bigint_as_string = PyObject_IsTrue(bigint_as_string);
2103 s->item_sort_key = item_sort_key;
2104 s->Decimal = Decimal;
2106 Py_INCREF(s->markers);
2107 Py_INCREF(s->defaultfn);
2108 Py_INCREF(s->encoder);
2109 Py_INCREF(s->indent);
2110 Py_INCREF(s->key_separator);
2111 Py_INCREF(s->item_separator);
2112 Py_INCREF(s->sort_keys);
2113 Py_INCREF(s->skipkeys);
2114 Py_INCREF(s->key_memo);
2115 Py_INCREF(s->item_sort_key);
2116 Py_INCREF(s->Decimal);
2117 return 0;
2120 static PyObject *
2121 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
2123 /* Python callable interface to encode_listencode_obj */
2124 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
2125 PyObject *obj;
2126 PyObject *rval;
2127 Py_ssize_t indent_level;
2128 PyEncoderObject *s;
2129 assert(PyEncoder_Check(self));
2130 s = (PyEncoderObject *)self;
2131 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
2132 &obj, _convertPyInt_AsSsize_t, &indent_level))
2133 return NULL;
2134 rval = PyList_New(0);
2135 if (rval == NULL)
2136 return NULL;
2137 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
2138 Py_DECREF(rval);
2139 return NULL;
2141 return rval;
2144 static PyObject *
2145 _encoded_const(PyObject *obj)
2147 /* Return the JSON string representation of None, True, False */
2148 if (obj == Py_None) {
2149 static PyObject *s_null = NULL;
2150 if (s_null == NULL) {
2151 s_null = PyString_InternFromString("null");
2153 Py_INCREF(s_null);
2154 return s_null;
2156 else if (obj == Py_True) {
2157 static PyObject *s_true = NULL;
2158 if (s_true == NULL) {
2159 s_true = PyString_InternFromString("true");
2161 Py_INCREF(s_true);
2162 return s_true;
2164 else if (obj == Py_False) {
2165 static PyObject *s_false = NULL;
2166 if (s_false == NULL) {
2167 s_false = PyString_InternFromString("false");
2169 Py_INCREF(s_false);
2170 return s_false;
2172 else {
2173 PyErr_SetString(PyExc_ValueError, "not a const");
2174 return NULL;
2178 static PyObject *
2179 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
2181 /* Return the JSON representation of a PyFloat */
2182 double i = PyFloat_AS_DOUBLE(obj);
2183 if (!Py_IS_FINITE(i)) {
2184 if (!s->allow_nan) {
2185 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
2186 return NULL;
2188 if (i > 0) {
2189 return PyString_FromString("Infinity");
2191 else if (i < 0) {
2192 return PyString_FromString("-Infinity");
2194 else {
2195 return PyString_FromString("NaN");
2198 /* Use a better float format here? */
2199 return PyObject_Repr(obj);
2202 static PyObject *
2203 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
2205 /* Return the JSON representation of a string */
2206 if (s->fast_encode)
2207 return py_encode_basestring_ascii(NULL, obj);
2208 else
2209 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
2212 static int
2213 _steal_list_append(PyObject *lst, PyObject *stolen)
2215 /* Append stolen and then decrement its reference count */
2216 int rval = PyList_Append(lst, stolen);
2217 Py_DECREF(stolen);
2218 return rval;
2221 static int
2222 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2224 /* Encode Python object obj to a JSON term, rval is a PyList */
2225 int rv = -1;
2226 if (Py_EnterRecursiveCall(" while encoding a JSON document"))
2227 return rv;
2228 do {
2229 if (obj == Py_None || obj == Py_True || obj == Py_False) {
2230 PyObject *cstr = _encoded_const(obj);
2231 if (cstr != NULL)
2232 rv = _steal_list_append(rval, cstr);
2234 else if (PyString_Check(obj) || PyUnicode_Check(obj))
2236 PyObject *encoded = encoder_encode_string(s, obj);
2237 if (encoded != NULL)
2238 rv = _steal_list_append(rval, encoded);
2240 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2241 PyObject *encoded = PyObject_Str(obj);
2242 if (encoded != NULL) {
2243 if (s->bigint_as_string) {
2244 encoded = maybe_quote_bigint(encoded, obj);
2245 if (encoded == NULL)
2246 break;
2248 rv = _steal_list_append(rval, encoded);
2251 else if (PyFloat_Check(obj)) {
2252 PyObject *encoded = encoder_encode_float(s, obj);
2253 if (encoded != NULL)
2254 rv = _steal_list_append(rval, encoded);
2256 else if (s->namedtuple_as_object && _is_namedtuple(obj)) {
2257 PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL);
2258 if (newobj != NULL) {
2259 rv = encoder_listencode_dict(s, rval, newobj, indent_level);
2260 Py_DECREF(newobj);
2263 else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) {
2264 rv = encoder_listencode_list(s, rval, obj, indent_level);
2266 else if (PyDict_Check(obj)) {
2267 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2269 else if (s->use_decimal && PyObject_TypeCheck(obj, s->Decimal)) {
2270 PyObject *encoded = PyObject_Str(obj);
2271 if (encoded != NULL)
2272 rv = _steal_list_append(rval, encoded);
2274 else {
2275 PyObject *ident = NULL;
2276 PyObject *newobj;
2277 if (s->markers != Py_None) {
2278 int has_key;
2279 ident = PyLong_FromVoidPtr(obj);
2280 if (ident == NULL)
2281 break;
2282 has_key = PyDict_Contains(s->markers, ident);
2283 if (has_key) {
2284 if (has_key != -1)
2285 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2286 Py_DECREF(ident);
2287 break;
2289 if (PyDict_SetItem(s->markers, ident, obj)) {
2290 Py_DECREF(ident);
2291 break;
2294 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2295 if (newobj == NULL) {
2296 Py_XDECREF(ident);
2297 break;
2299 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2300 Py_DECREF(newobj);
2301 if (rv) {
2302 Py_XDECREF(ident);
2303 rv = -1;
2305 else if (ident != NULL) {
2306 if (PyDict_DelItem(s->markers, ident)) {
2307 Py_XDECREF(ident);
2308 rv = -1;
2310 Py_XDECREF(ident);
2313 } while (0);
2314 Py_LeaveRecursiveCall();
2315 return rv;
2318 static int
2319 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2321 /* Encode Python dict dct a JSON term, rval is a PyList */
2322 static PyObject *open_dict = NULL;
2323 static PyObject *close_dict = NULL;
2324 static PyObject *empty_dict = NULL;
2325 static PyObject *iteritems = NULL;
2326 PyObject *kstr = NULL;
2327 PyObject *ident = NULL;
2328 PyObject *iter = NULL;
2329 PyObject *item = NULL;
2330 PyObject *items = NULL;
2331 PyObject *encoded = NULL;
2332 int skipkeys;
2333 Py_ssize_t idx;
2335 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) {
2336 open_dict = PyString_InternFromString("{");
2337 close_dict = PyString_InternFromString("}");
2338 empty_dict = PyString_InternFromString("{}");
2339 iteritems = PyString_InternFromString("iteritems");
2340 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL)
2341 return -1;
2343 if (PyDict_Size(dct) == 0)
2344 return PyList_Append(rval, empty_dict);
2346 if (s->markers != Py_None) {
2347 int has_key;
2348 ident = PyLong_FromVoidPtr(dct);
2349 if (ident == NULL)
2350 goto bail;
2351 has_key = PyDict_Contains(s->markers, ident);
2352 if (has_key) {
2353 if (has_key != -1)
2354 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2355 goto bail;
2357 if (PyDict_SetItem(s->markers, ident, dct)) {
2358 goto bail;
2362 if (PyList_Append(rval, open_dict))
2363 goto bail;
2365 if (s->indent != Py_None) {
2366 /* TODO: DOES NOT RUN */
2367 indent_level += 1;
2369 newline_indent = '\n' + (_indent * _current_indent_level)
2370 separator = _item_separator + newline_indent
2371 buf += newline_indent
2375 if (PyCallable_Check(s->item_sort_key)) {
2376 if (PyDict_CheckExact(dct))
2377 items = PyDict_Items(dct);
2378 else
2379 items = PyMapping_Items(dct);
2380 PyObject_CallMethod(items, "sort", "OO", Py_None, s->item_sort_key);
2382 else if (PyObject_IsTrue(s->sort_keys)) {
2383 /* First sort the keys then replace them with (key, value) tuples. */
2384 Py_ssize_t i, nitems;
2385 if (PyDict_CheckExact(dct))
2386 items = PyDict_Keys(dct);
2387 else
2388 items = PyMapping_Keys(dct);
2389 if (items == NULL)
2390 goto bail;
2391 if (!PyList_Check(items)) {
2392 PyErr_SetString(PyExc_ValueError, "keys must return list");
2393 goto bail;
2395 if (PyList_Sort(items) < 0)
2396 goto bail;
2397 nitems = PyList_GET_SIZE(items);
2398 for (i = 0; i < nitems; i++) {
2399 PyObject *key, *value;
2400 key = PyList_GET_ITEM(items, i);
2401 value = PyDict_GetItem(dct, key);
2402 item = PyTuple_Pack(2, key, value);
2403 if (item == NULL)
2404 goto bail;
2405 PyList_SET_ITEM(items, i, item);
2406 Py_DECREF(key);
2409 else {
2410 if (PyDict_CheckExact(dct))
2411 items = PyDict_Items(dct);
2412 else
2413 items = PyMapping_Items(dct);
2415 if (items == NULL)
2416 goto bail;
2417 iter = PyObject_GetIter(items);
2418 Py_DECREF(items);
2419 if (iter == NULL)
2420 goto bail;
2422 skipkeys = PyObject_IsTrue(s->skipkeys);
2423 idx = 0;
2424 while ((item = PyIter_Next(iter))) {
2425 PyObject *encoded, *key, *value;
2426 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
2427 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
2428 goto bail;
2430 key = PyTuple_GET_ITEM(item, 0);
2431 if (key == NULL)
2432 goto bail;
2433 value = PyTuple_GET_ITEM(item, 1);
2434 if (value == NULL)
2435 goto bail;
2437 encoded = PyDict_GetItem(s->key_memo, key);
2438 if (encoded != NULL) {
2439 Py_INCREF(encoded);
2441 else if (PyString_Check(key) || PyUnicode_Check(key)) {
2442 Py_INCREF(key);
2443 kstr = key;
2445 else if (PyFloat_Check(key)) {
2446 kstr = encoder_encode_float(s, key);
2447 if (kstr == NULL)
2448 goto bail;
2450 else if (key == Py_True || key == Py_False || key == Py_None) {
2451 /* This must come before the PyInt_Check because
2452 True and False are also 1 and 0.*/
2453 kstr = _encoded_const(key);
2454 if (kstr == NULL)
2455 goto bail;
2457 else if (PyInt_Check(key) || PyLong_Check(key)) {
2458 kstr = PyObject_Str(key);
2459 if (kstr == NULL)
2460 goto bail;
2462 else if (skipkeys) {
2463 Py_DECREF(item);
2464 continue;
2466 else {
2467 /* TODO: include repr of key */
2468 PyErr_SetString(PyExc_TypeError, "keys must be a string");
2469 goto bail;
2472 if (idx) {
2473 if (PyList_Append(rval, s->item_separator))
2474 goto bail;
2477 if (encoded == NULL) {
2478 encoded = encoder_encode_string(s, kstr);
2479 Py_CLEAR(kstr);
2480 if (encoded == NULL)
2481 goto bail;
2482 if (PyDict_SetItem(s->key_memo, key, encoded))
2483 goto bail;
2485 if (PyList_Append(rval, encoded)) {
2486 goto bail;
2488 Py_CLEAR(encoded);
2489 if (PyList_Append(rval, s->key_separator))
2490 goto bail;
2491 if (encoder_listencode_obj(s, rval, value, indent_level))
2492 goto bail;
2493 Py_CLEAR(item);
2494 idx += 1;
2496 Py_CLEAR(iter);
2497 if (PyErr_Occurred())
2498 goto bail;
2499 if (ident != NULL) {
2500 if (PyDict_DelItem(s->markers, ident))
2501 goto bail;
2502 Py_CLEAR(ident);
2504 if (s->indent != Py_None) {
2505 /* TODO: DOES NOT RUN */
2506 indent_level -= 1;
2508 yield '\n' + (_indent * _current_indent_level)
2511 if (PyList_Append(rval, close_dict))
2512 goto bail;
2513 return 0;
2515 bail:
2516 Py_XDECREF(encoded);
2517 Py_XDECREF(items);
2518 Py_XDECREF(iter);
2519 Py_XDECREF(kstr);
2520 Py_XDECREF(ident);
2521 return -1;
2525 static int
2526 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2528 /* Encode Python list seq to a JSON term, rval is a PyList */
2529 static PyObject *open_array = NULL;
2530 static PyObject *close_array = NULL;
2531 static PyObject *empty_array = NULL;
2532 PyObject *ident = NULL;
2533 PyObject *iter = NULL;
2534 PyObject *obj = NULL;
2535 int is_true;
2536 int i = 0;
2538 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2539 open_array = PyString_InternFromString("[");
2540 close_array = PyString_InternFromString("]");
2541 empty_array = PyString_InternFromString("[]");
2542 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2543 return -1;
2545 ident = NULL;
2546 is_true = PyObject_IsTrue(seq);
2547 if (is_true == -1)
2548 return -1;
2549 else if (is_true == 0)
2550 return PyList_Append(rval, empty_array);
2552 if (s->markers != Py_None) {
2553 int has_key;
2554 ident = PyLong_FromVoidPtr(seq);
2555 if (ident == NULL)
2556 goto bail;
2557 has_key = PyDict_Contains(s->markers, ident);
2558 if (has_key) {
2559 if (has_key != -1)
2560 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2561 goto bail;
2563 if (PyDict_SetItem(s->markers, ident, seq)) {
2564 goto bail;
2568 iter = PyObject_GetIter(seq);
2569 if (iter == NULL)
2570 goto bail;
2572 if (PyList_Append(rval, open_array))
2573 goto bail;
2574 if (s->indent != Py_None) {
2575 /* TODO: DOES NOT RUN */
2576 indent_level += 1;
2578 newline_indent = '\n' + (_indent * _current_indent_level)
2579 separator = _item_separator + newline_indent
2580 buf += newline_indent
2583 while ((obj = PyIter_Next(iter))) {
2584 if (i) {
2585 if (PyList_Append(rval, s->item_separator))
2586 goto bail;
2588 if (encoder_listencode_obj(s, rval, obj, indent_level))
2589 goto bail;
2590 i++;
2591 Py_CLEAR(obj);
2593 Py_CLEAR(iter);
2594 if (PyErr_Occurred())
2595 goto bail;
2596 if (ident != NULL) {
2597 if (PyDict_DelItem(s->markers, ident))
2598 goto bail;
2599 Py_CLEAR(ident);
2601 if (s->indent != Py_None) {
2602 /* TODO: DOES NOT RUN */
2603 indent_level -= 1;
2605 yield '\n' + (_indent * _current_indent_level)
2608 if (PyList_Append(rval, close_array))
2609 goto bail;
2610 return 0;
2612 bail:
2613 Py_XDECREF(obj);
2614 Py_XDECREF(iter);
2615 Py_XDECREF(ident);
2616 return -1;
2619 static void
2620 encoder_dealloc(PyObject *self)
2622 /* Deallocate Encoder */
2623 encoder_clear(self);
2624 Py_TYPE(self)->tp_free(self);
2627 static int
2628 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2630 PyEncoderObject *s;
2631 assert(PyEncoder_Check(self));
2632 s = (PyEncoderObject *)self;
2633 Py_VISIT(s->markers);
2634 Py_VISIT(s->defaultfn);
2635 Py_VISIT(s->encoder);
2636 Py_VISIT(s->indent);
2637 Py_VISIT(s->key_separator);
2638 Py_VISIT(s->item_separator);
2639 Py_VISIT(s->sort_keys);
2640 Py_VISIT(s->skipkeys);
2641 Py_VISIT(s->key_memo);
2642 Py_VISIT(s->item_sort_key);
2643 return 0;
2646 static int
2647 encoder_clear(PyObject *self)
2649 /* Deallocate Encoder */
2650 PyEncoderObject *s;
2651 assert(PyEncoder_Check(self));
2652 s = (PyEncoderObject *)self;
2653 Py_CLEAR(s->markers);
2654 Py_CLEAR(s->defaultfn);
2655 Py_CLEAR(s->encoder);
2656 Py_CLEAR(s->indent);
2657 Py_CLEAR(s->key_separator);
2658 Py_CLEAR(s->item_separator);
2659 Py_CLEAR(s->sort_keys);
2660 Py_CLEAR(s->skipkeys);
2661 Py_CLEAR(s->key_memo);
2662 Py_CLEAR(s->item_sort_key);
2663 Py_CLEAR(s->Decimal);
2664 return 0;
2667 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2669 static
2670 PyTypeObject PyEncoderType = {
2671 PyObject_HEAD_INIT(NULL)
2672 0, /* tp_internal */
2673 "simplejson._speedups.Encoder", /* tp_name */
2674 sizeof(PyEncoderObject), /* tp_basicsize */
2675 0, /* tp_itemsize */
2676 encoder_dealloc, /* tp_dealloc */
2677 0, /* tp_print */
2678 0, /* tp_getattr */
2679 0, /* tp_setattr */
2680 0, /* tp_compare */
2681 0, /* tp_repr */
2682 0, /* tp_as_number */
2683 0, /* tp_as_sequence */
2684 0, /* tp_as_mapping */
2685 0, /* tp_hash */
2686 encoder_call, /* tp_call */
2687 0, /* tp_str */
2688 0, /* tp_getattro */
2689 0, /* tp_setattro */
2690 0, /* tp_as_buffer */
2691 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2692 encoder_doc, /* tp_doc */
2693 encoder_traverse, /* tp_traverse */
2694 encoder_clear, /* tp_clear */
2695 0, /* tp_richcompare */
2696 0, /* tp_weaklistoffset */
2697 0, /* tp_iter */
2698 0, /* tp_iternext */
2699 0, /* tp_methods */
2700 encoder_members, /* tp_members */
2701 0, /* tp_getset */
2702 0, /* tp_base */
2703 0, /* tp_dict */
2704 0, /* tp_descr_get */
2705 0, /* tp_descr_set */
2706 0, /* tp_dictoffset */
2707 encoder_init, /* tp_init */
2708 0, /* tp_alloc */
2709 encoder_new, /* tp_new */
2710 0, /* tp_free */
2713 static PyMethodDef speedups_methods[] = {
2714 {"encode_basestring_ascii",
2715 (PyCFunction)py_encode_basestring_ascii,
2716 METH_O,
2717 pydoc_encode_basestring_ascii},
2718 {"scanstring",
2719 (PyCFunction)py_scanstring,
2720 METH_VARARGS,
2721 pydoc_scanstring},
2722 {NULL, NULL, 0, NULL}
2725 PyDoc_STRVAR(module_doc,
2726 "simplejson speedups\n");
2728 void
2729 init_speedups(void)
2731 PyObject *m;
2732 PyScannerType.tp_new = PyType_GenericNew;
2733 if (PyType_Ready(&PyScannerType) < 0)
2734 return;
2735 PyEncoderType.tp_new = PyType_GenericNew;
2736 if (PyType_Ready(&PyEncoderType) < 0)
2737 return;
2740 m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2741 Py_INCREF((PyObject*)&PyScannerType);
2742 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2743 Py_INCREF((PyObject*)&PyEncoderType);
2744 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);