2 * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
4 * Jansson is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
12 #include "jansson_private.h"
25 #include "strbuffer.h"
28 #define STREAM_STATE_OK 0
29 #define STREAM_STATE_EOF -1
30 #define STREAM_STATE_ERROR -2
32 #define TOKEN_INVALID -1
34 #define TOKEN_STRING 256
35 #define TOKEN_INTEGER 257
36 #define TOKEN_REAL 258
37 #define TOKEN_TRUE 259
38 #define TOKEN_FALSE 260
39 #define TOKEN_NULL 261
41 /* Locale independent versions of isxxx() functions */
42 #define l_isupper(c) ('A' <= (c) && (c) <= 'Z')
43 #define l_islower(c) ('a' <= (c) && (c) <= 'z')
44 #define l_isalpha(c) (l_isupper(c) || l_islower(c))
45 #define l_isdigit(c) ('0' <= (c) && (c) <= '9')
46 #define l_isxdigit(c) \
47 (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))
49 /* Read one byte from stream, convert to unsigned char, then int, and
50 return. return EOF on end of file. This corresponds to the
51 behaviour of fgetc(). */
52 typedef int (*get_func
)(void *data
);
61 int column
, last_column
;
67 strbuffer_t saved_text
;
81 #define stream_to_lex(stream) container_of(stream, lex_t, stream)
84 /*** error reporting ***/
86 static void error_set(json_error_t
*error
, const lex_t
*lex
,
87 enum json_error_code code
,
88 const char *msg
, ...) {
90 char msg_text
[JSON_ERROR_TEXT_LENGTH
];
91 char msg_with_context
[JSON_ERROR_TEXT_LENGTH
];
93 int line
= -1, col
= -1;
95 const char *result
= msg_text
;
101 vsnprintf(msg_text
, JSON_ERROR_TEXT_LENGTH
, msg
, ap
);
102 msg_text
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
106 const char *saved_text
= strbuffer_value(&lex
->saved_text
);
108 line
= lex
->stream
.line
;
109 col
= lex
->stream
.column
;
110 pos
= lex
->stream
.position
;
112 if (saved_text
&& saved_text
[0]) {
113 if (lex
->saved_text
.length
<= 20) {
114 int ret
= snprintf(msg_with_context
, JSON_ERROR_TEXT_LENGTH
, "%s near '%s'", msg_text
, saved_text
);
116 jsonp_error_set(error
, line
, col
, pos
, code
, "%s", "internal snprint error");
119 msg_with_context
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
120 result
= msg_with_context
;
123 if (code
== json_error_invalid_syntax
) {
124 /* More specific error code for premature end of file. */
125 code
= json_error_premature_end_of_input
;
127 if (lex
->stream
.state
== STREAM_STATE_ERROR
) {
128 /* No context for UTF-8 decoding errors */
131 int ret
= snprintf(msg_with_context
, JSON_ERROR_TEXT_LENGTH
, "%s near end of file", msg_text
);
133 jsonp_error_set(error
, line
, col
, pos
, code
, "%s", "internal snprint error");
136 msg_with_context
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
137 result
= msg_with_context
;
142 jsonp_error_set(error
, line
, col
, pos
, code
, "%s", result
);
146 /*** lexical analyzer ***/
149 stream_init(stream_t
*stream
, get_func get
, void *data
) {
152 stream
->buffer
[0] = '\0';
153 stream
->buffer_pos
= 0;
155 stream
->state
= STREAM_STATE_OK
;
158 stream
->position
= 0;
161 static int stream_get(stream_t
*stream
, json_error_t
*error
) {
164 if (stream
->state
!= STREAM_STATE_OK
)
165 return stream
->state
;
167 if (!stream
->buffer
[stream
->buffer_pos
]) {
168 c
= stream
->get(stream
->data
);
170 stream
->state
= STREAM_STATE_EOF
;
171 return STREAM_STATE_EOF
;
174 stream
->buffer
[0] = c
;
175 stream
->buffer_pos
= 0;
177 if (0x80 <= c
&& c
<= 0xFF) {
178 /* multi-byte UTF-8 sequence */
181 count
= utf8_check_first(c
);
186 // whatif count == 1 ?!?
191 // if count == 4 , i will become 5 and overflow.
192 for (i
= 1; i
< count
; i
++)
193 stream
->buffer
[i
] = stream
->get(stream
->data
);
195 if (!utf8_check_full(stream
->buffer
, count
, NULL
))
198 stream
->buffer
[count
] = '\0';
200 stream
->buffer
[1] = '\0';
203 c
= stream
->buffer
[stream
->buffer_pos
++];
208 stream
->last_column
= stream
->column
;
210 } else if (utf8_check_first(c
)) {
211 /* track the Unicode character column, so increment only if
212 this is the first character of a UTF-8 sequence */
219 stream
->state
= STREAM_STATE_ERROR
;
220 error_set(error
, stream_to_lex(stream
), json_error_invalid_utf8
, "unable to decode byte 0x%x", c
);
221 return STREAM_STATE_ERROR
;
224 static void stream_unget(stream_t
*stream
, int c
) {
225 if (c
== STREAM_STATE_EOF
|| c
== STREAM_STATE_ERROR
)
231 stream
->column
= stream
->last_column
;
232 } else if (utf8_check_first(c
))
235 assert(stream
->buffer_pos
> 0);
236 stream
->buffer_pos
--;
237 assert(stream
->buffer
[stream
->buffer_pos
] == c
);
241 static int lex_get(lex_t
*lex
, json_error_t
*error
) {
242 return stream_get(&lex
->stream
, error
);
245 static void lex_save(lex_t
*lex
, int c
) {
246 strbuffer_append_byte(&lex
->saved_text
, c
);
249 static int lex_get_save(lex_t
*lex
, json_error_t
*error
) {
250 int c
= stream_get(&lex
->stream
, error
);
251 if (c
!= STREAM_STATE_EOF
&& c
!= STREAM_STATE_ERROR
)
256 static void lex_unget(lex_t
*lex
, int c
) {
257 stream_unget(&lex
->stream
, c
);
260 static void lex_unget_unsave(lex_t
*lex
, int c
) {
261 if (c
!= STREAM_STATE_EOF
&& c
!= STREAM_STATE_ERROR
) {
262 /* Since we treat warnings as errors, when assertions are turned
263 * off the "d" variable would be set but never used. Which is
264 * treated as an error by GCC.
269 stream_unget(&lex
->stream
, c
);
273 strbuffer_pop(&lex
->saved_text
);
278 static void lex_save_cached(lex_t
*lex
) {
279 while (lex
->stream
.buffer
[lex
->stream
.buffer_pos
] != '\0') {
280 lex_save(lex
, lex
->stream
.buffer
[lex
->stream
.buffer_pos
]);
281 lex
->stream
.buffer_pos
++;
282 lex
->stream
.position
++;
286 static void lex_free_string(lex_t
*lex
) {
287 jsonp_free(lex
->value
.string
.val
);
288 lex
->value
.string
.val
= NULL
;
289 lex
->value
.string
.len
= 0;
292 /* assumes that str points to 'u' plus at least 4 valid hex digits */
293 static int32_t decode_unicode_escape(const char *str
) {
297 assert(str
[0] == 'u');
299 for (i
= 1; i
<= 4; i
++) {
304 else if (l_islower(c
))
305 value
+= c
- 'a' + 10;
306 else if (l_isupper(c
))
307 value
+= c
- 'A' + 10;
315 static void lex_scan_string(lex_t
*lex
, json_error_t
*error
) {
321 lex
->value
.string
.val
= NULL
;
322 lex
->token
= TOKEN_INVALID
;
324 c
= lex_get_save(lex
, error
);
327 if (c
== STREAM_STATE_ERROR
)
330 else if (c
== STREAM_STATE_EOF
) {
331 error_set(error
, lex
, json_error_premature_end_of_input
, "premature end of input");
335 else if (0 <= c
&& c
<= 0x1F) {
336 /* control character */
337 lex_unget_unsave(lex
, c
);
339 error_set(error
, lex
, json_error_invalid_syntax
, "unexpected newline");
341 error_set(error
, lex
, json_error_invalid_syntax
, "control character 0x%x", c
);
345 else if (c
== '\\') {
346 c
= lex_get_save(lex
, error
);
348 c
= lex_get_save(lex
, error
);
349 for (i
= 0; i
< 4; i
++) {
350 if (!l_isxdigit(c
)) {
351 error_set(error
, lex
, json_error_invalid_syntax
, "invalid escape");
354 c
= lex_get_save(lex
, error
);
356 } else if (c
== '"' || c
== '\\' || c
== '/' || c
== 'b' ||
357 c
== 'f' || c
== 'n' || c
== 'r' || c
== 't')
358 c
= lex_get_save(lex
, error
);
360 error_set(error
, lex
, json_error_invalid_syntax
, "invalid escape");
364 c
= lex_get_save(lex
, error
);
367 /* the actual value is at most of the same length as the source
369 - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
370 - a single \uXXXX escape (length 6) is converted to at most 3 bytes
371 - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
372 are converted to 4 bytes
374 t
= jsonp_malloc(lex
->saved_text
.length
+ 1);
376 /* this is not very nice, since TOKEN_INVALID is returned */
379 lex
->value
.string
.val
= t
;
381 /* + 1 to skip the " */
382 p
= strbuffer_value(&lex
->saved_text
) + 1;
391 value
= decode_unicode_escape(p
);
393 error_set(error
, lex
, json_error_invalid_syntax
, "invalid Unicode escape '%.6s'", p
- 1);
398 if (0xD800 <= value
&& value
<= 0xDBFF) {
400 if (*p
== '\\' && *(p
+ 1) == 'u') {
401 int32_t value2
= decode_unicode_escape(++p
);
403 error_set(error
, lex
, json_error_invalid_syntax
, "invalid Unicode escape '%.6s'", p
- 1);
408 if (0xDC00 <= value2
&& value2
<= 0xDFFF) {
409 /* valid second surrogate */
411 ((value
- 0xD800) << 10) +
415 /* invalid second surrogate */
416 error_set(error
, lex
,
417 json_error_invalid_syntax
,
418 "invalid Unicode '\\u%04X\\u%04X'",
423 /* no second surrogate */
424 error_set(error
, lex
, json_error_invalid_syntax
, "invalid Unicode '\\u%04X'",
428 } else if (0xDC00 <= value
&& value
<= 0xDFFF) {
429 error_set(error
, lex
, json_error_invalid_syntax
, "invalid Unicode '\\u%04X'", value
);
433 if (utf8_encode(value
, t
, &length
))
468 lex
->value
.string
.len
= t
- lex
->value
.string
.val
;
469 lex
->token
= TOKEN_STRING
;
473 lex_free_string(lex
);
476 #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
477 #if JSON_INTEGER_IS_LONG_LONG
478 #ifdef _MSC_VER /* Microsoft Visual Studio */
479 #define json_strtoint _strtoi64
481 #define json_strtoint strtoll
484 #define json_strtoint strtol
488 static int lex_scan_number(lex_t
*lex
, int c
, json_error_t
*error
) {
489 const char *saved_text
;
493 lex
->token
= TOKEN_INVALID
;
496 c
= lex_get_save(lex
, error
);
499 c
= lex_get_save(lex
, error
);
501 lex_unget_unsave(lex
, c
);
504 } else if (l_isdigit(c
)) {
506 c
= lex_get_save(lex
, error
);
507 while (l_isdigit(c
));
509 lex_unget_unsave(lex
, c
);
513 if (!(lex
->flags
& JSON_DECODE_INT_AS_REAL
) &&
514 c
!= '.' && c
!= 'E' && c
!= 'e') {
517 lex_unget_unsave(lex
, c
);
519 saved_text
= strbuffer_value(&lex
->saved_text
);
522 intval
= json_strtoint(saved_text
, &end
, 10);
523 if (errno
== ERANGE
) {
525 error_set(error
, lex
, json_error_numeric_overflow
, "too big negative integer");
527 error_set(error
, lex
, json_error_numeric_overflow
, "too big integer");
531 assert(end
== saved_text
+ lex
->saved_text
.length
);
533 lex
->token
= TOKEN_INTEGER
;
534 lex
->value
.integer
= intval
;
539 c
= lex_get(lex
, error
);
547 c
= lex_get_save(lex
, error
);
548 while (l_isdigit(c
));
551 if (c
== 'E' || c
== 'e') {
552 c
= lex_get_save(lex
, error
);
553 if (c
== '+' || c
== '-')
554 c
= lex_get_save(lex
, error
);
557 lex_unget_unsave(lex
, c
);
562 c
= lex_get_save(lex
, error
);
563 while (l_isdigit(c
));
566 lex_unget_unsave(lex
, c
);
568 if (jsonp_strtod(&lex
->saved_text
, &doubleval
)) {
569 error_set(error
, lex
, json_error_numeric_overflow
, "real number overflow");
573 lex
->token
= TOKEN_REAL
;
574 lex
->value
.real
= doubleval
;
581 static int lex_scan(lex_t
*lex
, json_error_t
*error
) {
584 strbuffer_clear(&lex
->saved_text
);
586 if (lex
->token
== TOKEN_STRING
)
587 lex_free_string(lex
);
590 c
= lex_get(lex
, error
);
591 while (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r');
593 if (c
== STREAM_STATE_EOF
) {
594 lex
->token
= TOKEN_EOF
;
598 if (c
== STREAM_STATE_ERROR
) {
599 lex
->token
= TOKEN_INVALID
;
605 if (c
== '{' || c
== '}' || c
== '[' || c
== ']' || c
== ':' || c
== ',')
609 lex_scan_string(lex
, error
);
611 else if (l_isdigit(c
) || c
== '-') {
612 if (lex_scan_number(lex
, c
, error
))
616 else if (l_isalpha(c
)) {
617 /* eat up the whole identifier for clearer error messages */
618 const char *saved_text
;
621 c
= lex_get_save(lex
, error
);
622 while (l_isalpha(c
));
623 lex_unget_unsave(lex
, c
);
625 saved_text
= strbuffer_value(&lex
->saved_text
);
627 if (strcmp(saved_text
, "true") == 0)
628 lex
->token
= TOKEN_TRUE
;
629 else if (strcmp(saved_text
, "false") == 0)
630 lex
->token
= TOKEN_FALSE
;
631 else if (strcmp(saved_text
, "null") == 0)
632 lex
->token
= TOKEN_NULL
;
634 lex
->token
= TOKEN_INVALID
;
638 /* save the rest of the input UTF-8 sequence to get an error
639 message of valid UTF-8 */
640 lex_save_cached(lex
);
641 lex
->token
= TOKEN_INVALID
;
648 static char *lex_steal_string(lex_t
*lex
, size_t *out_len
) {
650 if (lex
->token
== TOKEN_STRING
) {
651 result
= lex
->value
.string
.val
;
652 *out_len
= lex
->value
.string
.len
;
653 lex
->value
.string
.val
= NULL
;
654 lex
->value
.string
.len
= 0;
659 static int lex_init(lex_t
*lex
, get_func get
, size_t flags
, void *data
) {
660 stream_init(&lex
->stream
, get
, data
);
661 if (strbuffer_init(&lex
->saved_text
))
665 lex
->token
= TOKEN_INVALID
;
669 static void lex_close(lex_t
*lex
) {
670 if (lex
->token
== TOKEN_STRING
)
671 lex_free_string(lex
);
672 strbuffer_close(&lex
->saved_text
);
678 static json_t
*parse_value(lex_t
*lex
, size_t flags
, json_error_t
*error
);
680 static json_t
*parse_object(lex_t
*lex
, size_t flags
, json_error_t
*error
) {
681 json_t
*object
= json_object();
685 lex_scan(lex
, error
);
686 if (lex
->token
== '}')
694 if (lex
->token
!= TOKEN_STRING
) {
695 error_set(error
, lex
, json_error_invalid_syntax
, "string or '}' expected");
699 key
= lex_steal_string(lex
, &len
);
702 if (memchr(key
, '\0', len
)) {
704 error_set(error
, lex
, json_error_null_byte_in_key
, "NUL byte in object key not supported");
708 if (flags
& JSON_REJECT_DUPLICATES
) {
709 if (json_object_get(object
, key
)) {
711 error_set(error
, lex
, json_error_duplicate_key
, "duplicate object key");
716 lex_scan(lex
, error
);
717 if (lex
->token
!= ':') {
719 error_set(error
, lex
, json_error_invalid_syntax
, "':' expected");
723 lex_scan(lex
, error
);
724 value
= parse_value(lex
, flags
, error
);
730 if (json_object_set_new_nocheck(object
, key
, value
)) {
737 lex_scan(lex
, error
);
738 if (lex
->token
!= ',')
741 lex_scan(lex
, error
);
744 if (lex
->token
!= '}') {
745 error_set(error
, lex
, json_error_invalid_syntax
, "'}' expected");
756 static json_t
*parse_array(lex_t
*lex
, size_t flags
, json_error_t
*error
) {
757 json_t
*array
= json_array();
761 lex_scan(lex
, error
);
762 if (lex
->token
== ']')
766 json_t
*elem
= parse_value(lex
, flags
, error
);
770 if (json_array_append_new(array
, elem
)) {
774 lex_scan(lex
, error
);
775 if (lex
->token
!= ',')
778 lex_scan(lex
, error
);
781 if (lex
->token
!= ']') {
782 error_set(error
, lex
, json_error_invalid_syntax
, "']' expected");
793 static json_t
*parse_value(lex_t
*lex
, size_t flags
, json_error_t
*error
) {
797 if (lex
->depth
> JSON_PARSER_MAX_DEPTH
) {
798 error_set(error
, lex
, json_error_stack_overflow
, "maximum parsing depth reached");
802 switch (lex
->token
) {
804 const char *value
= lex
->value
.string
.val
;
805 size_t len
= lex
->value
.string
.len
;
807 if (!(flags
& JSON_ALLOW_NUL
)) {
808 if (memchr(value
, '\0', len
)) {
809 error_set(error
, lex
, json_error_null_character
, "\\u0000 is not allowed without JSON_ALLOW_NUL");
814 json
= jsonp_stringn_nocheck_own(value
, len
);
815 lex
->value
.string
.val
= NULL
;
816 lex
->value
.string
.len
= 0;
820 case TOKEN_INTEGER
: {
821 json
= json_integer(lex
->value
.integer
);
826 json
= json_real(lex
->value
.real
);
843 json
= parse_object(lex
, flags
, error
);
847 json
= parse_array(lex
, flags
, error
);
851 error_set(error
, lex
, json_error_invalid_syntax
, "invalid token");
855 error_set(error
, lex
, json_error_invalid_syntax
, "unexpected token");
866 static json_t
*parse_json(lex_t
*lex
, size_t flags
, json_error_t
*error
) {
871 lex_scan(lex
, error
);
872 if (!(flags
& JSON_DECODE_ANY
)) {
873 if (lex
->token
!= '[' && lex
->token
!= '{') {
874 error_set(error
, lex
, json_error_invalid_syntax
, "'[' or '{' expected");
879 result
= parse_value(lex
, flags
, error
);
883 if (!(flags
& JSON_DISABLE_EOF_CHECK
)) {
884 lex_scan(lex
, error
);
885 if (lex
->token
!= TOKEN_EOF
) {
886 error_set(error
, lex
, json_error_end_of_input_expected
, "end of file expected");
893 /* Save the position even though there was no error */
894 error
->position
= (int)lex
->stream
.position
;
905 static int string_get(void *data
) {
907 string_data_t
*stream
= (string_data_t
*)data
;
908 c
= stream
->data
[stream
->pos
];
913 return (unsigned char)c
;
917 json_t
*json_loads(const char *string
, size_t flags
, json_error_t
*error
) {
920 string_data_t stream_data
;
922 jsonp_error_init(error
, "<string>");
924 if (string
== NULL
) {
925 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
929 stream_data
.data
= string
;
932 if (lex_init(&lex
, string_get
, flags
, (void *)&stream_data
))
935 result
= parse_json(&lex
, flags
, error
);
947 static int buffer_get(void *data
) {
949 buffer_data_t
*stream
= data
;
950 if (stream
->pos
>= stream
->len
)
953 c
= stream
->data
[stream
->pos
];
955 return (unsigned char)c
;
958 json_t
*json_loadb(const char *buffer
, size_t buflen
, size_t flags
, json_error_t
*error
) {
961 buffer_data_t stream_data
;
963 jsonp_error_init(error
, "<buffer>");
965 if (buffer
== NULL
) {
966 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
970 stream_data
.data
= buffer
;
972 stream_data
.len
= buflen
;
974 if (lex_init(&lex
, buffer_get
, flags
, (void *)&stream_data
))
977 result
= parse_json(&lex
, flags
, error
);
983 json_t
*json_loadf(FILE *input
, size_t flags
, json_error_t
*error
) {
993 jsonp_error_init(error
, source
);
996 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1000 if (lex_init(&lex
, (get_func
)fgetc
, flags
, input
))
1003 result
= parse_json(&lex
, flags
, error
);
1009 static int fd_get_func(int *fd
) {
1010 #ifdef HAVE_UNISTD_H
1012 if (read(*fd
, &c
, 1) == 1)
1018 json_t
*json_loadfd(int input
, size_t flags
, json_error_t
*error
) {
1023 #ifdef HAVE_UNISTD_H
1024 if (input
== STDIN_FILENO
)
1028 source
= "<stream>";
1030 jsonp_error_init(error
, source
);
1033 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1037 if (lex_init(&lex
, (get_func
)fd_get_func
, flags
, &input
))
1040 result
= parse_json(&lex
, flags
, error
);
1046 json_t
*json_load_file(const char *path
, size_t flags
, json_error_t
*error
) {
1048 jsonp_error_init(error
, path
);
1051 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1055 FILE *fp
= fopen(path
, "rb");
1057 error_set(error
, NULL
, json_error_cannot_open_file
, "unable to open %s: %s", path
, strerror(errno
));
1061 result
= json_loadf(fp
, flags
, error
);
1066 #define MAX_BUF_LEN 1024
1069 char data
[MAX_BUF_LEN
];
1072 json_load_callback_t callback
;
1076 static int callback_get(void *data
) {
1078 callback_data_t
*stream
= data
;
1080 if (stream
->pos
>= stream
->len
) {
1082 stream
->len
= stream
->callback(stream
->data
, MAX_BUF_LEN
, stream
->arg
);
1083 if (stream
->len
== 0 || stream
->len
== (size_t) - 1)
1087 c
= stream
->data
[stream
->pos
];
1089 return (unsigned char)c
;
1092 json_t
*json_load_callback(json_load_callback_t callback
, void *arg
, size_t flags
, json_error_t
*error
) {
1096 callback_data_t stream_data
;
1098 memset(&stream_data
, 0, sizeof(stream_data
));
1099 stream_data
.callback
= callback
;
1100 stream_data
.arg
= arg
;
1102 jsonp_error_init(error
, "<callback>");
1104 if (callback
== NULL
) {
1105 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1109 if (lex_init(&lex
, (get_func
)callback_get
, flags
, &stream_data
))
1112 result
= parse_json(&lex
, flags
, error
);