1 /*-------------------------------------------------------------------------
4 * JSON parser and lexer interfaces
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/common/jsonapi.c
12 *-------------------------------------------------------------------------
17 #include "postgres_fe.h"
20 #include "common/jsonapi.h"
21 #include "mb/pg_wchar.h"
22 #include "port/pg_lfind.h"
24 #ifdef JSONAPI_USE_PQEXPBUFFER
25 #include "pqexpbuffer.h"
27 #include "lib/stringinfo.h"
28 #include "miscadmin.h"
32 * By default, we will use palloc/pfree along with StringInfo. In libpq,
33 * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
35 #ifdef JSONAPI_USE_PQEXPBUFFER
37 #define STRDUP(s) strdup(s)
38 #define ALLOC(size) malloc(size)
39 #define ALLOC0(size) calloc(1, size)
40 #define REALLOC realloc
41 #define FREE(s) free(s)
43 #define jsonapi_appendStringInfo appendPQExpBuffer
44 #define jsonapi_appendBinaryStringInfo appendBinaryPQExpBuffer
45 #define jsonapi_appendStringInfoChar appendPQExpBufferChar
46 /* XXX should we add a macro version to PQExpBuffer? */
47 #define jsonapi_appendStringInfoCharMacro appendPQExpBufferChar
48 #define jsonapi_makeStringInfo createPQExpBuffer
49 #define jsonapi_initStringInfo initPQExpBuffer
50 #define jsonapi_resetStringInfo resetPQExpBuffer
51 #define jsonapi_termStringInfo termPQExpBuffer
52 #define jsonapi_destroyStringInfo destroyPQExpBuffer
54 #else /* !JSONAPI_USE_PQEXPBUFFER */
56 #define STRDUP(s) pstrdup(s)
57 #define ALLOC(size) palloc(size)
58 #define ALLOC0(size) palloc0(size)
59 #define REALLOC repalloc
65 * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
66 * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
67 * argument to avoid future hair-pulling.
69 #define FREE(s) do { \
76 #define jsonapi_appendStringInfo appendStringInfo
77 #define jsonapi_appendBinaryStringInfo appendBinaryStringInfo
78 #define jsonapi_appendStringInfoChar appendStringInfoChar
79 #define jsonapi_appendStringInfoCharMacro appendStringInfoCharMacro
80 #define jsonapi_makeStringInfo makeStringInfo
81 #define jsonapi_initStringInfo initStringInfo
82 #define jsonapi_resetStringInfo resetStringInfo
83 #define jsonapi_termStringInfo(s) pfree((s)->data)
84 #define jsonapi_destroyStringInfo destroyStringInfo
86 #endif /* JSONAPI_USE_PQEXPBUFFER */
89 * The context of the parser is maintained by the recursive descent
90 * mechanism, but is passed explicitly to the error reporting routine
91 * for better diagnostics.
93 typedef enum /* contexts of JSON parser */
95 JSON_PARSE_VALUE
, /* expecting a value */
96 JSON_PARSE_STRING
, /* expecting a string (for a field name) */
97 JSON_PARSE_ARRAY_START
, /* saw '[', expecting value or ']' */
98 JSON_PARSE_ARRAY_NEXT
, /* saw array element, expecting ',' or ']' */
99 JSON_PARSE_OBJECT_START
, /* saw '{', expecting label or '}' */
100 JSON_PARSE_OBJECT_LABEL
, /* saw object label, expecting ':' */
101 JSON_PARSE_OBJECT_NEXT
, /* saw object value, expecting ',' or '}' */
102 JSON_PARSE_OBJECT_COMMA
, /* saw object ',', expecting next label */
103 JSON_PARSE_END
, /* saw the end of a document, expect nothing */
107 * Setup for table-driven parser.
108 * These enums need to be separate from the JsonTokenType and from each other
109 * so we can have all of them on the prediction stack, which consists of
110 * tokens, non-terminals, and semantic action markers.
116 JSON_NT_ARRAY_ELEMENTS
,
117 JSON_NT_MORE_ARRAY_ELEMENTS
,
119 JSON_NT_MORE_KEY_PAIRS
,
124 JSON_SEM_OSTART
= 64,
128 JSON_SEM_OFIELD_INIT
,
129 JSON_SEM_OFIELD_START
,
131 JSON_SEM_AELEM_START
,
133 JSON_SEM_SCALAR_INIT
,
134 JSON_SEM_SCALAR_CALL
,
138 * struct containing the 3 stacks used in non-recursive parsing,
139 * and the token and value for scalars that need to be preserved
142 * typedef appears in jsonapi.h
144 struct JsonParserStack
149 /* these two are indexed by lex_level */
152 JsonTokenType scalar_tok
;
157 * struct containing state used when there is a possible partial token at the
158 * end of a json chunk when we are doing incremental parsing.
160 * typedef appears in jsonapi.h
162 struct JsonIncrementalState
166 bool partial_completed
;
167 jsonapi_StrValType partial_token
;
171 * constants and macros used in the nonrecursive parser
173 #define JSON_NUM_TERMINALS 13
174 #define JSON_NUM_NONTERMINALS 5
175 #define JSON_NT_OFFSET JSON_NT_JSON
176 /* for indexing the table */
177 #define OFS(NT) (NT) - JSON_NT_OFFSET
178 /* classify items we get off the stack */
179 #define IS_SEM(x) ((x) & 0x40)
180 #define IS_NT(x) ((x) & 0x20)
183 * These productions are stored in reverse order right to left so that when
184 * they are pushed on the stack what we expect next is at the top of the stack.
186 static char JSON_PROD_EPSILON
[] = {0}; /* epsilon - an empty production */
189 static char JSON_PROD_SCALAR_STRING
[] = {JSON_SEM_SCALAR_CALL
, JSON_TOKEN_STRING
, JSON_SEM_SCALAR_INIT
, 0};
192 static char JSON_PROD_SCALAR_NUMBER
[] = {JSON_SEM_SCALAR_CALL
, JSON_TOKEN_NUMBER
, JSON_SEM_SCALAR_INIT
, 0};
195 static char JSON_PROD_SCALAR_TRUE
[] = {JSON_SEM_SCALAR_CALL
, JSON_TOKEN_TRUE
, JSON_SEM_SCALAR_INIT
, 0};
197 /* JSON -> 'false' */
198 static char JSON_PROD_SCALAR_FALSE
[] = {JSON_SEM_SCALAR_CALL
, JSON_TOKEN_FALSE
, JSON_SEM_SCALAR_INIT
, 0};
201 static char JSON_PROD_SCALAR_NULL
[] = {JSON_SEM_SCALAR_CALL
, JSON_TOKEN_NULL
, JSON_SEM_SCALAR_INIT
, 0};
203 /* JSON -> '{' KEY_PAIRS '}' */
204 static char JSON_PROD_OBJECT
[] = {JSON_SEM_OEND
, JSON_TOKEN_OBJECT_END
, JSON_NT_KEY_PAIRS
, JSON_TOKEN_OBJECT_START
, JSON_SEM_OSTART
, 0};
206 /* JSON -> '[' ARRAY_ELEMENTS ']' */
207 static char JSON_PROD_ARRAY
[] = {JSON_SEM_AEND
, JSON_TOKEN_ARRAY_END
, JSON_NT_ARRAY_ELEMENTS
, JSON_TOKEN_ARRAY_START
, JSON_SEM_ASTART
, 0};
209 /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
210 static char JSON_PROD_ARRAY_ELEMENTS
[] = {JSON_NT_MORE_ARRAY_ELEMENTS
, JSON_SEM_AELEM_END
, JSON_NT_JSON
, JSON_SEM_AELEM_START
, 0};
212 /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
213 static char JSON_PROD_MORE_ARRAY_ELEMENTS
[] = {JSON_NT_MORE_ARRAY_ELEMENTS
, JSON_SEM_AELEM_END
, JSON_NT_JSON
, JSON_SEM_AELEM_START
, JSON_TOKEN_COMMA
, 0};
215 /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
216 static char JSON_PROD_KEY_PAIRS
[] = {JSON_NT_MORE_KEY_PAIRS
, JSON_SEM_OFIELD_END
, JSON_NT_JSON
, JSON_SEM_OFIELD_START
, JSON_TOKEN_COLON
, JSON_TOKEN_STRING
, JSON_SEM_OFIELD_INIT
, 0};
218 /* MORE_KEY_PAIRS -> ',' string ':' JSON MORE_KEY_PAIRS */
219 static char JSON_PROD_MORE_KEY_PAIRS
[] = {JSON_NT_MORE_KEY_PAIRS
, JSON_SEM_OFIELD_END
, JSON_NT_JSON
, JSON_SEM_OFIELD_START
, JSON_TOKEN_COLON
, JSON_TOKEN_STRING
, JSON_SEM_OFIELD_INIT
, JSON_TOKEN_COMMA
, 0};
222 * Note: there are also epsilon productions for ARRAY_ELEMENTS,
223 * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
224 * They are all the same as none require any semantic actions.
228 * Table connecting the productions with their director sets of
230 * Any combination not specified here represents an error.
239 #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
241 static td_entry td_parser_table
[JSON_NUM_NONTERMINALS
][JSON_NUM_TERMINALS
] =
244 [OFS(JSON_NT_JSON
)][JSON_TOKEN_STRING
] = TD_ENTRY(JSON_PROD_SCALAR_STRING
),
245 [OFS(JSON_NT_JSON
)][JSON_TOKEN_NUMBER
] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER
),
246 [OFS(JSON_NT_JSON
)][JSON_TOKEN_TRUE
] = TD_ENTRY(JSON_PROD_SCALAR_TRUE
),
247 [OFS(JSON_NT_JSON
)][JSON_TOKEN_FALSE
] = TD_ENTRY(JSON_PROD_SCALAR_FALSE
),
248 [OFS(JSON_NT_JSON
)][JSON_TOKEN_NULL
] = TD_ENTRY(JSON_PROD_SCALAR_NULL
),
249 [OFS(JSON_NT_JSON
)][JSON_TOKEN_ARRAY_START
] = TD_ENTRY(JSON_PROD_ARRAY
),
250 [OFS(JSON_NT_JSON
)][JSON_TOKEN_OBJECT_START
] = TD_ENTRY(JSON_PROD_OBJECT
),
252 [OFS(JSON_NT_ARRAY_ELEMENTS
)][JSON_TOKEN_ARRAY_START
] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS
),
253 [OFS(JSON_NT_ARRAY_ELEMENTS
)][JSON_TOKEN_OBJECT_START
] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS
),
254 [OFS(JSON_NT_ARRAY_ELEMENTS
)][JSON_TOKEN_STRING
] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS
),
255 [OFS(JSON_NT_ARRAY_ELEMENTS
)][JSON_TOKEN_NUMBER
] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS
),
256 [OFS(JSON_NT_ARRAY_ELEMENTS
)][JSON_TOKEN_TRUE
] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS
),
257 [OFS(JSON_NT_ARRAY_ELEMENTS
)][JSON_TOKEN_FALSE
] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS
),
258 [OFS(JSON_NT_ARRAY_ELEMENTS
)][JSON_TOKEN_NULL
] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS
),
259 [OFS(JSON_NT_ARRAY_ELEMENTS
)][JSON_TOKEN_ARRAY_END
] = TD_ENTRY(JSON_PROD_EPSILON
),
260 /* MORE_ARRAY_ELEMENTS */
261 [OFS(JSON_NT_MORE_ARRAY_ELEMENTS
)][JSON_TOKEN_COMMA
] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS
),
262 [OFS(JSON_NT_MORE_ARRAY_ELEMENTS
)][JSON_TOKEN_ARRAY_END
] = TD_ENTRY(JSON_PROD_EPSILON
),
264 [OFS(JSON_NT_KEY_PAIRS
)][JSON_TOKEN_STRING
] = TD_ENTRY(JSON_PROD_KEY_PAIRS
),
265 [OFS(JSON_NT_KEY_PAIRS
)][JSON_TOKEN_OBJECT_END
] = TD_ENTRY(JSON_PROD_EPSILON
),
267 [OFS(JSON_NT_MORE_KEY_PAIRS
)][JSON_TOKEN_COMMA
] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS
),
268 [OFS(JSON_NT_MORE_KEY_PAIRS
)][JSON_TOKEN_OBJECT_END
] = TD_ENTRY(JSON_PROD_EPSILON
),
271 /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
272 static char JSON_PROD_GOAL
[] = {JSON_TOKEN_END
, JSON_NT_JSON
, 0};
274 static inline JsonParseErrorType
json_lex_string(JsonLexContext
*lex
);
275 static inline JsonParseErrorType
json_lex_number(JsonLexContext
*lex
, const char *s
,
276 bool *num_err
, size_t *total_len
);
277 static inline JsonParseErrorType
parse_scalar(JsonLexContext
*lex
, const JsonSemAction
*sem
);
278 static JsonParseErrorType
parse_object_field(JsonLexContext
*lex
, const JsonSemAction
*sem
);
279 static JsonParseErrorType
parse_object(JsonLexContext
*lex
, const JsonSemAction
*sem
);
280 static JsonParseErrorType
parse_array_element(JsonLexContext
*lex
, const JsonSemAction
*sem
);
281 static JsonParseErrorType
parse_array(JsonLexContext
*lex
, const JsonSemAction
*sem
);
282 static JsonParseErrorType
report_parse_error(JsonParseContext ctx
, JsonLexContext
*lex
);
283 static bool allocate_incremental_state(JsonLexContext
*lex
);
284 static inline void set_fname(JsonLexContext
*lex
, char *fname
);
286 /* the null action object used for pure validation */
287 const JsonSemAction nullSemAction
=
289 NULL
, NULL
, NULL
, NULL
, NULL
,
290 NULL
, NULL
, NULL
, NULL
, NULL
293 /* sentinels used for out-of-memory conditions */
294 static JsonLexContext failed_oom
;
295 static JsonIncrementalState failed_inc_oom
;
297 /* Parser support routines */
302 * what is the current look_ahead token?
304 static inline JsonTokenType
305 lex_peek(JsonLexContext
*lex
)
307 return lex
->token_type
;
313 * move the lexer to the next token if the current look_ahead token matches
314 * the parameter token. Otherwise, report an error.
316 static inline JsonParseErrorType
317 lex_expect(JsonParseContext ctx
, JsonLexContext
*lex
, JsonTokenType token
)
319 if (lex_peek(lex
) == token
)
320 return json_lex(lex
);
322 return report_parse_error(ctx
, lex
);
325 /* chars to consider as part of an alphanumeric token */
326 #define JSON_ALPHANUMERIC_CHAR(c) \
327 (((c) >= 'a' && (c) <= 'z') || \
328 ((c) >= 'A' && (c) <= 'Z') || \
329 ((c) >= '0' && (c) <= '9') || \
334 * Utility function to check if a string is a valid JSON number.
336 * str is of length len, and need not be null-terminated.
339 IsValidJsonNumber(const char *str
, size_t len
)
343 JsonLexContext dummy_lex
= {0};
349 * json_lex_number expects a leading '-' to have been eaten already.
351 * having to cast away the constness of str is ugly, but there's not much
356 dummy_lex
.input
= str
+ 1;
357 dummy_lex
.input_length
= len
- 1;
361 dummy_lex
.input
= str
;
362 dummy_lex
.input_length
= len
;
365 dummy_lex
.token_start
= dummy_lex
.input
;
367 json_lex_number(&dummy_lex
, dummy_lex
.input
, &numeric_error
, &total_len
);
369 return (!numeric_error
) && (total_len
== dummy_lex
.input_length
);
373 * makeJsonLexContextCstringLen
374 * Initialize the given JsonLexContext object, or create one
376 * If a valid 'lex' pointer is given, it is initialized. This can
377 * be used for stack-allocated structs, saving overhead. If NULL is
378 * given, a new struct is allocated.
380 * If need_escapes is true, ->strval stores the unescaped lexemes.
381 * Unescaping is expensive, so only request it when necessary.
383 * If need_escapes is true or lex was given as NULL, then caller is
384 * responsible for freeing the returned struct, either by calling
385 * freeJsonLexContext() or (in backend environment) via memory context
388 * In shlib code, any out-of-memory failures will be deferred to time
389 * of use; this function is guaranteed to return a valid JsonLexContext.
392 makeJsonLexContextCstringLen(JsonLexContext
*lex
, const char *json
,
393 size_t len
, int encoding
, bool need_escapes
)
397 lex
= ALLOC0(sizeof(JsonLexContext
));
400 lex
->flags
|= JSONLEX_FREE_STRUCT
;
403 memset(lex
, 0, sizeof(JsonLexContext
));
405 lex
->errormsg
= NULL
;
406 lex
->input
= lex
->token_terminator
= lex
->line_start
= json
;
407 lex
->line_number
= 1;
408 lex
->input_length
= len
;
409 lex
->input_encoding
= encoding
;
410 lex
->need_escapes
= need_escapes
;
414 * This call can fail in shlib code. We defer error handling to time
415 * of use (json_lex_string()) since we might not need to parse any
418 lex
->strval
= jsonapi_makeStringInfo();
419 lex
->flags
|= JSONLEX_FREE_STRVAL
;
426 * Allocates the internal bookkeeping structures for incremental parsing. This
427 * can only fail in-band with shlib code.
429 #define JS_STACK_CHUNK_SIZE 64
430 #define JS_MAX_PROD_LEN 10 /* more than we need */
431 #define JSON_TD_MAX_STACK 6400 /* hard coded for now - this is a REALLY high
434 allocate_incremental_state(JsonLexContext
*lex
)
441 lex
->inc_state
= ALLOC0(sizeof(JsonIncrementalState
));
442 pstack
= ALLOC0(sizeof(JsonParserStack
));
443 prediction
= ALLOC(JS_STACK_CHUNK_SIZE
* JS_MAX_PROD_LEN
);
444 fnames
= ALLOC(JS_STACK_CHUNK_SIZE
* sizeof(char *));
445 fnull
= ALLOC(JS_STACK_CHUNK_SIZE
* sizeof(bool));
447 #ifdef JSONAPI_USE_PQEXPBUFFER
454 FREE(lex
->inc_state
);
460 lex
->inc_state
= &failed_inc_oom
;
465 jsonapi_initStringInfo(&(lex
->inc_state
->partial_token
));
466 lex
->pstack
= pstack
;
467 lex
->pstack
->stack_size
= JS_STACK_CHUNK_SIZE
;
468 lex
->pstack
->prediction
= prediction
;
469 lex
->pstack
->fnames
= fnames
;
470 lex
->pstack
->fnull
= fnull
;
473 * fnames between 0 and lex_level must always be defined so that
474 * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
477 Assert(lex
->lex_level
== 0);
478 lex
->pstack
->fnames
[0] = NULL
;
480 lex
->incremental
= true;
486 * makeJsonLexContextIncremental
488 * Similar to above but set up for use in incremental parsing. That means we
489 * need explicit stacks for predictions, field names and null indicators, but
490 * we don't need the input, that will be handed in bit by bit to the
491 * parse routine. We also need an accumulator for partial tokens in case
492 * the boundary between chunks happens to fall in the middle of a token.
494 * In shlib code, any out-of-memory failures will be deferred to time of use;
495 * this function is guaranteed to return a valid JsonLexContext.
498 makeJsonLexContextIncremental(JsonLexContext
*lex
, int encoding
,
503 lex
= ALLOC0(sizeof(JsonLexContext
));
507 lex
->flags
|= JSONLEX_FREE_STRUCT
;
510 memset(lex
, 0, sizeof(JsonLexContext
));
512 lex
->line_number
= 1;
513 lex
->input_encoding
= encoding
;
515 if (!allocate_incremental_state(lex
))
517 if (lex
->flags
& JSONLEX_FREE_STRUCT
)
523 /* lex->inc_state tracks the OOM failure; we can return here. */
527 lex
->need_escapes
= need_escapes
;
531 * This call can fail in shlib code. We defer error handling to time
532 * of use (json_lex_string()) since we might not need to parse any
535 lex
->strval
= jsonapi_makeStringInfo();
536 lex
->flags
|= JSONLEX_FREE_STRVAL
;
543 setJsonLexContextOwnsTokens(JsonLexContext
*lex
, bool owned_by_context
)
545 if (lex
->incremental
&& lex
->inc_state
->started
)
548 * Switching this flag after parsing has already started is a
555 if (owned_by_context
)
556 lex
->flags
|= JSONLEX_CTX_OWNS_TOKENS
;
558 lex
->flags
&= ~JSONLEX_CTX_OWNS_TOKENS
;
562 inc_lex_level(JsonLexContext
*lex
)
564 if (lex
->incremental
&& (lex
->lex_level
+ 1) >= lex
->pstack
->stack_size
)
566 size_t new_stack_size
;
567 char *new_prediction
;
571 new_stack_size
= lex
->pstack
->stack_size
+ JS_STACK_CHUNK_SIZE
;
573 new_prediction
= REALLOC(lex
->pstack
->prediction
,
574 new_stack_size
* JS_MAX_PROD_LEN
);
575 #ifdef JSONAPI_USE_PQEXPBUFFER
579 lex
->pstack
->prediction
= new_prediction
;
581 new_fnames
= REALLOC(lex
->pstack
->fnames
,
582 new_stack_size
* sizeof(char *));
583 #ifdef JSONAPI_USE_PQEXPBUFFER
587 lex
->pstack
->fnames
= new_fnames
;
589 new_fnull
= REALLOC(lex
->pstack
->fnull
, new_stack_size
* sizeof(bool));
590 #ifdef JSONAPI_USE_PQEXPBUFFER
594 lex
->pstack
->fnull
= new_fnull
;
596 lex
->pstack
->stack_size
= new_stack_size
;
601 if (lex
->incremental
)
604 * Ensure freeJsonLexContext() remains safe even if no fname is
605 * assigned at this level.
607 lex
->pstack
->fnames
[lex
->lex_level
] = NULL
;
614 dec_lex_level(JsonLexContext
*lex
)
616 set_fname(lex
, NULL
); /* free the current level's fname, if needed */
621 push_prediction(JsonParserStack
*pstack
, td_entry entry
)
623 memcpy(pstack
->prediction
+ pstack
->pred_index
, entry
.prod
, entry
.len
);
624 pstack
->pred_index
+= entry
.len
;
628 pop_prediction(JsonParserStack
*pstack
)
630 Assert(pstack
->pred_index
> 0);
631 return pstack
->prediction
[--pstack
->pred_index
];
635 next_prediction(JsonParserStack
*pstack
)
637 Assert(pstack
->pred_index
> 0);
638 return pstack
->prediction
[pstack
->pred_index
- 1];
642 have_prediction(JsonParserStack
*pstack
)
644 return pstack
->pred_index
> 0;
648 set_fname(JsonLexContext
*lex
, char *fname
)
650 if (lex
->flags
& JSONLEX_CTX_OWNS_TOKENS
)
653 * Don't leak prior fnames. If one hasn't been assigned yet,
654 * inc_lex_level ensured that it's NULL (and therefore safe to free).
656 FREE(lex
->pstack
->fnames
[lex
->lex_level
]);
659 lex
->pstack
->fnames
[lex
->lex_level
] = fname
;
663 get_fname(JsonLexContext
*lex
)
665 return lex
->pstack
->fnames
[lex
->lex_level
];
669 set_fnull(JsonLexContext
*lex
, bool fnull
)
671 lex
->pstack
->fnull
[lex
->lex_level
] = fnull
;
675 get_fnull(JsonLexContext
*lex
)
677 return lex
->pstack
->fnull
[lex
->lex_level
];
681 * Free memory in a JsonLexContext.
683 * There's no need for this if a *lex pointer was given when the object was
684 * made, need_escapes was false, and json_errdetail() was not called; or if (in
685 * backend environment) a memory context delete/reset is imminent.
688 freeJsonLexContext(JsonLexContext
*lex
)
690 static const JsonLexContext empty
= {0};
692 if (!lex
|| lex
== &failed_oom
)
695 if (lex
->flags
& JSONLEX_FREE_STRVAL
)
696 jsonapi_destroyStringInfo(lex
->strval
);
699 jsonapi_destroyStringInfo(lex
->errormsg
);
701 if (lex
->incremental
)
703 jsonapi_termStringInfo(&lex
->inc_state
->partial_token
);
704 FREE(lex
->inc_state
);
705 FREE(lex
->pstack
->prediction
);
707 if (lex
->flags
& JSONLEX_CTX_OWNS_TOKENS
)
711 /* Clean up any tokens that were left behind. */
712 for (i
= 0; i
<= lex
->lex_level
; i
++)
713 FREE(lex
->pstack
->fnames
[i
]);
716 FREE(lex
->pstack
->fnames
);
717 FREE(lex
->pstack
->fnull
);
718 FREE(lex
->pstack
->scalar_val
);
722 if (lex
->flags
& JSONLEX_FREE_STRUCT
)
731 * Publicly visible entry point for the JSON parser.
733 * lex is a lexing context, set up for the json to be processed by calling
734 * makeJsonLexContext(). sem is a structure of function pointers to semantic
735 * action routines to be called at appropriate spots during parsing, and a
736 * pointer to a state object to be passed to those routines.
738 * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
739 * JSON parser. This is a useful way to validate that it's doing the right
740 * thing at least for non-incremental cases. If this is on we expect to see
741 * regression diffs relating to error messages about stack depth, but no
745 pg_parse_json(JsonLexContext
*lex
, const JsonSemAction
*sem
)
747 #ifdef FORCE_JSON_PSTACK
749 * We don't need partial token processing, there is only one chunk. But we
750 * still need to init the partial token string so that freeJsonLexContext
751 * works, so perform the full incremental initialization.
753 if (!allocate_incremental_state(lex
))
754 return JSON_OUT_OF_MEMORY
;
756 return pg_parse_json_incremental(lex
, sem
, lex
->input
, lex
->input_length
, true);
761 JsonParseErrorType result
;
763 if (lex
== &failed_oom
)
764 return JSON_OUT_OF_MEMORY
;
765 if (lex
->incremental
)
766 return JSON_INVALID_LEXER_TYPE
;
768 /* get the initial token */
769 result
= json_lex(lex
);
770 if (result
!= JSON_SUCCESS
)
775 /* parse by recursive descent */
778 case JSON_TOKEN_OBJECT_START
:
779 result
= parse_object(lex
, sem
);
781 case JSON_TOKEN_ARRAY_START
:
782 result
= parse_array(lex
, sem
);
785 result
= parse_scalar(lex
, sem
); /* json can be a bare scalar */
788 if (result
== JSON_SUCCESS
)
789 result
= lex_expect(JSON_PARSE_END
, lex
, JSON_TOKEN_END
);
796 * json_count_array_elements
798 * Returns number of array elements in lex context at start of array token
799 * until end of array token at same nesting level.
801 * Designed to be called from array_start routines.
804 json_count_array_elements(JsonLexContext
*lex
, int *elements
)
806 JsonLexContext copylex
;
808 JsonParseErrorType result
;
810 if (lex
== &failed_oom
)
811 return JSON_OUT_OF_MEMORY
;
814 * It's safe to do this with a shallow copy because the lexical routines
815 * don't scribble on the input. They do scribble on the other pointers
816 * etc, so doing this with a copy makes that safe.
818 memcpy(©lex
, lex
, sizeof(JsonLexContext
));
819 copylex
.need_escapes
= false; /* not interested in values here */
823 result
= lex_expect(JSON_PARSE_ARRAY_START
, ©lex
,
824 JSON_TOKEN_ARRAY_START
);
825 if (result
!= JSON_SUCCESS
)
827 if (lex_peek(©lex
) != JSON_TOKEN_ARRAY_END
)
832 result
= parse_array_element(©lex
, &nullSemAction
);
833 if (result
!= JSON_SUCCESS
)
835 if (copylex
.token_type
!= JSON_TOKEN_COMMA
)
837 result
= json_lex(©lex
);
838 if (result
!= JSON_SUCCESS
)
842 result
= lex_expect(JSON_PARSE_ARRAY_NEXT
, ©lex
,
843 JSON_TOKEN_ARRAY_END
);
844 if (result
!= JSON_SUCCESS
)
852 * pg_parse_json_incremental
854 * Routine for incremental parsing of json. This uses the non-recursive top
855 * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
856 * the Recursive Descent pattern used above, so we only use it for incremental
859 * The lexing context needs to be set up by a call to
860 * makeJsonLexContextIncremental(). sem is a structure of function pointers
861 * to semantic action routines, which should function exactly as those used
862 * in the recursive descent parser.
864 * This routine can be called repeatedly with chunks of JSON. On the final
865 * chunk is_last must be set to true. len is the length of the json chunk,
866 * which does not need to be null terminated.
869 pg_parse_json_incremental(JsonLexContext
*lex
,
870 const JsonSemAction
*sem
,
876 JsonParseErrorType result
;
877 JsonParseContext ctx
= JSON_PARSE_VALUE
;
878 JsonParserStack
*pstack
= lex
->pstack
;
880 if (lex
== &failed_oom
|| lex
->inc_state
== &failed_inc_oom
)
881 return JSON_OUT_OF_MEMORY
;
882 if (!lex
->incremental
)
883 return JSON_INVALID_LEXER_TYPE
;
885 lex
->input
= lex
->token_terminator
= lex
->line_start
= json
;
886 lex
->input_length
= len
;
887 lex
->inc_state
->is_last_chunk
= is_last
;
888 lex
->inc_state
->started
= true;
890 /* get the initial token */
891 result
= json_lex(lex
);
892 if (result
!= JSON_SUCCESS
)
897 /* use prediction stack for incremental parsing */
899 if (!have_prediction(pstack
))
901 td_entry goal
= TD_ENTRY(JSON_PROD_GOAL
);
903 push_prediction(pstack
, goal
);
906 while (have_prediction(pstack
))
908 char top
= pop_prediction(pstack
);
912 * these first two branches are the guts of the Table Driven method
917 * tok can only be a terminal symbol, so top must be too. the
918 * token matches the top of the stack, so get the next token.
920 if (tok
< JSON_TOKEN_END
)
922 result
= json_lex(lex
);
923 if (result
!= JSON_SUCCESS
)
928 else if (IS_NT(top
) && (entry
= td_parser_table
[OFS(top
)][tok
]).prod
!= NULL
)
931 * the token is in the director set for a production of the
932 * non-terminal at the top of the stack, so push the reversed RHS
933 * of the production onto the stack.
935 push_prediction(pstack
, entry
);
937 else if (IS_SEM(top
))
940 * top is a semantic action marker, so take action accordingly.
941 * It's important to have these markers in the prediction stack
942 * before any token they might need so we don't advance the token
943 * prematurely. Note in a couple of cases we need to do something
944 * both before and after the token.
948 case JSON_SEM_OSTART
:
950 json_struct_action ostart
= sem
->object_start
;
952 if (lex
->lex_level
>= JSON_TD_MAX_STACK
)
953 return JSON_NESTING_TOO_DEEP
;
957 result
= (*ostart
) (sem
->semstate
);
958 if (result
!= JSON_SUCCESS
)
962 if (!inc_lex_level(lex
))
963 return JSON_OUT_OF_MEMORY
;
968 json_struct_action oend
= sem
->object_end
;
973 result
= (*oend
) (sem
->semstate
);
974 if (result
!= JSON_SUCCESS
)
979 case JSON_SEM_ASTART
:
981 json_struct_action astart
= sem
->array_start
;
983 if (lex
->lex_level
>= JSON_TD_MAX_STACK
)
984 return JSON_NESTING_TOO_DEEP
;
988 result
= (*astart
) (sem
->semstate
);
989 if (result
!= JSON_SUCCESS
)
993 if (!inc_lex_level(lex
))
994 return JSON_OUT_OF_MEMORY
;
999 json_struct_action aend
= sem
->array_end
;
1004 result
= (*aend
) (sem
->semstate
);
1005 if (result
!= JSON_SUCCESS
)
1010 case JSON_SEM_OFIELD_INIT
:
1013 * all we do here is save out the field name. We have
1014 * to wait to get past the ':' to see if the next
1015 * value is null so we can call the semantic routine
1018 json_ofield_action ostart
= sem
->object_field_start
;
1019 json_ofield_action oend
= sem
->object_field_end
;
1021 if ((ostart
!= NULL
|| oend
!= NULL
) && lex
->need_escapes
)
1023 fname
= STRDUP(lex
->strval
->data
);
1025 return JSON_OUT_OF_MEMORY
;
1027 set_fname(lex
, fname
);
1030 case JSON_SEM_OFIELD_START
:
1033 * the current token should be the first token of the
1036 bool isnull
= tok
== JSON_TOKEN_NULL
;
1037 json_ofield_action ostart
= sem
->object_field_start
;
1039 set_fnull(lex
, isnull
);
1043 char *fname
= get_fname(lex
);
1045 result
= (*ostart
) (sem
->semstate
, fname
, isnull
);
1046 if (result
!= JSON_SUCCESS
)
1051 case JSON_SEM_OFIELD_END
:
1053 json_ofield_action oend
= sem
->object_field_end
;
1057 char *fname
= get_fname(lex
);
1058 bool isnull
= get_fnull(lex
);
1060 result
= (*oend
) (sem
->semstate
, fname
, isnull
);
1061 if (result
!= JSON_SUCCESS
)
1066 case JSON_SEM_AELEM_START
:
1068 json_aelem_action astart
= sem
->array_element_start
;
1069 bool isnull
= tok
== JSON_TOKEN_NULL
;
1071 set_fnull(lex
, isnull
);
1075 result
= (*astart
) (sem
->semstate
, isnull
);
1076 if (result
!= JSON_SUCCESS
)
1081 case JSON_SEM_AELEM_END
:
1083 json_aelem_action aend
= sem
->array_element_end
;
1087 bool isnull
= get_fnull(lex
);
1089 result
= (*aend
) (sem
->semstate
, isnull
);
1090 if (result
!= JSON_SUCCESS
)
1095 case JSON_SEM_SCALAR_INIT
:
1097 json_scalar_action sfunc
= sem
->scalar
;
1099 pstack
->scalar_val
= NULL
;
1104 * extract the de-escaped string value, or the raw
1108 * XXX copied from RD parser but looks like a
1111 if (tok
== JSON_TOKEN_STRING
)
1113 if (lex
->need_escapes
)
1115 pstack
->scalar_val
= STRDUP(lex
->strval
->data
);
1116 if (pstack
->scalar_val
== NULL
)
1117 return JSON_OUT_OF_MEMORY
;
1122 ptrdiff_t tlen
= (lex
->token_terminator
- lex
->token_start
);
1124 pstack
->scalar_val
= ALLOC(tlen
+ 1);
1125 if (pstack
->scalar_val
== NULL
)
1126 return JSON_OUT_OF_MEMORY
;
1128 memcpy(pstack
->scalar_val
, lex
->token_start
, tlen
);
1129 pstack
->scalar_val
[tlen
] = '\0';
1131 pstack
->scalar_tok
= tok
;
1135 case JSON_SEM_SCALAR_CALL
:
1138 * We'd like to be able to get rid of this business of
1139 * two bits of scalar action, but we can't. It breaks
1140 * certain semantic actions which expect that when
1141 * called the lexer has consumed the item. See for
1142 * example get_scalar() in jsonfuncs.c.
1144 json_scalar_action sfunc
= sem
->scalar
;
1148 result
= (*sfunc
) (sem
->semstate
, pstack
->scalar_val
, pstack
->scalar_tok
);
1151 * Either ownership of the token passed to the
1152 * callback, or we need to free it now. Either
1153 * way, clear our pointer to it so it doesn't get
1154 * freed in the future.
1156 if (lex
->flags
& JSONLEX_CTX_OWNS_TOKENS
)
1157 FREE(pstack
->scalar_val
);
1158 pstack
->scalar_val
= NULL
;
1160 if (result
!= JSON_SUCCESS
)
1166 /* should not happen */
1173 * The token didn't match the stack top if it's a terminal nor a
1174 * production for the stack top if it's a non-terminal.
1176 * Various cases here are Asserted to be not possible, as the
1177 * token would not appear at the top of the prediction stack
1178 * unless the lookahead matched.
1182 case JSON_TOKEN_STRING
:
1183 if (next_prediction(pstack
) == JSON_TOKEN_COLON
)
1184 ctx
= JSON_PARSE_STRING
;
1188 ctx
= JSON_PARSE_VALUE
;
1191 case JSON_TOKEN_NUMBER
:
1192 case JSON_TOKEN_TRUE
:
1193 case JSON_TOKEN_FALSE
:
1194 case JSON_TOKEN_NULL
:
1195 case JSON_TOKEN_ARRAY_START
:
1196 case JSON_TOKEN_OBJECT_START
:
1198 ctx
= JSON_PARSE_VALUE
;
1200 case JSON_TOKEN_ARRAY_END
:
1202 ctx
= JSON_PARSE_ARRAY_NEXT
;
1204 case JSON_TOKEN_OBJECT_END
:
1206 ctx
= JSON_PARSE_OBJECT_NEXT
;
1208 case JSON_TOKEN_COMMA
:
1210 if (next_prediction(pstack
) == JSON_TOKEN_STRING
)
1211 ctx
= JSON_PARSE_OBJECT_NEXT
;
1213 ctx
= JSON_PARSE_ARRAY_NEXT
;
1215 case JSON_TOKEN_COLON
:
1216 ctx
= JSON_PARSE_OBJECT_LABEL
;
1218 case JSON_TOKEN_END
:
1219 ctx
= JSON_PARSE_END
;
1221 case JSON_NT_MORE_ARRAY_ELEMENTS
:
1222 ctx
= JSON_PARSE_ARRAY_NEXT
;
1224 case JSON_NT_ARRAY_ELEMENTS
:
1225 ctx
= JSON_PARSE_ARRAY_START
;
1227 case JSON_NT_MORE_KEY_PAIRS
:
1228 ctx
= JSON_PARSE_OBJECT_NEXT
;
1230 case JSON_NT_KEY_PAIRS
:
1231 ctx
= JSON_PARSE_OBJECT_START
;
1234 ctx
= JSON_PARSE_VALUE
;
1236 return report_parse_error(ctx
, lex
);
1240 return JSON_SUCCESS
;
1244 * Recursive Descent parse routines. There is one for each structural
1245 * element in a json document:
1246 * - scalar (string, number, true, false, null)
1252 static inline JsonParseErrorType
1253 parse_scalar(JsonLexContext
*lex
, const JsonSemAction
*sem
)
1256 json_scalar_action sfunc
= sem
->scalar
;
1257 JsonTokenType tok
= lex_peek(lex
);
1258 JsonParseErrorType result
;
1260 /* a scalar must be a string, a number, true, false, or null */
1261 if (tok
!= JSON_TOKEN_STRING
&& tok
!= JSON_TOKEN_NUMBER
&&
1262 tok
!= JSON_TOKEN_TRUE
&& tok
!= JSON_TOKEN_FALSE
&&
1263 tok
!= JSON_TOKEN_NULL
)
1264 return report_parse_error(JSON_PARSE_VALUE
, lex
);
1266 /* if no semantic function, just consume the token */
1268 return json_lex(lex
);
1270 /* extract the de-escaped string value, or the raw lexeme */
1271 if (lex_peek(lex
) == JSON_TOKEN_STRING
)
1273 if (lex
->need_escapes
)
1275 val
= STRDUP(lex
->strval
->data
);
1277 return JSON_OUT_OF_MEMORY
;
1282 int len
= (lex
->token_terminator
- lex
->token_start
);
1284 val
= ALLOC(len
+ 1);
1286 return JSON_OUT_OF_MEMORY
;
1288 memcpy(val
, lex
->token_start
, len
);
1292 /* consume the token */
1293 result
= json_lex(lex
);
1294 if (result
!= JSON_SUCCESS
)
1300 /* invoke the callback, which may take ownership of val */
1301 result
= (*sfunc
) (sem
->semstate
, val
, tok
);
1303 if (lex
->flags
& JSONLEX_CTX_OWNS_TOKENS
)
1309 static JsonParseErrorType
1310 parse_object_field(JsonLexContext
*lex
, const JsonSemAction
*sem
)
1313 * An object field is "fieldname" : value where value can be a scalar,
1314 * object or array. Note: in user-facing docs and error messages, we
1315 * generally call a field name a "key".
1319 json_ofield_action ostart
= sem
->object_field_start
;
1320 json_ofield_action oend
= sem
->object_field_end
;
1323 JsonParseErrorType result
;
1325 if (lex_peek(lex
) != JSON_TOKEN_STRING
)
1326 return report_parse_error(JSON_PARSE_STRING
, lex
);
1327 if ((ostart
!= NULL
|| oend
!= NULL
) && lex
->need_escapes
)
1329 fname
= STRDUP(lex
->strval
->data
);
1331 return JSON_OUT_OF_MEMORY
;
1333 result
= json_lex(lex
);
1334 if (result
!= JSON_SUCCESS
)
1340 result
= lex_expect(JSON_PARSE_OBJECT_LABEL
, lex
, JSON_TOKEN_COLON
);
1341 if (result
!= JSON_SUCCESS
)
1347 tok
= lex_peek(lex
);
1348 isnull
= tok
== JSON_TOKEN_NULL
;
1352 result
= (*ostart
) (sem
->semstate
, fname
, isnull
);
1353 if (result
!= JSON_SUCCESS
)
1354 goto ofield_cleanup
;
1359 case JSON_TOKEN_OBJECT_START
:
1360 result
= parse_object(lex
, sem
);
1362 case JSON_TOKEN_ARRAY_START
:
1363 result
= parse_array(lex
, sem
);
1366 result
= parse_scalar(lex
, sem
);
1368 if (result
!= JSON_SUCCESS
)
1369 goto ofield_cleanup
;
1373 result
= (*oend
) (sem
->semstate
, fname
, isnull
);
1374 if (result
!= JSON_SUCCESS
)
1375 goto ofield_cleanup
;
1379 if (lex
->flags
& JSONLEX_CTX_OWNS_TOKENS
)
1384 static JsonParseErrorType
1385 parse_object(JsonLexContext
*lex
, const JsonSemAction
*sem
)
1388 * an object is a possibly empty sequence of object fields, separated by
1389 * commas and surrounded by curly braces.
1391 json_struct_action ostart
= sem
->object_start
;
1392 json_struct_action oend
= sem
->object_end
;
1394 JsonParseErrorType result
;
1399 * TODO: clients need some way to put a bound on stack growth. Parse level
1402 check_stack_depth();
1407 result
= (*ostart
) (sem
->semstate
);
1408 if (result
!= JSON_SUCCESS
)
1413 * Data inside an object is at a higher nesting level than the object
1414 * itself. Note that we increment this after we call the semantic routine
1415 * for the object start and restore it before we call the routine for the
1420 Assert(lex_peek(lex
) == JSON_TOKEN_OBJECT_START
);
1421 result
= json_lex(lex
);
1422 if (result
!= JSON_SUCCESS
)
1425 tok
= lex_peek(lex
);
1428 case JSON_TOKEN_STRING
:
1429 result
= parse_object_field(lex
, sem
);
1430 while (result
== JSON_SUCCESS
&& lex_peek(lex
) == JSON_TOKEN_COMMA
)
1432 result
= json_lex(lex
);
1433 if (result
!= JSON_SUCCESS
)
1435 result
= parse_object_field(lex
, sem
);
1438 case JSON_TOKEN_OBJECT_END
:
1441 /* case of an invalid initial token inside the object */
1442 result
= report_parse_error(JSON_PARSE_OBJECT_START
, lex
);
1444 if (result
!= JSON_SUCCESS
)
1447 result
= lex_expect(JSON_PARSE_OBJECT_NEXT
, lex
, JSON_TOKEN_OBJECT_END
);
1448 if (result
!= JSON_SUCCESS
)
1455 result
= (*oend
) (sem
->semstate
);
1456 if (result
!= JSON_SUCCESS
)
1460 return JSON_SUCCESS
;
1463 static JsonParseErrorType
1464 parse_array_element(JsonLexContext
*lex
, const JsonSemAction
*sem
)
1466 json_aelem_action astart
= sem
->array_element_start
;
1467 json_aelem_action aend
= sem
->array_element_end
;
1468 JsonTokenType tok
= lex_peek(lex
);
1469 JsonParseErrorType result
;
1472 isnull
= tok
== JSON_TOKEN_NULL
;
1476 result
= (*astart
) (sem
->semstate
, isnull
);
1477 if (result
!= JSON_SUCCESS
)
1481 /* an array element is any object, array or scalar */
1484 case JSON_TOKEN_OBJECT_START
:
1485 result
= parse_object(lex
, sem
);
1487 case JSON_TOKEN_ARRAY_START
:
1488 result
= parse_array(lex
, sem
);
1491 result
= parse_scalar(lex
, sem
);
1494 if (result
!= JSON_SUCCESS
)
1499 result
= (*aend
) (sem
->semstate
, isnull
);
1500 if (result
!= JSON_SUCCESS
)
1504 return JSON_SUCCESS
;
1507 static JsonParseErrorType
1508 parse_array(JsonLexContext
*lex
, const JsonSemAction
*sem
)
1511 * an array is a possibly empty sequence of array elements, separated by
1512 * commas and surrounded by square brackets.
1514 json_struct_action astart
= sem
->array_start
;
1515 json_struct_action aend
= sem
->array_end
;
1516 JsonParseErrorType result
;
1519 check_stack_depth();
1524 result
= (*astart
) (sem
->semstate
);
1525 if (result
!= JSON_SUCCESS
)
1530 * Data inside an array is at a higher nesting level than the array
1531 * itself. Note that we increment this after we call the semantic routine
1532 * for the array start and restore it before we call the routine for the
1537 result
= lex_expect(JSON_PARSE_ARRAY_START
, lex
, JSON_TOKEN_ARRAY_START
);
1538 if (result
== JSON_SUCCESS
&& lex_peek(lex
) != JSON_TOKEN_ARRAY_END
)
1540 result
= parse_array_element(lex
, sem
);
1542 while (result
== JSON_SUCCESS
&& lex_peek(lex
) == JSON_TOKEN_COMMA
)
1544 result
= json_lex(lex
);
1545 if (result
!= JSON_SUCCESS
)
1547 result
= parse_array_element(lex
, sem
);
1550 if (result
!= JSON_SUCCESS
)
1553 result
= lex_expect(JSON_PARSE_ARRAY_NEXT
, lex
, JSON_TOKEN_ARRAY_END
);
1554 if (result
!= JSON_SUCCESS
)
1561 result
= (*aend
) (sem
->semstate
);
1562 if (result
!= JSON_SUCCESS
)
1566 return JSON_SUCCESS
;
1570 * Lex one token from the input stream.
1572 * When doing incremental parsing, we can reach the end of the input string
1573 * without having (or knowing we have) a complete token. If it's not the
1574 * final chunk of input, the partial token is then saved to the lex
1575 * structure's ptok StringInfo. On subsequent calls input is appended to this
1576 * buffer until we have something that we think is a complete token,
1577 * which is then lexed using a recursive call to json_lex. Processing then
1578 * continues as normal on subsequent calls.
1580 * Note than when doing incremental processing, the lex.prev_token_terminator
1581 * should not be relied on. It could point into a previous input chunk or
1585 json_lex(JsonLexContext
*lex
)
1588 const char *const end
= lex
->input
+ lex
->input_length
;
1589 JsonParseErrorType result
;
1591 if (lex
== &failed_oom
|| lex
->inc_state
== &failed_inc_oom
)
1592 return JSON_OUT_OF_MEMORY
;
1594 if (lex
->incremental
)
1596 if (lex
->inc_state
->partial_completed
)
1599 * We just lexed a completed partial token on the last call, so
1602 jsonapi_resetStringInfo(&(lex
->inc_state
->partial_token
));
1603 lex
->token_terminator
= lex
->input
;
1604 lex
->inc_state
->partial_completed
= false;
1607 #ifdef JSONAPI_USE_PQEXPBUFFER
1608 /* Make sure our partial token buffer is valid before using it below. */
1609 if (PQExpBufferDataBroken(lex
->inc_state
->partial_token
))
1610 return JSON_OUT_OF_MEMORY
;
1614 s
= lex
->token_terminator
;
1616 if (lex
->incremental
&& lex
->inc_state
->partial_token
.len
)
1619 * We have a partial token. Extend it and if completed lex it by a
1622 jsonapi_StrValType
*ptok
= &(lex
->inc_state
->partial_token
);
1624 bool tok_done
= false;
1625 JsonLexContext dummy_lex
= {0};
1626 JsonParseErrorType partial_result
;
1628 if (ptok
->data
[0] == '"')
1631 * It's a string. Accumulate characters until we reach an
1636 for (int i
= ptok
->len
- 1; i
> 0; i
--)
1638 /* count the trailing backslashes on the partial token */
1639 if (ptok
->data
[i
] == '\\')
1645 for (size_t i
= 0; i
< lex
->input_length
; i
++)
1647 char c
= lex
->input
[i
];
1649 jsonapi_appendStringInfoCharMacro(ptok
, c
);
1651 if (c
== '"' && escapes
% 2 == 0)
1665 char c
= ptok
->data
[0];
1667 if (c
== '-' || (c
>= '0' && c
<= '9'))
1669 /* for numbers look for possible numeric continuations */
1671 bool numend
= false;
1673 for (size_t i
= 0; i
< lex
->input_length
&& !numend
; i
++)
1675 char cc
= lex
->input
[i
];
1694 jsonapi_appendStringInfoCharMacro(ptok
, cc
);
1705 * Add any remaining alphanumeric chars. This takes care of the
1706 * {null, false, true} literals as well as any trailing
1707 * alphanumeric junk on non-string tokens.
1709 for (size_t i
= added
; i
< lex
->input_length
; i
++)
1711 char cc
= lex
->input
[i
];
1713 if (JSON_ALPHANUMERIC_CHAR(cc
))
1715 jsonapi_appendStringInfoCharMacro(ptok
, cc
);
1724 if (added
== lex
->input_length
&&
1725 lex
->inc_state
->is_last_chunk
)
1733 /* We should have consumed the whole chunk in this case. */
1734 Assert(added
== lex
->input_length
);
1736 if (!lex
->inc_state
->is_last_chunk
)
1737 return JSON_INCOMPLETE
;
1739 /* json_errdetail() needs access to the accumulated token. */
1740 lex
->token_start
= ptok
->data
;
1741 lex
->token_terminator
= ptok
->data
+ ptok
->len
;
1742 return JSON_INVALID_TOKEN
;
1746 * Everything up to lex->input[added] has been added to the partial
1747 * token, so move the input past it.
1749 lex
->input
+= added
;
1750 lex
->input_length
-= added
;
1752 dummy_lex
.input
= dummy_lex
.token_terminator
=
1753 dummy_lex
.line_start
= ptok
->data
;
1754 dummy_lex
.line_number
= lex
->line_number
;
1755 dummy_lex
.input_length
= ptok
->len
;
1756 dummy_lex
.input_encoding
= lex
->input_encoding
;
1757 dummy_lex
.incremental
= false;
1758 dummy_lex
.need_escapes
= lex
->need_escapes
;
1759 dummy_lex
.strval
= lex
->strval
;
1761 partial_result
= json_lex(&dummy_lex
);
1764 * We either have a complete token or an error. In either case we need
1765 * to point to the partial token data for the semantic or error
1766 * routines. If it's not an error we'll readjust on the next call to
1769 lex
->token_type
= dummy_lex
.token_type
;
1770 lex
->line_number
= dummy_lex
.line_number
;
1773 * We know the prev_token_terminator must be back in some previous
1774 * piece of input, so we just make it NULL.
1776 lex
->prev_token_terminator
= NULL
;
1779 * Normally token_start would be ptok->data, but it could be later,
1780 * see json_lex_string's handling of invalid escapes.
1782 lex
->token_start
= dummy_lex
.token_start
;
1783 lex
->token_terminator
= dummy_lex
.token_terminator
;
1784 if (partial_result
== JSON_SUCCESS
)
1786 /* make sure we've used all the input */
1787 if (lex
->token_terminator
- lex
->token_start
!= ptok
->len
)
1790 return JSON_INVALID_TOKEN
;
1793 lex
->inc_state
->partial_completed
= true;
1795 return partial_result
;
1796 /* end of partial token processing */
1799 /* Skip leading whitespace. */
1800 while (s
< end
&& (*s
== ' ' || *s
== '\t' || *s
== '\n' || *s
== '\r'))
1805 lex
->line_start
= s
;
1808 lex
->token_start
= s
;
1810 /* Determine token type. */
1813 lex
->token_start
= NULL
;
1814 lex
->prev_token_terminator
= lex
->token_terminator
;
1815 lex
->token_terminator
= s
;
1816 lex
->token_type
= JSON_TOKEN_END
;
1822 /* Single-character token, some kind of punctuation mark. */
1824 lex
->prev_token_terminator
= lex
->token_terminator
;
1825 lex
->token_terminator
= s
+ 1;
1826 lex
->token_type
= JSON_TOKEN_OBJECT_START
;
1829 lex
->prev_token_terminator
= lex
->token_terminator
;
1830 lex
->token_terminator
= s
+ 1;
1831 lex
->token_type
= JSON_TOKEN_OBJECT_END
;
1834 lex
->prev_token_terminator
= lex
->token_terminator
;
1835 lex
->token_terminator
= s
+ 1;
1836 lex
->token_type
= JSON_TOKEN_ARRAY_START
;
1839 lex
->prev_token_terminator
= lex
->token_terminator
;
1840 lex
->token_terminator
= s
+ 1;
1841 lex
->token_type
= JSON_TOKEN_ARRAY_END
;
1844 lex
->prev_token_terminator
= lex
->token_terminator
;
1845 lex
->token_terminator
= s
+ 1;
1846 lex
->token_type
= JSON_TOKEN_COMMA
;
1849 lex
->prev_token_terminator
= lex
->token_terminator
;
1850 lex
->token_terminator
= s
+ 1;
1851 lex
->token_type
= JSON_TOKEN_COLON
;
1855 result
= json_lex_string(lex
);
1856 if (result
!= JSON_SUCCESS
)
1858 lex
->token_type
= JSON_TOKEN_STRING
;
1861 /* Negative number. */
1862 result
= json_lex_number(lex
, s
+ 1, NULL
, NULL
);
1863 if (result
!= JSON_SUCCESS
)
1865 lex
->token_type
= JSON_TOKEN_NUMBER
;
1877 /* Positive number. */
1878 result
= json_lex_number(lex
, s
, NULL
, NULL
);
1879 if (result
!= JSON_SUCCESS
)
1881 lex
->token_type
= JSON_TOKEN_NUMBER
;
1888 * We're not dealing with a string, number, legal
1889 * punctuation mark, or end of string. The only legal
1890 * tokens we might find here are true, false, and null,
1891 * but for error reporting purposes we scan until we see a
1892 * non-alphanumeric character. That way, we can report
1893 * the whole word as an unexpected token, rather than just
1894 * some unintuitive prefix thereof.
1896 for (p
= s
; p
< end
&& JSON_ALPHANUMERIC_CHAR(*p
); p
++)
1900 * We got some sort of unexpected punctuation or an
1901 * otherwise unexpected character, so just complain about
1902 * that one character.
1906 lex
->prev_token_terminator
= lex
->token_terminator
;
1907 lex
->token_terminator
= s
+ 1;
1908 return JSON_INVALID_TOKEN
;
1911 if (lex
->incremental
&& !lex
->inc_state
->is_last_chunk
&&
1912 p
== lex
->input
+ lex
->input_length
)
1914 jsonapi_appendBinaryStringInfo(&(lex
->inc_state
->partial_token
), s
, end
- s
);
1915 return JSON_INCOMPLETE
;
1919 * We've got a real alphanumeric token here. If it
1920 * happens to be true, false, or null, all is well. If
1923 lex
->prev_token_terminator
= lex
->token_terminator
;
1924 lex
->token_terminator
= p
;
1927 if (memcmp(s
, "true", 4) == 0)
1928 lex
->token_type
= JSON_TOKEN_TRUE
;
1929 else if (memcmp(s
, "null", 4) == 0)
1930 lex
->token_type
= JSON_TOKEN_NULL
;
1932 return JSON_INVALID_TOKEN
;
1934 else if (p
- s
== 5 && memcmp(s
, "false", 5) == 0)
1935 lex
->token_type
= JSON_TOKEN_FALSE
;
1937 return JSON_INVALID_TOKEN
;
1939 } /* end of switch */
1942 if (lex
->incremental
&& lex
->token_type
== JSON_TOKEN_END
&& !lex
->inc_state
->is_last_chunk
)
1943 return JSON_INCOMPLETE
;
1945 return JSON_SUCCESS
;
1949 * The next token in the input stream is known to be a string; lex it.
1951 * If lex->strval isn't NULL, fill it with the decoded string.
1952 * Set lex->token_terminator to the end of the decoded input, and in
1953 * success cases, transfer its previous value to lex->prev_token_terminator.
1954 * Return JSON_SUCCESS or an error code.
1956 * Note: be careful that all error exits advance lex->token_terminator
1957 * to the point after the character we detected the error on.
1959 static inline JsonParseErrorType
1960 json_lex_string(JsonLexContext
*lex
)
1963 const char *const end
= lex
->input
+ lex
->input_length
;
1964 int hi_surrogate
= -1;
1966 /* Convenience macros for error exits */
1967 #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
1969 if (lex->incremental && !lex->inc_state->is_last_chunk) \
1971 jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
1973 end - lex->token_start); \
1974 return JSON_INCOMPLETE; \
1976 lex->token_terminator = s; \
1979 #define FAIL_AT_CHAR_END(code) \
1981 const char *term = s + pg_encoding_mblen(lex->input_encoding, s); \
1982 lex->token_terminator = (term <= end) ? term : end; \
1986 if (lex
->need_escapes
)
1988 #ifdef JSONAPI_USE_PQEXPBUFFER
1989 /* make sure initialization succeeded */
1990 if (lex
->strval
== NULL
)
1991 return JSON_OUT_OF_MEMORY
;
1993 jsonapi_resetStringInfo(lex
->strval
);
1996 Assert(lex
->input_length
> 0);
1997 s
= lex
->token_start
;
2001 /* Premature end of the string. */
2003 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN
);
2006 else if (*s
== '\\')
2008 /* OK, we have an escape character. */
2011 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN
);
2017 for (i
= 1; i
<= 4; i
++)
2021 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN
);
2022 else if (*s
>= '0' && *s
<= '9')
2023 ch
= (ch
* 16) + (*s
- '0');
2024 else if (*s
>= 'a' && *s
<= 'f')
2025 ch
= (ch
* 16) + (*s
- 'a') + 10;
2026 else if (*s
>= 'A' && *s
<= 'F')
2027 ch
= (ch
* 16) + (*s
- 'A') + 10;
2029 FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT
);
2031 if (lex
->need_escapes
)
2034 * Combine surrogate pairs.
2036 if (is_utf16_surrogate_first(ch
))
2038 if (hi_surrogate
!= -1)
2039 FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE
);
2043 else if (is_utf16_surrogate_second(ch
))
2045 if (hi_surrogate
== -1)
2046 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE
);
2047 ch
= surrogate_pair_to_codepoint(hi_surrogate
, ch
);
2051 if (hi_surrogate
!= -1)
2052 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE
);
2055 * Reject invalid cases. We can't have a value above
2056 * 0xFFFF here (since we only accepted 4 hex digits
2057 * above), so no need to test for out-of-range chars.
2061 /* We can't allow this, since our TEXT type doesn't */
2062 FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO
);
2066 * Add the represented character to lex->strval. In the
2067 * backend, we can let pg_unicode_to_server_noerror()
2068 * handle any required character set conversion; in
2069 * frontend, we can only deal with trivial conversions.
2073 char cbuf
[MAX_UNICODE_EQUIVALENT_STRING
+ 1];
2075 if (!pg_unicode_to_server_noerror(ch
, (unsigned char *) cbuf
))
2076 FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE
);
2077 appendStringInfoString(lex
->strval
, cbuf
);
2080 if (lex
->input_encoding
== PG_UTF8
)
2082 /* OK, we can map the code point to UTF8 easily */
2086 unicode_to_utf8(ch
, (unsigned char *) utf8str
);
2087 utf8len
= pg_utf_mblen((unsigned char *) utf8str
);
2088 jsonapi_appendBinaryStringInfo(lex
->strval
, utf8str
, utf8len
);
2090 else if (ch
<= 0x007f)
2092 /* The ASCII range is the same in all encodings */
2093 jsonapi_appendStringInfoChar(lex
->strval
, (char) ch
);
2096 FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE
);
2097 #endif /* FRONTEND */
2100 else if (lex
->need_escapes
)
2102 if (hi_surrogate
!= -1)
2103 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE
);
2110 jsonapi_appendStringInfoChar(lex
->strval
, *s
);
2113 jsonapi_appendStringInfoChar(lex
->strval
, '\b');
2116 jsonapi_appendStringInfoChar(lex
->strval
, '\f');
2119 jsonapi_appendStringInfoChar(lex
->strval
, '\n');
2122 jsonapi_appendStringInfoChar(lex
->strval
, '\r');
2125 jsonapi_appendStringInfoChar(lex
->strval
, '\t');
2130 * Not a valid string escape, so signal error. We
2131 * adjust token_start so that just the escape sequence
2132 * is reported, not the whole string.
2134 lex
->token_start
= s
;
2135 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID
);
2138 else if (strchr("\"\\/bfnrt", *s
) == NULL
)
2141 * Simpler processing if we're not bothered about de-escaping
2143 * It's very tempting to remove the strchr() call here and
2144 * replace it with a switch statement, but testing so far has
2145 * shown it's not a performance win.
2147 lex
->token_start
= s
;
2148 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID
);
2155 if (hi_surrogate
!= -1)
2156 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE
);
2159 * Skip to the first byte that requires special handling, so we
2160 * can batch calls to jsonapi_appendBinaryStringInfo.
2162 while (p
< end
- sizeof(Vector8
) &&
2163 !pg_lfind8('\\', (uint8
*) p
, sizeof(Vector8
)) &&
2164 !pg_lfind8('"', (uint8
*) p
, sizeof(Vector8
)) &&
2165 !pg_lfind8_le(31, (uint8
*) p
, sizeof(Vector8
)))
2166 p
+= sizeof(Vector8
);
2168 for (; p
< end
; p
++)
2170 if (*p
== '\\' || *p
== '"')
2172 else if ((unsigned char) *p
<= 31)
2174 /* Per RFC4627, these characters MUST be escaped. */
2176 * Since *p isn't printable, exclude it from the context
2179 lex
->token_terminator
= p
;
2180 return JSON_ESCAPING_REQUIRED
;
2184 if (lex
->need_escapes
)
2185 jsonapi_appendBinaryStringInfo(lex
->strval
, s
, p
- s
);
2188 * s will be incremented at the top of the loop, so set it to just
2189 * behind our lookahead position
2195 if (hi_surrogate
!= -1)
2197 lex
->token_terminator
= s
+ 1;
2198 return JSON_UNICODE_LOW_SURROGATE
;
2201 #ifdef JSONAPI_USE_PQEXPBUFFER
2202 if (lex
->need_escapes
&& PQExpBufferBroken(lex
->strval
))
2203 return JSON_OUT_OF_MEMORY
;
2206 /* Hooray, we found the end of the string! */
2207 lex
->prev_token_terminator
= lex
->token_terminator
;
2208 lex
->token_terminator
= s
+ 1;
2209 return JSON_SUCCESS
;
2211 #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
2212 #undef FAIL_AT_CHAR_END
2216 * The next token in the input stream is known to be a number; lex it.
2218 * In JSON, a number consists of four parts:
2220 * (1) An optional minus sign ('-').
2222 * (2) Either a single '0', or a string of one or more digits that does not
2225 * (3) An optional decimal part, consisting of a period ('.') followed by
2226 * one or more digits. (Note: While this part can be omitted
2227 * completely, it's not OK to have only the decimal point without
2228 * any digits afterwards.)
2230 * (4) An optional exponent part, consisting of 'e' or 'E', optionally
2231 * followed by '+' or '-', followed by one or more digits. (Note:
2232 * As with the decimal part, if 'e' or 'E' is present, it must be
2233 * followed by at least one digit.)
2235 * The 's' argument to this function points to the ostensible beginning
2236 * of part 2 - i.e. the character after any optional minus sign, or the
2237 * first character of the string if there is none.
2239 * If num_err is not NULL, we return an error flag to *num_err rather than
2240 * raising an error for a badly-formed number. Also, if total_len is not NULL
2241 * the distance from lex->input to the token end+1 is returned to *total_len.
2243 static inline JsonParseErrorType
2244 json_lex_number(JsonLexContext
*lex
, const char *s
,
2245 bool *num_err
, size_t *total_len
)
2248 int len
= s
- lex
->input
;
2250 /* Part (1): leading sign indicator. */
2251 /* Caller already did this for us; so do nothing. */
2253 /* Part (2): parse main digit string. */
2254 if (len
< lex
->input_length
&& *s
== '0')
2259 else if (len
< lex
->input_length
&& *s
>= '1' && *s
<= '9')
2265 } while (len
< lex
->input_length
&& *s
>= '0' && *s
<= '9');
2270 /* Part (3): parse optional decimal portion. */
2271 if (len
< lex
->input_length
&& *s
== '.')
2275 if (len
== lex
->input_length
|| *s
< '0' || *s
> '9')
2283 } while (len
< lex
->input_length
&& *s
>= '0' && *s
<= '9');
2287 /* Part (4): parse optional exponent. */
2288 if (len
< lex
->input_length
&& (*s
== 'e' || *s
== 'E'))
2292 if (len
< lex
->input_length
&& (*s
== '+' || *s
== '-'))
2297 if (len
== lex
->input_length
|| *s
< '0' || *s
> '9')
2305 } while (len
< lex
->input_length
&& *s
>= '0' && *s
<= '9');
2310 * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
2311 * here should be considered part of the token for error-reporting
2314 for (; len
< lex
->input_length
&& JSON_ALPHANUMERIC_CHAR(*s
); s
++, len
++)
2317 if (total_len
!= NULL
)
2320 if (lex
->incremental
&& !lex
->inc_state
->is_last_chunk
&&
2321 len
>= lex
->input_length
)
2323 jsonapi_appendBinaryStringInfo(&lex
->inc_state
->partial_token
,
2324 lex
->token_start
, s
- lex
->token_start
);
2325 if (num_err
!= NULL
)
2328 return JSON_INCOMPLETE
;
2330 else if (num_err
!= NULL
)
2332 /* let the caller handle any error */
2337 /* return token endpoint */
2338 lex
->prev_token_terminator
= lex
->token_terminator
;
2339 lex
->token_terminator
= s
;
2340 /* handle error if any */
2342 return JSON_INVALID_TOKEN
;
2345 return JSON_SUCCESS
;
2349 * Report a parse error.
2351 * lex->token_start and lex->token_terminator must identify the current token.
2353 static JsonParseErrorType
2354 report_parse_error(JsonParseContext ctx
, JsonLexContext
*lex
)
2356 /* Handle case where the input ended prematurely. */
2357 if (lex
->token_start
== NULL
|| lex
->token_type
== JSON_TOKEN_END
)
2358 return JSON_EXPECTED_MORE
;
2360 /* Otherwise choose the error type based on the parsing context. */
2363 case JSON_PARSE_END
:
2364 return JSON_EXPECTED_END
;
2365 case JSON_PARSE_VALUE
:
2366 return JSON_EXPECTED_JSON
;
2367 case JSON_PARSE_STRING
:
2368 return JSON_EXPECTED_STRING
;
2369 case JSON_PARSE_ARRAY_START
:
2370 return JSON_EXPECTED_ARRAY_FIRST
;
2371 case JSON_PARSE_ARRAY_NEXT
:
2372 return JSON_EXPECTED_ARRAY_NEXT
;
2373 case JSON_PARSE_OBJECT_START
:
2374 return JSON_EXPECTED_OBJECT_FIRST
;
2375 case JSON_PARSE_OBJECT_LABEL
:
2376 return JSON_EXPECTED_COLON
;
2377 case JSON_PARSE_OBJECT_NEXT
:
2378 return JSON_EXPECTED_OBJECT_NEXT
;
2379 case JSON_PARSE_OBJECT_COMMA
:
2380 return JSON_EXPECTED_STRING
;
2384 * We don't use a default: case, so that the compiler will warn about
2385 * unhandled enum values.
2388 return JSON_SUCCESS
; /* silence stupider compilers */
2392 * Construct an (already translated) detail message for a JSON error.
2394 * The returned pointer should not be freed, the allocation is either static
2395 * or owned by the JsonLexContext.
2398 json_errdetail(JsonParseErrorType error
, JsonLexContext
*lex
)
2400 if (error
== JSON_OUT_OF_MEMORY
|| lex
== &failed_oom
)
2402 /* Short circuit. Allocating anything for this case is unhelpful. */
2403 return _("out of memory");
2407 jsonapi_resetStringInfo(lex
->errormsg
);
2409 lex
->errormsg
= jsonapi_makeStringInfo();
2412 * A helper for error messages that should print the current token. The
2413 * format must contain exactly one %.*s specifier.
2415 #define json_token_error(lex, format) \
2416 jsonapi_appendStringInfo((lex)->errormsg, _(format), \
2417 (int) ((lex)->token_terminator - (lex)->token_start), \
2418 (lex)->token_start);
2422 case JSON_INCOMPLETE
:
2424 /* fall through to the error code after switch */
2426 case JSON_INVALID_LEXER_TYPE
:
2427 if (lex
->incremental
)
2428 return _("Recursive descent parser cannot use incremental lexer.");
2430 return _("Incremental parser requires incremental lexer.");
2431 case JSON_NESTING_TOO_DEEP
:
2432 return (_("JSON nested too deep, maximum permitted depth is 6400."));
2433 case JSON_ESCAPING_INVALID
:
2434 json_token_error(lex
, "Escape sequence \"\\%.*s\" is invalid.");
2436 case JSON_ESCAPING_REQUIRED
:
2437 jsonapi_appendStringInfo(lex
->errormsg
,
2438 _("Character with value 0x%02x must be escaped."),
2439 (unsigned char) *(lex
->token_terminator
));
2441 case JSON_EXPECTED_END
:
2442 json_token_error(lex
, "Expected end of input, but found \"%.*s\".");
2444 case JSON_EXPECTED_ARRAY_FIRST
:
2445 json_token_error(lex
, "Expected array element or \"]\", but found \"%.*s\".");
2447 case JSON_EXPECTED_ARRAY_NEXT
:
2448 json_token_error(lex
, "Expected \",\" or \"]\", but found \"%.*s\".");
2450 case JSON_EXPECTED_COLON
:
2451 json_token_error(lex
, "Expected \":\", but found \"%.*s\".");
2453 case JSON_EXPECTED_JSON
:
2454 json_token_error(lex
, "Expected JSON value, but found \"%.*s\".");
2456 case JSON_EXPECTED_MORE
:
2457 return _("The input string ended unexpectedly.");
2458 case JSON_EXPECTED_OBJECT_FIRST
:
2459 json_token_error(lex
, "Expected string or \"}\", but found \"%.*s\".");
2461 case JSON_EXPECTED_OBJECT_NEXT
:
2462 json_token_error(lex
, "Expected \",\" or \"}\", but found \"%.*s\".");
2464 case JSON_EXPECTED_STRING
:
2465 json_token_error(lex
, "Expected string, but found \"%.*s\".");
2467 case JSON_INVALID_TOKEN
:
2468 json_token_error(lex
, "Token \"%.*s\" is invalid.");
2470 case JSON_OUT_OF_MEMORY
:
2471 /* should have been handled above; use the error path */
2473 case JSON_UNICODE_CODE_POINT_ZERO
:
2474 return _("\\u0000 cannot be converted to text.");
2475 case JSON_UNICODE_ESCAPE_FORMAT
:
2476 return _("\"\\u\" must be followed by four hexadecimal digits.");
2477 case JSON_UNICODE_HIGH_ESCAPE
:
2478 /* note: this case is only reachable in frontend not backend */
2479 return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
2480 case JSON_UNICODE_UNTRANSLATABLE
:
2483 * Note: this case is only reachable in backend and not frontend.
2484 * #ifdef it away so the frontend doesn't try to link against
2485 * backend functionality.
2488 return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
2489 GetDatabaseEncodingName());
2494 case JSON_UNICODE_HIGH_SURROGATE
:
2495 return _("Unicode high surrogate must not follow a high surrogate.");
2496 case JSON_UNICODE_LOW_SURROGATE
:
2497 return _("Unicode low surrogate must follow a high surrogate.");
2498 case JSON_SEM_ACTION_FAILED
:
2499 /* fall through to the error code after switch */
2502 #undef json_token_error
2504 /* Note that lex->errormsg can be NULL in shlib code. */
2505 if (lex
->errormsg
&& lex
->errormsg
->len
== 0)
2508 * We don't use a default: case, so that the compiler will warn about
2509 * unhandled enum values. But this needs to be here anyway to cover
2510 * the possibility of an incorrect input.
2512 jsonapi_appendStringInfo(lex
->errormsg
,
2513 "unexpected json parse error type: %d",
2517 #ifdef JSONAPI_USE_PQEXPBUFFER
2518 if (PQExpBufferBroken(lex
->errormsg
))
2519 return _("out of memory while constructing error description");
2522 return lex
->errormsg
->data
;