jsonpath scanner: reentrant scanner
[pgsql.git] / src / common / jsonapi.c
blob0e2a82ad7a1fa98945ceba8d64526c2449b1fdc9
1 /*-------------------------------------------------------------------------
3 * jsonapi.c
4 * JSON parser and lexer interfaces
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * IDENTIFICATION
10 * src/common/jsonapi.c
12 *-------------------------------------------------------------------------
14 #ifndef FRONTEND
15 #include "postgres.h"
16 #else
17 #include "postgres_fe.h"
18 #endif
20 #include "common/jsonapi.h"
21 #include "mb/pg_wchar.h"
22 #include "port/pg_lfind.h"
24 #ifdef JSONAPI_USE_PQEXPBUFFER
25 #include "pqexpbuffer.h"
26 #else
27 #include "lib/stringinfo.h"
28 #include "miscadmin.h"
29 #endif
32 * By default, we will use palloc/pfree along with StringInfo. In libpq,
33 * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
35 #ifdef JSONAPI_USE_PQEXPBUFFER
37 #define STRDUP(s) strdup(s)
38 #define ALLOC(size) malloc(size)
39 #define ALLOC0(size) calloc(1, size)
40 #define REALLOC realloc
41 #define FREE(s) free(s)
43 #define jsonapi_appendStringInfo appendPQExpBuffer
44 #define jsonapi_appendBinaryStringInfo appendBinaryPQExpBuffer
45 #define jsonapi_appendStringInfoChar appendPQExpBufferChar
46 /* XXX should we add a macro version to PQExpBuffer? */
47 #define jsonapi_appendStringInfoCharMacro appendPQExpBufferChar
48 #define jsonapi_makeStringInfo createPQExpBuffer
49 #define jsonapi_initStringInfo initPQExpBuffer
50 #define jsonapi_resetStringInfo resetPQExpBuffer
51 #define jsonapi_termStringInfo termPQExpBuffer
52 #define jsonapi_destroyStringInfo destroyPQExpBuffer
54 #else /* !JSONAPI_USE_PQEXPBUFFER */
56 #define STRDUP(s) pstrdup(s)
57 #define ALLOC(size) palloc(size)
58 #define ALLOC0(size) palloc0(size)
59 #define REALLOC repalloc
61 #ifdef FRONTEND
62 #define FREE pfree
63 #else
65 * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
66 * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
67 * argument to avoid future hair-pulling.
69 #define FREE(s) do { \
70 void *__v = (s); \
71 if (__v) \
72 pfree(__v); \
73 } while (0)
74 #endif
76 #define jsonapi_appendStringInfo appendStringInfo
77 #define jsonapi_appendBinaryStringInfo appendBinaryStringInfo
78 #define jsonapi_appendStringInfoChar appendStringInfoChar
79 #define jsonapi_appendStringInfoCharMacro appendStringInfoCharMacro
80 #define jsonapi_makeStringInfo makeStringInfo
81 #define jsonapi_initStringInfo initStringInfo
82 #define jsonapi_resetStringInfo resetStringInfo
83 #define jsonapi_termStringInfo(s) pfree((s)->data)
84 #define jsonapi_destroyStringInfo destroyStringInfo
86 #endif /* JSONAPI_USE_PQEXPBUFFER */
89 * The context of the parser is maintained by the recursive descent
90 * mechanism, but is passed explicitly to the error reporting routine
91 * for better diagnostics.
93 typedef enum /* contexts of JSON parser */
95 JSON_PARSE_VALUE, /* expecting a value */
96 JSON_PARSE_STRING, /* expecting a string (for a field name) */
97 JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
98 JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
99 JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
100 JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
101 JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
102 JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
103 JSON_PARSE_END, /* saw the end of a document, expect nothing */
104 } JsonParseContext;
107 * Setup for table-driven parser.
108 * These enums need to be separate from the JsonTokenType and from each other
109 * so we can have all of them on the prediction stack, which consists of
110 * tokens, non-terminals, and semantic action markers.
113 enum JsonNonTerminal
115 JSON_NT_JSON = 32,
116 JSON_NT_ARRAY_ELEMENTS,
117 JSON_NT_MORE_ARRAY_ELEMENTS,
118 JSON_NT_KEY_PAIRS,
119 JSON_NT_MORE_KEY_PAIRS,
122 enum JsonParserSem
124 JSON_SEM_OSTART = 64,
125 JSON_SEM_OEND,
126 JSON_SEM_ASTART,
127 JSON_SEM_AEND,
128 JSON_SEM_OFIELD_INIT,
129 JSON_SEM_OFIELD_START,
130 JSON_SEM_OFIELD_END,
131 JSON_SEM_AELEM_START,
132 JSON_SEM_AELEM_END,
133 JSON_SEM_SCALAR_INIT,
134 JSON_SEM_SCALAR_CALL,
138 * struct containing the 3 stacks used in non-recursive parsing,
139 * and the token and value for scalars that need to be preserved
140 * across calls.
142 * typedef appears in jsonapi.h
144 struct JsonParserStack
146 int stack_size;
147 char *prediction;
148 size_t pred_index;
149 /* these two are indexed by lex_level */
150 char **fnames;
151 bool *fnull;
152 JsonTokenType scalar_tok;
153 char *scalar_val;
157 * struct containing state used when there is a possible partial token at the
158 * end of a json chunk when we are doing incremental parsing.
160 * typedef appears in jsonapi.h
162 struct JsonIncrementalState
164 bool started;
165 bool is_last_chunk;
166 bool partial_completed;
167 jsonapi_StrValType partial_token;
171 * constants and macros used in the nonrecursive parser
173 #define JSON_NUM_TERMINALS 13
174 #define JSON_NUM_NONTERMINALS 5
175 #define JSON_NT_OFFSET JSON_NT_JSON
176 /* for indexing the table */
177 #define OFS(NT) (NT) - JSON_NT_OFFSET
178 /* classify items we get off the stack */
179 #define IS_SEM(x) ((x) & 0x40)
180 #define IS_NT(x) ((x) & 0x20)
183 * These productions are stored in reverse order right to left so that when
184 * they are pushed on the stack what we expect next is at the top of the stack.
186 static char JSON_PROD_EPSILON[] = {0}; /* epsilon - an empty production */
188 /* JSON -> string */
189 static char JSON_PROD_SCALAR_STRING[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_STRING, JSON_SEM_SCALAR_INIT, 0};
191 /* JSON -> number */
192 static char JSON_PROD_SCALAR_NUMBER[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NUMBER, JSON_SEM_SCALAR_INIT, 0};
194 /* JSON -> 'true' */
195 static char JSON_PROD_SCALAR_TRUE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_TRUE, JSON_SEM_SCALAR_INIT, 0};
197 /* JSON -> 'false' */
198 static char JSON_PROD_SCALAR_FALSE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_FALSE, JSON_SEM_SCALAR_INIT, 0};
200 /* JSON -> 'null' */
201 static char JSON_PROD_SCALAR_NULL[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NULL, JSON_SEM_SCALAR_INIT, 0};
203 /* JSON -> '{' KEY_PAIRS '}' */
204 static char JSON_PROD_OBJECT[] = {JSON_SEM_OEND, JSON_TOKEN_OBJECT_END, JSON_NT_KEY_PAIRS, JSON_TOKEN_OBJECT_START, JSON_SEM_OSTART, 0};
206 /* JSON -> '[' ARRAY_ELEMENTS ']' */
207 static char JSON_PROD_ARRAY[] = {JSON_SEM_AEND, JSON_TOKEN_ARRAY_END, JSON_NT_ARRAY_ELEMENTS, JSON_TOKEN_ARRAY_START, JSON_SEM_ASTART, 0};
209 /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
210 static char JSON_PROD_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, 0};
212 /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
213 static char JSON_PROD_MORE_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, JSON_TOKEN_COMMA, 0};
215 /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
216 static char JSON_PROD_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, 0};
218 /* MORE_KEY_PAIRS -> ',' string ':' JSON MORE_KEY_PAIRS */
219 static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, JSON_TOKEN_COMMA, 0};
222 * Note: there are also epsilon productions for ARRAY_ELEMENTS,
223 * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
224 * They are all the same as none require any semantic actions.
228 * Table connecting the productions with their director sets of
229 * terminal symbols.
230 * Any combination not specified here represents an error.
233 typedef struct
235 size_t len;
236 char *prod;
237 } td_entry;
239 #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
241 static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] =
243 /* JSON */
244 [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING),
245 [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER),
246 [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE),
247 [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE),
248 [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL),
249 [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY),
250 [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT),
251 /* ARRAY_ELEMENTS */
252 [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
253 [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
254 [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
255 [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
256 [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
257 [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
258 [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
259 [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
260 /* MORE_ARRAY_ELEMENTS */
261 [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS),
262 [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
263 /* KEY_PAIRS */
264 [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS),
265 [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
266 /* MORE_KEY_PAIRS */
267 [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS),
268 [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
271 /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
272 static char JSON_PROD_GOAL[] = {JSON_TOKEN_END, JSON_NT_JSON, 0};
274 static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
275 static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
276 bool *num_err, size_t *total_len);
277 static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem);
278 static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem);
279 static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem);
280 static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem);
281 static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
282 static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
283 static bool allocate_incremental_state(JsonLexContext *lex);
284 static inline void set_fname(JsonLexContext *lex, char *fname);
286 /* the null action object used for pure validation */
287 const JsonSemAction nullSemAction =
289 NULL, NULL, NULL, NULL, NULL,
290 NULL, NULL, NULL, NULL, NULL
293 /* sentinels used for out-of-memory conditions */
294 static JsonLexContext failed_oom;
295 static JsonIncrementalState failed_inc_oom;
297 /* Parser support routines */
300 * lex_peek
302 * what is the current look_ahead token?
304 static inline JsonTokenType
305 lex_peek(JsonLexContext *lex)
307 return lex->token_type;
311 * lex_expect
313 * move the lexer to the next token if the current look_ahead token matches
314 * the parameter token. Otherwise, report an error.
316 static inline JsonParseErrorType
317 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
319 if (lex_peek(lex) == token)
320 return json_lex(lex);
321 else
322 return report_parse_error(ctx, lex);
325 /* chars to consider as part of an alphanumeric token */
326 #define JSON_ALPHANUMERIC_CHAR(c) \
327 (((c) >= 'a' && (c) <= 'z') || \
328 ((c) >= 'A' && (c) <= 'Z') || \
329 ((c) >= '0' && (c) <= '9') || \
330 (c) == '_' || \
331 IS_HIGHBIT_SET(c))
334 * Utility function to check if a string is a valid JSON number.
336 * str is of length len, and need not be null-terminated.
338 bool
339 IsValidJsonNumber(const char *str, size_t len)
341 bool numeric_error;
342 size_t total_len;
343 JsonLexContext dummy_lex = {0};
345 if (len <= 0)
346 return false;
349 * json_lex_number expects a leading '-' to have been eaten already.
351 * having to cast away the constness of str is ugly, but there's not much
352 * easy alternative.
354 if (*str == '-')
356 dummy_lex.input = str + 1;
357 dummy_lex.input_length = len - 1;
359 else
361 dummy_lex.input = str;
362 dummy_lex.input_length = len;
365 dummy_lex.token_start = dummy_lex.input;
367 json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
369 return (!numeric_error) && (total_len == dummy_lex.input_length);
373 * makeJsonLexContextCstringLen
374 * Initialize the given JsonLexContext object, or create one
376 * If a valid 'lex' pointer is given, it is initialized. This can
377 * be used for stack-allocated structs, saving overhead. If NULL is
378 * given, a new struct is allocated.
380 * If need_escapes is true, ->strval stores the unescaped lexemes.
381 * Unescaping is expensive, so only request it when necessary.
383 * If need_escapes is true or lex was given as NULL, then caller is
384 * responsible for freeing the returned struct, either by calling
385 * freeJsonLexContext() or (in backend environment) via memory context
386 * cleanup.
388 * In shlib code, any out-of-memory failures will be deferred to time
389 * of use; this function is guaranteed to return a valid JsonLexContext.
391 JsonLexContext *
392 makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json,
393 size_t len, int encoding, bool need_escapes)
395 if (lex == NULL)
397 lex = ALLOC0(sizeof(JsonLexContext));
398 if (!lex)
399 return &failed_oom;
400 lex->flags |= JSONLEX_FREE_STRUCT;
402 else
403 memset(lex, 0, sizeof(JsonLexContext));
405 lex->errormsg = NULL;
406 lex->input = lex->token_terminator = lex->line_start = json;
407 lex->line_number = 1;
408 lex->input_length = len;
409 lex->input_encoding = encoding;
410 lex->need_escapes = need_escapes;
411 if (need_escapes)
414 * This call can fail in shlib code. We defer error handling to time
415 * of use (json_lex_string()) since we might not need to parse any
416 * strings anyway.
418 lex->strval = jsonapi_makeStringInfo();
419 lex->flags |= JSONLEX_FREE_STRVAL;
422 return lex;
426 * Allocates the internal bookkeeping structures for incremental parsing. This
427 * can only fail in-band with shlib code.
429 #define JS_STACK_CHUNK_SIZE 64
430 #define JS_MAX_PROD_LEN 10 /* more than we need */
431 #define JSON_TD_MAX_STACK 6400 /* hard coded for now - this is a REALLY high
432 * number */
433 static bool
434 allocate_incremental_state(JsonLexContext *lex)
436 void *pstack,
437 *prediction,
438 *fnames,
439 *fnull;
441 lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
442 pstack = ALLOC0(sizeof(JsonParserStack));
443 prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
444 fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
445 fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
447 #ifdef JSONAPI_USE_PQEXPBUFFER
448 if (!lex->inc_state
449 || !pstack
450 || !prediction
451 || !fnames
452 || !fnull)
454 FREE(lex->inc_state);
455 FREE(pstack);
456 FREE(prediction);
457 FREE(fnames);
458 FREE(fnull);
460 lex->inc_state = &failed_inc_oom;
461 return false;
463 #endif
465 jsonapi_initStringInfo(&(lex->inc_state->partial_token));
466 lex->pstack = pstack;
467 lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
468 lex->pstack->prediction = prediction;
469 lex->pstack->fnames = fnames;
470 lex->pstack->fnull = fnull;
473 * fnames between 0 and lex_level must always be defined so that
474 * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
475 * the rest.
477 Assert(lex->lex_level == 0);
478 lex->pstack->fnames[0] = NULL;
480 lex->incremental = true;
481 return true;
486 * makeJsonLexContextIncremental
488 * Similar to above but set up for use in incremental parsing. That means we
489 * need explicit stacks for predictions, field names and null indicators, but
490 * we don't need the input, that will be handed in bit by bit to the
491 * parse routine. We also need an accumulator for partial tokens in case
492 * the boundary between chunks happens to fall in the middle of a token.
494 * In shlib code, any out-of-memory failures will be deferred to time of use;
495 * this function is guaranteed to return a valid JsonLexContext.
497 JsonLexContext *
498 makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
499 bool need_escapes)
501 if (lex == NULL)
503 lex = ALLOC0(sizeof(JsonLexContext));
504 if (!lex)
505 return &failed_oom;
507 lex->flags |= JSONLEX_FREE_STRUCT;
509 else
510 memset(lex, 0, sizeof(JsonLexContext));
512 lex->line_number = 1;
513 lex->input_encoding = encoding;
515 if (!allocate_incremental_state(lex))
517 if (lex->flags & JSONLEX_FREE_STRUCT)
519 FREE(lex);
520 return &failed_oom;
523 /* lex->inc_state tracks the OOM failure; we can return here. */
524 return lex;
527 lex->need_escapes = need_escapes;
528 if (need_escapes)
531 * This call can fail in shlib code. We defer error handling to time
532 * of use (json_lex_string()) since we might not need to parse any
533 * strings anyway.
535 lex->strval = jsonapi_makeStringInfo();
536 lex->flags |= JSONLEX_FREE_STRVAL;
539 return lex;
542 void
543 setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
545 if (lex->incremental && lex->inc_state->started)
548 * Switching this flag after parsing has already started is a
549 * programming error.
551 Assert(false);
552 return;
555 if (owned_by_context)
556 lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
557 else
558 lex->flags &= ~JSONLEX_CTX_OWNS_TOKENS;
561 static inline bool
562 inc_lex_level(JsonLexContext *lex)
564 if (lex->incremental && (lex->lex_level + 1) >= lex->pstack->stack_size)
566 size_t new_stack_size;
567 char *new_prediction;
568 char **new_fnames;
569 bool *new_fnull;
571 new_stack_size = lex->pstack->stack_size + JS_STACK_CHUNK_SIZE;
573 new_prediction = REALLOC(lex->pstack->prediction,
574 new_stack_size * JS_MAX_PROD_LEN);
575 #ifdef JSONAPI_USE_PQEXPBUFFER
576 if (!new_prediction)
577 return false;
578 #endif
579 lex->pstack->prediction = new_prediction;
581 new_fnames = REALLOC(lex->pstack->fnames,
582 new_stack_size * sizeof(char *));
583 #ifdef JSONAPI_USE_PQEXPBUFFER
584 if (!new_fnames)
585 return false;
586 #endif
587 lex->pstack->fnames = new_fnames;
589 new_fnull = REALLOC(lex->pstack->fnull, new_stack_size * sizeof(bool));
590 #ifdef JSONAPI_USE_PQEXPBUFFER
591 if (!new_fnull)
592 return false;
593 #endif
594 lex->pstack->fnull = new_fnull;
596 lex->pstack->stack_size = new_stack_size;
599 lex->lex_level += 1;
601 if (lex->incremental)
604 * Ensure freeJsonLexContext() remains safe even if no fname is
605 * assigned at this level.
607 lex->pstack->fnames[lex->lex_level] = NULL;
610 return true;
613 static inline void
614 dec_lex_level(JsonLexContext *lex)
616 set_fname(lex, NULL); /* free the current level's fname, if needed */
617 lex->lex_level -= 1;
620 static inline void
621 push_prediction(JsonParserStack *pstack, td_entry entry)
623 memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
624 pstack->pred_index += entry.len;
627 static inline char
628 pop_prediction(JsonParserStack *pstack)
630 Assert(pstack->pred_index > 0);
631 return pstack->prediction[--pstack->pred_index];
634 static inline char
635 next_prediction(JsonParserStack *pstack)
637 Assert(pstack->pred_index > 0);
638 return pstack->prediction[pstack->pred_index - 1];
641 static inline bool
642 have_prediction(JsonParserStack *pstack)
644 return pstack->pred_index > 0;
647 static inline void
648 set_fname(JsonLexContext *lex, char *fname)
650 if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
653 * Don't leak prior fnames. If one hasn't been assigned yet,
654 * inc_lex_level ensured that it's NULL (and therefore safe to free).
656 FREE(lex->pstack->fnames[lex->lex_level]);
659 lex->pstack->fnames[lex->lex_level] = fname;
662 static inline char *
663 get_fname(JsonLexContext *lex)
665 return lex->pstack->fnames[lex->lex_level];
668 static inline void
669 set_fnull(JsonLexContext *lex, bool fnull)
671 lex->pstack->fnull[lex->lex_level] = fnull;
674 static inline bool
675 get_fnull(JsonLexContext *lex)
677 return lex->pstack->fnull[lex->lex_level];
681 * Free memory in a JsonLexContext.
683 * There's no need for this if a *lex pointer was given when the object was
684 * made, need_escapes was false, and json_errdetail() was not called; or if (in
685 * backend environment) a memory context delete/reset is imminent.
687 void
688 freeJsonLexContext(JsonLexContext *lex)
690 static const JsonLexContext empty = {0};
692 if (!lex || lex == &failed_oom)
693 return;
695 if (lex->flags & JSONLEX_FREE_STRVAL)
696 jsonapi_destroyStringInfo(lex->strval);
698 if (lex->errormsg)
699 jsonapi_destroyStringInfo(lex->errormsg);
701 if (lex->incremental)
703 jsonapi_termStringInfo(&lex->inc_state->partial_token);
704 FREE(lex->inc_state);
705 FREE(lex->pstack->prediction);
707 if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
709 int i;
711 /* Clean up any tokens that were left behind. */
712 for (i = 0; i <= lex->lex_level; i++)
713 FREE(lex->pstack->fnames[i]);
716 FREE(lex->pstack->fnames);
717 FREE(lex->pstack->fnull);
718 FREE(lex->pstack->scalar_val);
719 FREE(lex->pstack);
722 if (lex->flags & JSONLEX_FREE_STRUCT)
723 FREE(lex);
724 else
725 *lex = empty;
729 * pg_parse_json
731 * Publicly visible entry point for the JSON parser.
733 * lex is a lexing context, set up for the json to be processed by calling
734 * makeJsonLexContext(). sem is a structure of function pointers to semantic
735 * action routines to be called at appropriate spots during parsing, and a
736 * pointer to a state object to be passed to those routines.
738 * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
739 * JSON parser. This is a useful way to validate that it's doing the right
740 * thing at least for non-incremental cases. If this is on we expect to see
741 * regression diffs relating to error messages about stack depth, but no
742 * other differences.
744 JsonParseErrorType
745 pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
747 #ifdef FORCE_JSON_PSTACK
749 * We don't need partial token processing, there is only one chunk. But we
750 * still need to init the partial token string so that freeJsonLexContext
751 * works, so perform the full incremental initialization.
753 if (!allocate_incremental_state(lex))
754 return JSON_OUT_OF_MEMORY;
756 return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
758 #else
760 JsonTokenType tok;
761 JsonParseErrorType result;
763 if (lex == &failed_oom)
764 return JSON_OUT_OF_MEMORY;
765 if (lex->incremental)
766 return JSON_INVALID_LEXER_TYPE;
768 /* get the initial token */
769 result = json_lex(lex);
770 if (result != JSON_SUCCESS)
771 return result;
773 tok = lex_peek(lex);
775 /* parse by recursive descent */
776 switch (tok)
778 case JSON_TOKEN_OBJECT_START:
779 result = parse_object(lex, sem);
780 break;
781 case JSON_TOKEN_ARRAY_START:
782 result = parse_array(lex, sem);
783 break;
784 default:
785 result = parse_scalar(lex, sem); /* json can be a bare scalar */
788 if (result == JSON_SUCCESS)
789 result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
791 return result;
792 #endif
796 * json_count_array_elements
798 * Returns number of array elements in lex context at start of array token
799 * until end of array token at same nesting level.
801 * Designed to be called from array_start routines.
803 JsonParseErrorType
804 json_count_array_elements(JsonLexContext *lex, int *elements)
806 JsonLexContext copylex;
807 int count;
808 JsonParseErrorType result;
810 if (lex == &failed_oom)
811 return JSON_OUT_OF_MEMORY;
814 * It's safe to do this with a shallow copy because the lexical routines
815 * don't scribble on the input. They do scribble on the other pointers
816 * etc, so doing this with a copy makes that safe.
818 memcpy(&copylex, lex, sizeof(JsonLexContext));
819 copylex.need_escapes = false; /* not interested in values here */
820 copylex.lex_level++;
822 count = 0;
823 result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
824 JSON_TOKEN_ARRAY_START);
825 if (result != JSON_SUCCESS)
826 return result;
827 if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
829 while (1)
831 count++;
832 result = parse_array_element(&copylex, &nullSemAction);
833 if (result != JSON_SUCCESS)
834 return result;
835 if (copylex.token_type != JSON_TOKEN_COMMA)
836 break;
837 result = json_lex(&copylex);
838 if (result != JSON_SUCCESS)
839 return result;
842 result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
843 JSON_TOKEN_ARRAY_END);
844 if (result != JSON_SUCCESS)
845 return result;
847 *elements = count;
848 return JSON_SUCCESS;
852 * pg_parse_json_incremental
854 * Routine for incremental parsing of json. This uses the non-recursive top
855 * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
856 * the Recursive Descent pattern used above, so we only use it for incremental
857 * parsing of JSON.
859 * The lexing context needs to be set up by a call to
860 * makeJsonLexContextIncremental(). sem is a structure of function pointers
861 * to semantic action routines, which should function exactly as those used
862 * in the recursive descent parser.
864 * This routine can be called repeatedly with chunks of JSON. On the final
865 * chunk is_last must be set to true. len is the length of the json chunk,
866 * which does not need to be null terminated.
868 JsonParseErrorType
869 pg_parse_json_incremental(JsonLexContext *lex,
870 const JsonSemAction *sem,
871 const char *json,
872 size_t len,
873 bool is_last)
875 JsonTokenType tok;
876 JsonParseErrorType result;
877 JsonParseContext ctx = JSON_PARSE_VALUE;
878 JsonParserStack *pstack = lex->pstack;
880 if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
881 return JSON_OUT_OF_MEMORY;
882 if (!lex->incremental)
883 return JSON_INVALID_LEXER_TYPE;
885 lex->input = lex->token_terminator = lex->line_start = json;
886 lex->input_length = len;
887 lex->inc_state->is_last_chunk = is_last;
888 lex->inc_state->started = true;
890 /* get the initial token */
891 result = json_lex(lex);
892 if (result != JSON_SUCCESS)
893 return result;
895 tok = lex_peek(lex);
897 /* use prediction stack for incremental parsing */
899 if (!have_prediction(pstack))
901 td_entry goal = TD_ENTRY(JSON_PROD_GOAL);
903 push_prediction(pstack, goal);
906 while (have_prediction(pstack))
908 char top = pop_prediction(pstack);
909 td_entry entry;
912 * these first two branches are the guts of the Table Driven method
914 if (top == tok)
917 * tok can only be a terminal symbol, so top must be too. the
918 * token matches the top of the stack, so get the next token.
920 if (tok < JSON_TOKEN_END)
922 result = json_lex(lex);
923 if (result != JSON_SUCCESS)
924 return result;
925 tok = lex_peek(lex);
928 else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
931 * the token is in the director set for a production of the
932 * non-terminal at the top of the stack, so push the reversed RHS
933 * of the production onto the stack.
935 push_prediction(pstack, entry);
937 else if (IS_SEM(top))
940 * top is a semantic action marker, so take action accordingly.
941 * It's important to have these markers in the prediction stack
942 * before any token they might need so we don't advance the token
943 * prematurely. Note in a couple of cases we need to do something
944 * both before and after the token.
946 switch (top)
948 case JSON_SEM_OSTART:
950 json_struct_action ostart = sem->object_start;
952 if (lex->lex_level >= JSON_TD_MAX_STACK)
953 return JSON_NESTING_TOO_DEEP;
955 if (ostart != NULL)
957 result = (*ostart) (sem->semstate);
958 if (result != JSON_SUCCESS)
959 return result;
962 if (!inc_lex_level(lex))
963 return JSON_OUT_OF_MEMORY;
965 break;
966 case JSON_SEM_OEND:
968 json_struct_action oend = sem->object_end;
970 dec_lex_level(lex);
971 if (oend != NULL)
973 result = (*oend) (sem->semstate);
974 if (result != JSON_SUCCESS)
975 return result;
978 break;
979 case JSON_SEM_ASTART:
981 json_struct_action astart = sem->array_start;
983 if (lex->lex_level >= JSON_TD_MAX_STACK)
984 return JSON_NESTING_TOO_DEEP;
986 if (astart != NULL)
988 result = (*astart) (sem->semstate);
989 if (result != JSON_SUCCESS)
990 return result;
993 if (!inc_lex_level(lex))
994 return JSON_OUT_OF_MEMORY;
996 break;
997 case JSON_SEM_AEND:
999 json_struct_action aend = sem->array_end;
1001 dec_lex_level(lex);
1002 if (aend != NULL)
1004 result = (*aend) (sem->semstate);
1005 if (result != JSON_SUCCESS)
1006 return result;
1009 break;
1010 case JSON_SEM_OFIELD_INIT:
1013 * all we do here is save out the field name. We have
1014 * to wait to get past the ':' to see if the next
1015 * value is null so we can call the semantic routine
1017 char *fname = NULL;
1018 json_ofield_action ostart = sem->object_field_start;
1019 json_ofield_action oend = sem->object_field_end;
1021 if ((ostart != NULL || oend != NULL) && lex->need_escapes)
1023 fname = STRDUP(lex->strval->data);
1024 if (fname == NULL)
1025 return JSON_OUT_OF_MEMORY;
1027 set_fname(lex, fname);
1029 break;
1030 case JSON_SEM_OFIELD_START:
1033 * the current token should be the first token of the
1034 * value
1036 bool isnull = tok == JSON_TOKEN_NULL;
1037 json_ofield_action ostart = sem->object_field_start;
1039 set_fnull(lex, isnull);
1041 if (ostart != NULL)
1043 char *fname = get_fname(lex);
1045 result = (*ostart) (sem->semstate, fname, isnull);
1046 if (result != JSON_SUCCESS)
1047 return result;
1050 break;
1051 case JSON_SEM_OFIELD_END:
1053 json_ofield_action oend = sem->object_field_end;
1055 if (oend != NULL)
1057 char *fname = get_fname(lex);
1058 bool isnull = get_fnull(lex);
1060 result = (*oend) (sem->semstate, fname, isnull);
1061 if (result != JSON_SUCCESS)
1062 return result;
1065 break;
1066 case JSON_SEM_AELEM_START:
1068 json_aelem_action astart = sem->array_element_start;
1069 bool isnull = tok == JSON_TOKEN_NULL;
1071 set_fnull(lex, isnull);
1073 if (astart != NULL)
1075 result = (*astart) (sem->semstate, isnull);
1076 if (result != JSON_SUCCESS)
1077 return result;
1080 break;
1081 case JSON_SEM_AELEM_END:
1083 json_aelem_action aend = sem->array_element_end;
1085 if (aend != NULL)
1087 bool isnull = get_fnull(lex);
1089 result = (*aend) (sem->semstate, isnull);
1090 if (result != JSON_SUCCESS)
1091 return result;
1094 break;
1095 case JSON_SEM_SCALAR_INIT:
1097 json_scalar_action sfunc = sem->scalar;
1099 pstack->scalar_val = NULL;
1101 if (sfunc != NULL)
1104 * extract the de-escaped string value, or the raw
1105 * lexeme
1108 * XXX copied from RD parser but looks like a
1109 * buglet
1111 if (tok == JSON_TOKEN_STRING)
1113 if (lex->need_escapes)
1115 pstack->scalar_val = STRDUP(lex->strval->data);
1116 if (pstack->scalar_val == NULL)
1117 return JSON_OUT_OF_MEMORY;
1120 else
1122 ptrdiff_t tlen = (lex->token_terminator - lex->token_start);
1124 pstack->scalar_val = ALLOC(tlen + 1);
1125 if (pstack->scalar_val == NULL)
1126 return JSON_OUT_OF_MEMORY;
1128 memcpy(pstack->scalar_val, lex->token_start, tlen);
1129 pstack->scalar_val[tlen] = '\0';
1131 pstack->scalar_tok = tok;
1134 break;
1135 case JSON_SEM_SCALAR_CALL:
1138 * We'd like to be able to get rid of this business of
1139 * two bits of scalar action, but we can't. It breaks
1140 * certain semantic actions which expect that when
1141 * called the lexer has consumed the item. See for
1142 * example get_scalar() in jsonfuncs.c.
1144 json_scalar_action sfunc = sem->scalar;
1146 if (sfunc != NULL)
1148 result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
1151 * Either ownership of the token passed to the
1152 * callback, or we need to free it now. Either
1153 * way, clear our pointer to it so it doesn't get
1154 * freed in the future.
1156 if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1157 FREE(pstack->scalar_val);
1158 pstack->scalar_val = NULL;
1160 if (result != JSON_SUCCESS)
1161 return result;
1164 break;
1165 default:
1166 /* should not happen */
1167 break;
1170 else
1173 * The token didn't match the stack top if it's a terminal nor a
1174 * production for the stack top if it's a non-terminal.
1176 * Various cases here are Asserted to be not possible, as the
1177 * token would not appear at the top of the prediction stack
1178 * unless the lookahead matched.
1180 switch (top)
1182 case JSON_TOKEN_STRING:
1183 if (next_prediction(pstack) == JSON_TOKEN_COLON)
1184 ctx = JSON_PARSE_STRING;
1185 else
1187 Assert(false);
1188 ctx = JSON_PARSE_VALUE;
1190 break;
1191 case JSON_TOKEN_NUMBER:
1192 case JSON_TOKEN_TRUE:
1193 case JSON_TOKEN_FALSE:
1194 case JSON_TOKEN_NULL:
1195 case JSON_TOKEN_ARRAY_START:
1196 case JSON_TOKEN_OBJECT_START:
1197 Assert(false);
1198 ctx = JSON_PARSE_VALUE;
1199 break;
1200 case JSON_TOKEN_ARRAY_END:
1201 Assert(false);
1202 ctx = JSON_PARSE_ARRAY_NEXT;
1203 break;
1204 case JSON_TOKEN_OBJECT_END:
1205 Assert(false);
1206 ctx = JSON_PARSE_OBJECT_NEXT;
1207 break;
1208 case JSON_TOKEN_COMMA:
1209 Assert(false);
1210 if (next_prediction(pstack) == JSON_TOKEN_STRING)
1211 ctx = JSON_PARSE_OBJECT_NEXT;
1212 else
1213 ctx = JSON_PARSE_ARRAY_NEXT;
1214 break;
1215 case JSON_TOKEN_COLON:
1216 ctx = JSON_PARSE_OBJECT_LABEL;
1217 break;
1218 case JSON_TOKEN_END:
1219 ctx = JSON_PARSE_END;
1220 break;
1221 case JSON_NT_MORE_ARRAY_ELEMENTS:
1222 ctx = JSON_PARSE_ARRAY_NEXT;
1223 break;
1224 case JSON_NT_ARRAY_ELEMENTS:
1225 ctx = JSON_PARSE_ARRAY_START;
1226 break;
1227 case JSON_NT_MORE_KEY_PAIRS:
1228 ctx = JSON_PARSE_OBJECT_NEXT;
1229 break;
1230 case JSON_NT_KEY_PAIRS:
1231 ctx = JSON_PARSE_OBJECT_START;
1232 break;
1233 default:
1234 ctx = JSON_PARSE_VALUE;
1236 return report_parse_error(ctx, lex);
1240 return JSON_SUCCESS;
1244 * Recursive Descent parse routines. There is one for each structural
1245 * element in a json document:
1246 * - scalar (string, number, true, false, null)
1247 * - array ( [ ] )
1248 * - array element
1249 * - object ( { } )
1250 * - object field
1252 static inline JsonParseErrorType
1253 parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
1255 char *val = NULL;
1256 json_scalar_action sfunc = sem->scalar;
1257 JsonTokenType tok = lex_peek(lex);
1258 JsonParseErrorType result;
1260 /* a scalar must be a string, a number, true, false, or null */
1261 if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
1262 tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
1263 tok != JSON_TOKEN_NULL)
1264 return report_parse_error(JSON_PARSE_VALUE, lex);
1266 /* if no semantic function, just consume the token */
1267 if (sfunc == NULL)
1268 return json_lex(lex);
1270 /* extract the de-escaped string value, or the raw lexeme */
1271 if (lex_peek(lex) == JSON_TOKEN_STRING)
1273 if (lex->need_escapes)
1275 val = STRDUP(lex->strval->data);
1276 if (val == NULL)
1277 return JSON_OUT_OF_MEMORY;
1280 else
1282 int len = (lex->token_terminator - lex->token_start);
1284 val = ALLOC(len + 1);
1285 if (val == NULL)
1286 return JSON_OUT_OF_MEMORY;
1288 memcpy(val, lex->token_start, len);
1289 val[len] = '\0';
1292 /* consume the token */
1293 result = json_lex(lex);
1294 if (result != JSON_SUCCESS)
1296 FREE(val);
1297 return result;
1300 /* invoke the callback, which may take ownership of val */
1301 result = (*sfunc) (sem->semstate, val, tok);
1303 if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1304 FREE(val);
1306 return result;
1309 static JsonParseErrorType
1310 parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
1313 * An object field is "fieldname" : value where value can be a scalar,
1314 * object or array. Note: in user-facing docs and error messages, we
1315 * generally call a field name a "key".
1318 char *fname = NULL;
1319 json_ofield_action ostart = sem->object_field_start;
1320 json_ofield_action oend = sem->object_field_end;
1321 bool isnull;
1322 JsonTokenType tok;
1323 JsonParseErrorType result;
1325 if (lex_peek(lex) != JSON_TOKEN_STRING)
1326 return report_parse_error(JSON_PARSE_STRING, lex);
1327 if ((ostart != NULL || oend != NULL) && lex->need_escapes)
1329 fname = STRDUP(lex->strval->data);
1330 if (fname == NULL)
1331 return JSON_OUT_OF_MEMORY;
1333 result = json_lex(lex);
1334 if (result != JSON_SUCCESS)
1336 FREE(fname);
1337 return result;
1340 result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
1341 if (result != JSON_SUCCESS)
1343 FREE(fname);
1344 return result;
1347 tok = lex_peek(lex);
1348 isnull = tok == JSON_TOKEN_NULL;
1350 if (ostart != NULL)
1352 result = (*ostart) (sem->semstate, fname, isnull);
1353 if (result != JSON_SUCCESS)
1354 goto ofield_cleanup;
1357 switch (tok)
1359 case JSON_TOKEN_OBJECT_START:
1360 result = parse_object(lex, sem);
1361 break;
1362 case JSON_TOKEN_ARRAY_START:
1363 result = parse_array(lex, sem);
1364 break;
1365 default:
1366 result = parse_scalar(lex, sem);
1368 if (result != JSON_SUCCESS)
1369 goto ofield_cleanup;
1371 if (oend != NULL)
1373 result = (*oend) (sem->semstate, fname, isnull);
1374 if (result != JSON_SUCCESS)
1375 goto ofield_cleanup;
1378 ofield_cleanup:
1379 if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1380 FREE(fname);
1381 return result;
1384 static JsonParseErrorType
1385 parse_object(JsonLexContext *lex, const JsonSemAction *sem)
1388 * an object is a possibly empty sequence of object fields, separated by
1389 * commas and surrounded by curly braces.
1391 json_struct_action ostart = sem->object_start;
1392 json_struct_action oend = sem->object_end;
1393 JsonTokenType tok;
1394 JsonParseErrorType result;
1396 #ifndef FRONTEND
1399 * TODO: clients need some way to put a bound on stack growth. Parse level
1400 * limits maybe?
1402 check_stack_depth();
1403 #endif
1405 if (ostart != NULL)
1407 result = (*ostart) (sem->semstate);
1408 if (result != JSON_SUCCESS)
1409 return result;
1413 * Data inside an object is at a higher nesting level than the object
1414 * itself. Note that we increment this after we call the semantic routine
1415 * for the object start and restore it before we call the routine for the
1416 * object end.
1418 lex->lex_level++;
1420 Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
1421 result = json_lex(lex);
1422 if (result != JSON_SUCCESS)
1423 return result;
1425 tok = lex_peek(lex);
1426 switch (tok)
1428 case JSON_TOKEN_STRING:
1429 result = parse_object_field(lex, sem);
1430 while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1432 result = json_lex(lex);
1433 if (result != JSON_SUCCESS)
1434 break;
1435 result = parse_object_field(lex, sem);
1437 break;
1438 case JSON_TOKEN_OBJECT_END:
1439 break;
1440 default:
1441 /* case of an invalid initial token inside the object */
1442 result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
1444 if (result != JSON_SUCCESS)
1445 return result;
1447 result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
1448 if (result != JSON_SUCCESS)
1449 return result;
1451 lex->lex_level--;
1453 if (oend != NULL)
1455 result = (*oend) (sem->semstate);
1456 if (result != JSON_SUCCESS)
1457 return result;
1460 return JSON_SUCCESS;
1463 static JsonParseErrorType
1464 parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
1466 json_aelem_action astart = sem->array_element_start;
1467 json_aelem_action aend = sem->array_element_end;
1468 JsonTokenType tok = lex_peek(lex);
1469 JsonParseErrorType result;
1470 bool isnull;
1472 isnull = tok == JSON_TOKEN_NULL;
1474 if (astart != NULL)
1476 result = (*astart) (sem->semstate, isnull);
1477 if (result != JSON_SUCCESS)
1478 return result;
1481 /* an array element is any object, array or scalar */
1482 switch (tok)
1484 case JSON_TOKEN_OBJECT_START:
1485 result = parse_object(lex, sem);
1486 break;
1487 case JSON_TOKEN_ARRAY_START:
1488 result = parse_array(lex, sem);
1489 break;
1490 default:
1491 result = parse_scalar(lex, sem);
1494 if (result != JSON_SUCCESS)
1495 return result;
1497 if (aend != NULL)
1499 result = (*aend) (sem->semstate, isnull);
1500 if (result != JSON_SUCCESS)
1501 return result;
1504 return JSON_SUCCESS;
1507 static JsonParseErrorType
1508 parse_array(JsonLexContext *lex, const JsonSemAction *sem)
1511 * an array is a possibly empty sequence of array elements, separated by
1512 * commas and surrounded by square brackets.
1514 json_struct_action astart = sem->array_start;
1515 json_struct_action aend = sem->array_end;
1516 JsonParseErrorType result;
1518 #ifndef FRONTEND
1519 check_stack_depth();
1520 #endif
1522 if (astart != NULL)
1524 result = (*astart) (sem->semstate);
1525 if (result != JSON_SUCCESS)
1526 return result;
1530 * Data inside an array is at a higher nesting level than the array
1531 * itself. Note that we increment this after we call the semantic routine
1532 * for the array start and restore it before we call the routine for the
1533 * array end.
1535 lex->lex_level++;
1537 result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
1538 if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
1540 result = parse_array_element(lex, sem);
1542 while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1544 result = json_lex(lex);
1545 if (result != JSON_SUCCESS)
1546 break;
1547 result = parse_array_element(lex, sem);
1550 if (result != JSON_SUCCESS)
1551 return result;
1553 result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
1554 if (result != JSON_SUCCESS)
1555 return result;
1557 lex->lex_level--;
1559 if (aend != NULL)
1561 result = (*aend) (sem->semstate);
1562 if (result != JSON_SUCCESS)
1563 return result;
1566 return JSON_SUCCESS;
1570 * Lex one token from the input stream.
1572 * When doing incremental parsing, we can reach the end of the input string
1573 * without having (or knowing we have) a complete token. If it's not the
1574 * final chunk of input, the partial token is then saved to the lex
1575 * structure's ptok StringInfo. On subsequent calls input is appended to this
1576 * buffer until we have something that we think is a complete token,
1577 * which is then lexed using a recursive call to json_lex. Processing then
1578 * continues as normal on subsequent calls.
1580 * Note than when doing incremental processing, the lex.prev_token_terminator
1581 * should not be relied on. It could point into a previous input chunk or
1582 * worse.
1584 JsonParseErrorType
1585 json_lex(JsonLexContext *lex)
1587 const char *s;
1588 const char *const end = lex->input + lex->input_length;
1589 JsonParseErrorType result;
1591 if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
1592 return JSON_OUT_OF_MEMORY;
1594 if (lex->incremental)
1596 if (lex->inc_state->partial_completed)
1599 * We just lexed a completed partial token on the last call, so
1600 * reset everything
1602 jsonapi_resetStringInfo(&(lex->inc_state->partial_token));
1603 lex->token_terminator = lex->input;
1604 lex->inc_state->partial_completed = false;
1607 #ifdef JSONAPI_USE_PQEXPBUFFER
1608 /* Make sure our partial token buffer is valid before using it below. */
1609 if (PQExpBufferDataBroken(lex->inc_state->partial_token))
1610 return JSON_OUT_OF_MEMORY;
1611 #endif
1614 s = lex->token_terminator;
1616 if (lex->incremental && lex->inc_state->partial_token.len)
1619 * We have a partial token. Extend it and if completed lex it by a
1620 * recursive call
1622 jsonapi_StrValType *ptok = &(lex->inc_state->partial_token);
1623 size_t added = 0;
1624 bool tok_done = false;
1625 JsonLexContext dummy_lex = {0};
1626 JsonParseErrorType partial_result;
1628 if (ptok->data[0] == '"')
1631 * It's a string. Accumulate characters until we reach an
1632 * unescaped '"'.
1634 int escapes = 0;
1636 for (int i = ptok->len - 1; i > 0; i--)
1638 /* count the trailing backslashes on the partial token */
1639 if (ptok->data[i] == '\\')
1640 escapes++;
1641 else
1642 break;
1645 for (size_t i = 0; i < lex->input_length; i++)
1647 char c = lex->input[i];
1649 jsonapi_appendStringInfoCharMacro(ptok, c);
1650 added++;
1651 if (c == '"' && escapes % 2 == 0)
1653 tok_done = true;
1654 break;
1656 if (c == '\\')
1657 escapes++;
1658 else
1659 escapes = 0;
1662 else
1664 /* not a string */
1665 char c = ptok->data[0];
1667 if (c == '-' || (c >= '0' && c <= '9'))
1669 /* for numbers look for possible numeric continuations */
1671 bool numend = false;
1673 for (size_t i = 0; i < lex->input_length && !numend; i++)
1675 char cc = lex->input[i];
1677 switch (cc)
1679 case '+':
1680 case '-':
1681 case 'e':
1682 case 'E':
1683 case '0':
1684 case '1':
1685 case '2':
1686 case '3':
1687 case '4':
1688 case '5':
1689 case '6':
1690 case '7':
1691 case '8':
1692 case '9':
1694 jsonapi_appendStringInfoCharMacro(ptok, cc);
1695 added++;
1697 break;
1698 default:
1699 numend = true;
1705 * Add any remaining alphanumeric chars. This takes care of the
1706 * {null, false, true} literals as well as any trailing
1707 * alphanumeric junk on non-string tokens.
1709 for (size_t i = added; i < lex->input_length; i++)
1711 char cc = lex->input[i];
1713 if (JSON_ALPHANUMERIC_CHAR(cc))
1715 jsonapi_appendStringInfoCharMacro(ptok, cc);
1716 added++;
1718 else
1720 tok_done = true;
1721 break;
1724 if (added == lex->input_length &&
1725 lex->inc_state->is_last_chunk)
1727 tok_done = true;
1731 if (!tok_done)
1733 /* We should have consumed the whole chunk in this case. */
1734 Assert(added == lex->input_length);
1736 if (!lex->inc_state->is_last_chunk)
1737 return JSON_INCOMPLETE;
1739 /* json_errdetail() needs access to the accumulated token. */
1740 lex->token_start = ptok->data;
1741 lex->token_terminator = ptok->data + ptok->len;
1742 return JSON_INVALID_TOKEN;
1746 * Everything up to lex->input[added] has been added to the partial
1747 * token, so move the input past it.
1749 lex->input += added;
1750 lex->input_length -= added;
1752 dummy_lex.input = dummy_lex.token_terminator =
1753 dummy_lex.line_start = ptok->data;
1754 dummy_lex.line_number = lex->line_number;
1755 dummy_lex.input_length = ptok->len;
1756 dummy_lex.input_encoding = lex->input_encoding;
1757 dummy_lex.incremental = false;
1758 dummy_lex.need_escapes = lex->need_escapes;
1759 dummy_lex.strval = lex->strval;
1761 partial_result = json_lex(&dummy_lex);
1764 * We either have a complete token or an error. In either case we need
1765 * to point to the partial token data for the semantic or error
1766 * routines. If it's not an error we'll readjust on the next call to
1767 * json_lex.
1769 lex->token_type = dummy_lex.token_type;
1770 lex->line_number = dummy_lex.line_number;
1773 * We know the prev_token_terminator must be back in some previous
1774 * piece of input, so we just make it NULL.
1776 lex->prev_token_terminator = NULL;
1779 * Normally token_start would be ptok->data, but it could be later,
1780 * see json_lex_string's handling of invalid escapes.
1782 lex->token_start = dummy_lex.token_start;
1783 lex->token_terminator = dummy_lex.token_terminator;
1784 if (partial_result == JSON_SUCCESS)
1786 /* make sure we've used all the input */
1787 if (lex->token_terminator - lex->token_start != ptok->len)
1789 Assert(false);
1790 return JSON_INVALID_TOKEN;
1793 lex->inc_state->partial_completed = true;
1795 return partial_result;
1796 /* end of partial token processing */
1799 /* Skip leading whitespace. */
1800 while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
1802 if (*s++ == '\n')
1804 ++lex->line_number;
1805 lex->line_start = s;
1808 lex->token_start = s;
1810 /* Determine token type. */
1811 if (s >= end)
1813 lex->token_start = NULL;
1814 lex->prev_token_terminator = lex->token_terminator;
1815 lex->token_terminator = s;
1816 lex->token_type = JSON_TOKEN_END;
1818 else
1820 switch (*s)
1822 /* Single-character token, some kind of punctuation mark. */
1823 case '{':
1824 lex->prev_token_terminator = lex->token_terminator;
1825 lex->token_terminator = s + 1;
1826 lex->token_type = JSON_TOKEN_OBJECT_START;
1827 break;
1828 case '}':
1829 lex->prev_token_terminator = lex->token_terminator;
1830 lex->token_terminator = s + 1;
1831 lex->token_type = JSON_TOKEN_OBJECT_END;
1832 break;
1833 case '[':
1834 lex->prev_token_terminator = lex->token_terminator;
1835 lex->token_terminator = s + 1;
1836 lex->token_type = JSON_TOKEN_ARRAY_START;
1837 break;
1838 case ']':
1839 lex->prev_token_terminator = lex->token_terminator;
1840 lex->token_terminator = s + 1;
1841 lex->token_type = JSON_TOKEN_ARRAY_END;
1842 break;
1843 case ',':
1844 lex->prev_token_terminator = lex->token_terminator;
1845 lex->token_terminator = s + 1;
1846 lex->token_type = JSON_TOKEN_COMMA;
1847 break;
1848 case ':':
1849 lex->prev_token_terminator = lex->token_terminator;
1850 lex->token_terminator = s + 1;
1851 lex->token_type = JSON_TOKEN_COLON;
1852 break;
1853 case '"':
1854 /* string */
1855 result = json_lex_string(lex);
1856 if (result != JSON_SUCCESS)
1857 return result;
1858 lex->token_type = JSON_TOKEN_STRING;
1859 break;
1860 case '-':
1861 /* Negative number. */
1862 result = json_lex_number(lex, s + 1, NULL, NULL);
1863 if (result != JSON_SUCCESS)
1864 return result;
1865 lex->token_type = JSON_TOKEN_NUMBER;
1866 break;
1867 case '0':
1868 case '1':
1869 case '2':
1870 case '3':
1871 case '4':
1872 case '5':
1873 case '6':
1874 case '7':
1875 case '8':
1876 case '9':
1877 /* Positive number. */
1878 result = json_lex_number(lex, s, NULL, NULL);
1879 if (result != JSON_SUCCESS)
1880 return result;
1881 lex->token_type = JSON_TOKEN_NUMBER;
1882 break;
1883 default:
1885 const char *p;
1888 * We're not dealing with a string, number, legal
1889 * punctuation mark, or end of string. The only legal
1890 * tokens we might find here are true, false, and null,
1891 * but for error reporting purposes we scan until we see a
1892 * non-alphanumeric character. That way, we can report
1893 * the whole word as an unexpected token, rather than just
1894 * some unintuitive prefix thereof.
1896 for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
1897 /* skip */ ;
1900 * We got some sort of unexpected punctuation or an
1901 * otherwise unexpected character, so just complain about
1902 * that one character.
1904 if (p == s)
1906 lex->prev_token_terminator = lex->token_terminator;
1907 lex->token_terminator = s + 1;
1908 return JSON_INVALID_TOKEN;
1911 if (lex->incremental && !lex->inc_state->is_last_chunk &&
1912 p == lex->input + lex->input_length)
1914 jsonapi_appendBinaryStringInfo(&(lex->inc_state->partial_token), s, end - s);
1915 return JSON_INCOMPLETE;
1919 * We've got a real alphanumeric token here. If it
1920 * happens to be true, false, or null, all is well. If
1921 * not, error out.
1923 lex->prev_token_terminator = lex->token_terminator;
1924 lex->token_terminator = p;
1925 if (p - s == 4)
1927 if (memcmp(s, "true", 4) == 0)
1928 lex->token_type = JSON_TOKEN_TRUE;
1929 else if (memcmp(s, "null", 4) == 0)
1930 lex->token_type = JSON_TOKEN_NULL;
1931 else
1932 return JSON_INVALID_TOKEN;
1934 else if (p - s == 5 && memcmp(s, "false", 5) == 0)
1935 lex->token_type = JSON_TOKEN_FALSE;
1936 else
1937 return JSON_INVALID_TOKEN;
1939 } /* end of switch */
1942 if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
1943 return JSON_INCOMPLETE;
1944 else
1945 return JSON_SUCCESS;
1949 * The next token in the input stream is known to be a string; lex it.
1951 * If lex->strval isn't NULL, fill it with the decoded string.
1952 * Set lex->token_terminator to the end of the decoded input, and in
1953 * success cases, transfer its previous value to lex->prev_token_terminator.
1954 * Return JSON_SUCCESS or an error code.
1956 * Note: be careful that all error exits advance lex->token_terminator
1957 * to the point after the character we detected the error on.
1959 static inline JsonParseErrorType
1960 json_lex_string(JsonLexContext *lex)
1962 const char *s;
1963 const char *const end = lex->input + lex->input_length;
1964 int hi_surrogate = -1;
1966 /* Convenience macros for error exits */
1967 #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
1968 do { \
1969 if (lex->incremental && !lex->inc_state->is_last_chunk) \
1971 jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
1972 lex->token_start, \
1973 end - lex->token_start); \
1974 return JSON_INCOMPLETE; \
1976 lex->token_terminator = s; \
1977 return code; \
1978 } while (0)
1979 #define FAIL_AT_CHAR_END(code) \
1980 do { \
1981 const char *term = s + pg_encoding_mblen(lex->input_encoding, s); \
1982 lex->token_terminator = (term <= end) ? term : end; \
1983 return code; \
1984 } while (0)
1986 if (lex->need_escapes)
1988 #ifdef JSONAPI_USE_PQEXPBUFFER
1989 /* make sure initialization succeeded */
1990 if (lex->strval == NULL)
1991 return JSON_OUT_OF_MEMORY;
1992 #endif
1993 jsonapi_resetStringInfo(lex->strval);
1996 Assert(lex->input_length > 0);
1997 s = lex->token_start;
1998 for (;;)
2000 s++;
2001 /* Premature end of the string. */
2002 if (s >= end)
2003 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
2004 else if (*s == '"')
2005 break;
2006 else if (*s == '\\')
2008 /* OK, we have an escape character. */
2009 s++;
2010 if (s >= end)
2011 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
2012 else if (*s == 'u')
2014 int i;
2015 int ch = 0;
2017 for (i = 1; i <= 4; i++)
2019 s++;
2020 if (s >= end)
2021 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
2022 else if (*s >= '0' && *s <= '9')
2023 ch = (ch * 16) + (*s - '0');
2024 else if (*s >= 'a' && *s <= 'f')
2025 ch = (ch * 16) + (*s - 'a') + 10;
2026 else if (*s >= 'A' && *s <= 'F')
2027 ch = (ch * 16) + (*s - 'A') + 10;
2028 else
2029 FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
2031 if (lex->need_escapes)
2034 * Combine surrogate pairs.
2036 if (is_utf16_surrogate_first(ch))
2038 if (hi_surrogate != -1)
2039 FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
2040 hi_surrogate = ch;
2041 continue;
2043 else if (is_utf16_surrogate_second(ch))
2045 if (hi_surrogate == -1)
2046 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2047 ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
2048 hi_surrogate = -1;
2051 if (hi_surrogate != -1)
2052 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2055 * Reject invalid cases. We can't have a value above
2056 * 0xFFFF here (since we only accepted 4 hex digits
2057 * above), so no need to test for out-of-range chars.
2059 if (ch == 0)
2061 /* We can't allow this, since our TEXT type doesn't */
2062 FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
2066 * Add the represented character to lex->strval. In the
2067 * backend, we can let pg_unicode_to_server_noerror()
2068 * handle any required character set conversion; in
2069 * frontend, we can only deal with trivial conversions.
2071 #ifndef FRONTEND
2073 char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2075 if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
2076 FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
2077 appendStringInfoString(lex->strval, cbuf);
2079 #else
2080 if (lex->input_encoding == PG_UTF8)
2082 /* OK, we can map the code point to UTF8 easily */
2083 char utf8str[5];
2084 int utf8len;
2086 unicode_to_utf8(ch, (unsigned char *) utf8str);
2087 utf8len = pg_utf_mblen((unsigned char *) utf8str);
2088 jsonapi_appendBinaryStringInfo(lex->strval, utf8str, utf8len);
2090 else if (ch <= 0x007f)
2092 /* The ASCII range is the same in all encodings */
2093 jsonapi_appendStringInfoChar(lex->strval, (char) ch);
2095 else
2096 FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
2097 #endif /* FRONTEND */
2100 else if (lex->need_escapes)
2102 if (hi_surrogate != -1)
2103 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2105 switch (*s)
2107 case '"':
2108 case '\\':
2109 case '/':
2110 jsonapi_appendStringInfoChar(lex->strval, *s);
2111 break;
2112 case 'b':
2113 jsonapi_appendStringInfoChar(lex->strval, '\b');
2114 break;
2115 case 'f':
2116 jsonapi_appendStringInfoChar(lex->strval, '\f');
2117 break;
2118 case 'n':
2119 jsonapi_appendStringInfoChar(lex->strval, '\n');
2120 break;
2121 case 'r':
2122 jsonapi_appendStringInfoChar(lex->strval, '\r');
2123 break;
2124 case 't':
2125 jsonapi_appendStringInfoChar(lex->strval, '\t');
2126 break;
2127 default:
2130 * Not a valid string escape, so signal error. We
2131 * adjust token_start so that just the escape sequence
2132 * is reported, not the whole string.
2134 lex->token_start = s;
2135 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
2138 else if (strchr("\"\\/bfnrt", *s) == NULL)
2141 * Simpler processing if we're not bothered about de-escaping
2143 * It's very tempting to remove the strchr() call here and
2144 * replace it with a switch statement, but testing so far has
2145 * shown it's not a performance win.
2147 lex->token_start = s;
2148 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
2151 else
2153 const char *p = s;
2155 if (hi_surrogate != -1)
2156 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2159 * Skip to the first byte that requires special handling, so we
2160 * can batch calls to jsonapi_appendBinaryStringInfo.
2162 while (p < end - sizeof(Vector8) &&
2163 !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
2164 !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
2165 !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
2166 p += sizeof(Vector8);
2168 for (; p < end; p++)
2170 if (*p == '\\' || *p == '"')
2171 break;
2172 else if ((unsigned char) *p <= 31)
2174 /* Per RFC4627, these characters MUST be escaped. */
2176 * Since *p isn't printable, exclude it from the context
2177 * string
2179 lex->token_terminator = p;
2180 return JSON_ESCAPING_REQUIRED;
2184 if (lex->need_escapes)
2185 jsonapi_appendBinaryStringInfo(lex->strval, s, p - s);
2188 * s will be incremented at the top of the loop, so set it to just
2189 * behind our lookahead position
2191 s = p - 1;
2195 if (hi_surrogate != -1)
2197 lex->token_terminator = s + 1;
2198 return JSON_UNICODE_LOW_SURROGATE;
2201 #ifdef JSONAPI_USE_PQEXPBUFFER
2202 if (lex->need_escapes && PQExpBufferBroken(lex->strval))
2203 return JSON_OUT_OF_MEMORY;
2204 #endif
2206 /* Hooray, we found the end of the string! */
2207 lex->prev_token_terminator = lex->token_terminator;
2208 lex->token_terminator = s + 1;
2209 return JSON_SUCCESS;
2211 #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
2212 #undef FAIL_AT_CHAR_END
2216 * The next token in the input stream is known to be a number; lex it.
2218 * In JSON, a number consists of four parts:
2220 * (1) An optional minus sign ('-').
2222 * (2) Either a single '0', or a string of one or more digits that does not
2223 * begin with a '0'.
2225 * (3) An optional decimal part, consisting of a period ('.') followed by
2226 * one or more digits. (Note: While this part can be omitted
2227 * completely, it's not OK to have only the decimal point without
2228 * any digits afterwards.)
2230 * (4) An optional exponent part, consisting of 'e' or 'E', optionally
2231 * followed by '+' or '-', followed by one or more digits. (Note:
2232 * As with the decimal part, if 'e' or 'E' is present, it must be
2233 * followed by at least one digit.)
2235 * The 's' argument to this function points to the ostensible beginning
2236 * of part 2 - i.e. the character after any optional minus sign, or the
2237 * first character of the string if there is none.
2239 * If num_err is not NULL, we return an error flag to *num_err rather than
2240 * raising an error for a badly-formed number. Also, if total_len is not NULL
2241 * the distance from lex->input to the token end+1 is returned to *total_len.
2243 static inline JsonParseErrorType
2244 json_lex_number(JsonLexContext *lex, const char *s,
2245 bool *num_err, size_t *total_len)
2247 bool error = false;
2248 int len = s - lex->input;
2250 /* Part (1): leading sign indicator. */
2251 /* Caller already did this for us; so do nothing. */
2253 /* Part (2): parse main digit string. */
2254 if (len < lex->input_length && *s == '0')
2256 s++;
2257 len++;
2259 else if (len < lex->input_length && *s >= '1' && *s <= '9')
2263 s++;
2264 len++;
2265 } while (len < lex->input_length && *s >= '0' && *s <= '9');
2267 else
2268 error = true;
2270 /* Part (3): parse optional decimal portion. */
2271 if (len < lex->input_length && *s == '.')
2273 s++;
2274 len++;
2275 if (len == lex->input_length || *s < '0' || *s > '9')
2276 error = true;
2277 else
2281 s++;
2282 len++;
2283 } while (len < lex->input_length && *s >= '0' && *s <= '9');
2287 /* Part (4): parse optional exponent. */
2288 if (len < lex->input_length && (*s == 'e' || *s == 'E'))
2290 s++;
2291 len++;
2292 if (len < lex->input_length && (*s == '+' || *s == '-'))
2294 s++;
2295 len++;
2297 if (len == lex->input_length || *s < '0' || *s > '9')
2298 error = true;
2299 else
2303 s++;
2304 len++;
2305 } while (len < lex->input_length && *s >= '0' && *s <= '9');
2310 * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
2311 * here should be considered part of the token for error-reporting
2312 * purposes.
2314 for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
2315 error = true;
2317 if (total_len != NULL)
2318 *total_len = len;
2320 if (lex->incremental && !lex->inc_state->is_last_chunk &&
2321 len >= lex->input_length)
2323 jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token,
2324 lex->token_start, s - lex->token_start);
2325 if (num_err != NULL)
2326 *num_err = error;
2328 return JSON_INCOMPLETE;
2330 else if (num_err != NULL)
2332 /* let the caller handle any error */
2333 *num_err = error;
2335 else
2337 /* return token endpoint */
2338 lex->prev_token_terminator = lex->token_terminator;
2339 lex->token_terminator = s;
2340 /* handle error if any */
2341 if (error)
2342 return JSON_INVALID_TOKEN;
2345 return JSON_SUCCESS;
2349 * Report a parse error.
2351 * lex->token_start and lex->token_terminator must identify the current token.
2353 static JsonParseErrorType
2354 report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
2356 /* Handle case where the input ended prematurely. */
2357 if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
2358 return JSON_EXPECTED_MORE;
2360 /* Otherwise choose the error type based on the parsing context. */
2361 switch (ctx)
2363 case JSON_PARSE_END:
2364 return JSON_EXPECTED_END;
2365 case JSON_PARSE_VALUE:
2366 return JSON_EXPECTED_JSON;
2367 case JSON_PARSE_STRING:
2368 return JSON_EXPECTED_STRING;
2369 case JSON_PARSE_ARRAY_START:
2370 return JSON_EXPECTED_ARRAY_FIRST;
2371 case JSON_PARSE_ARRAY_NEXT:
2372 return JSON_EXPECTED_ARRAY_NEXT;
2373 case JSON_PARSE_OBJECT_START:
2374 return JSON_EXPECTED_OBJECT_FIRST;
2375 case JSON_PARSE_OBJECT_LABEL:
2376 return JSON_EXPECTED_COLON;
2377 case JSON_PARSE_OBJECT_NEXT:
2378 return JSON_EXPECTED_OBJECT_NEXT;
2379 case JSON_PARSE_OBJECT_COMMA:
2380 return JSON_EXPECTED_STRING;
2384 * We don't use a default: case, so that the compiler will warn about
2385 * unhandled enum values.
2387 Assert(false);
2388 return JSON_SUCCESS; /* silence stupider compilers */
2392 * Construct an (already translated) detail message for a JSON error.
2394 * The returned pointer should not be freed, the allocation is either static
2395 * or owned by the JsonLexContext.
2397 char *
2398 json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
2400 if (error == JSON_OUT_OF_MEMORY || lex == &failed_oom)
2402 /* Short circuit. Allocating anything for this case is unhelpful. */
2403 return _("out of memory");
2406 if (lex->errormsg)
2407 jsonapi_resetStringInfo(lex->errormsg);
2408 else
2409 lex->errormsg = jsonapi_makeStringInfo();
2412 * A helper for error messages that should print the current token. The
2413 * format must contain exactly one %.*s specifier.
2415 #define json_token_error(lex, format) \
2416 jsonapi_appendStringInfo((lex)->errormsg, _(format), \
2417 (int) ((lex)->token_terminator - (lex)->token_start), \
2418 (lex)->token_start);
2420 switch (error)
2422 case JSON_INCOMPLETE:
2423 case JSON_SUCCESS:
2424 /* fall through to the error code after switch */
2425 break;
2426 case JSON_INVALID_LEXER_TYPE:
2427 if (lex->incremental)
2428 return _("Recursive descent parser cannot use incremental lexer.");
2429 else
2430 return _("Incremental parser requires incremental lexer.");
2431 case JSON_NESTING_TOO_DEEP:
2432 return (_("JSON nested too deep, maximum permitted depth is 6400."));
2433 case JSON_ESCAPING_INVALID:
2434 json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
2435 break;
2436 case JSON_ESCAPING_REQUIRED:
2437 jsonapi_appendStringInfo(lex->errormsg,
2438 _("Character with value 0x%02x must be escaped."),
2439 (unsigned char) *(lex->token_terminator));
2440 break;
2441 case JSON_EXPECTED_END:
2442 json_token_error(lex, "Expected end of input, but found \"%.*s\".");
2443 break;
2444 case JSON_EXPECTED_ARRAY_FIRST:
2445 json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
2446 break;
2447 case JSON_EXPECTED_ARRAY_NEXT:
2448 json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
2449 break;
2450 case JSON_EXPECTED_COLON:
2451 json_token_error(lex, "Expected \":\", but found \"%.*s\".");
2452 break;
2453 case JSON_EXPECTED_JSON:
2454 json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
2455 break;
2456 case JSON_EXPECTED_MORE:
2457 return _("The input string ended unexpectedly.");
2458 case JSON_EXPECTED_OBJECT_FIRST:
2459 json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
2460 break;
2461 case JSON_EXPECTED_OBJECT_NEXT:
2462 json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
2463 break;
2464 case JSON_EXPECTED_STRING:
2465 json_token_error(lex, "Expected string, but found \"%.*s\".");
2466 break;
2467 case JSON_INVALID_TOKEN:
2468 json_token_error(lex, "Token \"%.*s\" is invalid.");
2469 break;
2470 case JSON_OUT_OF_MEMORY:
2471 /* should have been handled above; use the error path */
2472 break;
2473 case JSON_UNICODE_CODE_POINT_ZERO:
2474 return _("\\u0000 cannot be converted to text.");
2475 case JSON_UNICODE_ESCAPE_FORMAT:
2476 return _("\"\\u\" must be followed by four hexadecimal digits.");
2477 case JSON_UNICODE_HIGH_ESCAPE:
2478 /* note: this case is only reachable in frontend not backend */
2479 return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
2480 case JSON_UNICODE_UNTRANSLATABLE:
2483 * Note: this case is only reachable in backend and not frontend.
2484 * #ifdef it away so the frontend doesn't try to link against
2485 * backend functionality.
2487 #ifndef FRONTEND
2488 return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
2489 GetDatabaseEncodingName());
2490 #else
2491 Assert(false);
2492 break;
2493 #endif
2494 case JSON_UNICODE_HIGH_SURROGATE:
2495 return _("Unicode high surrogate must not follow a high surrogate.");
2496 case JSON_UNICODE_LOW_SURROGATE:
2497 return _("Unicode low surrogate must follow a high surrogate.");
2498 case JSON_SEM_ACTION_FAILED:
2499 /* fall through to the error code after switch */
2500 break;
2502 #undef json_token_error
2504 /* Note that lex->errormsg can be NULL in shlib code. */
2505 if (lex->errormsg && lex->errormsg->len == 0)
2508 * We don't use a default: case, so that the compiler will warn about
2509 * unhandled enum values. But this needs to be here anyway to cover
2510 * the possibility of an incorrect input.
2512 jsonapi_appendStringInfo(lex->errormsg,
2513 "unexpected json parse error type: %d",
2514 (int) error);
2517 #ifdef JSONAPI_USE_PQEXPBUFFER
2518 if (PQExpBufferBroken(lex->errormsg))
2519 return _("out of memory while constructing error description");
2520 #endif
2522 return lex->errormsg->data;