Clear padding of PgStat_HashKey when handling pgstats entries
[pgsql.git] / src / backend / utils / adt / json.c
blob058aade2af496efb03c21ffcadd1a8c439c7edd9
1 /*-------------------------------------------------------------------------
3 * json.c
4 * JSON data type support.
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * IDENTIFICATION
10 * src/backend/utils/adt/json.c
12 *-------------------------------------------------------------------------
14 #include "postgres.h"
16 #include "catalog/pg_proc.h"
17 #include "catalog/pg_type.h"
18 #include "common/hashfn.h"
19 #include "funcapi.h"
20 #include "libpq/pqformat.h"
21 #include "miscadmin.h"
22 #include "port/simd.h"
23 #include "utils/array.h"
24 #include "utils/builtins.h"
25 #include "utils/date.h"
26 #include "utils/datetime.h"
27 #include "utils/fmgroids.h"
28 #include "utils/json.h"
29 #include "utils/jsonfuncs.h"
30 #include "utils/lsyscache.h"
31 #include "utils/typcache.h"
35 * Support for fast key uniqueness checking.
37 * We maintain a hash table of used keys in JSON objects for fast detection
38 * of duplicates.
40 /* Common context for key uniqueness check */
41 typedef struct HTAB *JsonUniqueCheckState; /* hash table for key names */
43 /* Hash entry for JsonUniqueCheckState */
44 typedef struct JsonUniqueHashEntry
46 const char *key;
47 int key_len;
48 int object_id;
49 } JsonUniqueHashEntry;
51 /* Stack element for key uniqueness check during JSON parsing */
52 typedef struct JsonUniqueStackEntry
54 struct JsonUniqueStackEntry *parent;
55 int object_id;
56 } JsonUniqueStackEntry;
58 /* Context struct for key uniqueness check during JSON parsing */
59 typedef struct JsonUniqueParsingState
61 JsonLexContext *lex;
62 JsonUniqueCheckState check;
63 JsonUniqueStackEntry *stack;
64 int id_counter;
65 bool unique;
66 } JsonUniqueParsingState;
68 /* Context struct for key uniqueness check during JSON building */
69 typedef struct JsonUniqueBuilderState
71 JsonUniqueCheckState check; /* unique check */
72 StringInfoData skipped_keys; /* skipped keys with NULL values */
73 MemoryContext mcxt; /* context for saving skipped keys */
74 } JsonUniqueBuilderState;
77 /* State struct for JSON aggregation */
78 typedef struct JsonAggState
80 StringInfo str;
81 JsonTypeCategory key_category;
82 Oid key_output_func;
83 JsonTypeCategory val_category;
84 Oid val_output_func;
85 JsonUniqueBuilderState unique_check;
86 } JsonAggState;
88 static void composite_to_json(Datum composite, StringInfo result,
89 bool use_line_feeds);
90 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
91 Datum *vals, bool *nulls, int *valcount,
92 JsonTypeCategory tcategory, Oid outfuncoid,
93 bool use_line_feeds);
94 static void array_to_json_internal(Datum array, StringInfo result,
95 bool use_line_feeds);
96 static void datum_to_json_internal(Datum val, bool is_null, StringInfo result,
97 JsonTypeCategory tcategory, Oid outfuncoid,
98 bool key_scalar);
99 static void add_json(Datum val, bool is_null, StringInfo result,
100 Oid val_type, bool key_scalar);
101 static text *catenate_stringinfo_string(StringInfo buffer, const char *addon);
104 * Input.
106 Datum
107 json_in(PG_FUNCTION_ARGS)
109 char *json = PG_GETARG_CSTRING(0);
110 text *result = cstring_to_text(json);
111 JsonLexContext lex;
113 /* validate it */
114 makeJsonLexContext(&lex, result, false);
115 if (!pg_parse_json_or_errsave(&lex, &nullSemAction, fcinfo->context))
116 PG_RETURN_NULL();
118 /* Internal representation is the same as text */
119 PG_RETURN_TEXT_P(result);
123 * Output.
125 Datum
126 json_out(PG_FUNCTION_ARGS)
128 /* we needn't detoast because text_to_cstring will handle that */
129 Datum txt = PG_GETARG_DATUM(0);
131 PG_RETURN_CSTRING(TextDatumGetCString(txt));
135 * Binary send.
137 Datum
138 json_send(PG_FUNCTION_ARGS)
140 text *t = PG_GETARG_TEXT_PP(0);
141 StringInfoData buf;
143 pq_begintypsend(&buf);
144 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
145 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
149 * Binary receive.
151 Datum
152 json_recv(PG_FUNCTION_ARGS)
154 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
155 char *str;
156 int nbytes;
157 JsonLexContext lex;
159 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
161 /* Validate it. */
162 makeJsonLexContextCstringLen(&lex, str, nbytes, GetDatabaseEncoding(),
163 false);
164 pg_parse_json_or_ereport(&lex, &nullSemAction);
166 PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes));
170 * Turn a Datum into JSON text, appending the string to "result".
172 * tcategory and outfuncoid are from a previous call to json_categorize_type,
173 * except that if is_null is true then they can be invalid.
175 * If key_scalar is true, the value is being printed as a key, so insist
176 * it's of an acceptable type, and force it to be quoted.
178 static void
179 datum_to_json_internal(Datum val, bool is_null, StringInfo result,
180 JsonTypeCategory tcategory, Oid outfuncoid,
181 bool key_scalar)
183 char *outputstr;
184 text *jsontext;
186 check_stack_depth();
188 /* callers are expected to ensure that null keys are not passed in */
189 Assert(!(key_scalar && is_null));
191 if (is_null)
193 appendBinaryStringInfo(result, "null", strlen("null"));
194 return;
197 if (key_scalar &&
198 (tcategory == JSONTYPE_ARRAY ||
199 tcategory == JSONTYPE_COMPOSITE ||
200 tcategory == JSONTYPE_JSON ||
201 tcategory == JSONTYPE_CAST))
202 ereport(ERROR,
203 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
204 errmsg("key value must be scalar, not array, composite, or json")));
206 switch (tcategory)
208 case JSONTYPE_ARRAY:
209 array_to_json_internal(val, result, false);
210 break;
211 case JSONTYPE_COMPOSITE:
212 composite_to_json(val, result, false);
213 break;
214 case JSONTYPE_BOOL:
215 if (key_scalar)
216 appendStringInfoChar(result, '"');
217 if (DatumGetBool(val))
218 appendBinaryStringInfo(result, "true", strlen("true"));
219 else
220 appendBinaryStringInfo(result, "false", strlen("false"));
221 if (key_scalar)
222 appendStringInfoChar(result, '"');
223 break;
224 case JSONTYPE_NUMERIC:
225 outputstr = OidOutputFunctionCall(outfuncoid, val);
228 * Don't quote a non-key if it's a valid JSON number (i.e., not
229 * "Infinity", "-Infinity", or "NaN"). Since we know this is a
230 * numeric data type's output, we simplify and open-code the
231 * validation for better performance.
233 if (!key_scalar &&
234 ((*outputstr >= '0' && *outputstr <= '9') ||
235 (*outputstr == '-' &&
236 (outputstr[1] >= '0' && outputstr[1] <= '9'))))
237 appendStringInfoString(result, outputstr);
238 else
240 appendStringInfoChar(result, '"');
241 appendStringInfoString(result, outputstr);
242 appendStringInfoChar(result, '"');
244 pfree(outputstr);
245 break;
246 case JSONTYPE_DATE:
248 char buf[MAXDATELEN + 1];
250 JsonEncodeDateTime(buf, val, DATEOID, NULL);
251 appendStringInfoChar(result, '"');
252 appendStringInfoString(result, buf);
253 appendStringInfoChar(result, '"');
255 break;
256 case JSONTYPE_TIMESTAMP:
258 char buf[MAXDATELEN + 1];
260 JsonEncodeDateTime(buf, val, TIMESTAMPOID, NULL);
261 appendStringInfoChar(result, '"');
262 appendStringInfoString(result, buf);
263 appendStringInfoChar(result, '"');
265 break;
266 case JSONTYPE_TIMESTAMPTZ:
268 char buf[MAXDATELEN + 1];
270 JsonEncodeDateTime(buf, val, TIMESTAMPTZOID, NULL);
271 appendStringInfoChar(result, '"');
272 appendStringInfoString(result, buf);
273 appendStringInfoChar(result, '"');
275 break;
276 case JSONTYPE_JSON:
277 /* JSON and JSONB output will already be escaped */
278 outputstr = OidOutputFunctionCall(outfuncoid, val);
279 appendStringInfoString(result, outputstr);
280 pfree(outputstr);
281 break;
282 case JSONTYPE_CAST:
283 /* outfuncoid refers to a cast function, not an output function */
284 jsontext = DatumGetTextPP(OidFunctionCall1(outfuncoid, val));
285 appendBinaryStringInfo(result, VARDATA_ANY(jsontext),
286 VARSIZE_ANY_EXHDR(jsontext));
287 pfree(jsontext);
288 break;
289 default:
290 /* special-case text types to save useless palloc/memcpy cycles */
291 if (outfuncoid == F_TEXTOUT || outfuncoid == F_VARCHAROUT ||
292 outfuncoid == F_BPCHAROUT)
293 escape_json_text(result, (text *) DatumGetPointer(val));
294 else
296 outputstr = OidOutputFunctionCall(outfuncoid, val);
297 escape_json(result, outputstr);
298 pfree(outputstr);
300 break;
305 * Encode 'value' of datetime type 'typid' into JSON string in ISO format using
306 * optionally preallocated buffer 'buf'. Optional 'tzp' determines time-zone
307 * offset (in seconds) in which we want to show timestamptz.
309 char *
310 JsonEncodeDateTime(char *buf, Datum value, Oid typid, const int *tzp)
312 if (!buf)
313 buf = palloc(MAXDATELEN + 1);
315 switch (typid)
317 case DATEOID:
319 DateADT date;
320 struct pg_tm tm;
322 date = DatumGetDateADT(value);
324 /* Same as date_out(), but forcing DateStyle */
325 if (DATE_NOT_FINITE(date))
326 EncodeSpecialDate(date, buf);
327 else
329 j2date(date + POSTGRES_EPOCH_JDATE,
330 &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
331 EncodeDateOnly(&tm, USE_XSD_DATES, buf);
334 break;
335 case TIMEOID:
337 TimeADT time = DatumGetTimeADT(value);
338 struct pg_tm tt,
339 *tm = &tt;
340 fsec_t fsec;
342 /* Same as time_out(), but forcing DateStyle */
343 time2tm(time, tm, &fsec);
344 EncodeTimeOnly(tm, fsec, false, 0, USE_XSD_DATES, buf);
346 break;
347 case TIMETZOID:
349 TimeTzADT *time = DatumGetTimeTzADTP(value);
350 struct pg_tm tt,
351 *tm = &tt;
352 fsec_t fsec;
353 int tz;
355 /* Same as timetz_out(), but forcing DateStyle */
356 timetz2tm(time, tm, &fsec, &tz);
357 EncodeTimeOnly(tm, fsec, true, tz, USE_XSD_DATES, buf);
359 break;
360 case TIMESTAMPOID:
362 Timestamp timestamp;
363 struct pg_tm tm;
364 fsec_t fsec;
366 timestamp = DatumGetTimestamp(value);
367 /* Same as timestamp_out(), but forcing DateStyle */
368 if (TIMESTAMP_NOT_FINITE(timestamp))
369 EncodeSpecialTimestamp(timestamp, buf);
370 else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
371 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
372 else
373 ereport(ERROR,
374 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
375 errmsg("timestamp out of range")));
377 break;
378 case TIMESTAMPTZOID:
380 TimestampTz timestamp;
381 struct pg_tm tm;
382 int tz;
383 fsec_t fsec;
384 const char *tzn = NULL;
386 timestamp = DatumGetTimestampTz(value);
389 * If a time zone is specified, we apply the time-zone shift,
390 * convert timestamptz to pg_tm as if it were without a time
391 * zone, and then use the specified time zone for converting
392 * the timestamp into a string.
394 if (tzp)
396 tz = *tzp;
397 timestamp -= (TimestampTz) tz * USECS_PER_SEC;
400 /* Same as timestamptz_out(), but forcing DateStyle */
401 if (TIMESTAMP_NOT_FINITE(timestamp))
402 EncodeSpecialTimestamp(timestamp, buf);
403 else if (timestamp2tm(timestamp, tzp ? NULL : &tz, &tm, &fsec,
404 tzp ? NULL : &tzn, NULL) == 0)
406 if (tzp)
407 tm.tm_isdst = 1; /* set time-zone presence flag */
409 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
411 else
412 ereport(ERROR,
413 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
414 errmsg("timestamp out of range")));
416 break;
417 default:
418 elog(ERROR, "unknown jsonb value datetime type oid %u", typid);
419 return NULL;
422 return buf;
426 * Process a single dimension of an array.
427 * If it's the innermost dimension, output the values, otherwise call
428 * ourselves recursively to process the next dimension.
430 static void
431 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
432 bool *nulls, int *valcount, JsonTypeCategory tcategory,
433 Oid outfuncoid, bool use_line_feeds)
435 int i;
436 const char *sep;
438 Assert(dim < ndims);
440 sep = use_line_feeds ? ",\n " : ",";
442 appendStringInfoChar(result, '[');
444 for (i = 1; i <= dims[dim]; i++)
446 if (i > 1)
447 appendStringInfoString(result, sep);
449 if (dim + 1 == ndims)
451 datum_to_json_internal(vals[*valcount], nulls[*valcount],
452 result, tcategory,
453 outfuncoid, false);
454 (*valcount)++;
456 else
459 * Do we want line feeds on inner dimensions of arrays? For now
460 * we'll say no.
462 array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
463 valcount, tcategory, outfuncoid, false);
467 appendStringInfoChar(result, ']');
471 * Turn an array into JSON.
473 static void
474 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
476 ArrayType *v = DatumGetArrayTypeP(array);
477 Oid element_type = ARR_ELEMTYPE(v);
478 int *dim;
479 int ndim;
480 int nitems;
481 int count = 0;
482 Datum *elements;
483 bool *nulls;
484 int16 typlen;
485 bool typbyval;
486 char typalign;
487 JsonTypeCategory tcategory;
488 Oid outfuncoid;
490 ndim = ARR_NDIM(v);
491 dim = ARR_DIMS(v);
492 nitems = ArrayGetNItems(ndim, dim);
494 if (nitems <= 0)
496 appendStringInfoString(result, "[]");
497 return;
500 get_typlenbyvalalign(element_type,
501 &typlen, &typbyval, &typalign);
503 json_categorize_type(element_type, false,
504 &tcategory, &outfuncoid);
506 deconstruct_array(v, element_type, typlen, typbyval,
507 typalign, &elements, &nulls,
508 &nitems);
510 array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
511 outfuncoid, use_line_feeds);
513 pfree(elements);
514 pfree(nulls);
518 * Turn a composite / record into JSON.
520 static void
521 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
523 HeapTupleHeader td;
524 Oid tupType;
525 int32 tupTypmod;
526 TupleDesc tupdesc;
527 HeapTupleData tmptup,
528 *tuple;
529 int i;
530 bool needsep = false;
531 const char *sep;
532 int seplen;
535 * We can avoid expensive strlen() calls by precalculating the separator
536 * length.
538 sep = use_line_feeds ? ",\n " : ",";
539 seplen = use_line_feeds ? strlen(",\n ") : strlen(",");
541 td = DatumGetHeapTupleHeader(composite);
543 /* Extract rowtype info and find a tupdesc */
544 tupType = HeapTupleHeaderGetTypeId(td);
545 tupTypmod = HeapTupleHeaderGetTypMod(td);
546 tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
548 /* Build a temporary HeapTuple control structure */
549 tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
550 tmptup.t_data = td;
551 tuple = &tmptup;
553 appendStringInfoChar(result, '{');
555 for (i = 0; i < tupdesc->natts; i++)
557 Datum val;
558 bool isnull;
559 char *attname;
560 JsonTypeCategory tcategory;
561 Oid outfuncoid;
562 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
564 if (att->attisdropped)
565 continue;
567 if (needsep)
568 appendBinaryStringInfo(result, sep, seplen);
569 needsep = true;
571 attname = NameStr(att->attname);
572 escape_json(result, attname);
573 appendStringInfoChar(result, ':');
575 val = heap_getattr(tuple, i + 1, tupdesc, &isnull);
577 if (isnull)
579 tcategory = JSONTYPE_NULL;
580 outfuncoid = InvalidOid;
582 else
583 json_categorize_type(att->atttypid, false, &tcategory,
584 &outfuncoid);
586 datum_to_json_internal(val, isnull, result, tcategory, outfuncoid,
587 false);
590 appendStringInfoChar(result, '}');
591 ReleaseTupleDesc(tupdesc);
595 * Append JSON text for "val" to "result".
597 * This is just a thin wrapper around datum_to_json. If the same type will be
598 * printed many times, avoid using this; better to do the json_categorize_type
599 * lookups only once.
601 static void
602 add_json(Datum val, bool is_null, StringInfo result,
603 Oid val_type, bool key_scalar)
605 JsonTypeCategory tcategory;
606 Oid outfuncoid;
608 if (val_type == InvalidOid)
609 ereport(ERROR,
610 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
611 errmsg("could not determine input data type")));
613 if (is_null)
615 tcategory = JSONTYPE_NULL;
616 outfuncoid = InvalidOid;
618 else
619 json_categorize_type(val_type, false,
620 &tcategory, &outfuncoid);
622 datum_to_json_internal(val, is_null, result, tcategory, outfuncoid,
623 key_scalar);
627 * SQL function array_to_json(row)
629 Datum
630 array_to_json(PG_FUNCTION_ARGS)
632 Datum array = PG_GETARG_DATUM(0);
633 StringInfo result;
635 result = makeStringInfo();
637 array_to_json_internal(array, result, false);
639 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
643 * SQL function array_to_json(row, prettybool)
645 Datum
646 array_to_json_pretty(PG_FUNCTION_ARGS)
648 Datum array = PG_GETARG_DATUM(0);
649 bool use_line_feeds = PG_GETARG_BOOL(1);
650 StringInfo result;
652 result = makeStringInfo();
654 array_to_json_internal(array, result, use_line_feeds);
656 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
660 * SQL function row_to_json(row)
662 Datum
663 row_to_json(PG_FUNCTION_ARGS)
665 Datum array = PG_GETARG_DATUM(0);
666 StringInfo result;
668 result = makeStringInfo();
670 composite_to_json(array, result, false);
672 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
676 * SQL function row_to_json(row, prettybool)
678 Datum
679 row_to_json_pretty(PG_FUNCTION_ARGS)
681 Datum array = PG_GETARG_DATUM(0);
682 bool use_line_feeds = PG_GETARG_BOOL(1);
683 StringInfo result;
685 result = makeStringInfo();
687 composite_to_json(array, result, use_line_feeds);
689 PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
693 * Is the given type immutable when coming out of a JSON context?
695 * At present, datetimes are all considered mutable, because they
696 * depend on timezone. XXX we should also drill down into objects
697 * and arrays, but do not.
699 bool
700 to_json_is_immutable(Oid typoid)
702 JsonTypeCategory tcategory;
703 Oid outfuncoid;
705 json_categorize_type(typoid, false, &tcategory, &outfuncoid);
707 switch (tcategory)
709 case JSONTYPE_BOOL:
710 case JSONTYPE_JSON:
711 case JSONTYPE_JSONB:
712 case JSONTYPE_NULL:
713 return true;
715 case JSONTYPE_DATE:
716 case JSONTYPE_TIMESTAMP:
717 case JSONTYPE_TIMESTAMPTZ:
718 return false;
720 case JSONTYPE_ARRAY:
721 return false; /* TODO recurse into elements */
723 case JSONTYPE_COMPOSITE:
724 return false; /* TODO recurse into fields */
726 case JSONTYPE_NUMERIC:
727 case JSONTYPE_CAST:
728 case JSONTYPE_OTHER:
729 return func_volatile(outfuncoid) == PROVOLATILE_IMMUTABLE;
732 return false; /* not reached */
736 * SQL function to_json(anyvalue)
738 Datum
739 to_json(PG_FUNCTION_ARGS)
741 Datum val = PG_GETARG_DATUM(0);
742 Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
743 JsonTypeCategory tcategory;
744 Oid outfuncoid;
746 if (val_type == InvalidOid)
747 ereport(ERROR,
748 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
749 errmsg("could not determine input data type")));
751 json_categorize_type(val_type, false,
752 &tcategory, &outfuncoid);
754 PG_RETURN_DATUM(datum_to_json(val, tcategory, outfuncoid));
758 * Turn a Datum into JSON text.
760 * tcategory and outfuncoid are from a previous call to json_categorize_type.
762 Datum
763 datum_to_json(Datum val, JsonTypeCategory tcategory, Oid outfuncoid)
765 StringInfo result = makeStringInfo();
767 datum_to_json_internal(val, false, result, tcategory, outfuncoid,
768 false);
770 return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
774 * json_agg transition function
776 * aggregate input column as a json array value.
778 static Datum
779 json_agg_transfn_worker(FunctionCallInfo fcinfo, bool absent_on_null)
781 MemoryContext aggcontext,
782 oldcontext;
783 JsonAggState *state;
784 Datum val;
786 if (!AggCheckCallContext(fcinfo, &aggcontext))
788 /* cannot be called directly because of internal-type argument */
789 elog(ERROR, "json_agg_transfn called in non-aggregate context");
792 if (PG_ARGISNULL(0))
794 Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
796 if (arg_type == InvalidOid)
797 ereport(ERROR,
798 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
799 errmsg("could not determine input data type")));
802 * Make this state object in a context where it will persist for the
803 * duration of the aggregate call. MemoryContextSwitchTo is only
804 * needed the first time, as the StringInfo routines make sure they
805 * use the right context to enlarge the object if necessary.
807 oldcontext = MemoryContextSwitchTo(aggcontext);
808 state = (JsonAggState *) palloc(sizeof(JsonAggState));
809 state->str = makeStringInfo();
810 MemoryContextSwitchTo(oldcontext);
812 appendStringInfoChar(state->str, '[');
813 json_categorize_type(arg_type, false, &state->val_category,
814 &state->val_output_func);
816 else
818 state = (JsonAggState *) PG_GETARG_POINTER(0);
821 if (absent_on_null && PG_ARGISNULL(1))
822 PG_RETURN_POINTER(state);
824 if (state->str->len > 1)
825 appendStringInfoString(state->str, ", ");
827 /* fast path for NULLs */
828 if (PG_ARGISNULL(1))
830 datum_to_json_internal((Datum) 0, true, state->str, JSONTYPE_NULL,
831 InvalidOid, false);
832 PG_RETURN_POINTER(state);
835 val = PG_GETARG_DATUM(1);
837 /* add some whitespace if structured type and not first item */
838 if (!PG_ARGISNULL(0) && state->str->len > 1 &&
839 (state->val_category == JSONTYPE_ARRAY ||
840 state->val_category == JSONTYPE_COMPOSITE))
842 appendStringInfoString(state->str, "\n ");
845 datum_to_json_internal(val, false, state->str, state->val_category,
846 state->val_output_func, false);
849 * The transition type for json_agg() is declared to be "internal", which
850 * is a pass-by-value type the same size as a pointer. So we can safely
851 * pass the JsonAggState pointer through nodeAgg.c's machinations.
853 PG_RETURN_POINTER(state);
858 * json_agg aggregate function
860 Datum
861 json_agg_transfn(PG_FUNCTION_ARGS)
863 return json_agg_transfn_worker(fcinfo, false);
867 * json_agg_strict aggregate function
869 Datum
870 json_agg_strict_transfn(PG_FUNCTION_ARGS)
872 return json_agg_transfn_worker(fcinfo, true);
876 * json_agg final function
878 Datum
879 json_agg_finalfn(PG_FUNCTION_ARGS)
881 JsonAggState *state;
883 /* cannot be called directly because of internal-type argument */
884 Assert(AggCheckCallContext(fcinfo, NULL));
886 state = PG_ARGISNULL(0) ?
887 NULL :
888 (JsonAggState *) PG_GETARG_POINTER(0);
890 /* NULL result for no rows in, as is standard with aggregates */
891 if (state == NULL)
892 PG_RETURN_NULL();
894 /* Else return state with appropriate array terminator added */
895 PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]"));
898 /* Functions implementing hash table for key uniqueness check */
899 static uint32
900 json_unique_hash(const void *key, Size keysize)
902 const JsonUniqueHashEntry *entry = (JsonUniqueHashEntry *) key;
903 uint32 hash = hash_bytes_uint32(entry->object_id);
905 hash ^= hash_bytes((const unsigned char *) entry->key, entry->key_len);
907 return DatumGetUInt32(hash);
910 static int
911 json_unique_hash_match(const void *key1, const void *key2, Size keysize)
913 const JsonUniqueHashEntry *entry1 = (const JsonUniqueHashEntry *) key1;
914 const JsonUniqueHashEntry *entry2 = (const JsonUniqueHashEntry *) key2;
916 if (entry1->object_id != entry2->object_id)
917 return entry1->object_id > entry2->object_id ? 1 : -1;
919 if (entry1->key_len != entry2->key_len)
920 return entry1->key_len > entry2->key_len ? 1 : -1;
922 return strncmp(entry1->key, entry2->key, entry1->key_len);
926 * Uniqueness detection support.
928 * In order to detect uniqueness during building or parsing of a JSON
929 * object, we maintain a hash table of key names already seen.
931 static void
932 json_unique_check_init(JsonUniqueCheckState *cxt)
934 HASHCTL ctl;
936 memset(&ctl, 0, sizeof(ctl));
937 ctl.keysize = sizeof(JsonUniqueHashEntry);
938 ctl.entrysize = sizeof(JsonUniqueHashEntry);
939 ctl.hcxt = CurrentMemoryContext;
940 ctl.hash = json_unique_hash;
941 ctl.match = json_unique_hash_match;
943 *cxt = hash_create("json object hashtable",
945 &ctl,
946 HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
949 static void
950 json_unique_builder_init(JsonUniqueBuilderState *cxt)
952 json_unique_check_init(&cxt->check);
953 cxt->mcxt = CurrentMemoryContext;
954 cxt->skipped_keys.data = NULL;
957 static bool
958 json_unique_check_key(JsonUniqueCheckState *cxt, const char *key, int object_id)
960 JsonUniqueHashEntry entry;
961 bool found;
963 entry.key = key;
964 entry.key_len = strlen(key);
965 entry.object_id = object_id;
967 (void) hash_search(*cxt, &entry, HASH_ENTER, &found);
969 return !found;
973 * On-demand initialization of a throwaway StringInfo. This is used to
974 * read a key name that we don't need to store in the output object, for
975 * duplicate key detection when the value is NULL.
977 static StringInfo
978 json_unique_builder_get_throwawaybuf(JsonUniqueBuilderState *cxt)
980 StringInfo out = &cxt->skipped_keys;
982 if (!out->data)
984 MemoryContext oldcxt = MemoryContextSwitchTo(cxt->mcxt);
986 initStringInfo(out);
987 MemoryContextSwitchTo(oldcxt);
989 else
990 /* Just reset the string to empty */
991 out->len = 0;
993 return out;
997 * json_object_agg transition function.
999 * aggregate two input columns as a single json object value.
1001 static Datum
1002 json_object_agg_transfn_worker(FunctionCallInfo fcinfo,
1003 bool absent_on_null, bool unique_keys)
1005 MemoryContext aggcontext,
1006 oldcontext;
1007 JsonAggState *state;
1008 StringInfo out;
1009 Datum arg;
1010 bool skip;
1011 int key_offset;
1013 if (!AggCheckCallContext(fcinfo, &aggcontext))
1015 /* cannot be called directly because of internal-type argument */
1016 elog(ERROR, "json_object_agg_transfn called in non-aggregate context");
1019 if (PG_ARGISNULL(0))
1021 Oid arg_type;
1024 * Make the StringInfo in a context where it will persist for the
1025 * duration of the aggregate call. Switching context is only needed
1026 * for this initial step, as the StringInfo and dynahash routines make
1027 * sure they use the right context to enlarge the object if necessary.
1029 oldcontext = MemoryContextSwitchTo(aggcontext);
1030 state = (JsonAggState *) palloc(sizeof(JsonAggState));
1031 state->str = makeStringInfo();
1032 if (unique_keys)
1033 json_unique_builder_init(&state->unique_check);
1034 else
1035 memset(&state->unique_check, 0, sizeof(state->unique_check));
1036 MemoryContextSwitchTo(oldcontext);
1038 arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
1040 if (arg_type == InvalidOid)
1041 ereport(ERROR,
1042 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1043 errmsg("could not determine data type for argument %d", 1)));
1045 json_categorize_type(arg_type, false, &state->key_category,
1046 &state->key_output_func);
1048 arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2);
1050 if (arg_type == InvalidOid)
1051 ereport(ERROR,
1052 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1053 errmsg("could not determine data type for argument %d", 2)));
1055 json_categorize_type(arg_type, false, &state->val_category,
1056 &state->val_output_func);
1058 appendStringInfoString(state->str, "{ ");
1060 else
1062 state = (JsonAggState *) PG_GETARG_POINTER(0);
1066 * Note: since json_object_agg() is declared as taking type "any", the
1067 * parser will not do any type conversion on unknown-type literals (that
1068 * is, undecorated strings or NULLs). Such values will arrive here as
1069 * type UNKNOWN, which fortunately does not matter to us, since
1070 * unknownout() works fine.
1073 if (PG_ARGISNULL(1))
1074 ereport(ERROR,
1075 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1076 errmsg("null value not allowed for object key")));
1078 /* Skip null values if absent_on_null */
1079 skip = absent_on_null && PG_ARGISNULL(2);
1081 if (skip)
1084 * We got a NULL value and we're not storing those; if we're not
1085 * testing key uniqueness, we're done. If we are, use the throwaway
1086 * buffer to store the key name so that we can check it.
1088 if (!unique_keys)
1089 PG_RETURN_POINTER(state);
1091 out = json_unique_builder_get_throwawaybuf(&state->unique_check);
1093 else
1095 out = state->str;
1098 * Append comma delimiter only if we have already output some fields
1099 * after the initial string "{ ".
1101 if (out->len > 2)
1102 appendStringInfoString(out, ", ");
1105 arg = PG_GETARG_DATUM(1);
1107 key_offset = out->len;
1109 datum_to_json_internal(arg, false, out, state->key_category,
1110 state->key_output_func, true);
1112 if (unique_keys)
1115 * Copy the key first, instead of pointing into the buffer. It will be
1116 * added to the hash table, but the buffer may get reallocated as
1117 * we're appending more data to it. That would invalidate pointers to
1118 * keys in the current buffer.
1120 const char *key = MemoryContextStrdup(aggcontext,
1121 &out->data[key_offset]);
1123 if (!json_unique_check_key(&state->unique_check.check, key, 0))
1124 ereport(ERROR,
1125 errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
1126 errmsg("duplicate JSON object key value: %s", key));
1128 if (skip)
1129 PG_RETURN_POINTER(state);
1132 appendStringInfoString(state->str, " : ");
1134 if (PG_ARGISNULL(2))
1135 arg = (Datum) 0;
1136 else
1137 arg = PG_GETARG_DATUM(2);
1139 datum_to_json_internal(arg, PG_ARGISNULL(2), state->str,
1140 state->val_category,
1141 state->val_output_func, false);
1143 PG_RETURN_POINTER(state);
1147 * json_object_agg aggregate function
1149 Datum
1150 json_object_agg_transfn(PG_FUNCTION_ARGS)
1152 return json_object_agg_transfn_worker(fcinfo, false, false);
1156 * json_object_agg_strict aggregate function
1158 Datum
1159 json_object_agg_strict_transfn(PG_FUNCTION_ARGS)
1161 return json_object_agg_transfn_worker(fcinfo, true, false);
1165 * json_object_agg_unique aggregate function
1167 Datum
1168 json_object_agg_unique_transfn(PG_FUNCTION_ARGS)
1170 return json_object_agg_transfn_worker(fcinfo, false, true);
1174 * json_object_agg_unique_strict aggregate function
1176 Datum
1177 json_object_agg_unique_strict_transfn(PG_FUNCTION_ARGS)
1179 return json_object_agg_transfn_worker(fcinfo, true, true);
1183 * json_object_agg final function.
1185 Datum
1186 json_object_agg_finalfn(PG_FUNCTION_ARGS)
1188 JsonAggState *state;
1190 /* cannot be called directly because of internal-type argument */
1191 Assert(AggCheckCallContext(fcinfo, NULL));
1193 state = PG_ARGISNULL(0) ? NULL : (JsonAggState *) PG_GETARG_POINTER(0);
1195 /* NULL result for no rows in, as is standard with aggregates */
1196 if (state == NULL)
1197 PG_RETURN_NULL();
1199 /* Else return state with appropriate object terminator added */
1200 PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, " }"));
1204 * Helper function for aggregates: return given StringInfo's contents plus
1205 * specified trailing string, as a text datum. We need this because aggregate
1206 * final functions are not allowed to modify the aggregate state.
1208 static text *
1209 catenate_stringinfo_string(StringInfo buffer, const char *addon)
1211 /* custom version of cstring_to_text_with_len */
1212 int buflen = buffer->len;
1213 int addlen = strlen(addon);
1214 text *result = (text *) palloc(buflen + addlen + VARHDRSZ);
1216 SET_VARSIZE(result, buflen + addlen + VARHDRSZ);
1217 memcpy(VARDATA(result), buffer->data, buflen);
1218 memcpy(VARDATA(result) + buflen, addon, addlen);
1220 return result;
1223 Datum
1224 json_build_object_worker(int nargs, const Datum *args, const bool *nulls, const Oid *types,
1225 bool absent_on_null, bool unique_keys)
1227 int i;
1228 const char *sep = "";
1229 StringInfo result;
1230 JsonUniqueBuilderState unique_check;
1232 if (nargs % 2 != 0)
1233 ereport(ERROR,
1234 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1235 errmsg("argument list must have even number of elements"),
1236 /* translator: %s is a SQL function name */
1237 errhint("The arguments of %s must consist of alternating keys and values.",
1238 "json_build_object()")));
1240 result = makeStringInfo();
1242 appendStringInfoChar(result, '{');
1244 if (unique_keys)
1245 json_unique_builder_init(&unique_check);
1247 for (i = 0; i < nargs; i += 2)
1249 StringInfo out;
1250 bool skip;
1251 int key_offset;
1253 /* Skip null values if absent_on_null */
1254 skip = absent_on_null && nulls[i + 1];
1256 if (skip)
1258 /* If key uniqueness check is needed we must save skipped keys */
1259 if (!unique_keys)
1260 continue;
1262 out = json_unique_builder_get_throwawaybuf(&unique_check);
1264 else
1266 appendStringInfoString(result, sep);
1267 sep = ", ";
1268 out = result;
1271 /* process key */
1272 if (nulls[i])
1273 ereport(ERROR,
1274 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1275 errmsg("null value not allowed for object key")));
1277 /* save key offset before appending it */
1278 key_offset = out->len;
1280 add_json(args[i], false, out, types[i], true);
1282 if (unique_keys)
1285 * check key uniqueness after key appending
1287 * Copy the key first, instead of pointing into the buffer. It
1288 * will be added to the hash table, but the buffer may get
1289 * reallocated as we're appending more data to it. That would
1290 * invalidate pointers to keys in the current buffer.
1292 const char *key = pstrdup(&out->data[key_offset]);
1294 if (!json_unique_check_key(&unique_check.check, key, 0))
1295 ereport(ERROR,
1296 errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
1297 errmsg("duplicate JSON object key value: %s", key));
1299 if (skip)
1300 continue;
1303 appendStringInfoString(result, " : ");
1305 /* process value */
1306 add_json(args[i + 1], nulls[i + 1], result, types[i + 1], false);
1309 appendStringInfoChar(result, '}');
1311 return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
1315 * SQL function json_build_object(variadic "any")
1317 Datum
1318 json_build_object(PG_FUNCTION_ARGS)
1320 Datum *args;
1321 bool *nulls;
1322 Oid *types;
1324 /* build argument values to build the object */
1325 int nargs = extract_variadic_args(fcinfo, 0, true,
1326 &args, &types, &nulls);
1328 if (nargs < 0)
1329 PG_RETURN_NULL();
1331 PG_RETURN_DATUM(json_build_object_worker(nargs, args, nulls, types, false, false));
1335 * degenerate case of json_build_object where it gets 0 arguments.
1337 Datum
1338 json_build_object_noargs(PG_FUNCTION_ARGS)
1340 PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
1343 Datum
1344 json_build_array_worker(int nargs, const Datum *args, const bool *nulls, const Oid *types,
1345 bool absent_on_null)
1347 int i;
1348 const char *sep = "";
1349 StringInfo result;
1351 result = makeStringInfo();
1353 appendStringInfoChar(result, '[');
1355 for (i = 0; i < nargs; i++)
1357 if (absent_on_null && nulls[i])
1358 continue;
1360 appendStringInfoString(result, sep);
1361 sep = ", ";
1362 add_json(args[i], nulls[i], result, types[i], false);
1365 appendStringInfoChar(result, ']');
1367 return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
1371 * SQL function json_build_array(variadic "any")
1373 Datum
1374 json_build_array(PG_FUNCTION_ARGS)
1376 Datum *args;
1377 bool *nulls;
1378 Oid *types;
1380 /* build argument values to build the object */
1381 int nargs = extract_variadic_args(fcinfo, 0, true,
1382 &args, &types, &nulls);
1384 if (nargs < 0)
1385 PG_RETURN_NULL();
1387 PG_RETURN_DATUM(json_build_array_worker(nargs, args, nulls, types, false));
1391 * degenerate case of json_build_array where it gets 0 arguments.
1393 Datum
1394 json_build_array_noargs(PG_FUNCTION_ARGS)
1396 PG_RETURN_TEXT_P(cstring_to_text_with_len("[]", 2));
1400 * SQL function json_object(text[])
1402 * take a one or two dimensional array of text as key/value pairs
1403 * for a json object.
1405 Datum
1406 json_object(PG_FUNCTION_ARGS)
1408 ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(0);
1409 int ndims = ARR_NDIM(in_array);
1410 StringInfoData result;
1411 Datum *in_datums;
1412 bool *in_nulls;
1413 int in_count,
1414 count,
1416 text *rval;
1418 switch (ndims)
1420 case 0:
1421 PG_RETURN_DATUM(CStringGetTextDatum("{}"));
1422 break;
1424 case 1:
1425 if ((ARR_DIMS(in_array)[0]) % 2)
1426 ereport(ERROR,
1427 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1428 errmsg("array must have even number of elements")));
1429 break;
1431 case 2:
1432 if ((ARR_DIMS(in_array)[1]) != 2)
1433 ereport(ERROR,
1434 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1435 errmsg("array must have two columns")));
1436 break;
1438 default:
1439 ereport(ERROR,
1440 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1441 errmsg("wrong number of array subscripts")));
1444 deconstruct_array_builtin(in_array, TEXTOID, &in_datums, &in_nulls, &in_count);
1446 count = in_count / 2;
1448 initStringInfo(&result);
1450 appendStringInfoChar(&result, '{');
1452 for (i = 0; i < count; ++i)
1454 if (in_nulls[i * 2])
1455 ereport(ERROR,
1456 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1457 errmsg("null value not allowed for object key")));
1459 if (i > 0)
1460 appendStringInfoString(&result, ", ");
1461 escape_json_text(&result, (text *) DatumGetPointer(in_datums[i * 2]));
1462 appendStringInfoString(&result, " : ");
1463 if (in_nulls[i * 2 + 1])
1464 appendStringInfoString(&result, "null");
1465 else
1467 escape_json_text(&result,
1468 (text *) DatumGetPointer(in_datums[i * 2 + 1]));
1472 appendStringInfoChar(&result, '}');
1474 pfree(in_datums);
1475 pfree(in_nulls);
1477 rval = cstring_to_text_with_len(result.data, result.len);
1478 pfree(result.data);
1480 PG_RETURN_TEXT_P(rval);
1484 * SQL function json_object(text[], text[])
1486 * take separate key and value arrays of text to construct a json object
1487 * pairwise.
1489 Datum
1490 json_object_two_arg(PG_FUNCTION_ARGS)
1492 ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(0);
1493 ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(1);
1494 int nkdims = ARR_NDIM(key_array);
1495 int nvdims = ARR_NDIM(val_array);
1496 StringInfoData result;
1497 Datum *key_datums,
1498 *val_datums;
1499 bool *key_nulls,
1500 *val_nulls;
1501 int key_count,
1502 val_count,
1504 text *rval;
1506 if (nkdims > 1 || nkdims != nvdims)
1507 ereport(ERROR,
1508 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1509 errmsg("wrong number of array subscripts")));
1511 if (nkdims == 0)
1512 PG_RETURN_DATUM(CStringGetTextDatum("{}"));
1514 deconstruct_array_builtin(key_array, TEXTOID, &key_datums, &key_nulls, &key_count);
1515 deconstruct_array_builtin(val_array, TEXTOID, &val_datums, &val_nulls, &val_count);
1517 if (key_count != val_count)
1518 ereport(ERROR,
1519 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1520 errmsg("mismatched array dimensions")));
1522 initStringInfo(&result);
1524 appendStringInfoChar(&result, '{');
1526 for (i = 0; i < key_count; ++i)
1528 if (key_nulls[i])
1529 ereport(ERROR,
1530 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
1531 errmsg("null value not allowed for object key")));
1533 if (i > 0)
1534 appendStringInfoString(&result, ", ");
1535 escape_json_text(&result, (text *) DatumGetPointer(key_datums[i]));
1536 appendStringInfoString(&result, " : ");
1537 if (val_nulls[i])
1538 appendStringInfoString(&result, "null");
1539 else
1540 escape_json_text(&result,
1541 (text *) DatumGetPointer(val_datums[i]));
1544 appendStringInfoChar(&result, '}');
1546 pfree(key_datums);
1547 pfree(key_nulls);
1548 pfree(val_datums);
1549 pfree(val_nulls);
1551 rval = cstring_to_text_with_len(result.data, result.len);
1552 pfree(result.data);
1554 PG_RETURN_TEXT_P(rval);
1558 * escape_json_char
1559 * Inline helper function for escape_json* functions
1561 static pg_attribute_always_inline void
1562 escape_json_char(StringInfo buf, char c)
1564 switch (c)
1566 case '\b':
1567 appendStringInfoString(buf, "\\b");
1568 break;
1569 case '\f':
1570 appendStringInfoString(buf, "\\f");
1571 break;
1572 case '\n':
1573 appendStringInfoString(buf, "\\n");
1574 break;
1575 case '\r':
1576 appendStringInfoString(buf, "\\r");
1577 break;
1578 case '\t':
1579 appendStringInfoString(buf, "\\t");
1580 break;
1581 case '"':
1582 appendStringInfoString(buf, "\\\"");
1583 break;
1584 case '\\':
1585 appendStringInfoString(buf, "\\\\");
1586 break;
1587 default:
1588 if ((unsigned char) c < ' ')
1589 appendStringInfo(buf, "\\u%04x", (int) c);
1590 else
1591 appendStringInfoCharMacro(buf, c);
1592 break;
1597 * escape_json
1598 * Produce a JSON string literal, properly escaping the NUL-terminated
1599 * cstring.
1601 void
1602 escape_json(StringInfo buf, const char *str)
1604 appendStringInfoCharMacro(buf, '"');
1606 for (; *str != '\0'; str++)
1607 escape_json_char(buf, *str);
1609 appendStringInfoCharMacro(buf, '"');
1613 * Define the number of bytes that escape_json_with_len will look ahead in the
1614 * input string before flushing the input string to the destination buffer.
1615 * Looking ahead too far could result in cachelines being evicted that will
1616 * need to be reloaded in order to perform the appendBinaryStringInfo call.
1617 * Smaller values will result in a larger number of calls to
1618 * appendBinaryStringInfo and introduce additional function call overhead.
1619 * Values larger than the size of L1d cache will likely result in worse
1620 * performance.
1622 #define ESCAPE_JSON_FLUSH_AFTER 512
1625 * escape_json_with_len
1626 * Produce a JSON string literal, properly escaping the possibly not
1627 * NUL-terminated characters in 'str'. 'len' defines the number of bytes
1628 * from 'str' to process.
1630 void
1631 escape_json_with_len(StringInfo buf, const char *str, int len)
1633 int vlen;
1635 Assert(len >= 0);
1638 * Since we know the minimum length we'll need to append, let's just
1639 * enlarge the buffer now rather than incrementally making more space when
1640 * we run out. Add two extra bytes for the enclosing quotes.
1642 enlargeStringInfo(buf, len + 2);
1645 * Figure out how many bytes to process using SIMD. Round 'len' down to
1646 * the previous multiple of sizeof(Vector8), assuming that's a power-of-2.
1648 vlen = len & (int) (~(sizeof(Vector8) - 1));
1650 appendStringInfoCharMacro(buf, '"');
1652 for (int i = 0, copypos = 0;;)
1655 * To speed this up, try searching sizeof(Vector8) bytes at once for
1656 * special characters that we need to escape. When we find one, we
1657 * fall out of the Vector8 loop and copy the portion we've vector
1658 * searched and then we process sizeof(Vector8) bytes one byte at a
1659 * time. Once done, come back and try doing vector searching again.
1660 * We'll also process any remaining bytes at the tail end of the
1661 * string byte-by-byte. This optimization assumes that most chunks of
1662 * sizeof(Vector8) bytes won't contain any special characters.
1664 for (; i < vlen; i += sizeof(Vector8))
1666 Vector8 chunk;
1668 vector8_load(&chunk, (const uint8 *) &str[i]);
1671 * Break on anything less than ' ' or if we find a '"' or '\\'.
1672 * Those need special handling. That's done in the per-byte loop.
1674 if (vector8_has_le(chunk, (unsigned char) 0x1F) ||
1675 vector8_has(chunk, (unsigned char) '"') ||
1676 vector8_has(chunk, (unsigned char) '\\'))
1677 break;
1679 #ifdef ESCAPE_JSON_FLUSH_AFTER
1682 * Flush what's been checked so far out to the destination buffer
1683 * every so often to avoid having to re-read cachelines when
1684 * escaping large strings.
1686 if (i - copypos >= ESCAPE_JSON_FLUSH_AFTER)
1688 appendBinaryStringInfo(buf, &str[copypos], i - copypos);
1689 copypos = i;
1691 #endif
1695 * Write to the destination up to the point that we've vector searched
1696 * so far. Do this only when switching into per-byte mode rather than
1697 * once every sizeof(Vector8) bytes.
1699 if (copypos < i)
1701 appendBinaryStringInfo(buf, &str[copypos], i - copypos);
1702 copypos = i;
1706 * Per-byte loop for Vector8s containing special chars and for
1707 * processing the tail of the string.
1709 for (int b = 0; b < sizeof(Vector8); b++)
1711 /* check if we've finished */
1712 if (i == len)
1713 goto done;
1715 Assert(i < len);
1717 escape_json_char(buf, str[i++]);
1720 copypos = i;
1721 /* We're not done yet. Try the vector search again. */
1724 done:
1725 appendStringInfoCharMacro(buf, '"');
1729 * escape_json_text
1730 * Append 'txt' onto 'buf' and escape using escape_json_with_len.
1732 * This is more efficient than calling text_to_cstring and appending the
1733 * result as that could require an additional palloc and memcpy.
1735 void
1736 escape_json_text(StringInfo buf, const text *txt)
1738 /* must cast away the const, unfortunately */
1739 text *tunpacked = pg_detoast_datum_packed(unconstify(text *, txt));
1740 int len = VARSIZE_ANY_EXHDR(tunpacked);
1741 char *str;
1743 str = VARDATA_ANY(tunpacked);
1745 escape_json_with_len(buf, str, len);
1747 /* pfree any detoasted values */
1748 if (tunpacked != txt)
1749 pfree(tunpacked);
1752 /* Semantic actions for key uniqueness check */
1753 static JsonParseErrorType
1754 json_unique_object_start(void *_state)
1756 JsonUniqueParsingState *state = _state;
1757 JsonUniqueStackEntry *entry;
1759 if (!state->unique)
1760 return JSON_SUCCESS;
1762 /* push object entry to stack */
1763 entry = palloc(sizeof(*entry));
1764 entry->object_id = state->id_counter++;
1765 entry->parent = state->stack;
1766 state->stack = entry;
1768 return JSON_SUCCESS;
1771 static JsonParseErrorType
1772 json_unique_object_end(void *_state)
1774 JsonUniqueParsingState *state = _state;
1775 JsonUniqueStackEntry *entry;
1777 if (!state->unique)
1778 return JSON_SUCCESS;
1780 entry = state->stack;
1781 state->stack = entry->parent; /* pop object from stack */
1782 pfree(entry);
1783 return JSON_SUCCESS;
1786 static JsonParseErrorType
1787 json_unique_object_field_start(void *_state, char *field, bool isnull)
1789 JsonUniqueParsingState *state = _state;
1790 JsonUniqueStackEntry *entry;
1792 if (!state->unique)
1793 return JSON_SUCCESS;
1795 /* find key collision in the current object */
1796 if (json_unique_check_key(&state->check, field, state->stack->object_id))
1797 return JSON_SUCCESS;
1799 state->unique = false;
1801 /* pop all objects entries */
1802 while ((entry = state->stack))
1804 state->stack = entry->parent;
1805 pfree(entry);
1807 return JSON_SUCCESS;
1810 /* Validate JSON text and additionally check key uniqueness */
1811 bool
1812 json_validate(text *json, bool check_unique_keys, bool throw_error)
1814 JsonLexContext lex;
1815 JsonSemAction uniqueSemAction = {0};
1816 JsonUniqueParsingState state;
1817 JsonParseErrorType result;
1819 makeJsonLexContext(&lex, json, check_unique_keys);
1821 if (check_unique_keys)
1823 state.lex = &lex;
1824 state.stack = NULL;
1825 state.id_counter = 0;
1826 state.unique = true;
1827 json_unique_check_init(&state.check);
1829 uniqueSemAction.semstate = &state;
1830 uniqueSemAction.object_start = json_unique_object_start;
1831 uniqueSemAction.object_field_start = json_unique_object_field_start;
1832 uniqueSemAction.object_end = json_unique_object_end;
1835 result = pg_parse_json(&lex, check_unique_keys ? &uniqueSemAction : &nullSemAction);
1837 if (result != JSON_SUCCESS)
1839 if (throw_error)
1840 json_errsave_error(result, &lex, NULL);
1842 return false; /* invalid json */
1845 if (check_unique_keys && !state.unique)
1847 if (throw_error)
1848 ereport(ERROR,
1849 (errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
1850 errmsg("duplicate JSON object key value")));
1852 return false; /* not unique keys */
1855 if (check_unique_keys)
1856 freeJsonLexContext(&lex);
1858 return true; /* ok */
1862 * SQL function json_typeof(json) -> text
1864 * Returns the type of the outermost JSON value as TEXT. Possible types are
1865 * "object", "array", "string", "number", "boolean", and "null".
1867 * Performs a single call to json_lex() to get the first token of the supplied
1868 * value. This initial token uniquely determines the value's type. As our
1869 * input must already have been validated by json_in() or json_recv(), the
1870 * initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END,
1871 * JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END.
1873 Datum
1874 json_typeof(PG_FUNCTION_ARGS)
1876 text *json = PG_GETARG_TEXT_PP(0);
1877 JsonLexContext lex;
1878 char *type;
1879 JsonParseErrorType result;
1881 /* Lex exactly one token from the input and check its type. */
1882 makeJsonLexContext(&lex, json, false);
1883 result = json_lex(&lex);
1884 if (result != JSON_SUCCESS)
1885 json_errsave_error(result, &lex, NULL);
1887 switch (lex.token_type)
1889 case JSON_TOKEN_OBJECT_START:
1890 type = "object";
1891 break;
1892 case JSON_TOKEN_ARRAY_START:
1893 type = "array";
1894 break;
1895 case JSON_TOKEN_STRING:
1896 type = "string";
1897 break;
1898 case JSON_TOKEN_NUMBER:
1899 type = "number";
1900 break;
1901 case JSON_TOKEN_TRUE:
1902 case JSON_TOKEN_FALSE:
1903 type = "boolean";
1904 break;
1905 case JSON_TOKEN_NULL:
1906 type = "null";
1907 break;
1908 default:
1909 elog(ERROR, "unexpected json token: %d", lex.token_type);
1912 PG_RETURN_TEXT_P(cstring_to_text(type));