1 /*-------------------------------------------------------------------------
4 * implementation of radix tree (compressed trie) over text
6 * In a text_ops SPGiST index, inner tuples can have a prefix which is the
7 * common prefix of all strings indexed under that tuple. The node labels
8 * represent the next byte of the string(s) after the prefix. Assuming we
9 * always use the longest possible prefix, we will get more than one node
10 * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
12 * To reconstruct the indexed string for any index entry, concatenate the
13 * inner-tuple prefixes and node labels starting at the root and working
14 * down to the leaf entry, then append the datum in the leaf entry.
15 * (While descending the tree, "level" is the number of bytes reconstructed
18 * However, there are two special cases for node labels: -1 indicates that
19 * there are no more bytes after the prefix-so-far, and -2 indicates that we
20 * had to split an existing allTheSame tuple (in such a case we have to create
21 * a node label that doesn't correspond to any string byte). In either case,
22 * the node label does not contribute anything to the reconstructed string.
24 * Previously, we used a node label of zero for both special cases, but
25 * this was problematic because one can't tell whether a string ending at
26 * the current level can be pushed down into such a child node. For
27 * backwards compatibility, we still support such node labels for reading;
28 * but no new entries will ever be pushed down into a zero-labeled child.
29 * No new entries ever get pushed into a -2-labeled child, either.
32 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
33 * Portions Copyright (c) 1994, Regents of the University of California
36 * src/backend/access/spgist/spgtextproc.c
38 *-------------------------------------------------------------------------
42 #include "access/spgist.h"
43 #include "catalog/pg_type.h"
44 #include "common/int.h"
45 #include "mb/pg_wchar.h"
46 #include "utils/datum.h"
47 #include "utils/fmgrprotos.h"
48 #include "utils/pg_locale.h"
49 #include "utils/varlena.h"
54 * In the worst case, an inner tuple in a text radix tree could have as many
55 * as 258 nodes (one for each possible byte value, plus the two special
56 * cases). Each node can take 16 bytes on MAXALIGN=8 machines. The inner
57 * tuple must fit on an index page of size BLCKSZ. Rather than assuming we
58 * know the exact amount of overhead imposed by page headers, tuple headers,
59 * etc, we leave 100 bytes for that (the actual overhead should be no more
60 * than 56 bytes at this writing, so there is slop in this number).
61 * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
62 * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
63 * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
64 * tuple size exceeds maximum" if there are too many distinct next-byte values
65 * at a given place in the tree. Since use of nonstandard block sizes appears
66 * to be negligible in the field, we just live with that fact for now,
67 * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
70 #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
73 * Strategy for collation aware operator on text is equal to btree strategy
76 * Current collation aware strategies and their corresponding btree strategies:
77 * 11 BTLessStrategyNumber
78 * 12 BTLessEqualStrategyNumber
79 * 14 BTGreaterEqualStrategyNumber
80 * 15 BTGreaterStrategyNumber
82 #define SPG_STRATEGY_ADDITION (10)
83 #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
84 && (s) != RTPrefixStrategyNumber)
86 /* Struct for sorting values in picksplit */
87 typedef struct spgNodePtr
96 spg_text_config(PG_FUNCTION_ARGS
)
98 /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
99 spgConfigOut
*cfg
= (spgConfigOut
*) PG_GETARG_POINTER(1);
101 cfg
->prefixType
= TEXTOID
;
102 cfg
->labelType
= INT2OID
;
103 cfg
->canReturnData
= true;
104 cfg
->longValuesOK
= true; /* suffixing will shorten long values */
109 * Form a text datum from the given not-necessarily-null-terminated string,
110 * using short varlena header format if possible
113 formTextDatum(const char *data
, int datalen
)
117 p
= (char *) palloc(datalen
+ VARHDRSZ
);
119 if (datalen
+ VARHDRSZ_SHORT
<= VARATT_SHORT_MAX
)
121 SET_VARSIZE_SHORT(p
, datalen
+ VARHDRSZ_SHORT
);
123 memcpy(p
+ VARHDRSZ_SHORT
, data
, datalen
);
127 SET_VARSIZE(p
, datalen
+ VARHDRSZ
);
128 memcpy(p
+ VARHDRSZ
, data
, datalen
);
131 return PointerGetDatum(p
);
135 * Find the length of the common prefix of a and b
138 commonPrefix(const char *a
, const char *b
, int lena
, int lenb
)
142 while (i
< lena
&& i
< lenb
&& *a
== *b
)
153 * Binary search an array of int16 datums for a match to c
155 * On success, *i gets the match location; on failure, it gets where to insert
158 searchChar(Datum
*nodeLabels
, int nNodes
, int16 c
, int *i
)
163 while (StopLow
< StopHigh
)
165 int StopMiddle
= (StopLow
+ StopHigh
) >> 1;
166 int16 middle
= DatumGetInt16(nodeLabels
[StopMiddle
]);
169 StopHigh
= StopMiddle
;
171 StopLow
= StopMiddle
+ 1;
184 spg_text_choose(PG_FUNCTION_ARGS
)
186 spgChooseIn
*in
= (spgChooseIn
*) PG_GETARG_POINTER(0);
187 spgChooseOut
*out
= (spgChooseOut
*) PG_GETARG_POINTER(1);
188 text
*inText
= DatumGetTextPP(in
->datum
);
189 char *inStr
= VARDATA_ANY(inText
);
190 int inSize
= VARSIZE_ANY_EXHDR(inText
);
191 char *prefixStr
= NULL
;
197 /* Check for prefix match, set nodeChar to first byte after prefix */
200 text
*prefixText
= DatumGetTextPP(in
->prefixDatum
);
202 prefixStr
= VARDATA_ANY(prefixText
);
203 prefixSize
= VARSIZE_ANY_EXHDR(prefixText
);
205 commonLen
= commonPrefix(inStr
+ in
->level
,
210 if (commonLen
== prefixSize
)
212 if (inSize
- in
->level
> commonLen
)
213 nodeChar
= *(unsigned char *) (inStr
+ in
->level
+ commonLen
);
219 /* Must split tuple because incoming value doesn't match prefix */
220 out
->resultType
= spgSplitTuple
;
224 out
->result
.splitTuple
.prefixHasPrefix
= false;
228 out
->result
.splitTuple
.prefixHasPrefix
= true;
229 out
->result
.splitTuple
.prefixPrefixDatum
=
230 formTextDatum(prefixStr
, commonLen
);
232 out
->result
.splitTuple
.prefixNNodes
= 1;
233 out
->result
.splitTuple
.prefixNodeLabels
=
234 (Datum
*) palloc(sizeof(Datum
));
235 out
->result
.splitTuple
.prefixNodeLabels
[0] =
236 Int16GetDatum(*(unsigned char *) (prefixStr
+ commonLen
));
238 out
->result
.splitTuple
.childNodeN
= 0;
240 if (prefixSize
- commonLen
== 1)
242 out
->result
.splitTuple
.postfixHasPrefix
= false;
246 out
->result
.splitTuple
.postfixHasPrefix
= true;
247 out
->result
.splitTuple
.postfixPrefixDatum
=
248 formTextDatum(prefixStr
+ commonLen
+ 1,
249 prefixSize
- commonLen
- 1);
255 else if (inSize
> in
->level
)
257 nodeChar
= *(unsigned char *) (inStr
+ in
->level
);
264 /* Look up nodeChar in the node label array */
265 if (searchChar(in
->nodeLabels
, in
->nNodes
, nodeChar
, &i
))
268 * Descend to existing node. (If in->allTheSame, the core code will
269 * ignore our nodeN specification here, but that's OK. We still have
270 * to provide the correct levelAdd and restDatum values, and those are
271 * the same regardless of which node gets chosen by core.)
275 out
->resultType
= spgMatchNode
;
276 out
->result
.matchNode
.nodeN
= i
;
277 levelAdd
= commonLen
;
280 out
->result
.matchNode
.levelAdd
= levelAdd
;
281 if (inSize
- in
->level
- levelAdd
> 0)
282 out
->result
.matchNode
.restDatum
=
283 formTextDatum(inStr
+ in
->level
+ levelAdd
,
284 inSize
- in
->level
- levelAdd
);
286 out
->result
.matchNode
.restDatum
=
287 formTextDatum(NULL
, 0);
289 else if (in
->allTheSame
)
292 * Can't use AddNode action, so split the tuple. The upper tuple has
293 * the same prefix as before and uses a dummy node label -2 for the
294 * lower tuple. The lower tuple has no prefix and the same node
295 * labels as the original tuple.
297 * Note: it might seem tempting to shorten the upper tuple's prefix,
298 * if it has one, then use its last byte as label for the lower tuple.
299 * But that doesn't win since we know the incoming value matches the
300 * whole prefix: we'd just end up splitting the lower tuple again.
302 out
->resultType
= spgSplitTuple
;
303 out
->result
.splitTuple
.prefixHasPrefix
= in
->hasPrefix
;
304 out
->result
.splitTuple
.prefixPrefixDatum
= in
->prefixDatum
;
305 out
->result
.splitTuple
.prefixNNodes
= 1;
306 out
->result
.splitTuple
.prefixNodeLabels
= (Datum
*) palloc(sizeof(Datum
));
307 out
->result
.splitTuple
.prefixNodeLabels
[0] = Int16GetDatum(-2);
308 out
->result
.splitTuple
.childNodeN
= 0;
309 out
->result
.splitTuple
.postfixHasPrefix
= false;
313 /* Add a node for the not-previously-seen nodeChar value */
314 out
->resultType
= spgAddNode
;
315 out
->result
.addNode
.nodeLabel
= Int16GetDatum(nodeChar
);
316 out
->result
.addNode
.nodeN
= i
;
322 /* qsort comparator to sort spgNodePtr structs by "c" */
324 cmpNodePtr(const void *a
, const void *b
)
326 const spgNodePtr
*aa
= (const spgNodePtr
*) a
;
327 const spgNodePtr
*bb
= (const spgNodePtr
*) b
;
329 return pg_cmp_s16(aa
->c
, bb
->c
);
333 spg_text_picksplit(PG_FUNCTION_ARGS
)
335 spgPickSplitIn
*in
= (spgPickSplitIn
*) PG_GETARG_POINTER(0);
336 spgPickSplitOut
*out
= (spgPickSplitOut
*) PG_GETARG_POINTER(1);
337 text
*text0
= DatumGetTextPP(in
->datums
[0]);
342 /* Identify longest common prefix, if any */
343 commonLen
= VARSIZE_ANY_EXHDR(text0
);
344 for (i
= 1; i
< in
->nTuples
&& commonLen
> 0; i
++)
346 text
*texti
= DatumGetTextPP(in
->datums
[i
]);
347 int tmp
= commonPrefix(VARDATA_ANY(text0
),
349 VARSIZE_ANY_EXHDR(text0
),
350 VARSIZE_ANY_EXHDR(texti
));
357 * Limit the prefix length, if necessary, to ensure that the resulting
358 * inner tuple will fit on a page.
360 commonLen
= Min(commonLen
, SPGIST_MAX_PREFIX_LENGTH
);
362 /* Set node prefix to be that string, if it's not empty */
365 out
->hasPrefix
= false;
369 out
->hasPrefix
= true;
370 out
->prefixDatum
= formTextDatum(VARDATA_ANY(text0
), commonLen
);
373 /* Extract the node label (first non-common byte) from each value */
374 nodes
= (spgNodePtr
*) palloc(sizeof(spgNodePtr
) * in
->nTuples
);
376 for (i
= 0; i
< in
->nTuples
; i
++)
378 text
*texti
= DatumGetTextPP(in
->datums
[i
]);
380 if (commonLen
< VARSIZE_ANY_EXHDR(texti
))
381 nodes
[i
].c
= *(unsigned char *) (VARDATA_ANY(texti
) + commonLen
);
383 nodes
[i
].c
= -1; /* use -1 if string is all common */
385 nodes
[i
].d
= in
->datums
[i
];
389 * Sort by label values so that we can group the values into nodes. This
390 * also ensures that the nodes are ordered by label value, allowing the
391 * use of binary search in searchChar.
393 qsort(nodes
, in
->nTuples
, sizeof(*nodes
), cmpNodePtr
);
395 /* And emit results */
397 out
->nodeLabels
= (Datum
*) palloc(sizeof(Datum
) * in
->nTuples
);
398 out
->mapTuplesToNodes
= (int *) palloc(sizeof(int) * in
->nTuples
);
399 out
->leafTupleDatums
= (Datum
*) palloc(sizeof(Datum
) * in
->nTuples
);
401 for (i
= 0; i
< in
->nTuples
; i
++)
403 text
*texti
= DatumGetTextPP(nodes
[i
].d
);
406 if (i
== 0 || nodes
[i
].c
!= nodes
[i
- 1].c
)
408 out
->nodeLabels
[out
->nNodes
] = Int16GetDatum(nodes
[i
].c
);
412 if (commonLen
< VARSIZE_ANY_EXHDR(texti
))
413 leafD
= formTextDatum(VARDATA_ANY(texti
) + commonLen
+ 1,
414 VARSIZE_ANY_EXHDR(texti
) - commonLen
- 1);
416 leafD
= formTextDatum(NULL
, 0);
418 out
->leafTupleDatums
[nodes
[i
].i
] = leafD
;
419 out
->mapTuplesToNodes
[nodes
[i
].i
] = out
->nNodes
- 1;
426 spg_text_inner_consistent(PG_FUNCTION_ARGS
)
428 spgInnerConsistentIn
*in
= (spgInnerConsistentIn
*) PG_GETARG_POINTER(0);
429 spgInnerConsistentOut
*out
= (spgInnerConsistentOut
*) PG_GETARG_POINTER(1);
430 bool collate_is_c
= pg_newlocale_from_collation(PG_GET_COLLATION())->collate_is_c
;
431 text
*reconstructedValue
;
434 text
*prefixText
= NULL
;
439 * Reconstruct values represented at this tuple, including parent data,
440 * prefix of this tuple if any, and the node label if it's non-dummy.
441 * in->level should be the length of the previously reconstructed value,
442 * and the number of bytes added here is prefixSize or prefixSize + 1.
444 * Note: we assume that in->reconstructedValue isn't toasted and doesn't
445 * have a short varlena header. This is okay because it must have been
446 * created by a previous invocation of this routine, and we always emit
447 * long-format reconstructed values.
449 reconstructedValue
= (text
*) DatumGetPointer(in
->reconstructedValue
);
450 Assert(reconstructedValue
== NULL
? in
->level
== 0 :
451 VARSIZE_ANY_EXHDR(reconstructedValue
) == in
->level
);
453 maxReconstrLen
= in
->level
+ 1;
456 prefixText
= DatumGetTextPP(in
->prefixDatum
);
457 prefixSize
= VARSIZE_ANY_EXHDR(prefixText
);
458 maxReconstrLen
+= prefixSize
;
461 reconstrText
= palloc(VARHDRSZ
+ maxReconstrLen
);
462 SET_VARSIZE(reconstrText
, VARHDRSZ
+ maxReconstrLen
);
465 memcpy(VARDATA(reconstrText
),
466 VARDATA(reconstructedValue
),
469 memcpy(((char *) VARDATA(reconstrText
)) + in
->level
,
470 VARDATA_ANY(prefixText
),
472 /* last byte of reconstrText will be filled in below */
475 * Scan the child nodes. For each one, complete the reconstructed value
476 * and see if it's consistent with the query. If so, emit an entry into
479 out
->nodeNumbers
= (int *) palloc(sizeof(int) * in
->nNodes
);
480 out
->levelAdds
= (int *) palloc(sizeof(int) * in
->nNodes
);
481 out
->reconstructedValues
= (Datum
*) palloc(sizeof(Datum
) * in
->nNodes
);
484 for (i
= 0; i
< in
->nNodes
; i
++)
486 int16 nodeChar
= DatumGetInt16(in
->nodeLabels
[i
]);
491 /* If nodeChar is a dummy value, don't include it in data */
493 thisLen
= maxReconstrLen
- 1;
496 ((unsigned char *) VARDATA(reconstrText
))[maxReconstrLen
- 1] = nodeChar
;
497 thisLen
= maxReconstrLen
;
500 for (j
= 0; j
< in
->nkeys
; j
++)
502 StrategyNumber strategy
= in
->scankeys
[j
].sk_strategy
;
508 * If it's a collation-aware operator, but the collation is C, we
509 * can treat it as non-collation-aware. With non-C collation we
510 * need to traverse whole tree :-( so there's no point in making
511 * any check here. (Note also that our reconstructed value may
512 * well end with a partial multibyte character, so that applying
513 * any encoding-sensitive test to it would be risky anyhow.)
515 if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy
))
518 strategy
-= SPG_STRATEGY_ADDITION
;
523 inText
= DatumGetTextPP(in
->scankeys
[j
].sk_argument
);
524 inSize
= VARSIZE_ANY_EXHDR(inText
);
526 r
= memcmp(VARDATA(reconstrText
), VARDATA_ANY(inText
),
527 Min(inSize
, thisLen
));
531 case BTLessStrategyNumber
:
532 case BTLessEqualStrategyNumber
:
536 case BTEqualStrategyNumber
:
537 if (r
!= 0 || inSize
< thisLen
)
540 case BTGreaterEqualStrategyNumber
:
541 case BTGreaterStrategyNumber
:
545 case RTPrefixStrategyNumber
:
550 elog(ERROR
, "unrecognized strategy number: %d",
551 in
->scankeys
[j
].sk_strategy
);
556 break; /* no need to consider remaining conditions */
561 out
->nodeNumbers
[out
->nNodes
] = i
;
562 out
->levelAdds
[out
->nNodes
] = thisLen
- in
->level
;
563 SET_VARSIZE(reconstrText
, VARHDRSZ
+ thisLen
);
564 out
->reconstructedValues
[out
->nNodes
] =
565 datumCopy(PointerGetDatum(reconstrText
), false, -1);
574 spg_text_leaf_consistent(PG_FUNCTION_ARGS
)
576 spgLeafConsistentIn
*in
= (spgLeafConsistentIn
*) PG_GETARG_POINTER(0);
577 spgLeafConsistentOut
*out
= (spgLeafConsistentOut
*) PG_GETARG_POINTER(1);
578 int level
= in
->level
;
580 *reconstrValue
= NULL
;
586 /* all tests are exact */
587 out
->recheck
= false;
589 leafValue
= DatumGetTextPP(in
->leafDatum
);
591 /* As above, in->reconstructedValue isn't toasted or short. */
592 if (DatumGetPointer(in
->reconstructedValue
))
593 reconstrValue
= (text
*) DatumGetPointer(in
->reconstructedValue
);
595 Assert(reconstrValue
== NULL
? level
== 0 :
596 VARSIZE_ANY_EXHDR(reconstrValue
) == level
);
598 /* Reconstruct the full string represented by this leaf tuple */
599 fullLen
= level
+ VARSIZE_ANY_EXHDR(leafValue
);
600 if (VARSIZE_ANY_EXHDR(leafValue
) == 0 && level
> 0)
602 fullValue
= VARDATA(reconstrValue
);
603 out
->leafValue
= PointerGetDatum(reconstrValue
);
607 text
*fullText
= palloc(VARHDRSZ
+ fullLen
);
609 SET_VARSIZE(fullText
, VARHDRSZ
+ fullLen
);
610 fullValue
= VARDATA(fullText
);
612 memcpy(fullValue
, VARDATA(reconstrValue
), level
);
613 if (VARSIZE_ANY_EXHDR(leafValue
) > 0)
614 memcpy(fullValue
+ level
, VARDATA_ANY(leafValue
),
615 VARSIZE_ANY_EXHDR(leafValue
));
616 out
->leafValue
= PointerGetDatum(fullText
);
619 /* Perform the required comparison(s) */
621 for (j
= 0; j
< in
->nkeys
; j
++)
623 StrategyNumber strategy
= in
->scankeys
[j
].sk_strategy
;
624 text
*query
= DatumGetTextPP(in
->scankeys
[j
].sk_argument
);
625 int queryLen
= VARSIZE_ANY_EXHDR(query
);
628 if (strategy
== RTPrefixStrategyNumber
)
631 * if level >= length of query then reconstrValue must begin with
632 * query (prefix) string, so we don't need to check it again.
634 res
= (level
>= queryLen
) ||
635 DatumGetBool(DirectFunctionCall2Coll(text_starts_with
,
638 PointerGetDatum(query
)));
640 if (!res
) /* no need to consider remaining conditions */
646 if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy
))
648 /* Collation-aware comparison */
649 strategy
-= SPG_STRATEGY_ADDITION
;
651 /* If asserts enabled, verify encoding of reconstructed string */
652 Assert(pg_verifymbstr(fullValue
, fullLen
, false));
654 r
= varstr_cmp(fullValue
, fullLen
,
655 VARDATA_ANY(query
), queryLen
,
660 /* Non-collation-aware comparison */
661 r
= memcmp(fullValue
, VARDATA_ANY(query
), Min(queryLen
, fullLen
));
665 if (queryLen
> fullLen
)
667 else if (queryLen
< fullLen
)
674 case BTLessStrategyNumber
:
677 case BTLessEqualStrategyNumber
:
680 case BTEqualStrategyNumber
:
683 case BTGreaterEqualStrategyNumber
:
686 case BTGreaterStrategyNumber
:
690 elog(ERROR
, "unrecognized strategy number: %d",
691 in
->scankeys
[j
].sk_strategy
);
697 break; /* no need to consider remaining conditions */