src/backend/access/spgist/spgtextproc.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * spgtextproc.c
   4  *        implementation of radix tree (compressed trie) over text
   5  *
   6  * In a text_ops SPGiST index, inner tuples can have a prefix which is the
   7  * common prefix of all strings indexed under that tuple.  The node labels
   8  * represent the next byte of the string(s) after the prefix.  Assuming we
   9  * always use the longest possible prefix, we will get more than one node
  10  * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
  11  *
  12  * To reconstruct the indexed string for any index entry, concatenate the
  13  * inner-tuple prefixes and node labels starting at the root and working
  14  * down to the leaf entry, then append the datum in the leaf entry.
  15  * (While descending the tree, "level" is the number of bytes reconstructed
  16  * so far.)
  17  *
  18  * However, there are two special cases for node labels: -1 indicates that
  19  * there are no more bytes after the prefix-so-far, and -2 indicates that we
  20  * had to split an existing allTheSame tuple (in such a case we have to create
  21  * a node label that doesn't correspond to any string byte).  In either case,
  22  * the node label does not contribute anything to the reconstructed string.
  23  *
  24  * Previously, we used a node label of zero for both special cases, but
  25  * this was problematic because one can't tell whether a string ending at
  26  * the current level can be pushed down into such a child node.  For
  27  * backwards compatibility, we still support such node labels for reading;
  28  * but no new entries will ever be pushed down into a zero-labeled child.
  29  * No new entries ever get pushed into a -2-labeled child, either.
  30  *
  31  *
  32  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  33  * Portions Copyright (c) 1994, Regents of the University of California
  34  *
  35  * IDENTIFICATION
  36  *                      src/backend/access/spgist/spgtextproc.c
  37  *
  38  *-------------------------------------------------------------------------
  39  */
  40 #include "postgres.h"
  41
  42 #include "access/spgist.h"
  43 #include "catalog/pg_type.h"
  44 #include "common/int.h"
  45 #include "mb/pg_wchar.h"
  46 #include "utils/datum.h"
  47 #include "utils/fmgrprotos.h"
  48 #include "utils/pg_locale.h"
  49 #include "utils/varlena.h"
  50 #include "varatt.h"
  51
  52
  53 /*
  54  * In the worst case, an inner tuple in a text radix tree could have as many
  55  * as 258 nodes (one for each possible byte value, plus the two special
  56  * cases).  Each node can take 16 bytes on MAXALIGN=8 machines.  The inner
  57  * tuple must fit on an index page of size BLCKSZ.  Rather than assuming we
  58  * know the exact amount of overhead imposed by page headers, tuple headers,
  59  * etc, we leave 100 bytes for that (the actual overhead should be no more
  60  * than 56 bytes at this writing, so there is slop in this number).
  61  * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
  62  * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
  63  * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
  64  * tuple size exceeds maximum" if there are too many distinct next-byte values
  65  * at a given place in the tree.  Since use of nonstandard block sizes appears
  66  * to be negligible in the field, we just live with that fact for now,
  67  * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
  68  * than default.
  69  */
  70 #define SPGIST_MAX_PREFIX_LENGTH        Max((int) (BLCKSZ - 258 * 16 - 100), 32)
  71
  72 /*
  73  * Strategy for collation aware operator on text is equal to btree strategy
  74  * plus value of 10.
  75  *
  76  * Current collation aware strategies and their corresponding btree strategies:
  77  * 11 BTLessStrategyNumber
  78  * 12 BTLessEqualStrategyNumber
  79  * 14 BTGreaterEqualStrategyNumber
  80  * 15 BTGreaterStrategyNumber
  81  */
  82 #define SPG_STRATEGY_ADDITION   (10)
  83 #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
  84                                                                                  && (s) != RTPrefixStrategyNumber)
  85
  86 /* Struct for sorting values in picksplit */
  87 typedef struct spgNodePtr
  88 {
  89         Datum           d;
  90         int                     i;
  91         int16           c;
  92 } spgNodePtr;
  93
  94
  95 Datum
  96 spg_text_config(PG_FUNCTION_ARGS)
  97 {
  98         /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
  99         spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
 100
 101         cfg->prefixType = TEXTOID;
 102         cfg->labelType = INT2OID;
 103         cfg->canReturnData = true;
 104         cfg->longValuesOK = true;       /* suffixing will shorten long values */
 105         PG_RETURN_VOID();
 106 }
 107
 108 /*
 109  * Form a text datum from the given not-necessarily-null-terminated string,
 110  * using short varlena header format if possible
 111  */
 112 static Datum
 113 formTextDatum(const char *data, int datalen)
 114 {
 115         char       *p;
 116
 117         p = (char *) palloc(datalen + VARHDRSZ);
 118
 119         if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
 120         {
 121                 SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
 122                 if (datalen)
 123                         memcpy(p + VARHDRSZ_SHORT, data, datalen);
 124         }
 125         else
 126         {
 127                 SET_VARSIZE(p, datalen + VARHDRSZ);
 128                 memcpy(p + VARHDRSZ, data, datalen);
 129         }
 130
 131         return PointerGetDatum(p);
 132 }
 133
 134 /*
 135  * Find the length of the common prefix of a and b
 136  */
 137 static int
 138 commonPrefix(const char *a, const char *b, int lena, int lenb)
 139 {
 140         int                     i = 0;
 141
 142         while (i < lena && i < lenb && *a == *b)
 143         {
 144                 a++;
 145                 b++;
 146                 i++;
 147         }
 148
 149         return i;
 150 }
 151
 152 /*
 153  * Binary search an array of int16 datums for a match to c
 154  *
 155  * On success, *i gets the match location; on failure, it gets where to insert
 156  */
 157 static bool
 158 searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
 159 {
 160         int                     StopLow = 0,
 161                                 StopHigh = nNodes;
 162
 163         while (StopLow < StopHigh)
 164         {
 165                 int                     StopMiddle = (StopLow + StopHigh) >> 1;
 166                 int16           middle = DatumGetInt16(nodeLabels[StopMiddle]);
 167
 168                 if (c < middle)
 169                         StopHigh = StopMiddle;
 170                 else if (c > middle)
 171                         StopLow = StopMiddle + 1;
 172                 else
 173                 {
 174                         *i = StopMiddle;
 175                         return true;
 176                 }
 177         }
 178
 179         *i = StopHigh;
 180         return false;
 181 }
 182
 183 Datum
 184 spg_text_choose(PG_FUNCTION_ARGS)
 185 {
 186         spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
 187         spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
 188         text       *inText = DatumGetTextPP(in->datum);
 189         char       *inStr = VARDATA_ANY(inText);
 190         int                     inSize = VARSIZE_ANY_EXHDR(inText);
 191         char       *prefixStr = NULL;
 192         int                     prefixSize = 0;
 193         int                     commonLen = 0;
 194         int16           nodeChar = 0;
 195         int                     i = 0;
 196
 197         /* Check for prefix match, set nodeChar to first byte after prefix */
 198         if (in->hasPrefix)
 199         {
 200                 text       *prefixText = DatumGetTextPP(in->prefixDatum);
 201
 202                 prefixStr = VARDATA_ANY(prefixText);
 203                 prefixSize = VARSIZE_ANY_EXHDR(prefixText);
 204
 205                 commonLen = commonPrefix(inStr + in->level,
 206                                                                  prefixStr,
 207                                                                  inSize - in->level,
 208                                                                  prefixSize);
 209
 210                 if (commonLen == prefixSize)
 211                 {
 212                         if (inSize - in->level > commonLen)
 213                                 nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
 214                         else
 215                                 nodeChar = -1;
 216                 }
 217                 else
 218                 {
 219                         /* Must split tuple because incoming value doesn't match prefix */
 220                         out->resultType = spgSplitTuple;
 221
 222                         if (commonLen == 0)
 223                         {
 224                                 out->result.splitTuple.prefixHasPrefix = false;
 225                         }
 226                         else
 227                         {
 228                                 out->result.splitTuple.prefixHasPrefix = true;
 229                                 out->result.splitTuple.prefixPrefixDatum =
 230                                         formTextDatum(prefixStr, commonLen);
 231                         }
 232                         out->result.splitTuple.prefixNNodes = 1;
 233                         out->result.splitTuple.prefixNodeLabels =
 234                                 (Datum *) palloc(sizeof(Datum));
 235                         out->result.splitTuple.prefixNodeLabels[0] =
 236                                 Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
 237
 238                         out->result.splitTuple.childNodeN = 0;
 239
 240                         if (prefixSize - commonLen == 1)
 241                         {
 242                                 out->result.splitTuple.postfixHasPrefix = false;
 243                         }
 244                         else
 245                         {
 246                                 out->result.splitTuple.postfixHasPrefix = true;
 247                                 out->result.splitTuple.postfixPrefixDatum =
 248                                         formTextDatum(prefixStr + commonLen + 1,
 249                                                                   prefixSize - commonLen - 1);
 250                         }
 251
 252                         PG_RETURN_VOID();
 253                 }
 254         }
 255         else if (inSize > in->level)
 256         {
 257                 nodeChar = *(unsigned char *) (inStr + in->level);
 258         }
 259         else
 260         {
 261                 nodeChar = -1;
 262         }
 263
 264         /* Look up nodeChar in the node label array */
 265         if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
 266         {
 267                 /*
 268                  * Descend to existing node.  (If in->allTheSame, the core code will
 269                  * ignore our nodeN specification here, but that's OK.  We still have
 270                  * to provide the correct levelAdd and restDatum values, and those are
 271                  * the same regardless of which node gets chosen by core.)
 272                  */
 273                 int                     levelAdd;
 274
 275                 out->resultType = spgMatchNode;
 276                 out->result.matchNode.nodeN = i;
 277                 levelAdd = commonLen;
 278                 if (nodeChar >= 0)
 279                         levelAdd++;
 280                 out->result.matchNode.levelAdd = levelAdd;
 281                 if (inSize - in->level - levelAdd > 0)
 282                         out->result.matchNode.restDatum =
 283                                 formTextDatum(inStr + in->level + levelAdd,
 284                                                           inSize - in->level - levelAdd);
 285                 else
 286                         out->result.matchNode.restDatum =
 287                                 formTextDatum(NULL, 0);
 288         }
 289         else if (in->allTheSame)
 290         {
 291                 /*
 292                  * Can't use AddNode action, so split the tuple.  The upper tuple has
 293                  * the same prefix as before and uses a dummy node label -2 for the
 294                  * lower tuple.  The lower tuple has no prefix and the same node
 295                  * labels as the original tuple.
 296                  *
 297                  * Note: it might seem tempting to shorten the upper tuple's prefix,
 298                  * if it has one, then use its last byte as label for the lower tuple.
 299                  * But that doesn't win since we know the incoming value matches the
 300                  * whole prefix: we'd just end up splitting the lower tuple again.
 301                  */
 302                 out->resultType = spgSplitTuple;
 303                 out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
 304                 out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
 305                 out->result.splitTuple.prefixNNodes = 1;
 306                 out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
 307                 out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
 308                 out->result.splitTuple.childNodeN = 0;
 309                 out->result.splitTuple.postfixHasPrefix = false;
 310         }
 311         else
 312         {
 313                 /* Add a node for the not-previously-seen nodeChar value */
 314                 out->resultType = spgAddNode;
 315                 out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
 316                 out->result.addNode.nodeN = i;
 317         }
 318
 319         PG_RETURN_VOID();
 320 }
 321
 322 /* qsort comparator to sort spgNodePtr structs by "c" */
 323 static int
 324 cmpNodePtr(const void *a, const void *b)
 325 {
 326         const spgNodePtr *aa = (const spgNodePtr *) a;
 327         const spgNodePtr *bb = (const spgNodePtr *) b;
 328
 329         return pg_cmp_s16(aa->c, bb->c);
 330 }
 331
 332 Datum
 333 spg_text_picksplit(PG_FUNCTION_ARGS)
 334 {
 335         spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
 336         spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
 337         text       *text0 = DatumGetTextPP(in->datums[0]);
 338         int                     i,
 339                                 commonLen;
 340         spgNodePtr *nodes;
 341
 342         /* Identify longest common prefix, if any */
 343         commonLen = VARSIZE_ANY_EXHDR(text0);
 344         for (i = 1; i < in->nTuples && commonLen > 0; i++)
 345         {
 346                 text       *texti = DatumGetTextPP(in->datums[i]);
 347                 int                     tmp = commonPrefix(VARDATA_ANY(text0),
 348                                                                            VARDATA_ANY(texti),
 349                                                                            VARSIZE_ANY_EXHDR(text0),
 350                                                                            VARSIZE_ANY_EXHDR(texti));
 351
 352                 if (tmp < commonLen)
 353                         commonLen = tmp;
 354         }
 355
 356         /*
 357          * Limit the prefix length, if necessary, to ensure that the resulting
 358          * inner tuple will fit on a page.
 359          */
 360         commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
 361
 362         /* Set node prefix to be that string, if it's not empty */
 363         if (commonLen == 0)
 364         {
 365                 out->hasPrefix = false;
 366         }
 367         else
 368         {
 369                 out->hasPrefix = true;
 370                 out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
 371         }
 372
 373         /* Extract the node label (first non-common byte) from each value */
 374         nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);
 375
 376         for (i = 0; i < in->nTuples; i++)
 377         {
 378                 text       *texti = DatumGetTextPP(in->datums[i]);
 379
 380                 if (commonLen < VARSIZE_ANY_EXHDR(texti))
 381                         nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
 382                 else
 383                         nodes[i].c = -1;        /* use -1 if string is all common */
 384                 nodes[i].i = i;
 385                 nodes[i].d = in->datums[i];
 386         }
 387
 388         /*
 389          * Sort by label values so that we can group the values into nodes.  This
 390          * also ensures that the nodes are ordered by label value, allowing the
 391          * use of binary search in searchChar.
 392          */
 393         qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
 394
 395         /* And emit results */
 396         out->nNodes = 0;
 397         out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
 398         out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
 399         out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
 400
 401         for (i = 0; i < in->nTuples; i++)
 402         {
 403                 text       *texti = DatumGetTextPP(nodes[i].d);
 404                 Datum           leafD;
 405
 406                 if (i == 0 || nodes[i].c != nodes[i - 1].c)
 407                 {
 408                         out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
 409                         out->nNodes++;
 410                 }
 411
 412                 if (commonLen < VARSIZE_ANY_EXHDR(texti))
 413                         leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
 414                                                                   VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
 415                 else
 416                         leafD = formTextDatum(NULL, 0);
 417
 418                 out->leafTupleDatums[nodes[i].i] = leafD;
 419                 out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
 420         }
 421
 422         PG_RETURN_VOID();
 423 }
 424
 425 Datum
 426 spg_text_inner_consistent(PG_FUNCTION_ARGS)
 427 {
 428         spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
 429         spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
 430         bool            collate_is_c = pg_newlocale_from_collation(PG_GET_COLLATION())->collate_is_c;
 431         text       *reconstructedValue;
 432         text       *reconstrText;
 433         int                     maxReconstrLen;
 434         text       *prefixText = NULL;
 435         int                     prefixSize = 0;
 436         int                     i;
 437
 438         /*
 439          * Reconstruct values represented at this tuple, including parent data,
 440          * prefix of this tuple if any, and the node label if it's non-dummy.
 441          * in->level should be the length of the previously reconstructed value,
 442          * and the number of bytes added here is prefixSize or prefixSize + 1.
 443          *
 444          * Note: we assume that in->reconstructedValue isn't toasted and doesn't
 445          * have a short varlena header.  This is okay because it must have been
 446          * created by a previous invocation of this routine, and we always emit
 447          * long-format reconstructed values.
 448          */
 449         reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
 450         Assert(reconstructedValue == NULL ? in->level == 0 :
 451                    VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
 452
 453         maxReconstrLen = in->level + 1;
 454         if (in->hasPrefix)
 455         {
 456                 prefixText = DatumGetTextPP(in->prefixDatum);
 457                 prefixSize = VARSIZE_ANY_EXHDR(prefixText);
 458                 maxReconstrLen += prefixSize;
 459         }
 460
 461         reconstrText = palloc(VARHDRSZ + maxReconstrLen);
 462         SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
 463
 464         if (in->level)
 465                 memcpy(VARDATA(reconstrText),
 466                            VARDATA(reconstructedValue),
 467                            in->level);
 468         if (prefixSize)
 469                 memcpy(((char *) VARDATA(reconstrText)) + in->level,
 470                            VARDATA_ANY(prefixText),
 471                            prefixSize);
 472         /* last byte of reconstrText will be filled in below */
 473
 474         /*
 475          * Scan the child nodes.  For each one, complete the reconstructed value
 476          * and see if it's consistent with the query.  If so, emit an entry into
 477          * the output arrays.
 478          */
 479         out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
 480         out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
 481         out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
 482         out->nNodes = 0;
 483
 484         for (i = 0; i < in->nNodes; i++)
 485         {
 486                 int16           nodeChar = DatumGetInt16(in->nodeLabels[i]);
 487                 int                     thisLen;
 488                 bool            res = true;
 489                 int                     j;
 490
 491                 /* If nodeChar is a dummy value, don't include it in data */
 492                 if (nodeChar <= 0)
 493                         thisLen = maxReconstrLen - 1;
 494                 else
 495                 {
 496                         ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
 497                         thisLen = maxReconstrLen;
 498                 }
 499
 500                 for (j = 0; j < in->nkeys; j++)
 501                 {
 502                         StrategyNumber strategy = in->scankeys[j].sk_strategy;
 503                         text       *inText;
 504                         int                     inSize;
 505                         int                     r;
 506
 507                         /*
 508                          * If it's a collation-aware operator, but the collation is C, we
 509                          * can treat it as non-collation-aware.  With non-C collation we
 510                          * need to traverse whole tree :-( so there's no point in making
 511                          * any check here.  (Note also that our reconstructed value may
 512                          * well end with a partial multibyte character, so that applying
 513                          * any encoding-sensitive test to it would be risky anyhow.)
 514                          */
 515                         if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
 516                         {
 517                                 if (collate_is_c)
 518                                         strategy -= SPG_STRATEGY_ADDITION;
 519                                 else
 520                                         continue;
 521                         }
 522
 523                         inText = DatumGetTextPP(in->scankeys[j].sk_argument);
 524                         inSize = VARSIZE_ANY_EXHDR(inText);
 525
 526                         r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
 527                                            Min(inSize, thisLen));
 528
 529                         switch (strategy)
 530                         {
 531                                 case BTLessStrategyNumber:
 532                                 case BTLessEqualStrategyNumber:
 533                                         if (r > 0)
 534                                                 res = false;
 535                                         break;
 536                                 case BTEqualStrategyNumber:
 537                                         if (r != 0 || inSize < thisLen)
 538                                                 res = false;
 539                                         break;
 540                                 case BTGreaterEqualStrategyNumber:
 541                                 case BTGreaterStrategyNumber:
 542                                         if (r < 0)
 543                                                 res = false;
 544                                         break;
 545                                 case RTPrefixStrategyNumber:
 546                                         if (r != 0)
 547                                                 res = false;
 548                                         break;
 549                                 default:
 550                                         elog(ERROR, "unrecognized strategy number: %d",
 551                                                  in->scankeys[j].sk_strategy);
 552                                         break;
 553                         }
 554
 555                         if (!res)
 556                                 break;                  /* no need to consider remaining conditions */
 557                 }
 558
 559                 if (res)
 560                 {
 561                         out->nodeNumbers[out->nNodes] = i;
 562                         out->levelAdds[out->nNodes] = thisLen - in->level;
 563                         SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
 564                         out->reconstructedValues[out->nNodes] =
 565                                 datumCopy(PointerGetDatum(reconstrText), false, -1);
 566                         out->nNodes++;
 567                 }
 568         }
 569
 570         PG_RETURN_VOID();
 571 }
 572
 573 Datum
 574 spg_text_leaf_consistent(PG_FUNCTION_ARGS)
 575 {
 576         spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
 577         spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
 578         int                     level = in->level;
 579         text       *leafValue,
 580                            *reconstrValue = NULL;
 581         char       *fullValue;
 582         int                     fullLen;
 583         bool            res;
 584         int                     j;
 585
 586         /* all tests are exact */
 587         out->recheck = false;
 588
 589         leafValue = DatumGetTextPP(in->leafDatum);
 590
 591         /* As above, in->reconstructedValue isn't toasted or short. */
 592         if (DatumGetPointer(in->reconstructedValue))
 593                 reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
 594
 595         Assert(reconstrValue == NULL ? level == 0 :
 596                    VARSIZE_ANY_EXHDR(reconstrValue) == level);
 597
 598         /* Reconstruct the full string represented by this leaf tuple */
 599         fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
 600         if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
 601         {
 602                 fullValue = VARDATA(reconstrValue);
 603                 out->leafValue = PointerGetDatum(reconstrValue);
 604         }
 605         else
 606         {
 607                 text       *fullText = palloc(VARHDRSZ + fullLen);
 608
 609                 SET_VARSIZE(fullText, VARHDRSZ + fullLen);
 610                 fullValue = VARDATA(fullText);
 611                 if (level)
 612                         memcpy(fullValue, VARDATA(reconstrValue), level);
 613                 if (VARSIZE_ANY_EXHDR(leafValue) > 0)
 614                         memcpy(fullValue + level, VARDATA_ANY(leafValue),
 615                                    VARSIZE_ANY_EXHDR(leafValue));
 616                 out->leafValue = PointerGetDatum(fullText);
 617         }
 618
 619         /* Perform the required comparison(s) */
 620         res = true;
 621         for (j = 0; j < in->nkeys; j++)
 622         {
 623                 StrategyNumber strategy = in->scankeys[j].sk_strategy;
 624                 text       *query = DatumGetTextPP(in->scankeys[j].sk_argument);
 625                 int                     queryLen = VARSIZE_ANY_EXHDR(query);
 626                 int                     r;
 627
 628                 if (strategy == RTPrefixStrategyNumber)
 629                 {
 630                         /*
 631                          * if level >= length of query then reconstrValue must begin with
 632                          * query (prefix) string, so we don't need to check it again.
 633                          */
 634                         res = (level >= queryLen) ||
 635                                 DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
 636                                                                                                          PG_GET_COLLATION(),
 637                                                                                                          out->leafValue,
 638                                                                                                          PointerGetDatum(query)));
 639
 640                         if (!res)                       /* no need to consider remaining conditions */
 641                                 break;
 642
 643                         continue;
 644                 }
 645
 646                 if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
 647                 {
 648                         /* Collation-aware comparison */
 649                         strategy -= SPG_STRATEGY_ADDITION;
 650
 651                         /* If asserts enabled, verify encoding of reconstructed string */
 652                         Assert(pg_verifymbstr(fullValue, fullLen, false));
 653
 654                         r = varstr_cmp(fullValue, fullLen,
 655                                                    VARDATA_ANY(query), queryLen,
 656                                                    PG_GET_COLLATION());
 657                 }
 658                 else
 659                 {
 660                         /* Non-collation-aware comparison */
 661                         r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
 662
 663                         if (r == 0)
 664                         {
 665                                 if (queryLen > fullLen)
 666                                         r = -1;
 667                                 else if (queryLen < fullLen)
 668                                         r = 1;
 669                         }
 670                 }
 671
 672                 switch (strategy)
 673                 {
 674                         case BTLessStrategyNumber:
 675                                 res = (r < 0);
 676                                 break;
 677                         case BTLessEqualStrategyNumber:
 678                                 res = (r <= 0);
 679                                 break;
 680                         case BTEqualStrategyNumber:
 681                                 res = (r == 0);
 682                                 break;
 683                         case BTGreaterEqualStrategyNumber:
 684                                 res = (r >= 0);
 685                                 break;
 686                         case BTGreaterStrategyNumber:
 687                                 res = (r > 0);
 688                                 break;
 689                         default:
 690                                 elog(ERROR, "unrecognized strategy number: %d",
 691                                          in->scankeys[j].sk_strategy);
 692                                 res = false;
 693                                 break;
 694                 }
 695
 696                 if (!res)
 697                         break;                          /* no need to consider remaining conditions */
 698         }
 699
 700         PG_RETURN_BOOL(res);
 701 }