Cache NO ACTION foreign keys separately from RESTRICT foreign keys
[pgsql.git] / src / backend / utils / adt / like_support.c
blob8fdc677371f4d32d0088579c18c45108e277bff0
1 /*-------------------------------------------------------------------------
3 * like_support.c
4 * Planner support functions for LIKE, regex, and related operators.
6 * These routines handle special optimization of operators that can be
7 * used with index scans even though they are not known to the executor's
8 * indexscan machinery. The key idea is that these operators allow us
9 * to derive approximate indexscan qual clauses, such that any tuples
10 * that pass the operator clause itself must also satisfy the simpler
11 * indexscan condition(s). Then we can use the indexscan machinery
12 * to avoid scanning as much of the table as we'd otherwise have to,
13 * while applying the original operator as a qpqual condition to ensure
14 * we deliver only the tuples we want. (In essence, we're using a regular
15 * index as if it were a lossy index.)
17 * An example of what we're doing is
18 * textfield LIKE 'abc%def'
19 * from which we can generate the indexscanable conditions
20 * textfield >= 'abc' AND textfield < 'abd'
21 * which allow efficient scanning of an index on textfield.
22 * (In reality, character set and collation issues make the transformation
23 * from LIKE to indexscan limits rather harder than one might think ...
24 * but that's the basic idea.)
26 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
30 * IDENTIFICATION
31 * src/backend/utils/adt/like_support.c
33 *-------------------------------------------------------------------------
35 #include "postgres.h"
37 #include <math.h>
39 #include "access/htup_details.h"
40 #include "catalog/pg_collation.h"
41 #include "catalog/pg_operator.h"
42 #include "catalog/pg_opfamily.h"
43 #include "catalog/pg_statistic.h"
44 #include "catalog/pg_type.h"
45 #include "mb/pg_wchar.h"
46 #include "miscadmin.h"
47 #include "nodes/makefuncs.h"
48 #include "nodes/nodeFuncs.h"
49 #include "nodes/supportnodes.h"
50 #include "utils/builtins.h"
51 #include "utils/datum.h"
52 #include "utils/lsyscache.h"
53 #include "utils/pg_locale.h"
54 #include "utils/selfuncs.h"
55 #include "utils/varlena.h"
58 typedef enum
60 Pattern_Type_Like,
61 Pattern_Type_Like_IC,
62 Pattern_Type_Regex,
63 Pattern_Type_Regex_IC,
64 Pattern_Type_Prefix,
65 } Pattern_Type;
67 typedef enum
69 Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact,
70 } Pattern_Prefix_Status;
72 static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
73 static List *match_pattern_prefix(Node *leftop,
74 Node *rightop,
75 Pattern_Type ptype,
76 Oid expr_coll,
77 Oid opfamily,
78 Oid indexcollation);
79 static double patternsel_common(PlannerInfo *root,
80 Oid oprid,
81 Oid opfuncid,
82 List *args,
83 int varRelid,
84 Oid collation,
85 Pattern_Type ptype,
86 bool negate);
87 static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
88 Pattern_Type ptype,
89 Oid collation,
90 Const **prefix,
91 Selectivity *rest_selec);
92 static Selectivity prefix_selectivity(PlannerInfo *root,
93 VariableStatData *vardata,
94 Oid eqopr, Oid ltopr, Oid geopr,
95 Oid collation,
96 Const *prefixcon);
97 static Selectivity like_selectivity(const char *patt, int pattlen,
98 bool case_insensitive);
99 static Selectivity regex_selectivity(const char *patt, int pattlen,
100 bool case_insensitive,
101 int fixed_prefix_len);
102 static int pattern_char_isalpha(char c, bool is_multibyte,
103 pg_locale_t locale);
104 static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
105 Oid collation);
106 static Datum string_to_datum(const char *str, Oid datatype);
107 static Const *string_to_const(const char *str, Oid datatype);
108 static Const *string_to_bytea_const(const char *str, size_t str_len);
112 * Planner support functions for LIKE, regex, and related operators
114 Datum
115 textlike_support(PG_FUNCTION_ARGS)
117 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
119 PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
122 Datum
123 texticlike_support(PG_FUNCTION_ARGS)
125 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
127 PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
130 Datum
131 textregexeq_support(PG_FUNCTION_ARGS)
133 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
135 PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
138 Datum
139 texticregexeq_support(PG_FUNCTION_ARGS)
141 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
143 PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
146 Datum
147 text_starts_with_support(PG_FUNCTION_ARGS)
149 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
151 PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
154 /* Common code for the above */
155 static Node *
156 like_regex_support(Node *rawreq, Pattern_Type ptype)
158 Node *ret = NULL;
160 if (IsA(rawreq, SupportRequestSelectivity))
163 * Make a selectivity estimate for a function call, just as we'd do if
164 * the call was via the corresponding operator.
166 SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
167 Selectivity s1;
169 if (req->is_join)
172 * For the moment we just punt. If patternjoinsel is ever
173 * improved to do better, this should be made to call it.
175 s1 = DEFAULT_MATCH_SEL;
177 else
179 /* Share code with operator restriction selectivity functions */
180 s1 = patternsel_common(req->root,
181 InvalidOid,
182 req->funcid,
183 req->args,
184 req->varRelid,
185 req->inputcollid,
186 ptype,
187 false);
189 req->selectivity = s1;
190 ret = (Node *) req;
192 else if (IsA(rawreq, SupportRequestIndexCondition))
194 /* Try to convert operator/function call to index conditions */
195 SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
198 * Currently we have no "reverse" match operators with the pattern on
199 * the left, so we only need consider cases with the indexkey on the
200 * left.
202 if (req->indexarg != 0)
203 return NULL;
205 if (is_opclause(req->node))
207 OpExpr *clause = (OpExpr *) req->node;
209 Assert(list_length(clause->args) == 2);
210 ret = (Node *)
211 match_pattern_prefix((Node *) linitial(clause->args),
212 (Node *) lsecond(clause->args),
213 ptype,
214 clause->inputcollid,
215 req->opfamily,
216 req->indexcollation);
218 else if (is_funcclause(req->node)) /* be paranoid */
220 FuncExpr *clause = (FuncExpr *) req->node;
222 Assert(list_length(clause->args) == 2);
223 ret = (Node *)
224 match_pattern_prefix((Node *) linitial(clause->args),
225 (Node *) lsecond(clause->args),
226 ptype,
227 clause->inputcollid,
228 req->opfamily,
229 req->indexcollation);
233 return ret;
237 * match_pattern_prefix
238 * Try to generate an indexqual for a LIKE or regex operator.
240 static List *
241 match_pattern_prefix(Node *leftop,
242 Node *rightop,
243 Pattern_Type ptype,
244 Oid expr_coll,
245 Oid opfamily,
246 Oid indexcollation)
248 List *result;
249 Const *patt;
250 Const *prefix;
251 Pattern_Prefix_Status pstatus;
252 Oid ldatatype;
253 Oid rdatatype;
254 Oid eqopr;
255 Oid ltopr;
256 Oid geopr;
257 Oid preopr = InvalidOid;
258 bool collation_aware;
259 Expr *expr;
260 FmgrInfo ltproc;
261 Const *greaterstr;
264 * Can't do anything with a non-constant or NULL pattern argument.
266 * Note that since we restrict ourselves to cases with a hard constant on
267 * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
268 * about verifying that.
270 if (!IsA(rightop, Const) ||
271 ((Const *) rightop)->constisnull)
272 return NIL;
273 patt = (Const *) rightop;
276 * Try to extract a fixed prefix from the pattern.
278 pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
279 &prefix, NULL);
281 /* fail if no fixed prefix */
282 if (pstatus == Pattern_Prefix_None)
283 return NIL;
286 * Identify the operators we want to use, based on the type of the
287 * left-hand argument. Usually these are just the type's regular
288 * comparison operators, but if we are considering one of the semi-legacy
289 * "pattern" opclasses, use the "pattern" operators instead. Those are
290 * not collation-sensitive but always use C collation, as we want. The
291 * selected operators also determine the needed type of the prefix
292 * constant.
294 ldatatype = exprType(leftop);
295 switch (ldatatype)
297 case TEXTOID:
298 if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
300 eqopr = TextEqualOperator;
301 ltopr = TextPatternLessOperator;
302 geopr = TextPatternGreaterEqualOperator;
303 collation_aware = false;
305 else if (opfamily == TEXT_SPGIST_FAM_OID)
307 eqopr = TextEqualOperator;
308 ltopr = TextPatternLessOperator;
309 geopr = TextPatternGreaterEqualOperator;
310 /* This opfamily has direct support for prefixing */
311 preopr = TextPrefixOperator;
312 collation_aware = false;
314 else
316 eqopr = TextEqualOperator;
317 ltopr = TextLessOperator;
318 geopr = TextGreaterEqualOperator;
319 collation_aware = true;
321 rdatatype = TEXTOID;
322 break;
323 case NAMEOID:
326 * Note that here, we need the RHS type to be text, so that the
327 * comparison value isn't improperly truncated to NAMEDATALEN.
329 eqopr = NameEqualTextOperator;
330 ltopr = NameLessTextOperator;
331 geopr = NameGreaterEqualTextOperator;
332 collation_aware = true;
333 rdatatype = TEXTOID;
334 break;
335 case BPCHAROID:
336 if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
338 eqopr = BpcharEqualOperator;
339 ltopr = BpcharPatternLessOperator;
340 geopr = BpcharPatternGreaterEqualOperator;
341 collation_aware = false;
343 else
345 eqopr = BpcharEqualOperator;
346 ltopr = BpcharLessOperator;
347 geopr = BpcharGreaterEqualOperator;
348 collation_aware = true;
350 rdatatype = BPCHAROID;
351 break;
352 case BYTEAOID:
353 eqopr = ByteaEqualOperator;
354 ltopr = ByteaLessOperator;
355 geopr = ByteaGreaterEqualOperator;
356 collation_aware = false;
357 rdatatype = BYTEAOID;
358 break;
359 default:
360 /* Can't get here unless we're attached to the wrong operator */
361 return NIL;
365 * If necessary, coerce the prefix constant to the right type. The given
366 * prefix constant is either text or bytea type, therefore the only case
367 * where we need to do anything is when converting text to bpchar. Those
368 * two types are binary-compatible, so relabeling the Const node is
369 * sufficient.
371 if (prefix->consttype != rdatatype)
373 Assert(prefix->consttype == TEXTOID &&
374 rdatatype == BPCHAROID);
375 prefix->consttype = rdatatype;
379 * If we found an exact-match pattern, generate an "=" indexqual.
381 * Here and below, check to see whether the desired operator is actually
382 * supported by the index opclass, and fail quietly if not. This allows
383 * us to not be concerned with specific opclasses (except for the legacy
384 * "pattern" cases); any index that correctly implements the operators
385 * will work.
387 if (pstatus == Pattern_Prefix_Exact)
389 if (!op_in_opfamily(eqopr, opfamily))
390 return NIL;
391 if (indexcollation != expr_coll)
392 return NIL;
393 expr = make_opclause(eqopr, BOOLOID, false,
394 (Expr *) leftop, (Expr *) prefix,
395 InvalidOid, indexcollation);
396 result = list_make1(expr);
397 return result;
401 * Anything other than Pattern_Prefix_Exact is not supported if the
402 * expression collation is nondeterministic. The optimized equality or
403 * prefix tests use bytewise comparisons, which is not consistent with
404 * nondeterministic collations.
406 * expr_coll is not set for a non-collation-aware data type such as bytea.
408 if (expr_coll && !get_collation_isdeterministic(expr_coll))
409 return NIL;
412 * Otherwise, we have a nonempty required prefix of the values. Some
413 * opclasses support prefix checks directly, otherwise we'll try to
414 * generate a range constraint.
416 if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
418 expr = make_opclause(preopr, BOOLOID, false,
419 (Expr *) leftop, (Expr *) prefix,
420 InvalidOid, indexcollation);
421 result = list_make1(expr);
422 return result;
426 * Since we need a range constraint, it's only going to work reliably if
427 * the index is collation-insensitive or has "C" collation. Note that
428 * here we are looking at the index's collation, not the expression's
429 * collation -- this test is *not* dependent on the LIKE/regex operator's
430 * collation.
432 if (collation_aware &&
433 !pg_newlocale_from_collation(indexcollation)->collate_is_c)
434 return NIL;
437 * We can always say "x >= prefix".
439 if (!op_in_opfamily(geopr, opfamily))
440 return NIL;
441 expr = make_opclause(geopr, BOOLOID, false,
442 (Expr *) leftop, (Expr *) prefix,
443 InvalidOid, indexcollation);
444 result = list_make1(expr);
446 /*-------
447 * If we can create a string larger than the prefix, we can say
448 * "x < greaterstr". NB: we rely on make_greater_string() to generate
449 * a guaranteed-greater string, not just a probably-greater string.
450 * In general this is only guaranteed in C locale, so we'd better be
451 * using a C-locale index collation.
452 *-------
454 if (!op_in_opfamily(ltopr, opfamily))
455 return result;
456 fmgr_info(get_opcode(ltopr), &ltproc);
457 greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
458 if (greaterstr)
460 expr = make_opclause(ltopr, BOOLOID, false,
461 (Expr *) leftop, (Expr *) greaterstr,
462 InvalidOid, indexcollation);
463 result = lappend(result, expr);
466 return result;
471 * patternsel_common - generic code for pattern-match restriction selectivity.
473 * To support using this from either the operator or function paths, caller
474 * may pass either operator OID or underlying function OID; we look up the
475 * latter from the former if needed. (We could just have patternsel() call
476 * get_opcode(), but the work would be wasted if we don't have a need to
477 * compare a fixed prefix to the pg_statistic data.)
479 * Note that oprid and/or opfuncid should be for the positive-match operator
480 * even when negate is true.
482 static double
483 patternsel_common(PlannerInfo *root,
484 Oid oprid,
485 Oid opfuncid,
486 List *args,
487 int varRelid,
488 Oid collation,
489 Pattern_Type ptype,
490 bool negate)
492 VariableStatData vardata;
493 Node *other;
494 bool varonleft;
495 Datum constval;
496 Oid consttype;
497 Oid vartype;
498 Oid rdatatype;
499 Oid eqopr;
500 Oid ltopr;
501 Oid geopr;
502 Pattern_Prefix_Status pstatus;
503 Const *patt;
504 Const *prefix = NULL;
505 Selectivity rest_selec = 0;
506 double nullfrac = 0.0;
507 double result;
510 * Initialize result to the appropriate default estimate depending on
511 * whether it's a match or not-match operator.
513 if (negate)
514 result = 1.0 - DEFAULT_MATCH_SEL;
515 else
516 result = DEFAULT_MATCH_SEL;
519 * If expression is not variable op constant, then punt and return the
520 * default estimate.
522 if (!get_restriction_variable(root, args, varRelid,
523 &vardata, &other, &varonleft))
524 return result;
525 if (!varonleft || !IsA(other, Const))
527 ReleaseVariableStats(vardata);
528 return result;
532 * If the constant is NULL, assume operator is strict and return zero, ie,
533 * operator will never return TRUE. (It's zero even for a negator op.)
535 if (((Const *) other)->constisnull)
537 ReleaseVariableStats(vardata);
538 return 0.0;
540 constval = ((Const *) other)->constvalue;
541 consttype = ((Const *) other)->consttype;
544 * The right-hand const is type text or bytea for all supported operators.
545 * We do not expect to see binary-compatible types here, since
546 * const-folding should have relabeled the const to exactly match the
547 * operator's declared type.
549 if (consttype != TEXTOID && consttype != BYTEAOID)
551 ReleaseVariableStats(vardata);
552 return result;
556 * Similarly, the exposed type of the left-hand side should be one of
557 * those we know. (Do not look at vardata.atttype, which might be
558 * something binary-compatible but different.) We can use it to identify
559 * the comparison operators and the required type of the comparison
560 * constant, much as in match_pattern_prefix().
562 vartype = vardata.vartype;
564 switch (vartype)
566 case TEXTOID:
567 eqopr = TextEqualOperator;
568 ltopr = TextLessOperator;
569 geopr = TextGreaterEqualOperator;
570 rdatatype = TEXTOID;
571 break;
572 case NAMEOID:
575 * Note that here, we need the RHS type to be text, so that the
576 * comparison value isn't improperly truncated to NAMEDATALEN.
578 eqopr = NameEqualTextOperator;
579 ltopr = NameLessTextOperator;
580 geopr = NameGreaterEqualTextOperator;
581 rdatatype = TEXTOID;
582 break;
583 case BPCHAROID:
584 eqopr = BpcharEqualOperator;
585 ltopr = BpcharLessOperator;
586 geopr = BpcharGreaterEqualOperator;
587 rdatatype = BPCHAROID;
588 break;
589 case BYTEAOID:
590 eqopr = ByteaEqualOperator;
591 ltopr = ByteaLessOperator;
592 geopr = ByteaGreaterEqualOperator;
593 rdatatype = BYTEAOID;
594 break;
595 default:
596 /* Can't get here unless we're attached to the wrong operator */
597 ReleaseVariableStats(vardata);
598 return result;
602 * Grab the nullfrac for use below.
604 if (HeapTupleIsValid(vardata.statsTuple))
606 Form_pg_statistic stats;
608 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
609 nullfrac = stats->stanullfrac;
613 * Pull out any fixed prefix implied by the pattern, and estimate the
614 * fractional selectivity of the remainder of the pattern. Unlike many
615 * other selectivity estimators, we use the pattern operator's actual
616 * collation for this step. This is not because we expect the collation
617 * to make a big difference in the selectivity estimate (it seldom would),
618 * but because we want to be sure we cache compiled regexps under the
619 * right cache key, so that they can be re-used at runtime.
621 patt = (Const *) other;
622 pstatus = pattern_fixed_prefix(patt, ptype, collation,
623 &prefix, &rest_selec);
626 * If necessary, coerce the prefix constant to the right type. The only
627 * case where we need to do anything is when converting text to bpchar.
628 * Those two types are binary-compatible, so relabeling the Const node is
629 * sufficient.
631 if (prefix && prefix->consttype != rdatatype)
633 Assert(prefix->consttype == TEXTOID &&
634 rdatatype == BPCHAROID);
635 prefix->consttype = rdatatype;
638 if (pstatus == Pattern_Prefix_Exact)
641 * Pattern specifies an exact match, so estimate as for '='
643 result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
644 false, true, false);
646 else
649 * Not exact-match pattern. If we have a sufficiently large
650 * histogram, estimate selectivity for the histogram part of the
651 * population by counting matches in the histogram. If not, estimate
652 * selectivity of the fixed prefix and remainder of pattern
653 * separately, then combine the two to get an estimate of the
654 * selectivity for the part of the column population represented by
655 * the histogram. (For small histograms, we combine these
656 * approaches.)
658 * We then add up data for any most-common-values values; these are
659 * not in the histogram population, and we can get exact answers for
660 * them by applying the pattern operator, so there's no reason to
661 * approximate. (If the MCVs cover a significant part of the total
662 * population, this gives us a big leg up in accuracy.)
664 Selectivity selec;
665 int hist_size;
666 FmgrInfo opproc;
667 double mcv_selec,
668 sumcommon;
670 /* Try to use the histogram entries to get selectivity */
671 if (!OidIsValid(opfuncid))
672 opfuncid = get_opcode(oprid);
673 fmgr_info(opfuncid, &opproc);
675 selec = histogram_selectivity(&vardata, &opproc, collation,
676 constval, true,
677 10, 1, &hist_size);
679 /* If not at least 100 entries, use the heuristic method */
680 if (hist_size < 100)
682 Selectivity heursel;
683 Selectivity prefixsel;
685 if (pstatus == Pattern_Prefix_Partial)
686 prefixsel = prefix_selectivity(root, &vardata,
687 eqopr, ltopr, geopr,
688 collation,
689 prefix);
690 else
691 prefixsel = 1.0;
692 heursel = prefixsel * rest_selec;
694 if (selec < 0) /* fewer than 10 histogram entries? */
695 selec = heursel;
696 else
699 * For histogram sizes from 10 to 100, we combine the
700 * histogram and heuristic selectivities, putting increasingly
701 * more trust in the histogram for larger sizes.
703 double hist_weight = hist_size / 100.0;
705 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
709 /* In any case, don't believe extremely small or large estimates. */
710 if (selec < 0.0001)
711 selec = 0.0001;
712 else if (selec > 0.9999)
713 selec = 0.9999;
716 * If we have most-common-values info, add up the fractions of the MCV
717 * entries that satisfy MCV OP PATTERN. These fractions contribute
718 * directly to the result selectivity. Also add up the total fraction
719 * represented by MCV entries.
721 mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
722 constval, true,
723 &sumcommon);
726 * Now merge the results from the MCV and histogram calculations,
727 * realizing that the histogram covers only the non-null values that
728 * are not listed in MCV.
730 selec *= 1.0 - nullfrac - sumcommon;
731 selec += mcv_selec;
732 result = selec;
735 /* now adjust if we wanted not-match rather than match */
736 if (negate)
737 result = 1.0 - result - nullfrac;
739 /* result should be in range, but make sure... */
740 CLAMP_PROBABILITY(result);
742 if (prefix)
744 pfree(DatumGetPointer(prefix->constvalue));
745 pfree(prefix);
748 ReleaseVariableStats(vardata);
750 return result;
754 * Fix impedance mismatch between SQL-callable functions and patternsel_common
756 static double
757 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
759 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
760 Oid operator = PG_GETARG_OID(1);
761 List *args = (List *) PG_GETARG_POINTER(2);
762 int varRelid = PG_GETARG_INT32(3);
763 Oid collation = PG_GET_COLLATION();
766 * If this is for a NOT LIKE or similar operator, get the corresponding
767 * positive-match operator and work with that.
769 if (negate)
771 operator = get_negator(operator);
772 if (!OidIsValid(operator))
773 elog(ERROR, "patternsel called for operator without a negator");
776 return patternsel_common(root,
777 operator,
778 InvalidOid,
779 args,
780 varRelid,
781 collation,
782 ptype,
783 negate);
787 * regexeqsel - Selectivity of regular-expression pattern match.
789 Datum
790 regexeqsel(PG_FUNCTION_ARGS)
792 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
796 * icregexeqsel - Selectivity of case-insensitive regex match.
798 Datum
799 icregexeqsel(PG_FUNCTION_ARGS)
801 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
805 * likesel - Selectivity of LIKE pattern match.
807 Datum
808 likesel(PG_FUNCTION_ARGS)
810 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
814 * prefixsel - selectivity of prefix operator
816 Datum
817 prefixsel(PG_FUNCTION_ARGS)
819 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
824 * iclikesel - Selectivity of ILIKE pattern match.
826 Datum
827 iclikesel(PG_FUNCTION_ARGS)
829 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
833 * regexnesel - Selectivity of regular-expression pattern non-match.
835 Datum
836 regexnesel(PG_FUNCTION_ARGS)
838 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
842 * icregexnesel - Selectivity of case-insensitive regex non-match.
844 Datum
845 icregexnesel(PG_FUNCTION_ARGS)
847 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
851 * nlikesel - Selectivity of LIKE pattern non-match.
853 Datum
854 nlikesel(PG_FUNCTION_ARGS)
856 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
860 * icnlikesel - Selectivity of ILIKE pattern non-match.
862 Datum
863 icnlikesel(PG_FUNCTION_ARGS)
865 PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
869 * patternjoinsel - Generic code for pattern-match join selectivity.
871 static double
872 patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
874 /* For the moment we just punt. */
875 return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
879 * regexeqjoinsel - Join selectivity of regular-expression pattern match.
881 Datum
882 regexeqjoinsel(PG_FUNCTION_ARGS)
884 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
888 * icregexeqjoinsel - Join selectivity of case-insensitive regex match.
890 Datum
891 icregexeqjoinsel(PG_FUNCTION_ARGS)
893 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
897 * likejoinsel - Join selectivity of LIKE pattern match.
899 Datum
900 likejoinsel(PG_FUNCTION_ARGS)
902 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
906 * prefixjoinsel - Join selectivity of prefix operator
908 Datum
909 prefixjoinsel(PG_FUNCTION_ARGS)
911 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
915 * iclikejoinsel - Join selectivity of ILIKE pattern match.
917 Datum
918 iclikejoinsel(PG_FUNCTION_ARGS)
920 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
924 * regexnejoinsel - Join selectivity of regex non-match.
926 Datum
927 regexnejoinsel(PG_FUNCTION_ARGS)
929 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
933 * icregexnejoinsel - Join selectivity of case-insensitive regex non-match.
935 Datum
936 icregexnejoinsel(PG_FUNCTION_ARGS)
938 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
942 * nlikejoinsel - Join selectivity of LIKE pattern non-match.
944 Datum
945 nlikejoinsel(PG_FUNCTION_ARGS)
947 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
951 * icnlikejoinsel - Join selectivity of ILIKE pattern non-match.
953 Datum
954 icnlikejoinsel(PG_FUNCTION_ARGS)
956 PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
960 /*-------------------------------------------------------------------------
962 * Pattern analysis functions
964 * These routines support analysis of LIKE and regular-expression patterns
965 * by the planner/optimizer. It's important that they agree with the
966 * regular-expression code in backend/regex/ and the LIKE code in
967 * backend/utils/adt/like.c. Also, the computation of the fixed prefix
968 * must be conservative: if we report a string longer than the true fixed
969 * prefix, the query may produce actually wrong answers, rather than just
970 * getting a bad selectivity estimate!
972 *-------------------------------------------------------------------------
976 * Extract the fixed prefix, if any, for a pattern.
978 * *prefix is set to a palloc'd prefix string (in the form of a Const node),
979 * or to NULL if no fixed prefix exists for the pattern.
980 * If rest_selec is not NULL, *rest_selec is set to an estimate of the
981 * selectivity of the remainder of the pattern (without any fixed prefix).
982 * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
984 * The return value distinguishes no fixed prefix, a partial prefix,
985 * or an exact-match-only pattern.
988 static Pattern_Prefix_Status
989 like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
990 Const **prefix_const, Selectivity *rest_selec)
992 char *match;
993 char *patt;
994 int pattlen;
995 Oid typeid = patt_const->consttype;
996 int pos,
997 match_pos;
998 bool is_multibyte = (pg_database_encoding_max_length() > 1);
999 pg_locale_t locale = 0;
1001 /* the right-hand const is type text or bytea */
1002 Assert(typeid == BYTEAOID || typeid == TEXTOID);
1004 if (case_insensitive)
1006 if (typeid == BYTEAOID)
1007 ereport(ERROR,
1008 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1009 errmsg("case insensitive matching not supported on type bytea")));
1011 if (!OidIsValid(collation))
1014 * This typically means that the parser could not resolve a
1015 * conflict of implicit collations, so report it that way.
1017 ereport(ERROR,
1018 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1019 errmsg("could not determine which collation to use for ILIKE"),
1020 errhint("Use the COLLATE clause to set the collation explicitly.")));
1023 locale = pg_newlocale_from_collation(collation);
1026 if (typeid != BYTEAOID)
1028 patt = TextDatumGetCString(patt_const->constvalue);
1029 pattlen = strlen(patt);
1031 else
1033 bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
1035 pattlen = VARSIZE_ANY_EXHDR(bstr);
1036 patt = (char *) palloc(pattlen);
1037 memcpy(patt, VARDATA_ANY(bstr), pattlen);
1038 Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
1041 match = palloc(pattlen + 1);
1042 match_pos = 0;
1043 for (pos = 0; pos < pattlen; pos++)
1045 /* % and _ are wildcard characters in LIKE */
1046 if (patt[pos] == '%' ||
1047 patt[pos] == '_')
1048 break;
1050 /* Backslash escapes the next character */
1051 if (patt[pos] == '\\')
1053 pos++;
1054 if (pos >= pattlen)
1055 break;
1058 /* Stop if case-varying character (it's sort of a wildcard) */
1059 if (case_insensitive &&
1060 pattern_char_isalpha(patt[pos], is_multibyte, locale))
1061 break;
1063 match[match_pos++] = patt[pos];
1066 match[match_pos] = '\0';
1068 if (typeid != BYTEAOID)
1069 *prefix_const = string_to_const(match, typeid);
1070 else
1071 *prefix_const = string_to_bytea_const(match, match_pos);
1073 if (rest_selec != NULL)
1074 *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
1075 case_insensitive);
1077 pfree(patt);
1078 pfree(match);
1080 /* in LIKE, an empty pattern is an exact match! */
1081 if (pos == pattlen)
1082 return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
1084 if (match_pos > 0)
1085 return Pattern_Prefix_Partial;
1087 return Pattern_Prefix_None;
1090 static Pattern_Prefix_Status
1091 regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
1092 Const **prefix_const, Selectivity *rest_selec)
1094 Oid typeid = patt_const->consttype;
1095 char *prefix;
1096 bool exact;
1099 * Should be unnecessary, there are no bytea regex operators defined. As
1100 * such, it should be noted that the rest of this function has *not* been
1101 * made safe for binary (possibly NULL containing) strings.
1103 if (typeid == BYTEAOID)
1104 ereport(ERROR,
1105 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1106 errmsg("regular-expression matching not supported on type bytea")));
1108 /* Use the regexp machinery to extract the prefix, if any */
1109 prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
1110 case_insensitive, collation,
1111 &exact);
1113 if (prefix == NULL)
1115 *prefix_const = NULL;
1117 if (rest_selec != NULL)
1119 char *patt = TextDatumGetCString(patt_const->constvalue);
1121 *rest_selec = regex_selectivity(patt, strlen(patt),
1122 case_insensitive,
1124 pfree(patt);
1127 return Pattern_Prefix_None;
1130 *prefix_const = string_to_const(prefix, typeid);
1132 if (rest_selec != NULL)
1134 if (exact)
1136 /* Exact match, so there's no additional selectivity */
1137 *rest_selec = 1.0;
1139 else
1141 char *patt = TextDatumGetCString(patt_const->constvalue);
1143 *rest_selec = regex_selectivity(patt, strlen(patt),
1144 case_insensitive,
1145 strlen(prefix));
1146 pfree(patt);
1150 pfree(prefix);
1152 if (exact)
1153 return Pattern_Prefix_Exact; /* pattern specifies exact match */
1154 else
1155 return Pattern_Prefix_Partial;
1158 static Pattern_Prefix_Status
1159 pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
1160 Const **prefix, Selectivity *rest_selec)
1162 Pattern_Prefix_Status result;
1164 switch (ptype)
1166 case Pattern_Type_Like:
1167 result = like_fixed_prefix(patt, false, collation,
1168 prefix, rest_selec);
1169 break;
1170 case Pattern_Type_Like_IC:
1171 result = like_fixed_prefix(patt, true, collation,
1172 prefix, rest_selec);
1173 break;
1174 case Pattern_Type_Regex:
1175 result = regex_fixed_prefix(patt, false, collation,
1176 prefix, rest_selec);
1177 break;
1178 case Pattern_Type_Regex_IC:
1179 result = regex_fixed_prefix(patt, true, collation,
1180 prefix, rest_selec);
1181 break;
1182 case Pattern_Type_Prefix:
1183 /* Prefix type work is trivial. */
1184 result = Pattern_Prefix_Partial;
1185 *prefix = makeConst(patt->consttype,
1186 patt->consttypmod,
1187 patt->constcollid,
1188 patt->constlen,
1189 datumCopy(patt->constvalue,
1190 patt->constbyval,
1191 patt->constlen),
1192 patt->constisnull,
1193 patt->constbyval);
1194 if (rest_selec != NULL)
1195 *rest_selec = 1.0; /* all */
1196 break;
1197 default:
1198 elog(ERROR, "unrecognized ptype: %d", (int) ptype);
1199 result = Pattern_Prefix_None; /* keep compiler quiet */
1200 break;
1202 return result;
1206 * Estimate the selectivity of a fixed prefix for a pattern match.
1208 * A fixed prefix "foo" is estimated as the selectivity of the expression
1209 * "variable >= 'foo' AND variable < 'fop'".
1211 * The selectivity estimate is with respect to the portion of the column
1212 * population represented by the histogram --- the caller must fold this
1213 * together with info about MCVs and NULLs.
1215 * We use the given comparison operators and collation to do the estimation.
1216 * The given variable and Const must be of the associated datatype(s).
1218 * XXX Note: we make use of the upper bound to estimate operator selectivity
1219 * even if the locale is such that we cannot rely on the upper-bound string.
1220 * The selectivity only needs to be approximately right anyway, so it seems
1221 * more useful to use the upper-bound code than not.
1223 static Selectivity
1224 prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
1225 Oid eqopr, Oid ltopr, Oid geopr,
1226 Oid collation,
1227 Const *prefixcon)
1229 Selectivity prefixsel;
1230 FmgrInfo opproc;
1231 Const *greaterstrcon;
1232 Selectivity eq_sel;
1234 /* Estimate the selectivity of "x >= prefix" */
1235 fmgr_info(get_opcode(geopr), &opproc);
1237 prefixsel = ineq_histogram_selectivity(root, vardata,
1238 geopr, &opproc, true, true,
1239 collation,
1240 prefixcon->constvalue,
1241 prefixcon->consttype);
1243 if (prefixsel < 0.0)
1245 /* No histogram is present ... return a suitable default estimate */
1246 return DEFAULT_MATCH_SEL;
1250 * If we can create a string larger than the prefix, say "x < greaterstr".
1252 fmgr_info(get_opcode(ltopr), &opproc);
1253 greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
1254 if (greaterstrcon)
1256 Selectivity topsel;
1258 topsel = ineq_histogram_selectivity(root, vardata,
1259 ltopr, &opproc, false, false,
1260 collation,
1261 greaterstrcon->constvalue,
1262 greaterstrcon->consttype);
1264 /* ineq_histogram_selectivity worked before, it shouldn't fail now */
1265 Assert(topsel >= 0.0);
1268 * Merge the two selectivities in the same way as for a range query
1269 * (see clauselist_selectivity()). Note that we don't need to worry
1270 * about double-exclusion of nulls, since ineq_histogram_selectivity
1271 * doesn't count those anyway.
1273 prefixsel = topsel + prefixsel - 1.0;
1277 * If the prefix is long then the two bounding values might be too close
1278 * together for the histogram to distinguish them usefully, resulting in a
1279 * zero estimate (plus or minus roundoff error). To avoid returning a
1280 * ridiculously small estimate, compute the estimated selectivity for
1281 * "variable = 'foo'", and clamp to that. (Obviously, the resultant
1282 * estimate should be at least that.)
1284 * We apply this even if we couldn't make a greater string. That case
1285 * suggests that the prefix is near the maximum possible, and thus
1286 * probably off the end of the histogram, and thus we probably got a very
1287 * small estimate from the >= condition; so we still need to clamp.
1289 eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
1290 false, true, false);
1292 prefixsel = Max(prefixsel, eq_sel);
1294 return prefixsel;
1299 * Estimate the selectivity of a pattern of the specified type.
1300 * Note that any fixed prefix of the pattern will have been removed already,
1301 * so actually we may be looking at just a fragment of the pattern.
1303 * For now, we use a very simplistic approach: fixed characters reduce the
1304 * selectivity a good deal, character ranges reduce it a little,
1305 * wildcards (such as % for LIKE or .* for regex) increase it.
1308 #define FIXED_CHAR_SEL 0.20 /* about 1/5 */
1309 #define CHAR_RANGE_SEL 0.25
1310 #define ANY_CHAR_SEL 0.9 /* not 1, since it won't match end-of-string */
1311 #define FULL_WILDCARD_SEL 5.0
1312 #define PARTIAL_WILDCARD_SEL 2.0
1314 static Selectivity
1315 like_selectivity(const char *patt, int pattlen, bool case_insensitive)
1317 Selectivity sel = 1.0;
1318 int pos;
1320 /* Skip any leading wildcard; it's already factored into initial sel */
1321 for (pos = 0; pos < pattlen; pos++)
1323 if (patt[pos] != '%' && patt[pos] != '_')
1324 break;
1327 for (; pos < pattlen; pos++)
1329 /* % and _ are wildcard characters in LIKE */
1330 if (patt[pos] == '%')
1331 sel *= FULL_WILDCARD_SEL;
1332 else if (patt[pos] == '_')
1333 sel *= ANY_CHAR_SEL;
1334 else if (patt[pos] == '\\')
1336 /* Backslash quotes the next character */
1337 pos++;
1338 if (pos >= pattlen)
1339 break;
1340 sel *= FIXED_CHAR_SEL;
1342 else
1343 sel *= FIXED_CHAR_SEL;
1345 /* Could get sel > 1 if multiple wildcards */
1346 if (sel > 1.0)
1347 sel = 1.0;
1348 return sel;
1351 static Selectivity
1352 regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
1354 Selectivity sel = 1.0;
1355 int paren_depth = 0;
1356 int paren_pos = 0; /* dummy init to keep compiler quiet */
1357 int pos;
1359 /* since this function recurses, it could be driven to stack overflow */
1360 check_stack_depth();
1362 for (pos = 0; pos < pattlen; pos++)
1364 if (patt[pos] == '(')
1366 if (paren_depth == 0)
1367 paren_pos = pos; /* remember start of parenthesized item */
1368 paren_depth++;
1370 else if (patt[pos] == ')' && paren_depth > 0)
1372 paren_depth--;
1373 if (paren_depth == 0)
1374 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
1375 pos - (paren_pos + 1),
1376 case_insensitive);
1378 else if (patt[pos] == '|' && paren_depth == 0)
1381 * If unquoted | is present at paren level 0 in pattern, we have
1382 * multiple alternatives; sum their probabilities.
1384 sel += regex_selectivity_sub(patt + (pos + 1),
1385 pattlen - (pos + 1),
1386 case_insensitive);
1387 break; /* rest of pattern is now processed */
1389 else if (patt[pos] == '[')
1391 bool negclass = false;
1393 if (patt[++pos] == '^')
1395 negclass = true;
1396 pos++;
1398 if (patt[pos] == ']') /* ']' at start of class is not special */
1399 pos++;
1400 while (pos < pattlen && patt[pos] != ']')
1401 pos++;
1402 if (paren_depth == 0)
1403 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
1405 else if (patt[pos] == '.')
1407 if (paren_depth == 0)
1408 sel *= ANY_CHAR_SEL;
1410 else if (patt[pos] == '*' ||
1411 patt[pos] == '?' ||
1412 patt[pos] == '+')
1414 /* Ought to be smarter about quantifiers... */
1415 if (paren_depth == 0)
1416 sel *= PARTIAL_WILDCARD_SEL;
1418 else if (patt[pos] == '{')
1420 while (pos < pattlen && patt[pos] != '}')
1421 pos++;
1422 if (paren_depth == 0)
1423 sel *= PARTIAL_WILDCARD_SEL;
1425 else if (patt[pos] == '\\')
1427 /* backslash quotes the next character */
1428 pos++;
1429 if (pos >= pattlen)
1430 break;
1431 if (paren_depth == 0)
1432 sel *= FIXED_CHAR_SEL;
1434 else
1436 if (paren_depth == 0)
1437 sel *= FIXED_CHAR_SEL;
1440 /* Could get sel > 1 if multiple wildcards */
1441 if (sel > 1.0)
1442 sel = 1.0;
1443 return sel;
1446 static Selectivity
1447 regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
1448 int fixed_prefix_len)
1450 Selectivity sel;
1452 /* If patt doesn't end with $, consider it to have a trailing wildcard */
1453 if (pattlen > 0 && patt[pattlen - 1] == '$' &&
1454 (pattlen == 1 || patt[pattlen - 2] != '\\'))
1456 /* has trailing $ */
1457 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
1459 else
1461 /* no trailing $ */
1462 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
1463 sel *= FULL_WILDCARD_SEL;
1467 * If there's a fixed prefix, discount its selectivity. We have to be
1468 * careful here since a very long prefix could result in pow's result
1469 * underflowing to zero (in which case "sel" probably has as well).
1471 if (fixed_prefix_len > 0)
1473 double prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
1475 if (prefixsel > 0.0)
1476 sel /= prefixsel;
1479 /* Make sure result stays in range */
1480 CLAMP_PROBABILITY(sel);
1481 return sel;
1485 * Check whether char is a letter (and, hence, subject to case-folding)
1487 * In multibyte character sets or with ICU, we can't use isalpha, and it does
1488 * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
1489 * Instead, just assume any non-ASCII char is potentially case-varying, and
1490 * hard-wire knowledge of which ASCII chars are letters.
1492 static int
1493 pattern_char_isalpha(char c, bool is_multibyte,
1494 pg_locale_t locale)
1496 if (locale->ctype_is_c)
1497 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1498 else if (is_multibyte && IS_HIGHBIT_SET(c))
1499 return true;
1500 else if (locale->provider != COLLPROVIDER_LIBC)
1501 return IS_HIGHBIT_SET(c) ||
1502 (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1503 else
1504 return isalpha_l((unsigned char) c, locale->info.lt);
1509 * For bytea, the increment function need only increment the current byte
1510 * (there are no multibyte characters to worry about).
1512 static bool
1513 byte_increment(unsigned char *ptr, int len)
1515 if (*ptr >= 255)
1516 return false;
1517 (*ptr)++;
1518 return true;
1522 * Try to generate a string greater than the given string or any
1523 * string it is a prefix of. If successful, return a palloc'd string
1524 * in the form of a Const node; else return NULL.
1526 * The caller must provide the appropriate "less than" comparison function
1527 * for testing the strings, along with the collation to use.
1529 * The key requirement here is that given a prefix string, say "foo",
1530 * we must be able to generate another string "fop" that is greater than
1531 * all strings "foobar" starting with "foo". We can test that we have
1532 * generated a string greater than the prefix string, but in non-C collations
1533 * that is not a bulletproof guarantee that an extension of the string might
1534 * not sort after it; an example is that "foo " is less than "foo!", but it
1535 * is not clear that a "dictionary" sort ordering will consider "foo!" less
1536 * than "foo bar". CAUTION: Therefore, this function should be used only for
1537 * estimation purposes when working in a non-C collation.
1539 * To try to catch most cases where an extended string might otherwise sort
1540 * before the result value, we determine which of the strings "Z", "z", "y",
1541 * and "9" is seen as largest by the collation, and append that to the given
1542 * prefix before trying to find a string that compares as larger.
1544 * To search for a greater string, we repeatedly "increment" the rightmost
1545 * character, using an encoding-specific character incrementer function.
1546 * When it's no longer possible to increment the last character, we truncate
1547 * off that character and start incrementing the next-to-rightmost.
1548 * For example, if "z" were the last character in the sort order, then we
1549 * could produce "foo" as a string greater than "fonz".
1551 * This could be rather slow in the worst case, but in most cases we
1552 * won't have to try more than one or two strings before succeeding.
1554 * Note that it's important for the character incrementer not to be too anal
1555 * about producing every possible character code, since in some cases the only
1556 * way to get a larger string is to increment a previous character position.
1557 * So we don't want to spend too much time trying every possible character
1558 * code at the last position. A good rule of thumb is to be sure that we
1559 * don't try more than 256*K values for a K-byte character (and definitely
1560 * not 256^K, which is what an exhaustive search would approach).
1562 static Const *
1563 make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
1565 Oid datatype = str_const->consttype;
1566 char *workstr;
1567 int len;
1568 Datum cmpstr;
1569 char *cmptxt = NULL;
1570 mbcharacter_incrementer charinc;
1573 * Get a modifiable copy of the prefix string in C-string format, and set
1574 * up the string we will compare to as a Datum. In C locale this can just
1575 * be the given prefix string, otherwise we need to add a suffix. Type
1576 * BYTEA sorts bytewise so it never needs a suffix either.
1578 if (datatype == BYTEAOID)
1580 bytea *bstr = DatumGetByteaPP(str_const->constvalue);
1582 len = VARSIZE_ANY_EXHDR(bstr);
1583 workstr = (char *) palloc(len);
1584 memcpy(workstr, VARDATA_ANY(bstr), len);
1585 Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
1586 cmpstr = str_const->constvalue;
1588 else
1590 if (datatype == NAMEOID)
1591 workstr = DatumGetCString(DirectFunctionCall1(nameout,
1592 str_const->constvalue));
1593 else
1594 workstr = TextDatumGetCString(str_const->constvalue);
1595 len = strlen(workstr);
1596 if (len == 0 || pg_newlocale_from_collation(collation)->collate_is_c)
1597 cmpstr = str_const->constvalue;
1598 else
1600 /* If first time through, determine the suffix to use */
1601 static char suffixchar = 0;
1602 static Oid suffixcollation = 0;
1604 if (!suffixchar || suffixcollation != collation)
1606 char *best;
1608 best = "Z";
1609 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
1610 best = "z";
1611 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
1612 best = "y";
1613 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
1614 best = "9";
1615 suffixchar = *best;
1616 suffixcollation = collation;
1619 /* And build the string to compare to */
1620 if (datatype == NAMEOID)
1622 cmptxt = palloc(len + 2);
1623 memcpy(cmptxt, workstr, len);
1624 cmptxt[len] = suffixchar;
1625 cmptxt[len + 1] = '\0';
1626 cmpstr = PointerGetDatum(cmptxt);
1628 else
1630 cmptxt = palloc(VARHDRSZ + len + 1);
1631 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
1632 memcpy(VARDATA(cmptxt), workstr, len);
1633 *(VARDATA(cmptxt) + len) = suffixchar;
1634 cmpstr = PointerGetDatum(cmptxt);
1639 /* Select appropriate character-incrementer function */
1640 if (datatype == BYTEAOID)
1641 charinc = byte_increment;
1642 else
1643 charinc = pg_database_encoding_character_incrementer();
1645 /* And search ... */
1646 while (len > 0)
1648 int charlen;
1649 unsigned char *lastchar;
1651 /* Identify the last character --- for bytea, just the last byte */
1652 if (datatype == BYTEAOID)
1653 charlen = 1;
1654 else
1655 charlen = len - pg_mbcliplen(workstr, len, len - 1);
1656 lastchar = (unsigned char *) (workstr + len - charlen);
1659 * Try to generate a larger string by incrementing the last character
1660 * (for BYTEA, we treat each byte as a character).
1662 * Note: the incrementer function is expected to return true if it's
1663 * generated a valid-per-the-encoding new character, otherwise false.
1664 * The contents of the character on false return are unspecified.
1666 while (charinc(lastchar, charlen))
1668 Const *workstr_const;
1670 if (datatype == BYTEAOID)
1671 workstr_const = string_to_bytea_const(workstr, len);
1672 else
1673 workstr_const = string_to_const(workstr, datatype);
1675 if (DatumGetBool(FunctionCall2Coll(ltproc,
1676 collation,
1677 cmpstr,
1678 workstr_const->constvalue)))
1680 /* Successfully made a string larger than cmpstr */
1681 if (cmptxt)
1682 pfree(cmptxt);
1683 pfree(workstr);
1684 return workstr_const;
1687 /* No good, release unusable value and try again */
1688 pfree(DatumGetPointer(workstr_const->constvalue));
1689 pfree(workstr_const);
1693 * No luck here, so truncate off the last character and try to
1694 * increment the next one.
1696 len -= charlen;
1697 workstr[len] = '\0';
1700 /* Failed... */
1701 if (cmptxt)
1702 pfree(cmptxt);
1703 pfree(workstr);
1705 return NULL;
1709 * Generate a Datum of the appropriate type from a C string.
1710 * Note that all of the supported types are pass-by-ref, so the
1711 * returned value should be pfree'd if no longer needed.
1713 static Datum
1714 string_to_datum(const char *str, Oid datatype)
1716 Assert(str != NULL);
1719 * We cheat a little by assuming that CStringGetTextDatum() will do for
1720 * bpchar and varchar constants too...
1722 if (datatype == NAMEOID)
1723 return DirectFunctionCall1(namein, CStringGetDatum(str));
1724 else if (datatype == BYTEAOID)
1725 return DirectFunctionCall1(byteain, CStringGetDatum(str));
1726 else
1727 return CStringGetTextDatum(str);
1731 * Generate a Const node of the appropriate type from a C string.
1733 static Const *
1734 string_to_const(const char *str, Oid datatype)
1736 Datum conval = string_to_datum(str, datatype);
1737 Oid collation;
1738 int constlen;
1741 * We only need to support a few datatypes here, so hard-wire properties
1742 * instead of incurring the expense of catalog lookups.
1744 switch (datatype)
1746 case TEXTOID:
1747 case VARCHAROID:
1748 case BPCHAROID:
1749 collation = DEFAULT_COLLATION_OID;
1750 constlen = -1;
1751 break;
1753 case NAMEOID:
1754 collation = C_COLLATION_OID;
1755 constlen = NAMEDATALEN;
1756 break;
1758 case BYTEAOID:
1759 collation = InvalidOid;
1760 constlen = -1;
1761 break;
1763 default:
1764 elog(ERROR, "unexpected datatype in string_to_const: %u",
1765 datatype);
1766 return NULL;
1769 return makeConst(datatype, -1, collation, constlen,
1770 conval, false, false);
1774 * Generate a Const node of bytea type from a binary C string and a length.
1776 static Const *
1777 string_to_bytea_const(const char *str, size_t str_len)
1779 bytea *bstr = palloc(VARHDRSZ + str_len);
1780 Datum conval;
1782 memcpy(VARDATA(bstr), str, str_len);
1783 SET_VARSIZE(bstr, VARHDRSZ + str_len);
1784 conval = PointerGetDatum(bstr);
1786 return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);