1 /*-------------------------------------------------------------------------
4 * Functions for selectivity estimation of intarray operators
6 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * contrib/intarray/_int_selfuncs.c
13 *-------------------------------------------------------------------------
18 #include "access/htup_details.h"
19 #include "catalog/pg_operator.h"
20 #include "catalog/pg_statistic.h"
21 #include "catalog/pg_type.h"
22 #include "miscadmin.h"
23 #include "utils/builtins.h"
24 #include "utils/lsyscache.h"
25 #include "utils/selfuncs.h"
26 #include "utils/syscache.h"
28 PG_FUNCTION_INFO_V1(_int_overlap_sel
);
29 PG_FUNCTION_INFO_V1(_int_contains_sel
);
30 PG_FUNCTION_INFO_V1(_int_contained_sel
);
31 PG_FUNCTION_INFO_V1(_int_overlap_joinsel
);
32 PG_FUNCTION_INFO_V1(_int_contains_joinsel
);
33 PG_FUNCTION_INFO_V1(_int_contained_joinsel
);
34 PG_FUNCTION_INFO_V1(_int_matchsel
);
37 static Selectivity
int_query_opr_selec(ITEM
*item
, Datum
*values
, float4
*freqs
,
38 int nmncelems
, float4 minfreq
);
39 static int compare_val_int4(const void *a
, const void *b
);
42 * Wrappers around the default array selectivity estimation functions.
44 * The default array selectivity operators for the @>, && and @< operators
45 * work fine for integer arrays. However, if we tried to just use arraycontsel
46 * and arraycontjoinsel directly as the cost estimator functions for our
47 * operators, they would not work as intended, because they look at the
48 * operator's OID. Our operators behave exactly like the built-in anyarray
49 * versions, but we must tell the cost estimator functions which built-in
50 * operators they correspond to. These wrappers just replace the operator
51 * OID with the corresponding built-in operator's OID, and call the built-in
56 _int_overlap_sel(PG_FUNCTION_ARGS
)
58 PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel
,
60 ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP
),
66 _int_contains_sel(PG_FUNCTION_ARGS
)
68 PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel
,
70 ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP
),
76 _int_contained_sel(PG_FUNCTION_ARGS
)
78 PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel
,
80 ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP
),
86 _int_overlap_joinsel(PG_FUNCTION_ARGS
)
88 PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel
,
90 ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP
),
97 _int_contains_joinsel(PG_FUNCTION_ARGS
)
99 PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel
,
101 ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP
),
104 PG_GETARG_DATUM(4)));
108 _int_contained_joinsel(PG_FUNCTION_ARGS
)
110 PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel
,
112 ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP
),
115 PG_GETARG_DATUM(4)));
120 * _int_matchsel -- restriction selectivity function for intarray @@ query_int
123 _int_matchsel(PG_FUNCTION_ARGS
)
125 PlannerInfo
*root
= (PlannerInfo
*) PG_GETARG_POINTER(0);
127 List
*args
= (List
*) PG_GETARG_POINTER(2);
128 int varRelid
= PG_GETARG_INT32(3);
129 VariableStatData vardata
;
134 Datum
*mcelems
= NULL
;
135 float4
*mcefreqs
= NULL
;
137 float4 minfreq
= 0.0;
138 float4 nullfrac
= 0.0;
142 * If expression is not "variable @@ something" or "something @@ variable"
143 * then punt and return a default estimate.
145 if (!get_restriction_variable(root
, args
, varRelid
,
146 &vardata
, &other
, &varonleft
))
147 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL
);
150 * Variable should be int[]. We don't support cases where variable is
153 if (vardata
.vartype
!= INT4ARRAYOID
)
154 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL
);
157 * Can't do anything useful if the something is not a constant, either.
159 if (!IsA(other
, Const
))
161 ReleaseVariableStats(vardata
);
162 PG_RETURN_FLOAT8(DEFAULT_EQ_SEL
);
166 * The "@@" operator is strict, so we can cope with NULL right away.
168 if (((Const
*) other
)->constisnull
)
170 ReleaseVariableStats(vardata
);
171 PG_RETURN_FLOAT8(0.0);
174 /* The caller made sure the const is a query, so get it now */
175 query
= DatumGetQueryTypeP(((Const
*) other
)->constvalue
);
177 /* Empty query matches nothing */
178 if (query
->size
== 0)
180 ReleaseVariableStats(vardata
);
181 return (Selectivity
) 0.0;
185 * Get the statistics for the intarray column.
187 * We're interested in the Most-Common-Elements list, and the NULL
190 if (HeapTupleIsValid(vardata
.statsTuple
))
192 Form_pg_statistic stats
;
194 stats
= (Form_pg_statistic
) GETSTRUCT(vardata
.statsTuple
);
195 nullfrac
= stats
->stanullfrac
;
198 * For an int4 array, the default array type analyze function will
199 * collect a Most Common Elements list, which is an array of int4s.
201 if (get_attstatsslot(&sslot
, vardata
.statsTuple
,
202 STATISTIC_KIND_MCELEM
, InvalidOid
,
203 ATTSTATSSLOT_VALUES
| ATTSTATSSLOT_NUMBERS
))
205 Assert(sslot
.valuetype
== INT4OID
);
208 * There should be three more Numbers than Values, because the
209 * last three (for intarray) cells are taken for minimal, maximal
210 * and nulls frequency. Punt if not.
212 if (sslot
.nnumbers
== sslot
.nvalues
+ 3)
214 /* Grab the lowest frequency. */
215 minfreq
= sslot
.numbers
[sslot
.nnumbers
- (sslot
.nnumbers
- sslot
.nvalues
)];
217 mcelems
= sslot
.values
;
218 mcefreqs
= sslot
.numbers
;
219 nmcelems
= sslot
.nvalues
;
224 memset(&sslot
, 0, sizeof(sslot
));
226 /* Process the logical expression in the query, using the stats */
227 selec
= int_query_opr_selec(GETQUERY(query
) + query
->size
- 1,
228 mcelems
, mcefreqs
, nmcelems
, minfreq
);
230 /* MCE stats count only non-null rows, so adjust for null rows. */
231 selec
*= (1.0 - nullfrac
);
233 free_attstatsslot(&sslot
);
234 ReleaseVariableStats(vardata
);
236 CLAMP_PROBABILITY(selec
);
238 PG_RETURN_FLOAT8((float8
) selec
);
242 * Estimate selectivity of single intquery operator
245 int_query_opr_selec(ITEM
*item
, Datum
*mcelems
, float4
*mcefreqs
,
246 int nmcelems
, float4 minfreq
)
250 /* since this function recurses, it could be driven to stack overflow */
253 if (item
->type
== VAL
)
258 return (Selectivity
) DEFAULT_EQ_SEL
;
260 searchres
= (Datum
*) bsearch(&item
->val
, mcelems
, nmcelems
,
261 sizeof(Datum
), compare_val_int4
);
265 * The element is in MCELEM. Return precise selectivity (or at
266 * least as precise as ANALYZE could find out).
268 selec
= mcefreqs
[searchres
- mcelems
];
273 * The element is not in MCELEM. Punt, but assume that the
274 * selectivity cannot be more than minfreq / 2.
276 selec
= Min(DEFAULT_EQ_SEL
, minfreq
/ 2);
279 else if (item
->type
== OPR
)
281 /* Current query node is an operator */
285 s1
= int_query_opr_selec(item
- 1, mcelems
, mcefreqs
, nmcelems
,
294 s2
= int_query_opr_selec(item
+ item
->left
, mcelems
, mcefreqs
,
300 s2
= int_query_opr_selec(item
+ item
->left
, mcelems
, mcefreqs
,
302 selec
= s1
+ s2
- s1
* s2
;
306 elog(ERROR
, "unrecognized operator: %d", item
->val
);
307 selec
= 0; /* keep compiler quiet */
313 elog(ERROR
, "unrecognized int query item type: %u", item
->type
);
314 selec
= 0; /* keep compiler quiet */
317 /* Clamp intermediate results to stay sane despite roundoff error */
318 CLAMP_PROBABILITY(selec
);
324 * Comparison function for binary search in mcelem array.
327 compare_val_int4(const void *a
, const void *b
)
329 int32 key
= *(int32
*) a
;
330 const Datum
*t
= (const Datum
*) b
;
332 return key
- DatumGetInt32(*t
);