Harmonize more parameter names in bulk.
[pgsql.git] / src / backend / statistics / dependencies.c
blobbf698c1fc3f0e8e332e78631ec3f8f7bce84da65
1 /*-------------------------------------------------------------------------
3 * dependencies.c
4 * POSTGRES functional dependencies
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * IDENTIFICATION
10 * src/backend/statistics/dependencies.c
12 *-------------------------------------------------------------------------
14 #include "postgres.h"
16 #include "access/htup_details.h"
17 #include "access/sysattr.h"
18 #include "catalog/pg_operator.h"
19 #include "catalog/pg_statistic_ext.h"
20 #include "catalog/pg_statistic_ext_data.h"
21 #include "lib/stringinfo.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/nodes.h"
24 #include "nodes/pathnodes.h"
25 #include "optimizer/clauses.h"
26 #include "optimizer/optimizer.h"
27 #include "parser/parsetree.h"
28 #include "statistics/extended_stats_internal.h"
29 #include "statistics/statistics.h"
30 #include "utils/bytea.h"
31 #include "utils/fmgroids.h"
32 #include "utils/fmgrprotos.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/selfuncs.h"
36 #include "utils/syscache.h"
37 #include "utils/typcache.h"
39 /* size of the struct header fields (magic, type, ndeps) */
40 #define SizeOfHeader (3 * sizeof(uint32))
42 /* size of a serialized dependency (degree, natts, atts) */
43 #define SizeOfItem(natts) \
44 (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
46 /* minimal size of a dependency (with two attributes) */
47 #define MinSizeOfItem SizeOfItem(2)
49 /* minimal size of dependencies, when all deps are minimal */
50 #define MinSizeOfItems(ndeps) \
51 (SizeOfHeader + (ndeps) * MinSizeOfItem)
54 * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
55 * k-permutations of n elements, except that the order does not matter for the
56 * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
58 typedef struct DependencyGeneratorData
60 int k; /* size of the dependency */
61 int n; /* number of possible attributes */
62 int current; /* next dependency to return (index) */
63 AttrNumber ndependencies; /* number of dependencies generated */
64 AttrNumber *dependencies; /* array of pre-generated dependencies */
65 } DependencyGeneratorData;
67 typedef DependencyGeneratorData *DependencyGenerator;
69 static void generate_dependencies_recurse(DependencyGenerator state,
70 int index, AttrNumber start, AttrNumber *current);
71 static void generate_dependencies(DependencyGenerator state);
72 static DependencyGenerator DependencyGenerator_init(int n, int k);
73 static void DependencyGenerator_free(DependencyGenerator state);
74 static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
75 static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
76 static bool dependency_is_fully_matched(MVDependency *dependency,
77 Bitmapset *attnums);
78 static bool dependency_is_compatible_clause(Node *clause, Index relid,
79 AttrNumber *attnum);
80 static bool dependency_is_compatible_expression(Node *clause, Index relid,
81 List *statlist, Node **expr);
82 static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
83 int ndependencies, Bitmapset *attnums);
84 static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
85 int varRelid, JoinType jointype,
86 SpecialJoinInfo *sjinfo,
87 MVDependency **dependencies,
88 int ndependencies,
89 AttrNumber *list_attnums,
90 Bitmapset **estimatedclauses);
92 static void
93 generate_dependencies_recurse(DependencyGenerator state, int index,
94 AttrNumber start, AttrNumber *current)
97 * The generator handles the first (k-1) elements differently from the
98 * last element.
100 if (index < (state->k - 1))
102 AttrNumber i;
105 * The first (k-1) values have to be in ascending order, which we
106 * generate recursively.
109 for (i = start; i < state->n; i++)
111 current[index] = i;
112 generate_dependencies_recurse(state, (index + 1), (i + 1), current);
115 else
117 int i;
120 * the last element is the implied value, which does not respect the
121 * ascending order. We just need to check that the value is not in the
122 * first (k-1) elements.
125 for (i = 0; i < state->n; i++)
127 int j;
128 bool match = false;
130 current[index] = i;
132 for (j = 0; j < index; j++)
134 if (current[j] == i)
136 match = true;
137 break;
142 * If the value is not found in the first part of the dependency,
143 * we're done.
145 if (!match)
147 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
148 state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
149 memcpy(&state->dependencies[(state->k * state->ndependencies)],
150 current, state->k * sizeof(AttrNumber));
151 state->ndependencies++;
157 /* generate all dependencies (k-permutations of n elements) */
158 static void
159 generate_dependencies(DependencyGenerator state)
161 AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
163 generate_dependencies_recurse(state, 0, 0, current);
165 pfree(current);
169 * initialize the DependencyGenerator of variations, and prebuild the variations
171 * This pre-builds all the variations. We could also generate them in
172 * DependencyGenerator_next(), but this seems simpler.
174 static DependencyGenerator
175 DependencyGenerator_init(int n, int k)
177 DependencyGenerator state;
179 Assert((n >= k) && (k > 0));
181 /* allocate the DependencyGenerator state */
182 state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
183 state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
185 state->ndependencies = 0;
186 state->current = 0;
187 state->k = k;
188 state->n = n;
190 /* now actually pre-generate all the variations */
191 generate_dependencies(state);
193 return state;
196 /* free the DependencyGenerator state */
197 static void
198 DependencyGenerator_free(DependencyGenerator state)
200 pfree(state->dependencies);
201 pfree(state);
204 /* generate next combination */
205 static AttrNumber *
206 DependencyGenerator_next(DependencyGenerator state)
208 if (state->current == state->ndependencies)
209 return NULL;
211 return &state->dependencies[state->k * state->current++];
216 * validates functional dependency on the data
218 * An actual work horse of detecting functional dependencies. Given a variation
219 * of k attributes, it checks that the first (k-1) are sufficient to determine
220 * the last one.
222 static double
223 dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
225 int i,
226 nitems;
227 MultiSortSupport mss;
228 SortItem *items;
229 AttrNumber *attnums_dep;
231 /* counters valid within a group */
232 int group_size = 0;
233 int n_violations = 0;
235 /* total number of rows supporting (consistent with) the dependency */
236 int n_supporting_rows = 0;
238 /* Make sure we have at least two input attributes. */
239 Assert(k >= 2);
241 /* sort info for all attributes columns */
242 mss = multi_sort_init(k);
245 * Translate the array of indexes to regular attnums for the dependency
246 * (we will need this to identify the columns in StatsBuildData).
248 attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
249 for (i = 0; i < k; i++)
250 attnums_dep[i] = data->attnums[dependency[i]];
253 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
255 * (a) sort the data lexicographically
257 * (b) split the data into groups by first (k-1) columns
259 * (c) for each group count different values in the last column
261 * We use the column data types' default sort operators and collations;
262 * perhaps at some point it'd be worth using column-specific collations?
265 /* prepare the sort function for the dimensions */
266 for (i = 0; i < k; i++)
268 VacAttrStats *colstat = data->stats[dependency[i]];
269 TypeCacheEntry *type;
271 type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
272 if (type->lt_opr == InvalidOid) /* shouldn't happen */
273 elog(ERROR, "cache lookup failed for ordering operator for type %u",
274 colstat->attrtypid);
276 /* prepare the sort function for this dimension */
277 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
281 * build an array of SortItem(s) sorted using the multi-sort support
283 * XXX This relies on all stats entries pointing to the same tuple
284 * descriptor. For now that assumption holds, but it might change in the
285 * future for example if we support statistics on multiple tables.
287 items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
290 * Walk through the sorted array, split it into rows according to the
291 * first (k-1) columns. If there's a single value in the last column, we
292 * count the group as 'supporting' the functional dependency. Otherwise we
293 * count it as contradicting.
296 /* start with the first row forming a group */
297 group_size = 1;
299 /* loop 1 beyond the end of the array so that we count the final group */
300 for (i = 1; i <= nitems; i++)
303 * Check if the group ended, which may be either because we processed
304 * all the items (i==nitems), or because the i-th item is not equal to
305 * the preceding one.
307 if (i == nitems ||
308 multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
311 * If no violations were found in the group then track the rows of
312 * the group as supporting the functional dependency.
314 if (n_violations == 0)
315 n_supporting_rows += group_size;
317 /* Reset counters for the new group */
318 n_violations = 0;
319 group_size = 1;
320 continue;
322 /* first columns match, but the last one does not (so contradicting) */
323 else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
324 n_violations++;
326 group_size++;
329 /* Compute the 'degree of validity' as (supporting/total). */
330 return (n_supporting_rows * 1.0 / data->numrows);
334 * detects functional dependencies between groups of columns
336 * Generates all possible subsets of columns (variations) and computes
337 * the degree of validity for each one. For example when creating statistics
338 * on three columns (a,b,c) there are 9 possible dependencies
340 * two columns three columns
341 * ----------- -------------
342 * (a) -> b (a,b) -> c
343 * (a) -> c (a,c) -> b
344 * (b) -> a (b,c) -> a
345 * (b) -> c
346 * (c) -> a
347 * (c) -> b
349 MVDependencies *
350 statext_dependencies_build(StatsBuildData *data)
352 int i,
355 /* result */
356 MVDependencies *dependencies = NULL;
357 MemoryContext cxt;
359 Assert(data->nattnums >= 2);
361 /* tracks memory allocated by dependency_degree calls */
362 cxt = AllocSetContextCreate(CurrentMemoryContext,
363 "dependency_degree cxt",
364 ALLOCSET_DEFAULT_SIZES);
367 * We'll try build functional dependencies starting from the smallest ones
368 * covering just 2 columns, to the largest ones, covering all columns
369 * included in the statistics object. We start from the smallest ones
370 * because we want to be able to skip already implied ones.
372 for (k = 2; k <= data->nattnums; k++)
374 AttrNumber *dependency; /* array with k elements */
376 /* prepare a DependencyGenerator of variation */
377 DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
379 /* generate all possible variations of k values (out of n) */
380 while ((dependency = DependencyGenerator_next(DependencyGenerator)))
382 double degree;
383 MVDependency *d;
384 MemoryContext oldcxt;
386 /* release memory used by dependency degree calculation */
387 oldcxt = MemoryContextSwitchTo(cxt);
389 /* compute how valid the dependency seems */
390 degree = dependency_degree(data, k, dependency);
392 MemoryContextSwitchTo(oldcxt);
393 MemoryContextReset(cxt);
396 * if the dependency seems entirely invalid, don't store it
398 if (degree == 0.0)
399 continue;
401 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
402 + k * sizeof(AttrNumber));
404 /* copy the dependency (and keep the indexes into stxkeys) */
405 d->degree = degree;
406 d->nattributes = k;
407 for (i = 0; i < k; i++)
408 d->attributes[i] = data->attnums[dependency[i]];
410 /* initialize the list of dependencies */
411 if (dependencies == NULL)
413 dependencies
414 = (MVDependencies *) palloc0(sizeof(MVDependencies));
416 dependencies->magic = STATS_DEPS_MAGIC;
417 dependencies->type = STATS_DEPS_TYPE_BASIC;
418 dependencies->ndeps = 0;
421 dependencies->ndeps++;
422 dependencies = (MVDependencies *) repalloc(dependencies,
423 offsetof(MVDependencies, deps)
424 + dependencies->ndeps * sizeof(MVDependency *));
426 dependencies->deps[dependencies->ndeps - 1] = d;
430 * we're done with variations of k elements, so free the
431 * DependencyGenerator
433 DependencyGenerator_free(DependencyGenerator);
436 MemoryContextDelete(cxt);
438 return dependencies;
443 * Serialize list of dependencies into a bytea value.
445 bytea *
446 statext_dependencies_serialize(MVDependencies *dependencies)
448 int i;
449 bytea *output;
450 char *tmp;
451 Size len;
453 /* we need to store ndeps, with a number of attributes for each one */
454 len = VARHDRSZ + SizeOfHeader;
456 /* and also include space for the actual attribute numbers and degrees */
457 for (i = 0; i < dependencies->ndeps; i++)
458 len += SizeOfItem(dependencies->deps[i]->nattributes);
460 output = (bytea *) palloc0(len);
461 SET_VARSIZE(output, len);
463 tmp = VARDATA(output);
465 /* Store the base struct values (magic, type, ndeps) */
466 memcpy(tmp, &dependencies->magic, sizeof(uint32));
467 tmp += sizeof(uint32);
468 memcpy(tmp, &dependencies->type, sizeof(uint32));
469 tmp += sizeof(uint32);
470 memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
471 tmp += sizeof(uint32);
473 /* store number of attributes and attribute numbers for each dependency */
474 for (i = 0; i < dependencies->ndeps; i++)
476 MVDependency *d = dependencies->deps[i];
478 memcpy(tmp, &d->degree, sizeof(double));
479 tmp += sizeof(double);
481 memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
482 tmp += sizeof(AttrNumber);
484 memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
485 tmp += sizeof(AttrNumber) * d->nattributes;
487 /* protect against overflow */
488 Assert(tmp <= ((char *) output + len));
491 /* make sure we've produced exactly the right amount of data */
492 Assert(tmp == ((char *) output + len));
494 return output;
498 * Reads serialized dependencies into MVDependencies structure.
500 MVDependencies *
501 statext_dependencies_deserialize(bytea *data)
503 int i;
504 Size min_expected_size;
505 MVDependencies *dependencies;
506 char *tmp;
508 if (data == NULL)
509 return NULL;
511 if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
512 elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
513 VARSIZE_ANY_EXHDR(data), SizeOfHeader);
515 /* read the MVDependencies header */
516 dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
518 /* initialize pointer to the data part (skip the varlena header) */
519 tmp = VARDATA_ANY(data);
521 /* read the header fields and perform basic sanity checks */
522 memcpy(&dependencies->magic, tmp, sizeof(uint32));
523 tmp += sizeof(uint32);
524 memcpy(&dependencies->type, tmp, sizeof(uint32));
525 tmp += sizeof(uint32);
526 memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
527 tmp += sizeof(uint32);
529 if (dependencies->magic != STATS_DEPS_MAGIC)
530 elog(ERROR, "invalid dependency magic %d (expected %d)",
531 dependencies->magic, STATS_DEPS_MAGIC);
533 if (dependencies->type != STATS_DEPS_TYPE_BASIC)
534 elog(ERROR, "invalid dependency type %d (expected %d)",
535 dependencies->type, STATS_DEPS_TYPE_BASIC);
537 if (dependencies->ndeps == 0)
538 elog(ERROR, "invalid zero-length item array in MVDependencies");
540 /* what minimum bytea size do we expect for those parameters */
541 min_expected_size = SizeOfItem(dependencies->ndeps);
543 if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
544 elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
545 VARSIZE_ANY_EXHDR(data), min_expected_size);
547 /* allocate space for the MCV items */
548 dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
549 + (dependencies->ndeps * sizeof(MVDependency *)));
551 for (i = 0; i < dependencies->ndeps; i++)
553 double degree;
554 AttrNumber k;
555 MVDependency *d;
557 /* degree of validity */
558 memcpy(&degree, tmp, sizeof(double));
559 tmp += sizeof(double);
561 /* number of attributes */
562 memcpy(&k, tmp, sizeof(AttrNumber));
563 tmp += sizeof(AttrNumber);
565 /* is the number of attributes valid? */
566 Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
568 /* now that we know the number of attributes, allocate the dependency */
569 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
570 + (k * sizeof(AttrNumber)));
572 d->degree = degree;
573 d->nattributes = k;
575 /* copy attribute numbers */
576 memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
577 tmp += sizeof(AttrNumber) * d->nattributes;
579 dependencies->deps[i] = d;
581 /* still within the bytea */
582 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
585 /* we should have consumed the whole bytea exactly */
586 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
588 return dependencies;
592 * dependency_is_fully_matched
593 * checks that a functional dependency is fully matched given clauses on
594 * attributes (assuming the clauses are suitable equality clauses)
596 static bool
597 dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
599 int j;
602 * Check that the dependency actually is fully covered by clauses. We have
603 * to translate all attribute numbers, as those are referenced
605 for (j = 0; j < dependency->nattributes; j++)
607 int attnum = dependency->attributes[j];
609 if (!bms_is_member(attnum, attnums))
610 return false;
613 return true;
617 * statext_dependencies_load
618 * Load the functional dependencies for the indicated pg_statistic_ext tuple
620 MVDependencies *
621 statext_dependencies_load(Oid mvoid, bool inh)
623 MVDependencies *result;
624 bool isnull;
625 Datum deps;
626 HeapTuple htup;
628 htup = SearchSysCache2(STATEXTDATASTXOID,
629 ObjectIdGetDatum(mvoid),
630 BoolGetDatum(inh));
631 if (!HeapTupleIsValid(htup))
632 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
634 deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
635 Anum_pg_statistic_ext_data_stxddependencies, &isnull);
636 if (isnull)
637 elog(ERROR,
638 "requested statistics kind \"%c\" is not yet built for statistics object %u",
639 STATS_EXT_DEPENDENCIES, mvoid);
641 result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
643 ReleaseSysCache(htup);
645 return result;
649 * pg_dependencies_in - input routine for type pg_dependencies.
651 * pg_dependencies is real enough to be a table column, but it has no operations
652 * of its own, and disallows input too
654 Datum
655 pg_dependencies_in(PG_FUNCTION_ARGS)
658 * pg_node_list stores the data in binary form and parsing text input is
659 * not needed, so disallow this.
661 ereport(ERROR,
662 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
663 errmsg("cannot accept a value of type %s", "pg_dependencies")));
665 PG_RETURN_VOID(); /* keep compiler quiet */
669 * pg_dependencies - output routine for type pg_dependencies.
671 Datum
672 pg_dependencies_out(PG_FUNCTION_ARGS)
674 bytea *data = PG_GETARG_BYTEA_PP(0);
675 MVDependencies *dependencies = statext_dependencies_deserialize(data);
676 int i,
678 StringInfoData str;
680 initStringInfo(&str);
681 appendStringInfoChar(&str, '{');
683 for (i = 0; i < dependencies->ndeps; i++)
685 MVDependency *dependency = dependencies->deps[i];
687 if (i > 0)
688 appendStringInfoString(&str, ", ");
690 appendStringInfoChar(&str, '"');
691 for (j = 0; j < dependency->nattributes; j++)
693 if (j == dependency->nattributes - 1)
694 appendStringInfoString(&str, " => ");
695 else if (j > 0)
696 appendStringInfoString(&str, ", ");
698 appendStringInfo(&str, "%d", dependency->attributes[j]);
700 appendStringInfo(&str, "\": %f", dependency->degree);
703 appendStringInfoChar(&str, '}');
705 PG_RETURN_CSTRING(str.data);
709 * pg_dependencies_recv - binary input routine for type pg_dependencies.
711 Datum
712 pg_dependencies_recv(PG_FUNCTION_ARGS)
714 ereport(ERROR,
715 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
716 errmsg("cannot accept a value of type %s", "pg_dependencies")));
718 PG_RETURN_VOID(); /* keep compiler quiet */
722 * pg_dependencies_send - binary output routine for type pg_dependencies.
724 * Functional dependencies are serialized in a bytea value (although the type
725 * is named differently), so let's just send that.
727 Datum
728 pg_dependencies_send(PG_FUNCTION_ARGS)
730 return byteasend(fcinfo);
734 * dependency_is_compatible_clause
735 * Determines if the clause is compatible with functional dependencies
737 * Only clauses that have the form of equality to a pseudoconstant, or can be
738 * interpreted that way, are currently accepted. Furthermore the variable
739 * part of the clause must be a simple Var belonging to the specified
740 * relation, whose attribute number we return in *attnum on success.
742 static bool
743 dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
745 Var *var;
746 Node *clause_expr;
748 if (IsA(clause, RestrictInfo))
750 RestrictInfo *rinfo = (RestrictInfo *) clause;
752 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
753 if (rinfo->pseudoconstant)
754 return false;
756 /* Clauses referencing multiple, or no, varnos are incompatible */
757 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
758 return false;
760 clause = (Node *) rinfo->clause;
763 if (is_opclause(clause))
765 /* If it's an opclause, check for Var = Const or Const = Var. */
766 OpExpr *expr = (OpExpr *) clause;
768 /* Only expressions with two arguments are candidates. */
769 if (list_length(expr->args) != 2)
770 return false;
772 /* Make sure non-selected argument is a pseudoconstant. */
773 if (is_pseudo_constant_clause(lsecond(expr->args)))
774 clause_expr = linitial(expr->args);
775 else if (is_pseudo_constant_clause(linitial(expr->args)))
776 clause_expr = lsecond(expr->args);
777 else
778 return false;
781 * If it's not an "=" operator, just ignore the clause, as it's not
782 * compatible with functional dependencies.
784 * This uses the function for estimating selectivity, not the operator
785 * directly (a bit awkward, but well ...).
787 * XXX this is pretty dubious; probably it'd be better to check btree
788 * or hash opclass membership, so as not to be fooled by custom
789 * selectivity functions, and to be more consistent with decisions
790 * elsewhere in the planner.
792 if (get_oprrest(expr->opno) != F_EQSEL)
793 return false;
795 /* OK to proceed with checking "var" */
797 else if (IsA(clause, ScalarArrayOpExpr))
799 /* If it's an scalar array operator, check for Var IN Const. */
800 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
803 * Reject ALL() variant, we only care about ANY/IN.
805 * XXX Maybe we should check if all the values are the same, and allow
806 * ALL in that case? Doesn't seem very practical, though.
808 if (!expr->useOr)
809 return false;
811 /* Only expressions with two arguments are candidates. */
812 if (list_length(expr->args) != 2)
813 return false;
816 * We know it's always (Var IN Const), so we assume the var is the
817 * first argument, and pseudoconstant is the second one.
819 if (!is_pseudo_constant_clause(lsecond(expr->args)))
820 return false;
822 clause_expr = linitial(expr->args);
825 * If it's not an "=" operator, just ignore the clause, as it's not
826 * compatible with functional dependencies. The operator is identified
827 * simply by looking at which function it uses to estimate
828 * selectivity. That's a bit strange, but it's what other similar
829 * places do.
831 if (get_oprrest(expr->opno) != F_EQSEL)
832 return false;
834 /* OK to proceed with checking "var" */
836 else if (is_orclause(clause))
838 BoolExpr *bool_expr = (BoolExpr *) clause;
839 ListCell *lc;
841 /* start with no attribute number */
842 *attnum = InvalidAttrNumber;
844 foreach(lc, bool_expr->args)
846 AttrNumber clause_attnum;
849 * Had we found incompatible clause in the arguments, treat the
850 * whole clause as incompatible.
852 if (!dependency_is_compatible_clause((Node *) lfirst(lc),
853 relid, &clause_attnum))
854 return false;
856 if (*attnum == InvalidAttrNumber)
857 *attnum = clause_attnum;
859 /* ensure all the variables are the same (same attnum) */
860 if (*attnum != clause_attnum)
861 return false;
864 /* the Var is already checked by the recursive call */
865 return true;
867 else if (is_notclause(clause))
870 * "NOT x" can be interpreted as "x = false", so get the argument and
871 * proceed with seeing if it's a suitable Var.
873 clause_expr = (Node *) get_notclausearg(clause);
875 else
878 * A boolean expression "x" can be interpreted as "x = true", so
879 * proceed with seeing if it's a suitable Var.
881 clause_expr = (Node *) clause;
885 * We may ignore any RelabelType node above the operand. (There won't be
886 * more than one, since eval_const_expressions has been applied already.)
888 if (IsA(clause_expr, RelabelType))
889 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
891 /* We only support plain Vars for now */
892 if (!IsA(clause_expr, Var))
893 return false;
895 /* OK, we know we have a Var */
896 var = (Var *) clause_expr;
898 /* Ensure Var is from the correct relation */
899 if (var->varno != relid)
900 return false;
902 /* We also better ensure the Var is from the current level */
903 if (var->varlevelsup != 0)
904 return false;
906 /* Also ignore system attributes (we don't allow stats on those) */
907 if (!AttrNumberIsForUserDefinedAttr(var->varattno))
908 return false;
910 *attnum = var->varattno;
911 return true;
915 * find_strongest_dependency
916 * find the strongest dependency on the attributes
918 * When applying functional dependencies, we start with the strongest
919 * dependencies. That is, we select the dependency that:
921 * (a) has all attributes covered by equality clauses
923 * (b) has the most attributes
925 * (c) has the highest degree of validity
927 * This guarantees that we eliminate the most redundant conditions first
928 * (see the comment in dependencies_clauselist_selectivity).
930 static MVDependency *
931 find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
932 Bitmapset *attnums)
934 int i,
936 MVDependency *strongest = NULL;
938 /* number of attnums in clauses */
939 int nattnums = bms_num_members(attnums);
942 * Iterate over the MVDependency items and find the strongest one from the
943 * fully-matched dependencies. We do the cheap checks first, before
944 * matching it against the attnums.
946 for (i = 0; i < ndependencies; i++)
948 for (j = 0; j < dependencies[i]->ndeps; j++)
950 MVDependency *dependency = dependencies[i]->deps[j];
953 * Skip dependencies referencing more attributes than available
954 * clauses, as those can't be fully matched.
956 if (dependency->nattributes > nattnums)
957 continue;
959 if (strongest)
961 /* skip dependencies on fewer attributes than the strongest. */
962 if (dependency->nattributes < strongest->nattributes)
963 continue;
965 /* also skip weaker dependencies when attribute count matches */
966 if (strongest->nattributes == dependency->nattributes &&
967 strongest->degree > dependency->degree)
968 continue;
972 * this dependency is stronger, but we must still check that it's
973 * fully matched to these attnums. We perform this check last as
974 * it's slightly more expensive than the previous checks.
976 if (dependency_is_fully_matched(dependency, attnums))
977 strongest = dependency; /* save new best match */
981 return strongest;
985 * clauselist_apply_dependencies
986 * Apply the specified functional dependencies to a list of clauses and
987 * return the estimated selectivity of the clauses that are compatible
988 * with any of the given dependencies.
990 * This will estimate all not-already-estimated clauses that are compatible
991 * with functional dependencies, and which have an attribute mentioned by any
992 * of the given dependencies (either as an implying or implied attribute).
994 * Given (lists of) clauses on attributes (a,b) and a functional dependency
995 * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
996 * using the formula
998 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1000 * where 'f' is the degree of dependency. This reflects the fact that we
1001 * expect a fraction f of all rows to be consistent with the dependency
1002 * (a=>b), and so have a selectivity of P(a), while the remaining rows are
1003 * treated as independent.
1005 * In practice, we use a slightly modified version of this formula, which uses
1006 * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
1007 * should obviously not exceed either column's individual selectivity. I.e.,
1008 * we actually combine selectivities using the formula
1010 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1012 * This can make quite a difference if the specific values matching the
1013 * clauses are not consistent with the functional dependency.
1015 static Selectivity
1016 clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
1017 int varRelid, JoinType jointype,
1018 SpecialJoinInfo *sjinfo,
1019 MVDependency **dependencies, int ndependencies,
1020 AttrNumber *list_attnums,
1021 Bitmapset **estimatedclauses)
1023 Bitmapset *attnums;
1024 int i;
1025 int j;
1026 int nattrs;
1027 Selectivity *attr_sel;
1028 int attidx;
1029 int listidx;
1030 ListCell *l;
1031 Selectivity s1;
1034 * Extract the attnums of all implying and implied attributes from all the
1035 * given dependencies. Each of these attributes is expected to have at
1036 * least 1 not-already-estimated compatible clause that we will estimate
1037 * here.
1039 attnums = NULL;
1040 for (i = 0; i < ndependencies; i++)
1042 for (j = 0; j < dependencies[i]->nattributes; j++)
1044 AttrNumber attnum = dependencies[i]->attributes[j];
1046 attnums = bms_add_member(attnums, attnum);
1051 * Compute per-column selectivity estimates for each of these attributes,
1052 * and mark all the corresponding clauses as estimated.
1054 nattrs = bms_num_members(attnums);
1055 attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
1057 attidx = 0;
1058 i = -1;
1059 while ((i = bms_next_member(attnums, i)) >= 0)
1061 List *attr_clauses = NIL;
1062 Selectivity simple_sel;
1064 listidx = -1;
1065 foreach(l, clauses)
1067 Node *clause = (Node *) lfirst(l);
1069 listidx++;
1070 if (list_attnums[listidx] == i)
1072 attr_clauses = lappend(attr_clauses, clause);
1073 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1077 simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
1078 jointype, sjinfo, false);
1079 attr_sel[attidx++] = simple_sel;
1083 * Now combine these selectivities using the dependency information. For
1084 * chains of dependencies such as a -> b -> c, the b -> c dependency will
1085 * come before the a -> b dependency in the array, so we traverse the
1086 * array backwards to ensure such chains are computed in the right order.
1088 * As explained above, pairs of selectivities are combined using the
1089 * formula
1091 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1093 * to ensure that the combined selectivity is never greater than either
1094 * individual selectivity.
1096 * Where multiple dependencies apply (e.g., a -> b -> c), we use
1097 * conditional probabilities to compute the overall result as follows:
1099 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1101 * so we replace the selectivities of all implied attributes with
1102 * conditional probabilities, that are conditional on all their implying
1103 * attributes. The selectivities of all other non-implied attributes are
1104 * left as they are.
1106 for (i = ndependencies - 1; i >= 0; i--)
1108 MVDependency *dependency = dependencies[i];
1109 AttrNumber attnum;
1110 Selectivity s2;
1111 double f;
1113 /* Selectivity of all the implying attributes */
1114 s1 = 1.0;
1115 for (j = 0; j < dependency->nattributes - 1; j++)
1117 attnum = dependency->attributes[j];
1118 attidx = bms_member_index(attnums, attnum);
1119 s1 *= attr_sel[attidx];
1122 /* Original selectivity of the implied attribute */
1123 attnum = dependency->attributes[j];
1124 attidx = bms_member_index(attnums, attnum);
1125 s2 = attr_sel[attidx];
1128 * Replace s2 with the conditional probability s2 given s1, computed
1129 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1131 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1133 * where P(a) = s1, the selectivity of the implying attributes, and
1134 * P(b) = s2, the selectivity of the implied attribute.
1136 f = dependency->degree;
1138 if (s1 <= s2)
1139 attr_sel[attidx] = f + (1 - f) * s2;
1140 else
1141 attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1145 * The overall selectivity of all the clauses on all these attributes is
1146 * then the product of all the original (non-implied) probabilities and
1147 * the new conditional (implied) probabilities.
1149 s1 = 1.0;
1150 for (i = 0; i < nattrs; i++)
1151 s1 *= attr_sel[i];
1153 CLAMP_PROBABILITY(s1);
1155 pfree(attr_sel);
1156 bms_free(attnums);
1158 return s1;
1162 * dependency_is_compatible_expression
1163 * Determines if the expression is compatible with functional dependencies
1165 * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1166 * expression is a simple Var. OTOH we check that there's at least one
1167 * statistics object matching the expression.
1169 static bool
1170 dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1172 List *vars;
1173 ListCell *lc,
1174 *lc2;
1175 Node *clause_expr;
1177 if (IsA(clause, RestrictInfo))
1179 RestrictInfo *rinfo = (RestrictInfo *) clause;
1181 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1182 if (rinfo->pseudoconstant)
1183 return false;
1185 /* Clauses referencing multiple, or no, varnos are incompatible */
1186 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1187 return false;
1189 clause = (Node *) rinfo->clause;
1192 if (is_opclause(clause))
1194 /* If it's an opclause, check for Var = Const or Const = Var. */
1195 OpExpr *expr = (OpExpr *) clause;
1197 /* Only expressions with two arguments are candidates. */
1198 if (list_length(expr->args) != 2)
1199 return false;
1201 /* Make sure non-selected argument is a pseudoconstant. */
1202 if (is_pseudo_constant_clause(lsecond(expr->args)))
1203 clause_expr = linitial(expr->args);
1204 else if (is_pseudo_constant_clause(linitial(expr->args)))
1205 clause_expr = lsecond(expr->args);
1206 else
1207 return false;
1210 * If it's not an "=" operator, just ignore the clause, as it's not
1211 * compatible with functional dependencies.
1213 * This uses the function for estimating selectivity, not the operator
1214 * directly (a bit awkward, but well ...).
1216 * XXX this is pretty dubious; probably it'd be better to check btree
1217 * or hash opclass membership, so as not to be fooled by custom
1218 * selectivity functions, and to be more consistent with decisions
1219 * elsewhere in the planner.
1221 if (get_oprrest(expr->opno) != F_EQSEL)
1222 return false;
1224 /* OK to proceed with checking "var" */
1226 else if (IsA(clause, ScalarArrayOpExpr))
1228 /* If it's an scalar array operator, check for Var IN Const. */
1229 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1232 * Reject ALL() variant, we only care about ANY/IN.
1234 * FIXME Maybe we should check if all the values are the same, and
1235 * allow ALL in that case? Doesn't seem very practical, though.
1237 if (!expr->useOr)
1238 return false;
1240 /* Only expressions with two arguments are candidates. */
1241 if (list_length(expr->args) != 2)
1242 return false;
1245 * We know it's always (Var IN Const), so we assume the var is the
1246 * first argument, and pseudoconstant is the second one.
1248 if (!is_pseudo_constant_clause(lsecond(expr->args)))
1249 return false;
1251 clause_expr = linitial(expr->args);
1254 * If it's not an "=" operator, just ignore the clause, as it's not
1255 * compatible with functional dependencies. The operator is identified
1256 * simply by looking at which function it uses to estimate
1257 * selectivity. That's a bit strange, but it's what other similar
1258 * places do.
1260 if (get_oprrest(expr->opno) != F_EQSEL)
1261 return false;
1263 /* OK to proceed with checking "var" */
1265 else if (is_orclause(clause))
1267 BoolExpr *bool_expr = (BoolExpr *) clause;
1269 /* start with no expression (we'll use the first match) */
1270 *expr = NULL;
1272 foreach(lc, bool_expr->args)
1274 Node *or_expr = NULL;
1277 * Had we found incompatible expression in the arguments, treat
1278 * the whole expression as incompatible.
1280 if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1281 statlist, &or_expr))
1282 return false;
1284 if (*expr == NULL)
1285 *expr = or_expr;
1287 /* ensure all the expressions are the same */
1288 if (!equal(or_expr, *expr))
1289 return false;
1292 /* the expression is already checked by the recursive call */
1293 return true;
1295 else if (is_notclause(clause))
1298 * "NOT x" can be interpreted as "x = false", so get the argument and
1299 * proceed with seeing if it's a suitable Var.
1301 clause_expr = (Node *) get_notclausearg(clause);
1303 else
1306 * A boolean expression "x" can be interpreted as "x = true", so
1307 * proceed with seeing if it's a suitable Var.
1309 clause_expr = (Node *) clause;
1313 * We may ignore any RelabelType node above the operand. (There won't be
1314 * more than one, since eval_const_expressions has been applied already.)
1316 if (IsA(clause_expr, RelabelType))
1317 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1319 vars = pull_var_clause(clause_expr, 0);
1321 foreach(lc, vars)
1323 Var *var = (Var *) lfirst(lc);
1325 /* Ensure Var is from the correct relation */
1326 if (var->varno != relid)
1327 return false;
1329 /* We also better ensure the Var is from the current level */
1330 if (var->varlevelsup != 0)
1331 return false;
1333 /* Also ignore system attributes (we don't allow stats on those) */
1334 if (!AttrNumberIsForUserDefinedAttr(var->varattno))
1335 return false;
1339 * Check if we actually have a matching statistics for the expression.
1341 * XXX Maybe this is an overkill. We'll eliminate the expressions later.
1343 foreach(lc, statlist)
1345 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1347 /* ignore stats without dependencies */
1348 if (info->kind != STATS_EXT_DEPENDENCIES)
1349 continue;
1351 foreach(lc2, info->exprs)
1353 Node *stat_expr = (Node *) lfirst(lc2);
1355 if (equal(clause_expr, stat_expr))
1357 *expr = stat_expr;
1358 return true;
1363 return false;
1367 * dependencies_clauselist_selectivity
1368 * Return the estimated selectivity of (a subset of) the given clauses
1369 * using functional dependency statistics, or 1.0 if no useful functional
1370 * dependency statistic exists.
1372 * 'estimatedclauses' is an input/output argument that gets a bit set
1373 * corresponding to the (zero-based) list index of each clause that is included
1374 * in the estimated selectivity.
1376 * Given equality clauses on attributes (a,b) we find the strongest dependency
1377 * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1378 * dependency, we then combine the per-clause selectivities using the formula
1380 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1382 * where 'f' is the degree of the dependency. (Actually we use a slightly
1383 * modified version of this formula -- see clauselist_apply_dependencies()).
1385 * With clauses on more than two attributes, the dependencies are applied
1386 * recursively, starting with the widest/strongest dependencies. For example
1387 * P(a,b,c) is first split like this:
1389 * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1391 * assuming (a,b=>c) is the strongest dependency.
1393 Selectivity
1394 dependencies_clauselist_selectivity(PlannerInfo *root,
1395 List *clauses,
1396 int varRelid,
1397 JoinType jointype,
1398 SpecialJoinInfo *sjinfo,
1399 RelOptInfo *rel,
1400 Bitmapset **estimatedclauses)
1402 Selectivity s1 = 1.0;
1403 ListCell *l;
1404 Bitmapset *clauses_attnums = NULL;
1405 AttrNumber *list_attnums;
1406 int listidx;
1407 MVDependencies **func_dependencies;
1408 int nfunc_dependencies;
1409 int total_ndeps;
1410 MVDependency **dependencies;
1411 int ndependencies;
1412 int i;
1413 AttrNumber attnum_offset;
1414 RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1416 /* unique expressions */
1417 Node **unique_exprs;
1418 int unique_exprs_cnt;
1420 /* check if there's any stats that might be useful for us. */
1421 if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1422 return 1.0;
1424 list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
1425 list_length(clauses));
1428 * We allocate space as if every clause was a unique expression, although
1429 * that's probably overkill. Some will be simple column references that
1430 * we'll translate to attnums, and there might be duplicates. But it's
1431 * easier and cheaper to just do one allocation than repalloc later.
1433 unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
1434 unique_exprs_cnt = 0;
1437 * Pre-process the clauses list to extract the attnums seen in each item.
1438 * We need to determine if there's any clauses which will be useful for
1439 * dependency selectivity estimations. Along the way we'll record all of
1440 * the attnums for each clause in a list which we'll reference later so we
1441 * don't need to repeat the same work again. We'll also keep track of all
1442 * attnums seen.
1444 * We also skip clauses that we already estimated using different types of
1445 * statistics (we treat them as incompatible).
1447 * To handle expressions, we assign them negative attnums, as if it was a
1448 * system attribute (this is fine, as we only allow extended stats on user
1449 * attributes). And then we offset everything by the number of
1450 * expressions, so that we can store the values in a bitmapset.
1452 listidx = 0;
1453 foreach(l, clauses)
1455 Node *clause = (Node *) lfirst(l);
1456 AttrNumber attnum;
1457 Node *expr = NULL;
1459 /* ignore clause by default */
1460 list_attnums[listidx] = InvalidAttrNumber;
1462 if (!bms_is_member(listidx, *estimatedclauses))
1465 * If it's a simple column reference, just extract the attnum. If
1466 * it's an expression, assign a negative attnum as if it was a
1467 * system attribute.
1469 if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1471 list_attnums[listidx] = attnum;
1473 else if (dependency_is_compatible_expression(clause, rel->relid,
1474 rel->statlist,
1475 &expr))
1477 /* special attnum assigned to this expression */
1478 attnum = InvalidAttrNumber;
1480 Assert(expr != NULL);
1482 /* If the expression is duplicate, use the same attnum. */
1483 for (i = 0; i < unique_exprs_cnt; i++)
1485 if (equal(unique_exprs[i], expr))
1487 /* negative attribute number to expression */
1488 attnum = -(i + 1);
1489 break;
1493 /* not found in the list, so add it */
1494 if (attnum == InvalidAttrNumber)
1496 unique_exprs[unique_exprs_cnt++] = expr;
1498 /* after incrementing the value, to get -1, -2, ... */
1499 attnum = (-unique_exprs_cnt);
1502 /* remember which attnum was assigned to this clause */
1503 list_attnums[listidx] = attnum;
1507 listidx++;
1510 Assert(listidx == list_length(clauses));
1513 * How much we need to offset the attnums? If there are no expressions,
1514 * then no offset is needed. Otherwise we need to offset enough for the
1515 * lowest value (-unique_exprs_cnt) to become 1.
1517 if (unique_exprs_cnt > 0)
1518 attnum_offset = (unique_exprs_cnt + 1);
1519 else
1520 attnum_offset = 0;
1523 * Now that we know how many expressions there are, we can offset the
1524 * values just enough to build the bitmapset.
1526 for (i = 0; i < list_length(clauses); i++)
1528 AttrNumber attnum;
1530 /* ignore incompatible or already estimated clauses */
1531 if (list_attnums[i] == InvalidAttrNumber)
1532 continue;
1534 /* make sure the attnum is in the expected range */
1535 Assert(list_attnums[i] >= (-unique_exprs_cnt));
1536 Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1538 /* make sure the attnum is positive (valid AttrNumber) */
1539 attnum = list_attnums[i] + attnum_offset;
1542 * Either it's a regular attribute, or it's an expression, in which
1543 * case we must not have seen it before (expressions are unique).
1545 * XXX Check whether it's a regular attribute has to be done using the
1546 * original attnum, while the second check has to use the value with
1547 * an offset.
1549 Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1550 !bms_is_member(attnum, clauses_attnums));
1553 * Remember the offset attnum, both for attributes and expressions.
1554 * We'll pass list_attnums to clauselist_apply_dependencies, which
1555 * uses it to identify clauses in a bitmap. We could also pass the
1556 * offset, but this is more convenient.
1558 list_attnums[i] = attnum;
1560 clauses_attnums = bms_add_member(clauses_attnums, attnum);
1564 * If there's not at least two distinct attnums and expressions, then
1565 * reject the whole list of clauses. We must return 1.0 so the calling
1566 * function's selectivity is unaffected.
1568 if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1570 bms_free(clauses_attnums);
1571 pfree(list_attnums);
1572 return 1.0;
1576 * Load all functional dependencies matching at least two parameters. We
1577 * can simply consider all dependencies at once, without having to search
1578 * for the best statistics object.
1580 * To not waste cycles and memory, we deserialize dependencies only for
1581 * statistics that match at least two attributes. The array is allocated
1582 * with the assumption that all objects match - we could grow the array to
1583 * make it just the right size, but it's likely wasteful anyway thanks to
1584 * moving the freed chunks to freelists etc.
1586 func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1587 list_length(rel->statlist));
1588 nfunc_dependencies = 0;
1589 total_ndeps = 0;
1591 foreach(l, rel->statlist)
1593 StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
1594 int nmatched;
1595 int nexprs;
1596 int k;
1597 MVDependencies *deps;
1599 /* skip statistics that are not of the correct type */
1600 if (stat->kind != STATS_EXT_DEPENDENCIES)
1601 continue;
1603 /* skip statistics with mismatching stxdinherit value */
1604 if (stat->inherit != rte->inh)
1605 continue;
1608 * Count matching attributes - we have to undo the attnum offsets. The
1609 * input attribute numbers are not offset (expressions are not
1610 * included in stat->keys, so it's not necessary). But we need to
1611 * offset it before checking against clauses_attnums.
1613 nmatched = 0;
1614 k = -1;
1615 while ((k = bms_next_member(stat->keys, k)) >= 0)
1617 AttrNumber attnum = (AttrNumber) k;
1619 /* skip expressions */
1620 if (!AttrNumberIsForUserDefinedAttr(attnum))
1621 continue;
1623 /* apply the same offset as above */
1624 attnum += attnum_offset;
1626 if (bms_is_member(attnum, clauses_attnums))
1627 nmatched++;
1630 /* count matching expressions */
1631 nexprs = 0;
1632 for (i = 0; i < unique_exprs_cnt; i++)
1634 ListCell *lc;
1636 foreach(lc, stat->exprs)
1638 Node *stat_expr = (Node *) lfirst(lc);
1640 /* try to match it */
1641 if (equal(stat_expr, unique_exprs[i]))
1642 nexprs++;
1647 * Skip objects matching fewer than two attributes/expressions from
1648 * clauses.
1650 if (nmatched + nexprs < 2)
1651 continue;
1653 deps = statext_dependencies_load(stat->statOid, rte->inh);
1656 * The expressions may be represented by different attnums in the
1657 * stats, we need to remap them to be consistent with the clauses.
1658 * That will make the later steps (e.g. picking the strongest item and
1659 * so on) much simpler and cheaper, because it won't need to care
1660 * about the offset at all.
1662 * When we're at it, we can ignore dependencies that are not fully
1663 * matched by clauses (i.e. referencing attributes or expressions that
1664 * are not in the clauses).
1666 * We have to do this for all statistics, as long as there are any
1667 * expressions - we need to shift the attnums in all dependencies.
1669 * XXX Maybe we should do this always, because it also eliminates some
1670 * of the dependencies early. It might be cheaper than having to walk
1671 * the longer list in find_strongest_dependency later, especially as
1672 * we need to do that repeatedly?
1674 * XXX We have to do this even when there are no expressions in
1675 * clauses, otherwise find_strongest_dependency may fail for stats
1676 * with expressions (due to lookup of negative value in bitmap). So we
1677 * need to at least filter out those dependencies. Maybe we could do
1678 * it in a cheaper way (if there are no expr clauses, we can just
1679 * discard all negative attnums without any lookups).
1681 if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1683 int ndeps = 0;
1685 for (i = 0; i < deps->ndeps; i++)
1687 bool skip = false;
1688 MVDependency *dep = deps->deps[i];
1689 int j;
1691 for (j = 0; j < dep->nattributes; j++)
1693 int idx;
1694 Node *expr;
1695 int k;
1696 AttrNumber unique_attnum = InvalidAttrNumber;
1697 AttrNumber attnum;
1699 /* undo the per-statistics offset */
1700 attnum = dep->attributes[j];
1703 * For regular attributes we can simply check if it
1704 * matches any clause. If there's no matching clause, we
1705 * can just ignore it. We need to offset the attnum
1706 * though.
1708 if (AttrNumberIsForUserDefinedAttr(attnum))
1710 dep->attributes[j] = attnum + attnum_offset;
1712 if (!bms_is_member(dep->attributes[j], clauses_attnums))
1714 skip = true;
1715 break;
1718 continue;
1722 * the attnum should be a valid system attnum (-1, -2,
1723 * ...)
1725 Assert(AttributeNumberIsValid(attnum));
1728 * For expressions, we need to do two translations. First
1729 * we have to translate the negative attnum to index in
1730 * the list of expressions (in the statistics object).
1731 * Then we need to see if there's a matching clause. The
1732 * index of the unique expression determines the attnum
1733 * (and we offset it).
1735 idx = -(1 + attnum);
1737 /* Is the expression index is valid? */
1738 Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1740 expr = (Node *) list_nth(stat->exprs, idx);
1742 /* try to find the expression in the unique list */
1743 for (k = 0; k < unique_exprs_cnt; k++)
1746 * found a matching unique expression, use the attnum
1747 * (derived from index of the unique expression)
1749 if (equal(unique_exprs[k], expr))
1751 unique_attnum = -(k + 1) + attnum_offset;
1752 break;
1757 * Found no matching expression, so we can simply skip
1758 * this dependency, because there's no chance it will be
1759 * fully covered.
1761 if (unique_attnum == InvalidAttrNumber)
1763 skip = true;
1764 break;
1767 /* otherwise remap it to the new attnum */
1768 dep->attributes[j] = unique_attnum;
1771 /* if found a matching dependency, keep it */
1772 if (!skip)
1774 /* maybe we've skipped something earlier, so move it */
1775 if (ndeps != i)
1776 deps->deps[ndeps] = deps->deps[i];
1778 ndeps++;
1782 deps->ndeps = ndeps;
1786 * It's possible we've removed all dependencies, in which case we
1787 * don't bother adding it to the list.
1789 if (deps->ndeps > 0)
1791 func_dependencies[nfunc_dependencies] = deps;
1792 total_ndeps += deps->ndeps;
1793 nfunc_dependencies++;
1797 /* if no matching stats could be found then we've nothing to do */
1798 if (nfunc_dependencies == 0)
1800 pfree(func_dependencies);
1801 bms_free(clauses_attnums);
1802 pfree(list_attnums);
1803 pfree(unique_exprs);
1804 return 1.0;
1808 * Work out which dependencies we can apply, starting with the
1809 * widest/strongest ones, and proceeding to smaller/weaker ones.
1811 dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1812 total_ndeps);
1813 ndependencies = 0;
1815 while (true)
1817 MVDependency *dependency;
1818 AttrNumber attnum;
1820 /* the widest/strongest dependency, fully matched by clauses */
1821 dependency = find_strongest_dependency(func_dependencies,
1822 nfunc_dependencies,
1823 clauses_attnums);
1824 if (!dependency)
1825 break;
1827 dependencies[ndependencies++] = dependency;
1829 /* Ignore dependencies using this implied attribute in later loops */
1830 attnum = dependency->attributes[dependency->nattributes - 1];
1831 clauses_attnums = bms_del_member(clauses_attnums, attnum);
1835 * If we found applicable dependencies, use them to estimate all
1836 * compatible clauses on attributes that they refer to.
1838 if (ndependencies != 0)
1839 s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1840 sjinfo, dependencies, ndependencies,
1841 list_attnums, estimatedclauses);
1843 /* free deserialized functional dependencies (and then the array) */
1844 for (i = 0; i < nfunc_dependencies; i++)
1845 pfree(func_dependencies[i]);
1847 pfree(dependencies);
1848 pfree(func_dependencies);
1849 bms_free(clauses_attnums);
1850 pfree(list_attnums);
1851 pfree(unique_exprs);
1853 return s1;