1 /*-------------------------------------------------------------------------
4 * POSTGRES functional dependencies
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/statistics/dependencies.c
12 *-------------------------------------------------------------------------
16 #include "access/htup_details.h"
17 #include "access/sysattr.h"
18 #include "catalog/pg_operator.h"
19 #include "catalog/pg_statistic_ext.h"
20 #include "catalog/pg_statistic_ext_data.h"
21 #include "lib/stringinfo.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/nodes.h"
24 #include "nodes/pathnodes.h"
25 #include "optimizer/clauses.h"
26 #include "optimizer/optimizer.h"
27 #include "parser/parsetree.h"
28 #include "statistics/extended_stats_internal.h"
29 #include "statistics/statistics.h"
30 #include "utils/bytea.h"
31 #include "utils/fmgroids.h"
32 #include "utils/fmgrprotos.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/selfuncs.h"
36 #include "utils/syscache.h"
37 #include "utils/typcache.h"
39 /* size of the struct header fields (magic, type, ndeps) */
40 #define SizeOfHeader (3 * sizeof(uint32))
42 /* size of a serialized dependency (degree, natts, atts) */
43 #define SizeOfItem(natts) \
44 (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
46 /* minimal size of a dependency (with two attributes) */
47 #define MinSizeOfItem SizeOfItem(2)
49 /* minimal size of dependencies, when all deps are minimal */
50 #define MinSizeOfItems(ndeps) \
51 (SizeOfHeader + (ndeps) * MinSizeOfItem)
54 * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
55 * k-permutations of n elements, except that the order does not matter for the
56 * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
58 typedef struct DependencyGeneratorData
60 int k
; /* size of the dependency */
61 int n
; /* number of possible attributes */
62 int current
; /* next dependency to return (index) */
63 AttrNumber ndependencies
; /* number of dependencies generated */
64 AttrNumber
*dependencies
; /* array of pre-generated dependencies */
65 } DependencyGeneratorData
;
67 typedef DependencyGeneratorData
*DependencyGenerator
;
69 static void generate_dependencies_recurse(DependencyGenerator state
,
70 int index
, AttrNumber start
, AttrNumber
*current
);
71 static void generate_dependencies(DependencyGenerator state
);
72 static DependencyGenerator
DependencyGenerator_init(int n
, int k
);
73 static void DependencyGenerator_free(DependencyGenerator state
);
74 static AttrNumber
*DependencyGenerator_next(DependencyGenerator state
);
75 static double dependency_degree(StatsBuildData
*data
, int k
, AttrNumber
*dependency
);
76 static bool dependency_is_fully_matched(MVDependency
*dependency
,
78 static bool dependency_is_compatible_clause(Node
*clause
, Index relid
,
80 static bool dependency_is_compatible_expression(Node
*clause
, Index relid
,
81 List
*statlist
, Node
**expr
);
82 static MVDependency
*find_strongest_dependency(MVDependencies
**dependencies
,
83 int ndependencies
, Bitmapset
*attnums
);
84 static Selectivity
clauselist_apply_dependencies(PlannerInfo
*root
, List
*clauses
,
85 int varRelid
, JoinType jointype
,
86 SpecialJoinInfo
*sjinfo
,
87 MVDependency
**dependencies
,
89 AttrNumber
*list_attnums
,
90 Bitmapset
**estimatedclauses
);
93 generate_dependencies_recurse(DependencyGenerator state
, int index
,
94 AttrNumber start
, AttrNumber
*current
)
97 * The generator handles the first (k-1) elements differently from the
100 if (index
< (state
->k
- 1))
105 * The first (k-1) values have to be in ascending order, which we
106 * generate recursively.
109 for (i
= start
; i
< state
->n
; i
++)
112 generate_dependencies_recurse(state
, (index
+ 1), (i
+ 1), current
);
120 * the last element is the implied value, which does not respect the
121 * ascending order. We just need to check that the value is not in the
122 * first (k-1) elements.
125 for (i
= 0; i
< state
->n
; i
++)
132 for (j
= 0; j
< index
; j
++)
142 * If the value is not found in the first part of the dependency,
147 state
->dependencies
= (AttrNumber
*) repalloc(state
->dependencies
,
148 state
->k
* (state
->ndependencies
+ 1) * sizeof(AttrNumber
));
149 memcpy(&state
->dependencies
[(state
->k
* state
->ndependencies
)],
150 current
, state
->k
* sizeof(AttrNumber
));
151 state
->ndependencies
++;
157 /* generate all dependencies (k-permutations of n elements) */
159 generate_dependencies(DependencyGenerator state
)
161 AttrNumber
*current
= (AttrNumber
*) palloc0(sizeof(AttrNumber
) * state
->k
);
163 generate_dependencies_recurse(state
, 0, 0, current
);
169 * initialize the DependencyGenerator of variations, and prebuild the variations
171 * This pre-builds all the variations. We could also generate them in
172 * DependencyGenerator_next(), but this seems simpler.
174 static DependencyGenerator
175 DependencyGenerator_init(int n
, int k
)
177 DependencyGenerator state
;
179 Assert((n
>= k
) && (k
> 0));
181 /* allocate the DependencyGenerator state */
182 state
= (DependencyGenerator
) palloc0(sizeof(DependencyGeneratorData
));
183 state
->dependencies
= (AttrNumber
*) palloc(k
* sizeof(AttrNumber
));
185 state
->ndependencies
= 0;
190 /* now actually pre-generate all the variations */
191 generate_dependencies(state
);
196 /* free the DependencyGenerator state */
198 DependencyGenerator_free(DependencyGenerator state
)
200 pfree(state
->dependencies
);
204 /* generate next combination */
206 DependencyGenerator_next(DependencyGenerator state
)
208 if (state
->current
== state
->ndependencies
)
211 return &state
->dependencies
[state
->k
* state
->current
++];
216 * validates functional dependency on the data
218 * An actual work horse of detecting functional dependencies. Given a variation
219 * of k attributes, it checks that the first (k-1) are sufficient to determine
223 dependency_degree(StatsBuildData
*data
, int k
, AttrNumber
*dependency
)
227 MultiSortSupport mss
;
229 AttrNumber
*attnums_dep
;
231 /* counters valid within a group */
233 int n_violations
= 0;
235 /* total number of rows supporting (consistent with) the dependency */
236 int n_supporting_rows
= 0;
238 /* Make sure we have at least two input attributes. */
241 /* sort info for all attributes columns */
242 mss
= multi_sort_init(k
);
245 * Translate the array of indexes to regular attnums for the dependency
246 * (we will need this to identify the columns in StatsBuildData).
248 attnums_dep
= (AttrNumber
*) palloc(k
* sizeof(AttrNumber
));
249 for (i
= 0; i
< k
; i
++)
250 attnums_dep
[i
] = data
->attnums
[dependency
[i
]];
253 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
255 * (a) sort the data lexicographically
257 * (b) split the data into groups by first (k-1) columns
259 * (c) for each group count different values in the last column
261 * We use the column data types' default sort operators and collations;
262 * perhaps at some point it'd be worth using column-specific collations?
265 /* prepare the sort function for the dimensions */
266 for (i
= 0; i
< k
; i
++)
268 VacAttrStats
*colstat
= data
->stats
[dependency
[i
]];
269 TypeCacheEntry
*type
;
271 type
= lookup_type_cache(colstat
->attrtypid
, TYPECACHE_LT_OPR
);
272 if (type
->lt_opr
== InvalidOid
) /* shouldn't happen */
273 elog(ERROR
, "cache lookup failed for ordering operator for type %u",
276 /* prepare the sort function for this dimension */
277 multi_sort_add_dimension(mss
, i
, type
->lt_opr
, colstat
->attrcollid
);
281 * build an array of SortItem(s) sorted using the multi-sort support
283 * XXX This relies on all stats entries pointing to the same tuple
284 * descriptor. For now that assumption holds, but it might change in the
285 * future for example if we support statistics on multiple tables.
287 items
= build_sorted_items(data
, &nitems
, mss
, k
, attnums_dep
);
290 * Walk through the sorted array, split it into rows according to the
291 * first (k-1) columns. If there's a single value in the last column, we
292 * count the group as 'supporting' the functional dependency. Otherwise we
293 * count it as contradicting.
296 /* start with the first row forming a group */
299 /* loop 1 beyond the end of the array so that we count the final group */
300 for (i
= 1; i
<= nitems
; i
++)
303 * Check if the group ended, which may be either because we processed
304 * all the items (i==nitems), or because the i-th item is not equal to
308 multi_sort_compare_dims(0, k
- 2, &items
[i
- 1], &items
[i
], mss
) != 0)
311 * If no violations were found in the group then track the rows of
312 * the group as supporting the functional dependency.
314 if (n_violations
== 0)
315 n_supporting_rows
+= group_size
;
317 /* Reset counters for the new group */
322 /* first columns match, but the last one does not (so contradicting) */
323 else if (multi_sort_compare_dim(k
- 1, &items
[i
- 1], &items
[i
], mss
) != 0)
329 /* Compute the 'degree of validity' as (supporting/total). */
330 return (n_supporting_rows
* 1.0 / data
->numrows
);
334 * detects functional dependencies between groups of columns
336 * Generates all possible subsets of columns (variations) and computes
337 * the degree of validity for each one. For example when creating statistics
338 * on three columns (a,b,c) there are 9 possible dependencies
340 * two columns three columns
341 * ----------- -------------
342 * (a) -> b (a,b) -> c
343 * (a) -> c (a,c) -> b
344 * (b) -> a (b,c) -> a
350 statext_dependencies_build(StatsBuildData
*data
)
356 MVDependencies
*dependencies
= NULL
;
359 Assert(data
->nattnums
>= 2);
361 /* tracks memory allocated by dependency_degree calls */
362 cxt
= AllocSetContextCreate(CurrentMemoryContext
,
363 "dependency_degree cxt",
364 ALLOCSET_DEFAULT_SIZES
);
367 * We'll try build functional dependencies starting from the smallest ones
368 * covering just 2 columns, to the largest ones, covering all columns
369 * included in the statistics object. We start from the smallest ones
370 * because we want to be able to skip already implied ones.
372 for (k
= 2; k
<= data
->nattnums
; k
++)
374 AttrNumber
*dependency
; /* array with k elements */
376 /* prepare a DependencyGenerator of variation */
377 DependencyGenerator DependencyGenerator
= DependencyGenerator_init(data
->nattnums
, k
);
379 /* generate all possible variations of k values (out of n) */
380 while ((dependency
= DependencyGenerator_next(DependencyGenerator
)))
384 MemoryContext oldcxt
;
386 /* release memory used by dependency degree calculation */
387 oldcxt
= MemoryContextSwitchTo(cxt
);
389 /* compute how valid the dependency seems */
390 degree
= dependency_degree(data
, k
, dependency
);
392 MemoryContextSwitchTo(oldcxt
);
393 MemoryContextReset(cxt
);
396 * if the dependency seems entirely invalid, don't store it
401 d
= (MVDependency
*) palloc0(offsetof(MVDependency
, attributes
)
402 + k
* sizeof(AttrNumber
));
404 /* copy the dependency (and keep the indexes into stxkeys) */
407 for (i
= 0; i
< k
; i
++)
408 d
->attributes
[i
] = data
->attnums
[dependency
[i
]];
410 /* initialize the list of dependencies */
411 if (dependencies
== NULL
)
414 = (MVDependencies
*) palloc0(sizeof(MVDependencies
));
416 dependencies
->magic
= STATS_DEPS_MAGIC
;
417 dependencies
->type
= STATS_DEPS_TYPE_BASIC
;
418 dependencies
->ndeps
= 0;
421 dependencies
->ndeps
++;
422 dependencies
= (MVDependencies
*) repalloc(dependencies
,
423 offsetof(MVDependencies
, deps
)
424 + dependencies
->ndeps
* sizeof(MVDependency
*));
426 dependencies
->deps
[dependencies
->ndeps
- 1] = d
;
430 * we're done with variations of k elements, so free the
431 * DependencyGenerator
433 DependencyGenerator_free(DependencyGenerator
);
436 MemoryContextDelete(cxt
);
443 * Serialize list of dependencies into a bytea value.
446 statext_dependencies_serialize(MVDependencies
*dependencies
)
453 /* we need to store ndeps, with a number of attributes for each one */
454 len
= VARHDRSZ
+ SizeOfHeader
;
456 /* and also include space for the actual attribute numbers and degrees */
457 for (i
= 0; i
< dependencies
->ndeps
; i
++)
458 len
+= SizeOfItem(dependencies
->deps
[i
]->nattributes
);
460 output
= (bytea
*) palloc0(len
);
461 SET_VARSIZE(output
, len
);
463 tmp
= VARDATA(output
);
465 /* Store the base struct values (magic, type, ndeps) */
466 memcpy(tmp
, &dependencies
->magic
, sizeof(uint32
));
467 tmp
+= sizeof(uint32
);
468 memcpy(tmp
, &dependencies
->type
, sizeof(uint32
));
469 tmp
+= sizeof(uint32
);
470 memcpy(tmp
, &dependencies
->ndeps
, sizeof(uint32
));
471 tmp
+= sizeof(uint32
);
473 /* store number of attributes and attribute numbers for each dependency */
474 for (i
= 0; i
< dependencies
->ndeps
; i
++)
476 MVDependency
*d
= dependencies
->deps
[i
];
478 memcpy(tmp
, &d
->degree
, sizeof(double));
479 tmp
+= sizeof(double);
481 memcpy(tmp
, &d
->nattributes
, sizeof(AttrNumber
));
482 tmp
+= sizeof(AttrNumber
);
484 memcpy(tmp
, d
->attributes
, sizeof(AttrNumber
) * d
->nattributes
);
485 tmp
+= sizeof(AttrNumber
) * d
->nattributes
;
487 /* protect against overflow */
488 Assert(tmp
<= ((char *) output
+ len
));
491 /* make sure we've produced exactly the right amount of data */
492 Assert(tmp
== ((char *) output
+ len
));
498 * Reads serialized dependencies into MVDependencies structure.
501 statext_dependencies_deserialize(bytea
*data
)
504 Size min_expected_size
;
505 MVDependencies
*dependencies
;
511 if (VARSIZE_ANY_EXHDR(data
) < SizeOfHeader
)
512 elog(ERROR
, "invalid MVDependencies size %zd (expected at least %zd)",
513 VARSIZE_ANY_EXHDR(data
), SizeOfHeader
);
515 /* read the MVDependencies header */
516 dependencies
= (MVDependencies
*) palloc0(sizeof(MVDependencies
));
518 /* initialize pointer to the data part (skip the varlena header) */
519 tmp
= VARDATA_ANY(data
);
521 /* read the header fields and perform basic sanity checks */
522 memcpy(&dependencies
->magic
, tmp
, sizeof(uint32
));
523 tmp
+= sizeof(uint32
);
524 memcpy(&dependencies
->type
, tmp
, sizeof(uint32
));
525 tmp
+= sizeof(uint32
);
526 memcpy(&dependencies
->ndeps
, tmp
, sizeof(uint32
));
527 tmp
+= sizeof(uint32
);
529 if (dependencies
->magic
!= STATS_DEPS_MAGIC
)
530 elog(ERROR
, "invalid dependency magic %d (expected %d)",
531 dependencies
->magic
, STATS_DEPS_MAGIC
);
533 if (dependencies
->type
!= STATS_DEPS_TYPE_BASIC
)
534 elog(ERROR
, "invalid dependency type %d (expected %d)",
535 dependencies
->type
, STATS_DEPS_TYPE_BASIC
);
537 if (dependencies
->ndeps
== 0)
538 elog(ERROR
, "invalid zero-length item array in MVDependencies");
540 /* what minimum bytea size do we expect for those parameters */
541 min_expected_size
= SizeOfItem(dependencies
->ndeps
);
543 if (VARSIZE_ANY_EXHDR(data
) < min_expected_size
)
544 elog(ERROR
, "invalid dependencies size %zd (expected at least %zd)",
545 VARSIZE_ANY_EXHDR(data
), min_expected_size
);
547 /* allocate space for the MCV items */
548 dependencies
= repalloc(dependencies
, offsetof(MVDependencies
, deps
)
549 + (dependencies
->ndeps
* sizeof(MVDependency
*)));
551 for (i
= 0; i
< dependencies
->ndeps
; i
++)
557 /* degree of validity */
558 memcpy(°ree
, tmp
, sizeof(double));
559 tmp
+= sizeof(double);
561 /* number of attributes */
562 memcpy(&k
, tmp
, sizeof(AttrNumber
));
563 tmp
+= sizeof(AttrNumber
);
565 /* is the number of attributes valid? */
566 Assert((k
>= 2) && (k
<= STATS_MAX_DIMENSIONS
));
568 /* now that we know the number of attributes, allocate the dependency */
569 d
= (MVDependency
*) palloc0(offsetof(MVDependency
, attributes
)
570 + (k
* sizeof(AttrNumber
)));
575 /* copy attribute numbers */
576 memcpy(d
->attributes
, tmp
, sizeof(AttrNumber
) * d
->nattributes
);
577 tmp
+= sizeof(AttrNumber
) * d
->nattributes
;
579 dependencies
->deps
[i
] = d
;
581 /* still within the bytea */
582 Assert(tmp
<= ((char *) data
+ VARSIZE_ANY(data
)));
585 /* we should have consumed the whole bytea exactly */
586 Assert(tmp
== ((char *) data
+ VARSIZE_ANY(data
)));
592 * dependency_is_fully_matched
593 * checks that a functional dependency is fully matched given clauses on
594 * attributes (assuming the clauses are suitable equality clauses)
597 dependency_is_fully_matched(MVDependency
*dependency
, Bitmapset
*attnums
)
602 * Check that the dependency actually is fully covered by clauses. We have
603 * to translate all attribute numbers, as those are referenced
605 for (j
= 0; j
< dependency
->nattributes
; j
++)
607 int attnum
= dependency
->attributes
[j
];
609 if (!bms_is_member(attnum
, attnums
))
617 * statext_dependencies_load
618 * Load the functional dependencies for the indicated pg_statistic_ext tuple
621 statext_dependencies_load(Oid mvoid
, bool inh
)
623 MVDependencies
*result
;
628 htup
= SearchSysCache2(STATEXTDATASTXOID
,
629 ObjectIdGetDatum(mvoid
),
631 if (!HeapTupleIsValid(htup
))
632 elog(ERROR
, "cache lookup failed for statistics object %u", mvoid
);
634 deps
= SysCacheGetAttr(STATEXTDATASTXOID
, htup
,
635 Anum_pg_statistic_ext_data_stxddependencies
, &isnull
);
638 "requested statistics kind \"%c\" is not yet built for statistics object %u",
639 STATS_EXT_DEPENDENCIES
, mvoid
);
641 result
= statext_dependencies_deserialize(DatumGetByteaPP(deps
));
643 ReleaseSysCache(htup
);
649 * pg_dependencies_in - input routine for type pg_dependencies.
651 * pg_dependencies is real enough to be a table column, but it has no operations
652 * of its own, and disallows input too
655 pg_dependencies_in(PG_FUNCTION_ARGS
)
658 * pg_node_list stores the data in binary form and parsing text input is
659 * not needed, so disallow this.
662 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
663 errmsg("cannot accept a value of type %s", "pg_dependencies")));
665 PG_RETURN_VOID(); /* keep compiler quiet */
669 * pg_dependencies - output routine for type pg_dependencies.
672 pg_dependencies_out(PG_FUNCTION_ARGS
)
674 bytea
*data
= PG_GETARG_BYTEA_PP(0);
675 MVDependencies
*dependencies
= statext_dependencies_deserialize(data
);
680 initStringInfo(&str
);
681 appendStringInfoChar(&str
, '{');
683 for (i
= 0; i
< dependencies
->ndeps
; i
++)
685 MVDependency
*dependency
= dependencies
->deps
[i
];
688 appendStringInfoString(&str
, ", ");
690 appendStringInfoChar(&str
, '"');
691 for (j
= 0; j
< dependency
->nattributes
; j
++)
693 if (j
== dependency
->nattributes
- 1)
694 appendStringInfoString(&str
, " => ");
696 appendStringInfoString(&str
, ", ");
698 appendStringInfo(&str
, "%d", dependency
->attributes
[j
]);
700 appendStringInfo(&str
, "\": %f", dependency
->degree
);
703 appendStringInfoChar(&str
, '}');
705 PG_RETURN_CSTRING(str
.data
);
709 * pg_dependencies_recv - binary input routine for type pg_dependencies.
712 pg_dependencies_recv(PG_FUNCTION_ARGS
)
715 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
716 errmsg("cannot accept a value of type %s", "pg_dependencies")));
718 PG_RETURN_VOID(); /* keep compiler quiet */
722 * pg_dependencies_send - binary output routine for type pg_dependencies.
724 * Functional dependencies are serialized in a bytea value (although the type
725 * is named differently), so let's just send that.
728 pg_dependencies_send(PG_FUNCTION_ARGS
)
730 return byteasend(fcinfo
);
734 * dependency_is_compatible_clause
735 * Determines if the clause is compatible with functional dependencies
737 * Only clauses that have the form of equality to a pseudoconstant, or can be
738 * interpreted that way, are currently accepted. Furthermore the variable
739 * part of the clause must be a simple Var belonging to the specified
740 * relation, whose attribute number we return in *attnum on success.
743 dependency_is_compatible_clause(Node
*clause
, Index relid
, AttrNumber
*attnum
)
748 if (IsA(clause
, RestrictInfo
))
750 RestrictInfo
*rinfo
= (RestrictInfo
*) clause
;
752 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
753 if (rinfo
->pseudoconstant
)
756 /* Clauses referencing multiple, or no, varnos are incompatible */
757 if (bms_membership(rinfo
->clause_relids
) != BMS_SINGLETON
)
760 clause
= (Node
*) rinfo
->clause
;
763 if (is_opclause(clause
))
765 /* If it's an opclause, check for Var = Const or Const = Var. */
766 OpExpr
*expr
= (OpExpr
*) clause
;
768 /* Only expressions with two arguments are candidates. */
769 if (list_length(expr
->args
) != 2)
772 /* Make sure non-selected argument is a pseudoconstant. */
773 if (is_pseudo_constant_clause(lsecond(expr
->args
)))
774 clause_expr
= linitial(expr
->args
);
775 else if (is_pseudo_constant_clause(linitial(expr
->args
)))
776 clause_expr
= lsecond(expr
->args
);
781 * If it's not an "=" operator, just ignore the clause, as it's not
782 * compatible with functional dependencies.
784 * This uses the function for estimating selectivity, not the operator
785 * directly (a bit awkward, but well ...).
787 * XXX this is pretty dubious; probably it'd be better to check btree
788 * or hash opclass membership, so as not to be fooled by custom
789 * selectivity functions, and to be more consistent with decisions
790 * elsewhere in the planner.
792 if (get_oprrest(expr
->opno
) != F_EQSEL
)
795 /* OK to proceed with checking "var" */
797 else if (IsA(clause
, ScalarArrayOpExpr
))
799 /* If it's an scalar array operator, check for Var IN Const. */
800 ScalarArrayOpExpr
*expr
= (ScalarArrayOpExpr
*) clause
;
803 * Reject ALL() variant, we only care about ANY/IN.
805 * XXX Maybe we should check if all the values are the same, and allow
806 * ALL in that case? Doesn't seem very practical, though.
811 /* Only expressions with two arguments are candidates. */
812 if (list_length(expr
->args
) != 2)
816 * We know it's always (Var IN Const), so we assume the var is the
817 * first argument, and pseudoconstant is the second one.
819 if (!is_pseudo_constant_clause(lsecond(expr
->args
)))
822 clause_expr
= linitial(expr
->args
);
825 * If it's not an "=" operator, just ignore the clause, as it's not
826 * compatible with functional dependencies. The operator is identified
827 * simply by looking at which function it uses to estimate
828 * selectivity. That's a bit strange, but it's what other similar
831 if (get_oprrest(expr
->opno
) != F_EQSEL
)
834 /* OK to proceed with checking "var" */
836 else if (is_orclause(clause
))
838 BoolExpr
*bool_expr
= (BoolExpr
*) clause
;
841 /* start with no attribute number */
842 *attnum
= InvalidAttrNumber
;
844 foreach(lc
, bool_expr
->args
)
846 AttrNumber clause_attnum
;
849 * Had we found incompatible clause in the arguments, treat the
850 * whole clause as incompatible.
852 if (!dependency_is_compatible_clause((Node
*) lfirst(lc
),
853 relid
, &clause_attnum
))
856 if (*attnum
== InvalidAttrNumber
)
857 *attnum
= clause_attnum
;
859 /* ensure all the variables are the same (same attnum) */
860 if (*attnum
!= clause_attnum
)
864 /* the Var is already checked by the recursive call */
867 else if (is_notclause(clause
))
870 * "NOT x" can be interpreted as "x = false", so get the argument and
871 * proceed with seeing if it's a suitable Var.
873 clause_expr
= (Node
*) get_notclausearg(clause
);
878 * A boolean expression "x" can be interpreted as "x = true", so
879 * proceed with seeing if it's a suitable Var.
881 clause_expr
= (Node
*) clause
;
885 * We may ignore any RelabelType node above the operand. (There won't be
886 * more than one, since eval_const_expressions has been applied already.)
888 if (IsA(clause_expr
, RelabelType
))
889 clause_expr
= (Node
*) ((RelabelType
*) clause_expr
)->arg
;
891 /* We only support plain Vars for now */
892 if (!IsA(clause_expr
, Var
))
895 /* OK, we know we have a Var */
896 var
= (Var
*) clause_expr
;
898 /* Ensure Var is from the correct relation */
899 if (var
->varno
!= relid
)
902 /* We also better ensure the Var is from the current level */
903 if (var
->varlevelsup
!= 0)
906 /* Also ignore system attributes (we don't allow stats on those) */
907 if (!AttrNumberIsForUserDefinedAttr(var
->varattno
))
910 *attnum
= var
->varattno
;
915 * find_strongest_dependency
916 * find the strongest dependency on the attributes
918 * When applying functional dependencies, we start with the strongest
919 * dependencies. That is, we select the dependency that:
921 * (a) has all attributes covered by equality clauses
923 * (b) has the most attributes
925 * (c) has the highest degree of validity
927 * This guarantees that we eliminate the most redundant conditions first
928 * (see the comment in dependencies_clauselist_selectivity).
930 static MVDependency
*
931 find_strongest_dependency(MVDependencies
**dependencies
, int ndependencies
,
936 MVDependency
*strongest
= NULL
;
938 /* number of attnums in clauses */
939 int nattnums
= bms_num_members(attnums
);
942 * Iterate over the MVDependency items and find the strongest one from the
943 * fully-matched dependencies. We do the cheap checks first, before
944 * matching it against the attnums.
946 for (i
= 0; i
< ndependencies
; i
++)
948 for (j
= 0; j
< dependencies
[i
]->ndeps
; j
++)
950 MVDependency
*dependency
= dependencies
[i
]->deps
[j
];
953 * Skip dependencies referencing more attributes than available
954 * clauses, as those can't be fully matched.
956 if (dependency
->nattributes
> nattnums
)
961 /* skip dependencies on fewer attributes than the strongest. */
962 if (dependency
->nattributes
< strongest
->nattributes
)
965 /* also skip weaker dependencies when attribute count matches */
966 if (strongest
->nattributes
== dependency
->nattributes
&&
967 strongest
->degree
> dependency
->degree
)
972 * this dependency is stronger, but we must still check that it's
973 * fully matched to these attnums. We perform this check last as
974 * it's slightly more expensive than the previous checks.
976 if (dependency_is_fully_matched(dependency
, attnums
))
977 strongest
= dependency
; /* save new best match */
985 * clauselist_apply_dependencies
986 * Apply the specified functional dependencies to a list of clauses and
987 * return the estimated selectivity of the clauses that are compatible
988 * with any of the given dependencies.
990 * This will estimate all not-already-estimated clauses that are compatible
991 * with functional dependencies, and which have an attribute mentioned by any
992 * of the given dependencies (either as an implying or implied attribute).
994 * Given (lists of) clauses on attributes (a,b) and a functional dependency
995 * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
998 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1000 * where 'f' is the degree of dependency. This reflects the fact that we
1001 * expect a fraction f of all rows to be consistent with the dependency
1002 * (a=>b), and so have a selectivity of P(a), while the remaining rows are
1003 * treated as independent.
1005 * In practice, we use a slightly modified version of this formula, which uses
1006 * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
1007 * should obviously not exceed either column's individual selectivity. I.e.,
1008 * we actually combine selectivities using the formula
1010 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1012 * This can make quite a difference if the specific values matching the
1013 * clauses are not consistent with the functional dependency.
1016 clauselist_apply_dependencies(PlannerInfo
*root
, List
*clauses
,
1017 int varRelid
, JoinType jointype
,
1018 SpecialJoinInfo
*sjinfo
,
1019 MVDependency
**dependencies
, int ndependencies
,
1020 AttrNumber
*list_attnums
,
1021 Bitmapset
**estimatedclauses
)
1027 Selectivity
*attr_sel
;
1034 * Extract the attnums of all implying and implied attributes from all the
1035 * given dependencies. Each of these attributes is expected to have at
1036 * least 1 not-already-estimated compatible clause that we will estimate
1040 for (i
= 0; i
< ndependencies
; i
++)
1042 for (j
= 0; j
< dependencies
[i
]->nattributes
; j
++)
1044 AttrNumber attnum
= dependencies
[i
]->attributes
[j
];
1046 attnums
= bms_add_member(attnums
, attnum
);
1051 * Compute per-column selectivity estimates for each of these attributes,
1052 * and mark all the corresponding clauses as estimated.
1054 nattrs
= bms_num_members(attnums
);
1055 attr_sel
= (Selectivity
*) palloc(sizeof(Selectivity
) * nattrs
);
1059 while ((i
= bms_next_member(attnums
, i
)) >= 0)
1061 List
*attr_clauses
= NIL
;
1062 Selectivity simple_sel
;
1067 Node
*clause
= (Node
*) lfirst(l
);
1070 if (list_attnums
[listidx
] == i
)
1072 attr_clauses
= lappend(attr_clauses
, clause
);
1073 *estimatedclauses
= bms_add_member(*estimatedclauses
, listidx
);
1077 simple_sel
= clauselist_selectivity_ext(root
, attr_clauses
, varRelid
,
1078 jointype
, sjinfo
, false);
1079 attr_sel
[attidx
++] = simple_sel
;
1083 * Now combine these selectivities using the dependency information. For
1084 * chains of dependencies such as a -> b -> c, the b -> c dependency will
1085 * come before the a -> b dependency in the array, so we traverse the
1086 * array backwards to ensure such chains are computed in the right order.
1088 * As explained above, pairs of selectivities are combined using the
1091 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1093 * to ensure that the combined selectivity is never greater than either
1094 * individual selectivity.
1096 * Where multiple dependencies apply (e.g., a -> b -> c), we use
1097 * conditional probabilities to compute the overall result as follows:
1099 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1101 * so we replace the selectivities of all implied attributes with
1102 * conditional probabilities, that are conditional on all their implying
1103 * attributes. The selectivities of all other non-implied attributes are
1106 for (i
= ndependencies
- 1; i
>= 0; i
--)
1108 MVDependency
*dependency
= dependencies
[i
];
1113 /* Selectivity of all the implying attributes */
1115 for (j
= 0; j
< dependency
->nattributes
- 1; j
++)
1117 attnum
= dependency
->attributes
[j
];
1118 attidx
= bms_member_index(attnums
, attnum
);
1119 s1
*= attr_sel
[attidx
];
1122 /* Original selectivity of the implied attribute */
1123 attnum
= dependency
->attributes
[j
];
1124 attidx
= bms_member_index(attnums
, attnum
);
1125 s2
= attr_sel
[attidx
];
1128 * Replace s2 with the conditional probability s2 given s1, computed
1129 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1131 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1133 * where P(a) = s1, the selectivity of the implying attributes, and
1134 * P(b) = s2, the selectivity of the implied attribute.
1136 f
= dependency
->degree
;
1139 attr_sel
[attidx
] = f
+ (1 - f
) * s2
;
1141 attr_sel
[attidx
] = f
* s2
/ s1
+ (1 - f
) * s2
;
1145 * The overall selectivity of all the clauses on all these attributes is
1146 * then the product of all the original (non-implied) probabilities and
1147 * the new conditional (implied) probabilities.
1150 for (i
= 0; i
< nattrs
; i
++)
1153 CLAMP_PROBABILITY(s1
);
1162 * dependency_is_compatible_expression
1163 * Determines if the expression is compatible with functional dependencies
1165 * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1166 * expression is a simple Var. OTOH we check that there's at least one
1167 * statistics object matching the expression.
1170 dependency_is_compatible_expression(Node
*clause
, Index relid
, List
*statlist
, Node
**expr
)
1177 if (IsA(clause
, RestrictInfo
))
1179 RestrictInfo
*rinfo
= (RestrictInfo
*) clause
;
1181 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1182 if (rinfo
->pseudoconstant
)
1185 /* Clauses referencing multiple, or no, varnos are incompatible */
1186 if (bms_membership(rinfo
->clause_relids
) != BMS_SINGLETON
)
1189 clause
= (Node
*) rinfo
->clause
;
1192 if (is_opclause(clause
))
1194 /* If it's an opclause, check for Var = Const or Const = Var. */
1195 OpExpr
*expr
= (OpExpr
*) clause
;
1197 /* Only expressions with two arguments are candidates. */
1198 if (list_length(expr
->args
) != 2)
1201 /* Make sure non-selected argument is a pseudoconstant. */
1202 if (is_pseudo_constant_clause(lsecond(expr
->args
)))
1203 clause_expr
= linitial(expr
->args
);
1204 else if (is_pseudo_constant_clause(linitial(expr
->args
)))
1205 clause_expr
= lsecond(expr
->args
);
1210 * If it's not an "=" operator, just ignore the clause, as it's not
1211 * compatible with functional dependencies.
1213 * This uses the function for estimating selectivity, not the operator
1214 * directly (a bit awkward, but well ...).
1216 * XXX this is pretty dubious; probably it'd be better to check btree
1217 * or hash opclass membership, so as not to be fooled by custom
1218 * selectivity functions, and to be more consistent with decisions
1219 * elsewhere in the planner.
1221 if (get_oprrest(expr
->opno
) != F_EQSEL
)
1224 /* OK to proceed with checking "var" */
1226 else if (IsA(clause
, ScalarArrayOpExpr
))
1228 /* If it's an scalar array operator, check for Var IN Const. */
1229 ScalarArrayOpExpr
*expr
= (ScalarArrayOpExpr
*) clause
;
1232 * Reject ALL() variant, we only care about ANY/IN.
1234 * FIXME Maybe we should check if all the values are the same, and
1235 * allow ALL in that case? Doesn't seem very practical, though.
1240 /* Only expressions with two arguments are candidates. */
1241 if (list_length(expr
->args
) != 2)
1245 * We know it's always (Var IN Const), so we assume the var is the
1246 * first argument, and pseudoconstant is the second one.
1248 if (!is_pseudo_constant_clause(lsecond(expr
->args
)))
1251 clause_expr
= linitial(expr
->args
);
1254 * If it's not an "=" operator, just ignore the clause, as it's not
1255 * compatible with functional dependencies. The operator is identified
1256 * simply by looking at which function it uses to estimate
1257 * selectivity. That's a bit strange, but it's what other similar
1260 if (get_oprrest(expr
->opno
) != F_EQSEL
)
1263 /* OK to proceed with checking "var" */
1265 else if (is_orclause(clause
))
1267 BoolExpr
*bool_expr
= (BoolExpr
*) clause
;
1269 /* start with no expression (we'll use the first match) */
1272 foreach(lc
, bool_expr
->args
)
1274 Node
*or_expr
= NULL
;
1277 * Had we found incompatible expression in the arguments, treat
1278 * the whole expression as incompatible.
1280 if (!dependency_is_compatible_expression((Node
*) lfirst(lc
), relid
,
1281 statlist
, &or_expr
))
1287 /* ensure all the expressions are the same */
1288 if (!equal(or_expr
, *expr
))
1292 /* the expression is already checked by the recursive call */
1295 else if (is_notclause(clause
))
1298 * "NOT x" can be interpreted as "x = false", so get the argument and
1299 * proceed with seeing if it's a suitable Var.
1301 clause_expr
= (Node
*) get_notclausearg(clause
);
1306 * A boolean expression "x" can be interpreted as "x = true", so
1307 * proceed with seeing if it's a suitable Var.
1309 clause_expr
= (Node
*) clause
;
1313 * We may ignore any RelabelType node above the operand. (There won't be
1314 * more than one, since eval_const_expressions has been applied already.)
1316 if (IsA(clause_expr
, RelabelType
))
1317 clause_expr
= (Node
*) ((RelabelType
*) clause_expr
)->arg
;
1319 vars
= pull_var_clause(clause_expr
, 0);
1323 Var
*var
= (Var
*) lfirst(lc
);
1325 /* Ensure Var is from the correct relation */
1326 if (var
->varno
!= relid
)
1329 /* We also better ensure the Var is from the current level */
1330 if (var
->varlevelsup
!= 0)
1333 /* Also ignore system attributes (we don't allow stats on those) */
1334 if (!AttrNumberIsForUserDefinedAttr(var
->varattno
))
1339 * Check if we actually have a matching statistics for the expression.
1341 * XXX Maybe this is an overkill. We'll eliminate the expressions later.
1343 foreach(lc
, statlist
)
1345 StatisticExtInfo
*info
= (StatisticExtInfo
*) lfirst(lc
);
1347 /* ignore stats without dependencies */
1348 if (info
->kind
!= STATS_EXT_DEPENDENCIES
)
1351 foreach(lc2
, info
->exprs
)
1353 Node
*stat_expr
= (Node
*) lfirst(lc2
);
1355 if (equal(clause_expr
, stat_expr
))
1367 * dependencies_clauselist_selectivity
1368 * Return the estimated selectivity of (a subset of) the given clauses
1369 * using functional dependency statistics, or 1.0 if no useful functional
1370 * dependency statistic exists.
1372 * 'estimatedclauses' is an input/output argument that gets a bit set
1373 * corresponding to the (zero-based) list index of each clause that is included
1374 * in the estimated selectivity.
1376 * Given equality clauses on attributes (a,b) we find the strongest dependency
1377 * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1378 * dependency, we then combine the per-clause selectivities using the formula
1380 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1382 * where 'f' is the degree of the dependency. (Actually we use a slightly
1383 * modified version of this formula -- see clauselist_apply_dependencies()).
1385 * With clauses on more than two attributes, the dependencies are applied
1386 * recursively, starting with the widest/strongest dependencies. For example
1387 * P(a,b,c) is first split like this:
1389 * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1391 * assuming (a,b=>c) is the strongest dependency.
1394 dependencies_clauselist_selectivity(PlannerInfo
*root
,
1398 SpecialJoinInfo
*sjinfo
,
1400 Bitmapset
**estimatedclauses
)
1402 Selectivity s1
= 1.0;
1404 Bitmapset
*clauses_attnums
= NULL
;
1405 AttrNumber
*list_attnums
;
1407 MVDependencies
**func_dependencies
;
1408 int nfunc_dependencies
;
1410 MVDependency
**dependencies
;
1413 AttrNumber attnum_offset
;
1414 RangeTblEntry
*rte
= planner_rt_fetch(rel
->relid
, root
);
1416 /* unique expressions */
1417 Node
**unique_exprs
;
1418 int unique_exprs_cnt
;
1420 /* check if there's any stats that might be useful for us. */
1421 if (!has_stats_of_kind(rel
->statlist
, STATS_EXT_DEPENDENCIES
))
1424 list_attnums
= (AttrNumber
*) palloc(sizeof(AttrNumber
) *
1425 list_length(clauses
));
1428 * We allocate space as if every clause was a unique expression, although
1429 * that's probably overkill. Some will be simple column references that
1430 * we'll translate to attnums, and there might be duplicates. But it's
1431 * easier and cheaper to just do one allocation than repalloc later.
1433 unique_exprs
= (Node
**) palloc(sizeof(Node
*) * list_length(clauses
));
1434 unique_exprs_cnt
= 0;
1437 * Pre-process the clauses list to extract the attnums seen in each item.
1438 * We need to determine if there's any clauses which will be useful for
1439 * dependency selectivity estimations. Along the way we'll record all of
1440 * the attnums for each clause in a list which we'll reference later so we
1441 * don't need to repeat the same work again. We'll also keep track of all
1444 * We also skip clauses that we already estimated using different types of
1445 * statistics (we treat them as incompatible).
1447 * To handle expressions, we assign them negative attnums, as if it was a
1448 * system attribute (this is fine, as we only allow extended stats on user
1449 * attributes). And then we offset everything by the number of
1450 * expressions, so that we can store the values in a bitmapset.
1455 Node
*clause
= (Node
*) lfirst(l
);
1459 /* ignore clause by default */
1460 list_attnums
[listidx
] = InvalidAttrNumber
;
1462 if (!bms_is_member(listidx
, *estimatedclauses
))
1465 * If it's a simple column reference, just extract the attnum. If
1466 * it's an expression, assign a negative attnum as if it was a
1469 if (dependency_is_compatible_clause(clause
, rel
->relid
, &attnum
))
1471 list_attnums
[listidx
] = attnum
;
1473 else if (dependency_is_compatible_expression(clause
, rel
->relid
,
1477 /* special attnum assigned to this expression */
1478 attnum
= InvalidAttrNumber
;
1480 Assert(expr
!= NULL
);
1482 /* If the expression is duplicate, use the same attnum. */
1483 for (i
= 0; i
< unique_exprs_cnt
; i
++)
1485 if (equal(unique_exprs
[i
], expr
))
1487 /* negative attribute number to expression */
1493 /* not found in the list, so add it */
1494 if (attnum
== InvalidAttrNumber
)
1496 unique_exprs
[unique_exprs_cnt
++] = expr
;
1498 /* after incrementing the value, to get -1, -2, ... */
1499 attnum
= (-unique_exprs_cnt
);
1502 /* remember which attnum was assigned to this clause */
1503 list_attnums
[listidx
] = attnum
;
1510 Assert(listidx
== list_length(clauses
));
1513 * How much we need to offset the attnums? If there are no expressions,
1514 * then no offset is needed. Otherwise we need to offset enough for the
1515 * lowest value (-unique_exprs_cnt) to become 1.
1517 if (unique_exprs_cnt
> 0)
1518 attnum_offset
= (unique_exprs_cnt
+ 1);
1523 * Now that we know how many expressions there are, we can offset the
1524 * values just enough to build the bitmapset.
1526 for (i
= 0; i
< list_length(clauses
); i
++)
1530 /* ignore incompatible or already estimated clauses */
1531 if (list_attnums
[i
] == InvalidAttrNumber
)
1534 /* make sure the attnum is in the expected range */
1535 Assert(list_attnums
[i
] >= (-unique_exprs_cnt
));
1536 Assert(list_attnums
[i
] <= MaxHeapAttributeNumber
);
1538 /* make sure the attnum is positive (valid AttrNumber) */
1539 attnum
= list_attnums
[i
] + attnum_offset
;
1542 * Either it's a regular attribute, or it's an expression, in which
1543 * case we must not have seen it before (expressions are unique).
1545 * XXX Check whether it's a regular attribute has to be done using the
1546 * original attnum, while the second check has to use the value with
1549 Assert(AttrNumberIsForUserDefinedAttr(list_attnums
[i
]) ||
1550 !bms_is_member(attnum
, clauses_attnums
));
1553 * Remember the offset attnum, both for attributes and expressions.
1554 * We'll pass list_attnums to clauselist_apply_dependencies, which
1555 * uses it to identify clauses in a bitmap. We could also pass the
1556 * offset, but this is more convenient.
1558 list_attnums
[i
] = attnum
;
1560 clauses_attnums
= bms_add_member(clauses_attnums
, attnum
);
1564 * If there's not at least two distinct attnums and expressions, then
1565 * reject the whole list of clauses. We must return 1.0 so the calling
1566 * function's selectivity is unaffected.
1568 if (bms_membership(clauses_attnums
) != BMS_MULTIPLE
)
1570 bms_free(clauses_attnums
);
1571 pfree(list_attnums
);
1576 * Load all functional dependencies matching at least two parameters. We
1577 * can simply consider all dependencies at once, without having to search
1578 * for the best statistics object.
1580 * To not waste cycles and memory, we deserialize dependencies only for
1581 * statistics that match at least two attributes. The array is allocated
1582 * with the assumption that all objects match - we could grow the array to
1583 * make it just the right size, but it's likely wasteful anyway thanks to
1584 * moving the freed chunks to freelists etc.
1586 func_dependencies
= (MVDependencies
**) palloc(sizeof(MVDependencies
*) *
1587 list_length(rel
->statlist
));
1588 nfunc_dependencies
= 0;
1591 foreach(l
, rel
->statlist
)
1593 StatisticExtInfo
*stat
= (StatisticExtInfo
*) lfirst(l
);
1597 MVDependencies
*deps
;
1599 /* skip statistics that are not of the correct type */
1600 if (stat
->kind
!= STATS_EXT_DEPENDENCIES
)
1603 /* skip statistics with mismatching stxdinherit value */
1604 if (stat
->inherit
!= rte
->inh
)
1608 * Count matching attributes - we have to undo the attnum offsets. The
1609 * input attribute numbers are not offset (expressions are not
1610 * included in stat->keys, so it's not necessary). But we need to
1611 * offset it before checking against clauses_attnums.
1615 while ((k
= bms_next_member(stat
->keys
, k
)) >= 0)
1617 AttrNumber attnum
= (AttrNumber
) k
;
1619 /* skip expressions */
1620 if (!AttrNumberIsForUserDefinedAttr(attnum
))
1623 /* apply the same offset as above */
1624 attnum
+= attnum_offset
;
1626 if (bms_is_member(attnum
, clauses_attnums
))
1630 /* count matching expressions */
1632 for (i
= 0; i
< unique_exprs_cnt
; i
++)
1636 foreach(lc
, stat
->exprs
)
1638 Node
*stat_expr
= (Node
*) lfirst(lc
);
1640 /* try to match it */
1641 if (equal(stat_expr
, unique_exprs
[i
]))
1647 * Skip objects matching fewer than two attributes/expressions from
1650 if (nmatched
+ nexprs
< 2)
1653 deps
= statext_dependencies_load(stat
->statOid
, rte
->inh
);
1656 * The expressions may be represented by different attnums in the
1657 * stats, we need to remap them to be consistent with the clauses.
1658 * That will make the later steps (e.g. picking the strongest item and
1659 * so on) much simpler and cheaper, because it won't need to care
1660 * about the offset at all.
1662 * When we're at it, we can ignore dependencies that are not fully
1663 * matched by clauses (i.e. referencing attributes or expressions that
1664 * are not in the clauses).
1666 * We have to do this for all statistics, as long as there are any
1667 * expressions - we need to shift the attnums in all dependencies.
1669 * XXX Maybe we should do this always, because it also eliminates some
1670 * of the dependencies early. It might be cheaper than having to walk
1671 * the longer list in find_strongest_dependency later, especially as
1672 * we need to do that repeatedly?
1674 * XXX We have to do this even when there are no expressions in
1675 * clauses, otherwise find_strongest_dependency may fail for stats
1676 * with expressions (due to lookup of negative value in bitmap). So we
1677 * need to at least filter out those dependencies. Maybe we could do
1678 * it in a cheaper way (if there are no expr clauses, we can just
1679 * discard all negative attnums without any lookups).
1681 if (unique_exprs_cnt
> 0 || stat
->exprs
!= NIL
)
1685 for (i
= 0; i
< deps
->ndeps
; i
++)
1688 MVDependency
*dep
= deps
->deps
[i
];
1691 for (j
= 0; j
< dep
->nattributes
; j
++)
1696 AttrNumber unique_attnum
= InvalidAttrNumber
;
1699 /* undo the per-statistics offset */
1700 attnum
= dep
->attributes
[j
];
1703 * For regular attributes we can simply check if it
1704 * matches any clause. If there's no matching clause, we
1705 * can just ignore it. We need to offset the attnum
1708 if (AttrNumberIsForUserDefinedAttr(attnum
))
1710 dep
->attributes
[j
] = attnum
+ attnum_offset
;
1712 if (!bms_is_member(dep
->attributes
[j
], clauses_attnums
))
1722 * the attnum should be a valid system attnum (-1, -2,
1725 Assert(AttributeNumberIsValid(attnum
));
1728 * For expressions, we need to do two translations. First
1729 * we have to translate the negative attnum to index in
1730 * the list of expressions (in the statistics object).
1731 * Then we need to see if there's a matching clause. The
1732 * index of the unique expression determines the attnum
1733 * (and we offset it).
1735 idx
= -(1 + attnum
);
1737 /* Is the expression index is valid? */
1738 Assert((idx
>= 0) && (idx
< list_length(stat
->exprs
)));
1740 expr
= (Node
*) list_nth(stat
->exprs
, idx
);
1742 /* try to find the expression in the unique list */
1743 for (k
= 0; k
< unique_exprs_cnt
; k
++)
1746 * found a matching unique expression, use the attnum
1747 * (derived from index of the unique expression)
1749 if (equal(unique_exprs
[k
], expr
))
1751 unique_attnum
= -(k
+ 1) + attnum_offset
;
1757 * Found no matching expression, so we can simply skip
1758 * this dependency, because there's no chance it will be
1761 if (unique_attnum
== InvalidAttrNumber
)
1767 /* otherwise remap it to the new attnum */
1768 dep
->attributes
[j
] = unique_attnum
;
1771 /* if found a matching dependency, keep it */
1774 /* maybe we've skipped something earlier, so move it */
1776 deps
->deps
[ndeps
] = deps
->deps
[i
];
1782 deps
->ndeps
= ndeps
;
1786 * It's possible we've removed all dependencies, in which case we
1787 * don't bother adding it to the list.
1789 if (deps
->ndeps
> 0)
1791 func_dependencies
[nfunc_dependencies
] = deps
;
1792 total_ndeps
+= deps
->ndeps
;
1793 nfunc_dependencies
++;
1797 /* if no matching stats could be found then we've nothing to do */
1798 if (nfunc_dependencies
== 0)
1800 pfree(func_dependencies
);
1801 bms_free(clauses_attnums
);
1802 pfree(list_attnums
);
1803 pfree(unique_exprs
);
1808 * Work out which dependencies we can apply, starting with the
1809 * widest/strongest ones, and proceeding to smaller/weaker ones.
1811 dependencies
= (MVDependency
**) palloc(sizeof(MVDependency
*) *
1817 MVDependency
*dependency
;
1820 /* the widest/strongest dependency, fully matched by clauses */
1821 dependency
= find_strongest_dependency(func_dependencies
,
1827 dependencies
[ndependencies
++] = dependency
;
1829 /* Ignore dependencies using this implied attribute in later loops */
1830 attnum
= dependency
->attributes
[dependency
->nattributes
- 1];
1831 clauses_attnums
= bms_del_member(clauses_attnums
, attnum
);
1835 * If we found applicable dependencies, use them to estimate all
1836 * compatible clauses on attributes that they refer to.
1838 if (ndependencies
!= 0)
1839 s1
= clauselist_apply_dependencies(root
, clauses
, varRelid
, jointype
,
1840 sjinfo
, dependencies
, ndependencies
,
1841 list_attnums
, estimatedclauses
);
1843 /* free deserialized functional dependencies (and then the array) */
1844 for (i
= 0; i
< nfunc_dependencies
; i
++)
1845 pfree(func_dependencies
[i
]);
1847 pfree(dependencies
);
1848 pfree(func_dependencies
);
1849 bms_free(clauses_attnums
);
1850 pfree(list_attnums
);
1851 pfree(unique_exprs
);