1 /*-------------------------------------------------------------------------
4 * POSTGRES multivariate MCV lists
7 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/statistics/mcv.c
13 *-------------------------------------------------------------------------
19 #include "access/htup_details.h"
20 #include "catalog/pg_statistic_ext.h"
21 #include "catalog/pg_statistic_ext_data.h"
24 #include "nodes/nodeFuncs.h"
25 #include "statistics/extended_stats_internal.h"
26 #include "statistics/statistics.h"
27 #include "utils/array.h"
28 #include "utils/builtins.h"
29 #include "utils/fmgrprotos.h"
30 #include "utils/lsyscache.h"
31 #include "utils/selfuncs.h"
32 #include "utils/syscache.h"
33 #include "utils/typcache.h"
36 * Computes size of a serialized MCV item, depending on the number of
37 * dimensions (columns) the statistic is defined on. The datum values are
38 * stored in a separate array (deduplicated, to minimize the size), and
39 * so the serialized items only store uint16 indexes into that array.
41 * Each serialized item stores (in this order):
43 * - indexes to values (ndim * sizeof(uint16))
44 * - null flags (ndim * sizeof(bool))
45 * - frequency (sizeof(double))
46 * - base_frequency (sizeof(double))
48 * There is no alignment padding within an MCV item.
49 * So in total each MCV item requires this many bytes:
51 * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
53 #define ITEM_SIZE(ndims) \
54 ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
57 * Used to compute size of serialized MCV list representation.
59 #define MinSizeOfMCVList \
60 (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
63 * Size of the serialized MCV list, excluding the space needed for
64 * deduplicated per-dimension values. The macro is meant to be used
65 * when it's not yet safe to access the serialized info about amount
66 * of data for each column.
68 #define SizeOfMCVList(ndims,nitems) \
69 ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
70 ((ndims) * sizeof(DimensionInfo)) + \
71 ((nitems) * ITEM_SIZE(ndims)))
73 static MultiSortSupport
build_mss(StatsBuildData
*data
);
75 static SortItem
*build_distinct_groups(int numrows
, SortItem
*items
,
76 MultiSortSupport mss
, int *ndistinct
);
78 static SortItem
**build_column_frequencies(SortItem
*groups
, int ngroups
,
79 MultiSortSupport mss
, int *ncounts
);
81 static int count_distinct_groups(int numrows
, SortItem
*items
,
82 MultiSortSupport mss
);
85 * Compute new value for bitmap item, considering whether it's used for
86 * clauses connected by AND/OR.
88 #define RESULT_MERGE(value, is_or, match) \
89 ((is_or) ? ((value) || (match)) : ((value) && (match)))
92 * When processing a list of clauses, the bitmap item may get set to a value
93 * such that additional clauses can't change it. For example, when processing
94 * a list of clauses connected to AND, as soon as the item gets set to 'false'
95 * then it'll remain like that. Similarly clauses connected by OR and 'true'.
97 * Returns true when the value in the bitmap can't change no matter how the
98 * remaining clauses are evaluated.
100 #define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value)))
103 * get_mincount_for_mcv_list
104 * Determine the minimum number of times a value needs to appear in
105 * the sample for it to be included in the MCV list.
107 * We want to keep only values that appear sufficiently often in the
108 * sample that it is reasonable to extrapolate their sample frequencies to
109 * the entire table. We do this by placing an upper bound on the relative
110 * standard error of the sample frequency, so that any estimates the
111 * planner generates from the MCV statistics can be expected to be
112 * reasonably accurate.
114 * Since we are sampling without replacement, the sample frequency of a
115 * particular value is described by a hypergeometric distribution. A
116 * common rule of thumb when estimating errors in this situation is to
117 * require at least 10 instances of the value in the sample, in which case
118 * the distribution can be approximated by a normal distribution, and
119 * standard error analysis techniques can be applied. Given a sample size
120 * of n, a population size of N, and a sample frequency of p=cnt/n, the
121 * standard error of the proportion p is given by
122 * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
123 * where the second term is the finite population correction. To get
124 * reasonably accurate planner estimates, we impose an upper bound on the
125 * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative
126 * error bound is fairly arbitrary, but has been found empirically to work
127 * well. Rearranging this formula gives a lower bound on the number of
128 * instances of the value seen:
129 * cnt > n*(N-n) / (N-n+0.04*n*(N-1))
130 * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
131 * case where n approaches 0 cannot happen in practice, since the sample
132 * size is at least 300. The case where n approaches N corresponds to
133 * sampling the whole table, in which case it is reasonable to keep
134 * the whole MCV list (have no lower bound), so it makes sense to apply
135 * this formula for all inputs, even though the above derivation is
136 * technically only valid when the right hand side is at least around 10.
138 * An alternative way to look at this formula is as follows -- assume that
139 * the number of instances of the value seen scales up to the entire
140 * table, so that the population count is K=N*cnt/n. Then the distribution
141 * in the sample is a hypergeometric distribution parameterised by N, n
142 * and K, and the bound above is mathematically equivalent to demanding
143 * that the standard deviation of that distribution is less than 20% of
144 * its mean. Thus the relative errors in any planner estimates produced
145 * from the MCV statistics are likely to be not too large.
148 get_mincount_for_mcv_list(int samplerows
, double totalrows
)
150 double n
= samplerows
;
151 double N
= totalrows
;
156 denom
= N
- n
+ 0.04 * n
* (N
- 1);
158 /* Guard against division by zero (possible if n = N = 1) */
162 return numer
/ denom
;
166 * Builds MCV list from the set of sampled rows.
168 * The algorithm is quite simple:
170 * (1) sort the data (default collation, '<' for the data type)
172 * (2) count distinct groups, decide how many to keep
174 * (3) build the MCV list using the threshold determined in (2)
176 * (4) remove rows represented by the MCV from the sample
180 statext_mcv_build(StatsBuildData
*data
, double totalrows
, int stattarget
)
190 MCVList
*mcvlist
= NULL
;
191 MultiSortSupport mss
;
193 /* comparator for all the columns */
194 mss
= build_mss(data
);
197 items
= build_sorted_items(data
, &nitems
, mss
,
198 data
->nattnums
, data
->attnums
);
203 /* for convenience */
204 numattrs
= data
->nattnums
;
205 numrows
= data
->numrows
;
207 /* transform the sorted rows into groups (sorted by frequency) */
208 groups
= build_distinct_groups(nitems
, items
, mss
, &ngroups
);
211 * The maximum number of MCV items to store, based on the statistics
212 * target we computed for the statistics object (from the target set for
213 * the object itself, attributes and the system default). In any case, we
214 * can't keep more groups than we have available.
217 if (nitems
> ngroups
)
221 * Decide how many items to keep in the MCV list. We can't use the same
222 * algorithm as per-column MCV lists, because that only considers the
223 * actual group frequency - but we're primarily interested in how the
224 * actual frequency differs from the base frequency (product of simple
225 * per-column frequencies, as if the columns were independent).
227 * Using the same algorithm might exclude items that are close to the
228 * "average" frequency of the sample. But that does not say whether the
229 * observed frequency is close to the base frequency or not. We also need
230 * to consider unexpectedly uncommon items (again, compared to the base
231 * frequency), and the single-column algorithm does not have to.
233 * We simply decide how many items to keep by computing the minimum count
234 * using get_mincount_for_mcv_list() and then keep all items that seem to
235 * be more common than that.
237 mincount
= get_mincount_for_mcv_list(numrows
, totalrows
);
240 * Walk the groups until we find the first group with a count below the
241 * mincount threshold (the index of that group is the number of groups we
244 for (i
= 0; i
< nitems
; i
++)
246 if (groups
[i
].count
< mincount
)
254 * At this point, we know the number of items for the MCV list. There
255 * might be none (for uniform distribution with many groups), and in that
256 * case, there will be no MCV list. Otherwise, construct the MCV list.
262 MultiSortSupport tmp
;
264 /* frequencies for values in each attribute */
268 /* used to search values */
269 tmp
= (MultiSortSupport
) palloc(offsetof(MultiSortSupportData
, ssup
)
270 + sizeof(SortSupportData
));
272 /* compute frequencies for values in each column */
273 nfreqs
= (int *) palloc0(sizeof(int) * numattrs
);
274 freqs
= build_column_frequencies(groups
, ngroups
, mss
, nfreqs
);
277 * Allocate the MCV list structure, set the global parameters.
279 mcvlist
= (MCVList
*) palloc0(offsetof(MCVList
, items
) +
280 sizeof(MCVItem
) * nitems
);
282 mcvlist
->magic
= STATS_MCV_MAGIC
;
283 mcvlist
->type
= STATS_MCV_TYPE_BASIC
;
284 mcvlist
->ndimensions
= numattrs
;
285 mcvlist
->nitems
= nitems
;
287 /* store info about data type OIDs */
288 for (i
= 0; i
< numattrs
; i
++)
289 mcvlist
->types
[i
] = data
->stats
[i
]->attrtypid
;
291 /* Copy the first chunk of groups into the result. */
292 for (i
= 0; i
< nitems
; i
++)
294 /* just point to the proper place in the list */
295 MCVItem
*item
= &mcvlist
->items
[i
];
297 item
->values
= (Datum
*) palloc(sizeof(Datum
) * numattrs
);
298 item
->isnull
= (bool *) palloc(sizeof(bool) * numattrs
);
300 /* copy values for the group */
301 memcpy(item
->values
, groups
[i
].values
, sizeof(Datum
) * numattrs
);
302 memcpy(item
->isnull
, groups
[i
].isnull
, sizeof(bool) * numattrs
);
304 /* groups should be sorted by frequency in descending order */
305 Assert((i
== 0) || (groups
[i
- 1].count
>= groups
[i
].count
));
307 /* group frequency */
308 item
->frequency
= (double) groups
[i
].count
/ numrows
;
310 /* base frequency, if the attributes were independent */
311 item
->base_frequency
= 1.0;
312 for (j
= 0; j
< numattrs
; j
++)
316 /* single dimension */
318 tmp
->ssup
[0] = mss
->ssup
[j
];
320 /* fill search key */
321 key
.values
= &groups
[i
].values
[j
];
322 key
.isnull
= &groups
[i
].isnull
[j
];
324 freq
= (SortItem
*) bsearch_arg(&key
, freqs
[j
], nfreqs
[j
],
326 multi_sort_compare
, tmp
);
328 item
->base_frequency
*= ((double) freq
->count
) / numrows
;
344 * Build a MultiSortSupport for the given StatsBuildData.
346 static MultiSortSupport
347 build_mss(StatsBuildData
*data
)
350 int numattrs
= data
->nattnums
;
352 /* Sort by multiple columns (using array of SortSupport) */
353 MultiSortSupport mss
= multi_sort_init(numattrs
);
355 /* prepare the sort functions for all the attributes */
356 for (i
= 0; i
< numattrs
; i
++)
358 VacAttrStats
*colstat
= data
->stats
[i
];
359 TypeCacheEntry
*type
;
361 type
= lookup_type_cache(colstat
->attrtypid
, TYPECACHE_LT_OPR
);
362 if (type
->lt_opr
== InvalidOid
) /* shouldn't happen */
363 elog(ERROR
, "cache lookup failed for ordering operator for type %u",
366 multi_sort_add_dimension(mss
, i
, type
->lt_opr
, colstat
->attrcollid
);
373 * count_distinct_groups
374 * Count distinct combinations of SortItems in the array.
376 * The array is assumed to be sorted according to the MultiSortSupport.
379 count_distinct_groups(int numrows
, SortItem
*items
, MultiSortSupport mss
)
385 for (i
= 1; i
< numrows
; i
++)
387 /* make sure the array really is sorted */
388 Assert(multi_sort_compare(&items
[i
], &items
[i
- 1], mss
) >= 0);
390 if (multi_sort_compare(&items
[i
], &items
[i
- 1], mss
) != 0)
398 * compare_sort_item_count
399 * Comparator for sorting items by count (frequencies) in descending
403 compare_sort_item_count(const void *a
, const void *b
, void *arg
)
405 SortItem
*ia
= (SortItem
*) a
;
406 SortItem
*ib
= (SortItem
*) b
;
408 if (ia
->count
== ib
->count
)
410 else if (ia
->count
> ib
->count
)
417 * build_distinct_groups
418 * Build an array of SortItems for distinct groups and counts matching
421 * The 'items' array is assumed to be sorted.
424 build_distinct_groups(int numrows
, SortItem
*items
, MultiSortSupport mss
,
429 int ngroups
= count_distinct_groups(numrows
, items
, mss
);
431 SortItem
*groups
= (SortItem
*) palloc(ngroups
* sizeof(SortItem
));
434 groups
[0] = items
[0];
437 for (i
= 1; i
< numrows
; i
++)
439 /* Assume sorted in ascending order. */
440 Assert(multi_sort_compare(&items
[i
], &items
[i
- 1], mss
) >= 0);
442 /* New distinct group detected. */
443 if (multi_sort_compare(&items
[i
], &items
[i
- 1], mss
) != 0)
445 groups
[++j
] = items
[i
];
452 /* ensure we filled the expected number of distinct groups */
453 Assert(j
+ 1 == ngroups
);
455 /* Sort the distinct groups by frequency (in descending order). */
456 qsort_interruptible(groups
, ngroups
, sizeof(SortItem
),
457 compare_sort_item_count
, NULL
);
459 *ndistinct
= ngroups
;
463 /* compare sort items (single dimension) */
465 sort_item_compare(const void *a
, const void *b
, void *arg
)
467 SortSupport ssup
= (SortSupport
) arg
;
468 SortItem
*ia
= (SortItem
*) a
;
469 SortItem
*ib
= (SortItem
*) b
;
471 return ApplySortComparator(ia
->values
[0], ia
->isnull
[0],
472 ib
->values
[0], ib
->isnull
[0],
477 * build_column_frequencies
478 * Compute frequencies of values in each column.
480 * This returns an array of SortItems for each attribute the MCV is built
481 * on, with a frequency (number of occurrences) for each value. This is
482 * then used to compute "base" frequency of MCV items.
484 * All the memory is allocated in a single chunk, so that a single pfree
485 * is enough to release it. We do not allocate space for values/isnull
486 * arrays in the SortItems, because we can simply point into the input
490 build_column_frequencies(SortItem
*groups
, int ngroups
,
491 MultiSortSupport mss
, int *ncounts
)
501 /* allocate arrays for all columns as a single chunk */
502 ptr
= palloc(MAXALIGN(sizeof(SortItem
*) * mss
->ndims
) +
503 mss
->ndims
* MAXALIGN(sizeof(SortItem
) * ngroups
));
505 /* initial array of pointers */
506 result
= (SortItem
**) ptr
;
507 ptr
+= MAXALIGN(sizeof(SortItem
*) * mss
->ndims
);
509 for (dim
= 0; dim
< mss
->ndims
; dim
++)
511 SortSupport ssup
= &mss
->ssup
[dim
];
513 /* array of values for a single column */
514 result
[dim
] = (SortItem
*) ptr
;
515 ptr
+= MAXALIGN(sizeof(SortItem
) * ngroups
);
517 /* extract data for the dimension */
518 for (i
= 0; i
< ngroups
; i
++)
520 /* point into the input groups */
521 result
[dim
][i
].values
= &groups
[i
].values
[dim
];
522 result
[dim
][i
].isnull
= &groups
[i
].isnull
[dim
];
523 result
[dim
][i
].count
= groups
[i
].count
;
526 /* sort the values, deduplicate */
527 qsort_interruptible(result
[dim
], ngroups
, sizeof(SortItem
),
528 sort_item_compare
, ssup
);
531 * Identify distinct values, compute frequency (there might be
532 * multiple MCV items containing this value, so we need to sum counts
536 for (i
= 1; i
< ngroups
; i
++)
538 if (sort_item_compare(&result
[dim
][i
- 1], &result
[dim
][i
], ssup
) == 0)
540 result
[dim
][ncounts
[dim
] - 1].count
+= result
[dim
][i
].count
;
544 result
[dim
][ncounts
[dim
]] = result
[dim
][i
];
555 * Load the MCV list for the indicated pg_statistic_ext_data tuple.
558 statext_mcv_load(Oid mvoid
, bool inh
)
563 HeapTuple htup
= SearchSysCache2(STATEXTDATASTXOID
,
564 ObjectIdGetDatum(mvoid
), BoolGetDatum(inh
));
566 if (!HeapTupleIsValid(htup
))
567 elog(ERROR
, "cache lookup failed for statistics object %u", mvoid
);
569 mcvlist
= SysCacheGetAttr(STATEXTDATASTXOID
, htup
,
570 Anum_pg_statistic_ext_data_stxdmcv
, &isnull
);
574 "requested statistics kind \"%c\" is not yet built for statistics object %u",
575 STATS_EXT_MCV
, mvoid
);
577 result
= statext_mcv_deserialize(DatumGetByteaP(mcvlist
));
579 ReleaseSysCache(htup
);
586 * statext_mcv_serialize
587 * Serialize MCV list into a pg_mcv_list value.
589 * The MCV items may include values of various data types, and it's reasonable
590 * to expect redundancy (values for a given attribute, repeated for multiple
591 * MCV list items). So we deduplicate the values into arrays, and then replace
592 * the values by indexes into those arrays.
594 * The overall structure of the serialized representation looks like this:
596 * +---------------+----------------+---------------------+-------+
597 * | header fields | dimension info | deduplicated values | items |
598 * +---------------+----------------+---------------------+-------+
600 * Where dimension info stores information about the type of the K-th
601 * attribute (e.g. typlen, typbyval and length of deduplicated values).
602 * Deduplicated values store deduplicated values for each attribute. And
603 * items store the actual MCV list items, with values replaced by indexes into
606 * When serializing the items, we use uint16 indexes. The number of MCV items
607 * is limited by the statistics target (which is capped to 10k at the moment).
608 * We might increase this to 65k and still fit into uint16, so there's a bit of
609 * slack. Furthermore, this limit is on the number of distinct values per column,
610 * and we usually have few of those (and various combinations of them for the
611 * those MCV list). So uint16 seems fine for now.
613 * We don't really expect the serialization to save as much space as for
614 * histograms, as we are not doing any bucket splits (which is the source
615 * of high redundancy in histograms).
617 * TODO: Consider packing boolean flags (NULL) for each item into a single char
618 * (or a longer type) instead of using an array of bool items.
621 statext_mcv_serialize(MCVList
*mcvlist
, VacAttrStats
**stats
)
625 int ndims
= mcvlist
->ndimensions
;
632 /* serialized items (indexes into arrays, etc.) */
635 char *endptr PG_USED_FOR_ASSERTS_ONLY
;
637 /* values per dimension (and number of non-NULL values) */
638 Datum
**values
= (Datum
**) palloc0(sizeof(Datum
*) * ndims
);
639 int *counts
= (int *) palloc0(sizeof(int) * ndims
);
642 * We'll include some rudimentary information about the attribute types
643 * (length, by-val flag), so that we don't have to look them up while
644 * deserializing the MCV list (we already have the type OID in the
645 * header). This is safe because when changing the type of the attribute
646 * the statistics gets dropped automatically. We need to store the info
647 * about the arrays of deduplicated values anyway.
649 info
= (DimensionInfo
*) palloc0(sizeof(DimensionInfo
) * ndims
);
651 /* sort support data for all attributes included in the MCV list */
652 ssup
= (SortSupport
) palloc0(sizeof(SortSupportData
) * ndims
);
654 /* collect and deduplicate values for each dimension (attribute) */
655 for (dim
= 0; dim
< ndims
; dim
++)
658 TypeCacheEntry
*typentry
;
661 * Lookup the LT operator (can't get it from stats extra_data, as we
662 * don't know how to interpret that - scalar vs. array etc.).
664 typentry
= lookup_type_cache(stats
[dim
]->attrtypid
, TYPECACHE_LT_OPR
);
666 /* copy important info about the data type (length, by-value) */
667 info
[dim
].typlen
= stats
[dim
]->attrtype
->typlen
;
668 info
[dim
].typbyval
= stats
[dim
]->attrtype
->typbyval
;
670 /* allocate space for values in the attribute and collect them */
671 values
[dim
] = (Datum
*) palloc0(sizeof(Datum
) * mcvlist
->nitems
);
673 for (i
= 0; i
< mcvlist
->nitems
; i
++)
675 /* skip NULL values - we don't need to deduplicate those */
676 if (mcvlist
->items
[i
].isnull
[dim
])
679 /* append the value at the end */
680 values
[dim
][counts
[dim
]] = mcvlist
->items
[i
].values
[dim
];
684 /* if there are just NULL values in this dimension, we're done */
685 if (counts
[dim
] == 0)
688 /* sort and deduplicate the data */
689 ssup
[dim
].ssup_cxt
= CurrentMemoryContext
;
690 ssup
[dim
].ssup_collation
= stats
[dim
]->attrcollid
;
691 ssup
[dim
].ssup_nulls_first
= false;
693 PrepareSortSupportFromOrderingOp(typentry
->lt_opr
, &ssup
[dim
]);
695 qsort_interruptible(values
[dim
], counts
[dim
], sizeof(Datum
),
696 compare_scalars_simple
, &ssup
[dim
]);
699 * Walk through the array and eliminate duplicate values, but keep the
700 * ordering (so that we can do a binary search later). We know there's
701 * at least one item as (counts[dim] != 0), so we can skip the first
704 ndistinct
= 1; /* number of distinct values */
705 for (i
= 1; i
< counts
[dim
]; i
++)
707 /* expect sorted array */
708 Assert(compare_datums_simple(values
[dim
][i
- 1], values
[dim
][i
], &ssup
[dim
]) <= 0);
710 /* if the value is the same as the previous one, we can skip it */
711 if (!compare_datums_simple(values
[dim
][i
- 1], values
[dim
][i
], &ssup
[dim
]))
714 values
[dim
][ndistinct
] = values
[dim
][i
];
718 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
719 Assert(ndistinct
<= PG_UINT16_MAX
);
722 * Store additional info about the attribute - number of deduplicated
723 * values, and also size of the serialized data. For fixed-length data
724 * types this is trivial to compute, for varwidth types we need to
725 * actually walk the array and sum the sizes.
727 info
[dim
].nvalues
= ndistinct
;
729 if (info
[dim
].typbyval
) /* by-value data types */
731 info
[dim
].nbytes
= info
[dim
].nvalues
* info
[dim
].typlen
;
734 * We copy the data into the MCV item during deserialization, so
735 * we don't need to allocate any extra space.
737 info
[dim
].nbytes_aligned
= 0;
739 else if (info
[dim
].typlen
> 0) /* fixed-length by-ref */
742 * We don't care about alignment in the serialized data, so we
743 * pack the data as much as possible. But we also track how much
744 * data will be needed after deserialization, and in that case we
745 * need to account for alignment of each item.
747 * Note: As the items are fixed-length, we could easily compute
748 * this during deserialization, but we do it here anyway.
750 info
[dim
].nbytes
= info
[dim
].nvalues
* info
[dim
].typlen
;
751 info
[dim
].nbytes_aligned
= info
[dim
].nvalues
* MAXALIGN(info
[dim
].typlen
);
753 else if (info
[dim
].typlen
== -1) /* varlena */
755 info
[dim
].nbytes
= 0;
756 info
[dim
].nbytes_aligned
= 0;
757 for (i
= 0; i
< info
[dim
].nvalues
; i
++)
762 * For varlena values, we detoast the values and store the
763 * length and data separately. We don't bother with alignment
764 * here, which means that during deserialization we need to
765 * copy the fields and only access the copies.
767 values
[dim
][i
] = PointerGetDatum(PG_DETOAST_DATUM(values
[dim
][i
]));
769 /* serialized length (uint32 length + data) */
770 len
= VARSIZE_ANY_EXHDR(values
[dim
][i
]);
771 info
[dim
].nbytes
+= sizeof(uint32
); /* length */
772 info
[dim
].nbytes
+= len
; /* value (no header) */
775 * During deserialization we'll build regular varlena values
776 * with full headers, and we need to align them properly.
778 info
[dim
].nbytes_aligned
+= MAXALIGN(VARHDRSZ
+ len
);
781 else if (info
[dim
].typlen
== -2) /* cstring */
783 info
[dim
].nbytes
= 0;
784 info
[dim
].nbytes_aligned
= 0;
785 for (i
= 0; i
< info
[dim
].nvalues
; i
++)
790 * cstring is handled similar to varlena - first we store the
791 * length as uint32 and then the data. We don't care about
792 * alignment, which means that during deserialization we need
793 * to copy the fields and only access the copies.
796 /* c-strings include terminator, so +1 byte */
797 len
= strlen(DatumGetCString(values
[dim
][i
])) + 1;
798 info
[dim
].nbytes
+= sizeof(uint32
); /* length */
799 info
[dim
].nbytes
+= len
; /* value */
801 /* space needed for properly aligned deserialized copies */
802 info
[dim
].nbytes_aligned
+= MAXALIGN(len
);
806 /* we know (count>0) so there must be some data */
807 Assert(info
[dim
].nbytes
> 0);
811 * Now we can finally compute how much space we'll actually need for the
812 * whole serialized MCV list (varlena header, MCV header, dimension info
813 * for each attribute, deduplicated values and items).
815 total_length
= (3 * sizeof(uint32
)) /* magic + type + nitems */
816 + sizeof(AttrNumber
) /* ndimensions */
817 + (ndims
* sizeof(Oid
)); /* attribute types */
820 total_length
+= ndims
* sizeof(DimensionInfo
);
822 /* add space for the arrays of deduplicated values */
823 for (i
= 0; i
< ndims
; i
++)
824 total_length
+= info
[i
].nbytes
;
827 * And finally account for the items (those are fixed-length, thanks to
828 * replacing values with uint16 indexes into the deduplicated arrays).
830 total_length
+= mcvlist
->nitems
* ITEM_SIZE(dim
);
833 * Allocate space for the whole serialized MCV list (we'll skip bytes, so
834 * we set them to zero to make the result more compressible).
836 raw
= (bytea
*) palloc0(VARHDRSZ
+ total_length
);
837 SET_VARSIZE(raw
, VARHDRSZ
+ total_length
);
840 endptr
= ptr
+ total_length
;
842 /* copy the MCV list header fields, one by one */
843 memcpy(ptr
, &mcvlist
->magic
, sizeof(uint32
));
844 ptr
+= sizeof(uint32
);
846 memcpy(ptr
, &mcvlist
->type
, sizeof(uint32
));
847 ptr
+= sizeof(uint32
);
849 memcpy(ptr
, &mcvlist
->nitems
, sizeof(uint32
));
850 ptr
+= sizeof(uint32
);
852 memcpy(ptr
, &mcvlist
->ndimensions
, sizeof(AttrNumber
));
853 ptr
+= sizeof(AttrNumber
);
855 memcpy(ptr
, mcvlist
->types
, sizeof(Oid
) * ndims
);
856 ptr
+= (sizeof(Oid
) * ndims
);
858 /* store information about the attributes (data amounts, ...) */
859 memcpy(ptr
, info
, sizeof(DimensionInfo
) * ndims
);
860 ptr
+= sizeof(DimensionInfo
) * ndims
;
862 /* Copy the deduplicated values for all attributes to the output. */
863 for (dim
= 0; dim
< ndims
; dim
++)
865 /* remember the starting point for Asserts later */
866 char *start PG_USED_FOR_ASSERTS_ONLY
= ptr
;
868 for (i
= 0; i
< info
[dim
].nvalues
; i
++)
870 Datum value
= values
[dim
][i
];
872 if (info
[dim
].typbyval
) /* passed by value */
877 * For byval types, we need to copy just the significant bytes
878 * - we can't use memcpy directly, as that assumes
879 * little-endian behavior. store_att_byval does almost what
880 * we need, but it requires a properly aligned buffer - the
881 * output buffer does not guarantee that. So we simply use a
882 * local Datum variable (which guarantees proper alignment),
883 * and then copy the value from it.
885 store_att_byval(&tmp
, value
, info
[dim
].typlen
);
887 memcpy(ptr
, &tmp
, info
[dim
].typlen
);
888 ptr
+= info
[dim
].typlen
;
890 else if (info
[dim
].typlen
> 0) /* passed by reference */
892 /* no special alignment needed, treated as char array */
893 memcpy(ptr
, DatumGetPointer(value
), info
[dim
].typlen
);
894 ptr
+= info
[dim
].typlen
;
896 else if (info
[dim
].typlen
== -1) /* varlena */
898 uint32 len
= VARSIZE_ANY_EXHDR(DatumGetPointer(value
));
900 /* copy the length */
901 memcpy(ptr
, &len
, sizeof(uint32
));
902 ptr
+= sizeof(uint32
);
904 /* data from the varlena value (without the header) */
905 memcpy(ptr
, VARDATA_ANY(DatumGetPointer(value
)), len
);
908 else if (info
[dim
].typlen
== -2) /* cstring */
910 uint32 len
= (uint32
) strlen(DatumGetCString(value
)) + 1;
912 /* copy the length */
913 memcpy(ptr
, &len
, sizeof(uint32
));
914 ptr
+= sizeof(uint32
);
917 memcpy(ptr
, DatumGetCString(value
), len
);
921 /* no underflows or overflows */
922 Assert((ptr
> start
) && ((ptr
- start
) <= info
[dim
].nbytes
));
925 /* we should get exactly nbytes of data for this dimension */
926 Assert((ptr
- start
) == info
[dim
].nbytes
);
929 /* Serialize the items, with uint16 indexes instead of the values. */
930 for (i
= 0; i
< mcvlist
->nitems
; i
++)
932 MCVItem
*mcvitem
= &mcvlist
->items
[i
];
934 /* don't write beyond the allocated space */
935 Assert(ptr
<= (endptr
- ITEM_SIZE(dim
)));
937 /* copy NULL and frequency flags into the serialized MCV */
938 memcpy(ptr
, mcvitem
->isnull
, sizeof(bool) * ndims
);
939 ptr
+= sizeof(bool) * ndims
;
941 memcpy(ptr
, &mcvitem
->frequency
, sizeof(double));
942 ptr
+= sizeof(double);
944 memcpy(ptr
, &mcvitem
->base_frequency
, sizeof(double));
945 ptr
+= sizeof(double);
947 /* store the indexes last */
948 for (dim
= 0; dim
< ndims
; dim
++)
953 /* do the lookup only for non-NULL values */
954 if (!mcvitem
->isnull
[dim
])
956 value
= (Datum
*) bsearch_arg(&mcvitem
->values
[dim
], values
[dim
],
957 info
[dim
].nvalues
, sizeof(Datum
),
958 compare_scalars_simple
, &ssup
[dim
]);
960 Assert(value
!= NULL
); /* serialization or deduplication
963 /* compute index within the deduplicated array */
964 index
= (uint16
) (value
- values
[dim
]);
966 /* check the index is within expected bounds */
967 Assert(index
< info
[dim
].nvalues
);
970 /* copy the index into the serialized MCV */
971 memcpy(ptr
, &index
, sizeof(uint16
));
972 ptr
+= sizeof(uint16
);
975 /* make sure we don't overflow the allocated value */
976 Assert(ptr
<= endptr
);
979 /* at this point we expect to match the total_length exactly */
980 Assert(ptr
== endptr
);
989 * statext_mcv_deserialize
990 * Reads serialized MCV list into MCVList structure.
992 * All the memory needed by the MCV list is allocated as a single chunk, so
993 * it's possible to simply pfree() it at once.
996 statext_mcv_deserialize(bytea
*data
)
1004 char *endptr PG_USED_FOR_ASSERTS_ONLY
;
1008 DimensionInfo
*info
= NULL
;
1010 /* local allocation buffer (used only for deserialization) */
1016 /* buffer used for the result */
1026 * We can't possibly deserialize a MCV list if there's not even a complete
1027 * header. We need an explicit formula here, because we serialize the
1028 * header fields one by one, so we need to ignore struct alignment.
1030 if (VARSIZE_ANY(data
) < MinSizeOfMCVList
)
1031 elog(ERROR
, "invalid MCV size %zu (expected at least %zu)",
1032 VARSIZE_ANY(data
), MinSizeOfMCVList
);
1034 /* read the MCV list header */
1035 mcvlist
= (MCVList
*) palloc0(offsetof(MCVList
, items
));
1037 /* pointer to the data part (skip the varlena header) */
1038 raw
= (char *) data
;
1039 ptr
= VARDATA_ANY(raw
);
1040 endptr
= (char *) raw
+ VARSIZE_ANY(data
);
1042 /* get the header and perform further sanity checks */
1043 memcpy(&mcvlist
->magic
, ptr
, sizeof(uint32
));
1044 ptr
+= sizeof(uint32
);
1046 memcpy(&mcvlist
->type
, ptr
, sizeof(uint32
));
1047 ptr
+= sizeof(uint32
);
1049 memcpy(&mcvlist
->nitems
, ptr
, sizeof(uint32
));
1050 ptr
+= sizeof(uint32
);
1052 memcpy(&mcvlist
->ndimensions
, ptr
, sizeof(AttrNumber
));
1053 ptr
+= sizeof(AttrNumber
);
1055 if (mcvlist
->magic
!= STATS_MCV_MAGIC
)
1056 elog(ERROR
, "invalid MCV magic %u (expected %u)",
1057 mcvlist
->magic
, STATS_MCV_MAGIC
);
1059 if (mcvlist
->type
!= STATS_MCV_TYPE_BASIC
)
1060 elog(ERROR
, "invalid MCV type %u (expected %u)",
1061 mcvlist
->type
, STATS_MCV_TYPE_BASIC
);
1063 if (mcvlist
->ndimensions
== 0)
1064 elog(ERROR
, "invalid zero-length dimension array in MCVList");
1065 else if ((mcvlist
->ndimensions
> STATS_MAX_DIMENSIONS
) ||
1066 (mcvlist
->ndimensions
< 0))
1067 elog(ERROR
, "invalid length (%d) dimension array in MCVList",
1068 mcvlist
->ndimensions
);
1070 if (mcvlist
->nitems
== 0)
1071 elog(ERROR
, "invalid zero-length item array in MCVList");
1072 else if (mcvlist
->nitems
> STATS_MCVLIST_MAX_ITEMS
)
1073 elog(ERROR
, "invalid length (%u) item array in MCVList",
1076 nitems
= mcvlist
->nitems
;
1077 ndims
= mcvlist
->ndimensions
;
1080 * Check amount of data including DimensionInfo for all dimensions and
1081 * also the serialized items (including uint16 indexes). Also, walk
1082 * through the dimension information and add it to the sum.
1084 expected_size
= SizeOfMCVList(ndims
, nitems
);
1087 * Check that we have at least the dimension and info records, along with
1088 * the items. We don't know the size of the serialized values yet. We need
1089 * to do this check first, before accessing the dimension info.
1091 if (VARSIZE_ANY(data
) < expected_size
)
1092 elog(ERROR
, "invalid MCV size %zu (expected %zu)",
1093 VARSIZE_ANY(data
), expected_size
);
1095 /* Now copy the array of type Oids. */
1096 memcpy(mcvlist
->types
, ptr
, sizeof(Oid
) * ndims
);
1097 ptr
+= (sizeof(Oid
) * ndims
);
1099 /* Now it's safe to access the dimension info. */
1100 info
= palloc(ndims
* sizeof(DimensionInfo
));
1102 memcpy(info
, ptr
, ndims
* sizeof(DimensionInfo
));
1103 ptr
+= (ndims
* sizeof(DimensionInfo
));
1105 /* account for the value arrays */
1106 for (dim
= 0; dim
< ndims
; dim
++)
1109 * XXX I wonder if we can/should rely on asserts here. Maybe those
1110 * checks should be done every time?
1112 Assert(info
[dim
].nvalues
>= 0);
1113 Assert(info
[dim
].nbytes
>= 0);
1115 expected_size
+= info
[dim
].nbytes
;
1119 * Now we know the total expected MCV size, including all the pieces
1120 * (header, dimension info. items and deduplicated data). So do the final
1123 if (VARSIZE_ANY(data
) != expected_size
)
1124 elog(ERROR
, "invalid MCV size %zu (expected %zu)",
1125 VARSIZE_ANY(data
), expected_size
);
1128 * We need an array of Datum values for each dimension, so that we can
1129 * easily translate the uint16 indexes later. We also need a top-level
1130 * array of pointers to those per-dimension arrays.
1132 * While allocating the arrays for dimensions, compute how much space we
1133 * need for a copy of the by-ref data, as we can't simply point to the
1134 * original values (it might go away).
1136 datalen
= 0; /* space for by-ref data */
1137 map
= (Datum
**) palloc(ndims
* sizeof(Datum
*));
1139 for (dim
= 0; dim
< ndims
; dim
++)
1141 map
[dim
] = (Datum
*) palloc(sizeof(Datum
) * info
[dim
].nvalues
);
1143 /* space needed for a copy of data for by-ref types */
1144 datalen
+= info
[dim
].nbytes_aligned
;
1148 * Now resize the MCV list so that the allocation includes all the data.
1150 * Allocate space for a copy of the data, as we can't simply reference the
1151 * serialized data - it's not aligned properly, and it may disappear while
1152 * we're still using the MCV list, e.g. due to catcache release.
1154 * We do care about alignment here, because we will allocate all the
1155 * pieces at once, but then use pointers to different parts.
1157 mcvlen
= MAXALIGN(offsetof(MCVList
, items
) + (sizeof(MCVItem
) * nitems
));
1159 /* arrays of values and isnull flags for all MCV items */
1160 mcvlen
+= nitems
* MAXALIGN(sizeof(Datum
) * ndims
);
1161 mcvlen
+= nitems
* MAXALIGN(sizeof(bool) * ndims
);
1163 /* we don't quite need to align this, but it makes some asserts easier */
1164 mcvlen
+= MAXALIGN(datalen
);
1166 /* now resize the deserialized MCV list, and compute pointers to parts */
1167 mcvlist
= repalloc(mcvlist
, mcvlen
);
1169 /* pointer to the beginning of values/isnull arrays */
1170 valuesptr
= (char *) mcvlist
1171 + MAXALIGN(offsetof(MCVList
, items
) + (sizeof(MCVItem
) * nitems
));
1173 isnullptr
= valuesptr
+ (nitems
* MAXALIGN(sizeof(Datum
) * ndims
));
1175 dataptr
= isnullptr
+ (nitems
* MAXALIGN(sizeof(bool) * ndims
));
1178 * Build mapping (index => value) for translating the serialized data into
1179 * the in-memory representation.
1181 for (dim
= 0; dim
< ndims
; dim
++)
1183 /* remember start position in the input array */
1184 char *start PG_USED_FOR_ASSERTS_ONLY
= ptr
;
1186 if (info
[dim
].typbyval
)
1188 /* for by-val types we simply copy data into the mapping */
1189 for (i
= 0; i
< info
[dim
].nvalues
; i
++)
1193 memcpy(&v
, ptr
, info
[dim
].typlen
);
1194 ptr
+= info
[dim
].typlen
;
1196 map
[dim
][i
] = fetch_att(&v
, true, info
[dim
].typlen
);
1198 /* no under/overflow of input array */
1199 Assert(ptr
<= (start
+ info
[dim
].nbytes
));
1204 /* for by-ref types we need to also make a copy of the data */
1206 /* passed by reference, but fixed length (name, tid, ...) */
1207 if (info
[dim
].typlen
> 0)
1209 for (i
= 0; i
< info
[dim
].nvalues
; i
++)
1211 memcpy(dataptr
, ptr
, info
[dim
].typlen
);
1212 ptr
+= info
[dim
].typlen
;
1214 /* just point into the array */
1215 map
[dim
][i
] = PointerGetDatum(dataptr
);
1216 dataptr
+= MAXALIGN(info
[dim
].typlen
);
1219 else if (info
[dim
].typlen
== -1)
1222 for (i
= 0; i
< info
[dim
].nvalues
; i
++)
1226 /* read the uint32 length */
1227 memcpy(&len
, ptr
, sizeof(uint32
));
1228 ptr
+= sizeof(uint32
);
1230 /* the length is data-only */
1231 SET_VARSIZE(dataptr
, len
+ VARHDRSZ
);
1232 memcpy(VARDATA(dataptr
), ptr
, len
);
1235 /* just point into the array */
1236 map
[dim
][i
] = PointerGetDatum(dataptr
);
1238 /* skip to place of the next deserialized value */
1239 dataptr
+= MAXALIGN(len
+ VARHDRSZ
);
1242 else if (info
[dim
].typlen
== -2)
1245 for (i
= 0; i
< info
[dim
].nvalues
; i
++)
1249 memcpy(&len
, ptr
, sizeof(uint32
));
1250 ptr
+= sizeof(uint32
);
1252 memcpy(dataptr
, ptr
, len
);
1255 /* just point into the array */
1256 map
[dim
][i
] = PointerGetDatum(dataptr
);
1257 dataptr
+= MAXALIGN(len
);
1261 /* no under/overflow of input array */
1262 Assert(ptr
<= (start
+ info
[dim
].nbytes
));
1264 /* no overflow of the output mcv value */
1265 Assert(dataptr
<= ((char *) mcvlist
+ mcvlen
));
1268 /* check we consumed input data for this dimension exactly */
1269 Assert(ptr
== (start
+ info
[dim
].nbytes
));
1272 /* we should have also filled the MCV list exactly */
1273 Assert(dataptr
== ((char *) mcvlist
+ mcvlen
));
1275 /* deserialize the MCV items and translate the indexes to Datums */
1276 for (i
= 0; i
< nitems
; i
++)
1278 MCVItem
*item
= &mcvlist
->items
[i
];
1280 item
->values
= (Datum
*) valuesptr
;
1281 valuesptr
+= MAXALIGN(sizeof(Datum
) * ndims
);
1283 item
->isnull
= (bool *) isnullptr
;
1284 isnullptr
+= MAXALIGN(sizeof(bool) * ndims
);
1286 memcpy(item
->isnull
, ptr
, sizeof(bool) * ndims
);
1287 ptr
+= sizeof(bool) * ndims
;
1289 memcpy(&item
->frequency
, ptr
, sizeof(double));
1290 ptr
+= sizeof(double);
1292 memcpy(&item
->base_frequency
, ptr
, sizeof(double));
1293 ptr
+= sizeof(double);
1295 /* finally translate the indexes (for non-NULL only) */
1296 for (dim
= 0; dim
< ndims
; dim
++)
1300 memcpy(&index
, ptr
, sizeof(uint16
));
1301 ptr
+= sizeof(uint16
);
1303 if (item
->isnull
[dim
])
1306 item
->values
[dim
] = map
[dim
][index
];
1309 /* check we're not overflowing the input */
1310 Assert(ptr
<= endptr
);
1313 /* check that we processed all the data */
1314 Assert(ptr
== endptr
);
1316 /* release the buffers used for mapping */
1317 for (dim
= 0; dim
< ndims
; dim
++)
1326 * SRF with details about buckets of a histogram:
1328 * - item ID (0...nitems)
1329 * - values (string array)
1330 * - nulls only (boolean array)
1331 * - frequency (double precision)
1332 * - base_frequency (double precision)
1334 * The input is the OID of the statistics, and there are no rows returned if
1335 * the statistics contains no histogram.
1338 pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS
)
1340 FuncCallContext
*funcctx
;
1342 /* stuff done only on the first call of the function */
1343 if (SRF_IS_FIRSTCALL())
1345 MemoryContext oldcontext
;
1349 /* create a function context for cross-call persistence */
1350 funcctx
= SRF_FIRSTCALL_INIT();
1352 /* switch to memory context appropriate for multiple function calls */
1353 oldcontext
= MemoryContextSwitchTo(funcctx
->multi_call_memory_ctx
);
1355 mcvlist
= statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
1357 funcctx
->user_fctx
= mcvlist
;
1359 /* total number of tuples to be returned */
1360 funcctx
->max_calls
= 0;
1361 if (funcctx
->user_fctx
!= NULL
)
1362 funcctx
->max_calls
= mcvlist
->nitems
;
1364 /* Build a tuple descriptor for our result type */
1365 if (get_call_result_type(fcinfo
, NULL
, &tupdesc
) != TYPEFUNC_COMPOSITE
)
1367 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
1368 errmsg("function returning record called in context "
1369 "that cannot accept type record")));
1370 tupdesc
= BlessTupleDesc(tupdesc
);
1373 * generate attribute metadata needed later to produce tuples from raw
1376 funcctx
->attinmeta
= TupleDescGetAttInMetadata(tupdesc
);
1378 MemoryContextSwitchTo(oldcontext
);
1381 /* stuff done on every call of the function */
1382 funcctx
= SRF_PERCALL_SETUP();
1384 if (funcctx
->call_cntr
< funcctx
->max_calls
) /* do when there is more
1391 ArrayBuildState
*astate_values
= NULL
;
1392 ArrayBuildState
*astate_nulls
= NULL
;
1398 mcvlist
= (MCVList
*) funcctx
->user_fctx
;
1400 Assert(funcctx
->call_cntr
< mcvlist
->nitems
);
1402 item
= &mcvlist
->items
[funcctx
->call_cntr
];
1404 for (i
= 0; i
< mcvlist
->ndimensions
; i
++)
1407 astate_nulls
= accumArrayResult(astate_nulls
,
1408 BoolGetDatum(item
->isnull
[i
]),
1411 CurrentMemoryContext
);
1413 if (!item
->isnull
[i
])
1421 /* lookup output func for the type */
1422 getTypeOutputInfo(mcvlist
->types
[i
], &outfunc
, &isvarlena
);
1423 fmgr_info(outfunc
, &fmgrinfo
);
1425 val
= FunctionCall1(&fmgrinfo
, item
->values
[i
]);
1426 txt
= cstring_to_text(DatumGetPointer(val
));
1428 astate_values
= accumArrayResult(astate_values
,
1429 PointerGetDatum(txt
),
1432 CurrentMemoryContext
);
1435 astate_values
= accumArrayResult(astate_values
,
1439 CurrentMemoryContext
);
1442 values
[0] = Int32GetDatum(funcctx
->call_cntr
);
1443 values
[1] = makeArrayResult(astate_values
, CurrentMemoryContext
);
1444 values
[2] = makeArrayResult(astate_nulls
, CurrentMemoryContext
);
1445 values
[3] = Float8GetDatum(item
->frequency
);
1446 values
[4] = Float8GetDatum(item
->base_frequency
);
1448 /* no NULLs in the tuple */
1449 memset(nulls
, 0, sizeof(nulls
));
1452 tuple
= heap_form_tuple(funcctx
->attinmeta
->tupdesc
, values
, nulls
);
1454 /* make the tuple into a datum */
1455 result
= HeapTupleGetDatum(tuple
);
1457 SRF_RETURN_NEXT(funcctx
, result
);
1459 else /* do when there is no more left */
1461 SRF_RETURN_DONE(funcctx
);
1466 * pg_mcv_list_in - input routine for type pg_mcv_list.
1468 * pg_mcv_list is real enough to be a table column, but it has no operations
1469 * of its own, and disallows input too
1472 pg_mcv_list_in(PG_FUNCTION_ARGS
)
1475 * pg_mcv_list stores the data in binary form and parsing text input is
1476 * not needed, so disallow this.
1479 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
1480 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1482 PG_RETURN_VOID(); /* keep compiler quiet */
1487 * pg_mcv_list_out - output routine for type pg_mcv_list.
1489 * MCV lists are serialized into a bytea value, so we simply call byteaout()
1490 * to serialize the value into text. But it'd be nice to serialize that into
1491 * a meaningful representation (e.g. for inspection by people).
1493 * XXX This should probably return something meaningful, similar to what
1494 * pg_dependencies_out does. Not sure how to deal with the deduplicated
1495 * values, though - do we want to expand that or not?
1498 pg_mcv_list_out(PG_FUNCTION_ARGS
)
1500 return byteaout(fcinfo
);
1504 * pg_mcv_list_recv - binary input routine for type pg_mcv_list.
1507 pg_mcv_list_recv(PG_FUNCTION_ARGS
)
1510 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
1511 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1513 PG_RETURN_VOID(); /* keep compiler quiet */
1517 * pg_mcv_list_send - binary output routine for type pg_mcv_list.
1519 * MCV lists are serialized in a bytea value (although the type is named
1520 * differently), so let's just send that.
1523 pg_mcv_list_send(PG_FUNCTION_ARGS
)
1525 return byteasend(fcinfo
);
1529 * match the attribute/expression to a dimension of the statistic
1531 * Returns the zero-based index of the matching statistics dimension.
1532 * Optionally determines the collation.
1535 mcv_match_expression(Node
*expr
, Bitmapset
*keys
, List
*exprs
, Oid
*collid
)
1541 /* simple Var, so just lookup using varattno */
1542 Var
*var
= (Var
*) expr
;
1545 *collid
= var
->varcollid
;
1547 idx
= bms_member_index(keys
, var
->varattno
);
1550 elog(ERROR
, "variable not found in statistics object");
1554 /* expression - lookup in stats expressions */
1558 *collid
= exprCollation(expr
);
1560 /* expressions are stored after the simple columns */
1561 idx
= bms_num_members(keys
);
1564 Node
*stat_expr
= (Node
*) lfirst(lc
);
1566 if (equal(expr
, stat_expr
))
1573 elog(ERROR
, "expression not found in statistics object");
1580 * mcv_get_match_bitmap
1581 * Evaluate clauses using the MCV list, and update the match bitmap.
1583 * A match bitmap keeps match/mismatch status for each MCV item, and we
1584 * update it based on additional clauses. We also use it to skip items
1585 * that can't possibly match (e.g. item marked as "mismatch" can't change
1586 * to "match" when evaluating AND clause list).
1588 * The function also returns a flag indicating whether there was an
1589 * equality condition for all attributes, the minimum frequency in the MCV
1590 * list, and a total MCV frequency (sum of frequencies for all items).
1592 * XXX Currently the match bitmap uses a bool for each MCV item, which is
1593 * somewhat wasteful as we could do with just a single bit, thus reducing
1594 * the size to ~1/8. It would also allow us to combine bitmaps simply using
1595 * & and |, which should be faster than min/max. The bitmaps are fairly
1596 * small, though (thanks to the cap on the MCV list size).
1599 mcv_get_match_bitmap(PlannerInfo
*root
, List
*clauses
,
1600 Bitmapset
*keys
, List
*exprs
,
1601 MCVList
*mcvlist
, bool is_or
)
1606 /* The bitmap may be partially built. */
1607 Assert(clauses
!= NIL
);
1608 Assert(mcvlist
!= NULL
);
1609 Assert(mcvlist
->nitems
> 0);
1610 Assert(mcvlist
->nitems
<= STATS_MCVLIST_MAX_ITEMS
);
1612 matches
= palloc(sizeof(bool) * mcvlist
->nitems
);
1613 memset(matches
, !is_or
, sizeof(bool) * mcvlist
->nitems
);
1616 * Loop through the list of clauses, and for each of them evaluate all the
1617 * MCV items not yet eliminated by the preceding clauses.
1621 Node
*clause
= (Node
*) lfirst(l
);
1623 /* if it's a RestrictInfo, then extract the clause */
1624 if (IsA(clause
, RestrictInfo
))
1625 clause
= (Node
*) ((RestrictInfo
*) clause
)->clause
;
1628 * Handle the various types of clauses - OpClause, NullTest and
1631 if (is_opclause(clause
))
1633 OpExpr
*expr
= (OpExpr
*) clause
;
1636 /* valid only after examine_opclause_args returns true */
1643 fmgr_info(get_opcode(expr
->opno
), &opproc
);
1645 /* extract the var/expr and const from the expression */
1646 if (!examine_opclause_args(expr
->args
, &clause_expr
, &cst
, &expronleft
))
1647 elog(ERROR
, "incompatible clause");
1649 /* match the attribute/expression to a dimension of the statistic */
1650 idx
= mcv_match_expression(clause_expr
, keys
, exprs
, &collid
);
1653 * Walk through the MCV items and evaluate the current clause. We
1654 * can skip items that were already ruled out, and terminate if
1655 * there are no remaining MCV items that might possibly match.
1657 for (int i
= 0; i
< mcvlist
->nitems
; i
++)
1660 MCVItem
*item
= &mcvlist
->items
[i
];
1665 * When the MCV item or the Const value is NULL we can treat
1666 * this as a mismatch. We must not call the operator because
1669 if (item
->isnull
[idx
] || cst
->constisnull
)
1671 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, false);
1676 * Skip MCV items that can't change result in the bitmap. Once
1677 * the value gets false for AND-lists, or true for OR-lists,
1678 * we don't need to look at more clauses.
1680 if (RESULT_IS_FINAL(matches
[i
], is_or
))
1684 * First check whether the constant is below the lower
1685 * boundary (in that case we can skip the bucket, because
1686 * there's no overlap).
1688 * We don't store collations used to build the statistics, but
1689 * we can use the collation for the attribute itself, as
1690 * stored in varcollid. We do reset the statistics after a
1691 * type change (including collation change), so this is OK.
1692 * For expressions, we use the collation extracted from the
1693 * expression itself.
1696 match
= DatumGetBool(FunctionCall2Coll(&opproc
,
1701 match
= DatumGetBool(FunctionCall2Coll(&opproc
,
1704 item
->values
[idx
]));
1706 /* update the match bitmap with the result */
1707 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, match
);
1710 else if (IsA(clause
, ScalarArrayOpExpr
))
1712 ScalarArrayOpExpr
*expr
= (ScalarArrayOpExpr
*) clause
;
1715 /* valid only after examine_opclause_args returns true */
1722 /* array evaluation */
1723 ArrayType
*arrayval
;
1731 fmgr_info(get_opcode(expr
->opno
), &opproc
);
1733 /* extract the var/expr and const from the expression */
1734 if (!examine_opclause_args(expr
->args
, &clause_expr
, &cst
, &expronleft
))
1735 elog(ERROR
, "incompatible clause");
1737 /* We expect Var on left */
1739 elog(ERROR
, "incompatible clause");
1742 * Deconstruct the array constant, unless it's NULL (we'll cover
1745 if (!cst
->constisnull
)
1747 arrayval
= DatumGetArrayTypeP(cst
->constvalue
);
1748 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval
),
1749 &elmlen
, &elmbyval
, &elmalign
);
1750 deconstruct_array(arrayval
,
1751 ARR_ELEMTYPE(arrayval
),
1752 elmlen
, elmbyval
, elmalign
,
1753 &elem_values
, &elem_nulls
, &num_elems
);
1756 /* match the attribute/expression to a dimension of the statistic */
1757 idx
= mcv_match_expression(clause_expr
, keys
, exprs
, &collid
);
1760 * Walk through the MCV items and evaluate the current clause. We
1761 * can skip items that were already ruled out, and terminate if
1762 * there are no remaining MCV items that might possibly match.
1764 for (int i
= 0; i
< mcvlist
->nitems
; i
++)
1767 bool match
= !expr
->useOr
;
1768 MCVItem
*item
= &mcvlist
->items
[i
];
1771 * When the MCV item or the Const value is NULL we can treat
1772 * this as a mismatch. We must not call the operator because
1775 if (item
->isnull
[idx
] || cst
->constisnull
)
1777 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, false);
1782 * Skip MCV items that can't change result in the bitmap. Once
1783 * the value gets false for AND-lists, or true for OR-lists,
1784 * we don't need to look at more clauses.
1786 if (RESULT_IS_FINAL(matches
[i
], is_or
))
1789 for (j
= 0; j
< num_elems
; j
++)
1791 Datum elem_value
= elem_values
[j
];
1792 bool elem_isnull
= elem_nulls
[j
];
1795 /* NULL values always evaluate as not matching. */
1798 match
= RESULT_MERGE(match
, expr
->useOr
, false);
1803 * Stop evaluating the array elements once we reach a
1804 * matching value that can't change - ALL() is the same as
1805 * AND-list, ANY() is the same as OR-list.
1807 if (RESULT_IS_FINAL(match
, expr
->useOr
))
1810 elem_match
= DatumGetBool(FunctionCall2Coll(&opproc
,
1815 match
= RESULT_MERGE(match
, expr
->useOr
, elem_match
);
1818 /* update the match bitmap with the result */
1819 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, match
);
1822 else if (IsA(clause
, NullTest
))
1824 NullTest
*expr
= (NullTest
*) clause
;
1825 Node
*clause_expr
= (Node
*) (expr
->arg
);
1827 /* match the attribute/expression to a dimension of the statistic */
1828 int idx
= mcv_match_expression(clause_expr
, keys
, exprs
, NULL
);
1831 * Walk through the MCV items and evaluate the current clause. We
1832 * can skip items that were already ruled out, and terminate if
1833 * there are no remaining MCV items that might possibly match.
1835 for (int i
= 0; i
< mcvlist
->nitems
; i
++)
1837 bool match
= false; /* assume mismatch */
1838 MCVItem
*item
= &mcvlist
->items
[i
];
1840 /* if the clause mismatches the MCV item, update the bitmap */
1841 switch (expr
->nulltesttype
)
1844 match
= (item
->isnull
[idx
]) ? true : match
;
1848 match
= (!item
->isnull
[idx
]) ? true : match
;
1852 /* now, update the match bitmap, depending on OR/AND type */
1853 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, match
);
1856 else if (is_orclause(clause
) || is_andclause(clause
))
1858 /* AND/OR clause, with all subclauses being compatible */
1861 BoolExpr
*bool_clause
= ((BoolExpr
*) clause
);
1862 List
*bool_clauses
= bool_clause
->args
;
1864 /* match/mismatch bitmap for each MCV item */
1865 bool *bool_matches
= NULL
;
1867 Assert(bool_clauses
!= NIL
);
1868 Assert(list_length(bool_clauses
) >= 2);
1870 /* build the match bitmap for the OR-clauses */
1871 bool_matches
= mcv_get_match_bitmap(root
, bool_clauses
, keys
, exprs
,
1872 mcvlist
, is_orclause(clause
));
1875 * Merge the bitmap produced by mcv_get_match_bitmap into the
1876 * current one. We need to consider if we're evaluating AND or OR
1877 * condition when merging the results.
1879 for (i
= 0; i
< mcvlist
->nitems
; i
++)
1880 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, bool_matches
[i
]);
1882 pfree(bool_matches
);
1884 else if (is_notclause(clause
))
1886 /* NOT clause, with all subclauses compatible */
1889 BoolExpr
*not_clause
= ((BoolExpr
*) clause
);
1890 List
*not_args
= not_clause
->args
;
1892 /* match/mismatch bitmap for each MCV item */
1893 bool *not_matches
= NULL
;
1895 Assert(not_args
!= NIL
);
1896 Assert(list_length(not_args
) == 1);
1898 /* build the match bitmap for the NOT-clause */
1899 not_matches
= mcv_get_match_bitmap(root
, not_args
, keys
, exprs
,
1903 * Merge the bitmap produced by mcv_get_match_bitmap into the
1904 * current one. We're handling a NOT clause, so invert the result
1905 * before merging it into the global bitmap.
1907 for (i
= 0; i
< mcvlist
->nitems
; i
++)
1908 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, !not_matches
[i
]);
1912 else if (IsA(clause
, Var
))
1914 /* Var (has to be a boolean Var, possibly from below NOT) */
1916 Var
*var
= (Var
*) (clause
);
1918 /* match the attribute to a dimension of the statistic */
1919 int idx
= bms_member_index(keys
, var
->varattno
);
1921 Assert(var
->vartype
== BOOLOID
);
1924 * Walk through the MCV items and evaluate the current clause. We
1925 * can skip items that were already ruled out, and terminate if
1926 * there are no remaining MCV items that might possibly match.
1928 for (int i
= 0; i
< mcvlist
->nitems
; i
++)
1930 MCVItem
*item
= &mcvlist
->items
[i
];
1933 /* if the item is NULL, it's a mismatch */
1934 if (!item
->isnull
[idx
] && DatumGetBool(item
->values
[idx
]))
1937 /* update the result bitmap */
1938 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, match
);
1943 /* Otherwise, it must be a bare boolean-returning expression */
1946 /* match the expression to a dimension of the statistic */
1947 idx
= mcv_match_expression(clause
, keys
, exprs
, NULL
);
1950 * Walk through the MCV items and evaluate the current clause. We
1951 * can skip items that were already ruled out, and terminate if
1952 * there are no remaining MCV items that might possibly match.
1954 for (int i
= 0; i
< mcvlist
->nitems
; i
++)
1957 MCVItem
*item
= &mcvlist
->items
[i
];
1959 /* "match" just means it's bool TRUE */
1960 match
= !item
->isnull
[idx
] && DatumGetBool(item
->values
[idx
]);
1962 /* now, update the match bitmap, depending on OR/AND type */
1963 matches
[i
] = RESULT_MERGE(matches
[i
], is_or
, match
);
1973 * mcv_combine_selectivities
1974 * Combine per-column and multi-column MCV selectivity estimates.
1976 * simple_sel is a "simple" selectivity estimate (produced without using any
1977 * extended statistics, essentially assuming independence of columns/clauses).
1979 * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
1980 * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then
1981 * essentially interpreted as a correction to be added to simple_sel, as
1984 * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
1985 * matching ones). This is used as an upper bound on the portion of the
1986 * selectivity estimates not covered by the MCV statistics.
1988 * Note: While simple and base selectivities are defined in a quite similar
1989 * way, the values are computed differently and are not therefore equal. The
1990 * simple selectivity is computed as a product of per-clause estimates, while
1991 * the base selectivity is computed by adding up base frequencies of matching
1992 * items of the multi-column MCV list. So the values may differ for two main
1993 * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1994 * the MCV items did not match the estimated clauses.
1996 * As both (a) and (b) reduce the base selectivity value, it generally holds
1997 * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
1998 * values may be equal.
2000 * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
2001 * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
2002 * correction for the part covered by the MCV list. Those two statements are
2003 * actually equivalent.
2006 mcv_combine_selectivities(Selectivity simple_sel
,
2007 Selectivity mcv_sel
,
2008 Selectivity mcv_basesel
,
2009 Selectivity mcv_totalsel
)
2011 Selectivity other_sel
;
2014 /* estimated selectivity of values not covered by MCV matches */
2015 other_sel
= simple_sel
- mcv_basesel
;
2016 CLAMP_PROBABILITY(other_sel
);
2018 /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2019 if (other_sel
> 1.0 - mcv_totalsel
)
2020 other_sel
= 1.0 - mcv_totalsel
;
2022 /* overall selectivity is the sum of the MCV and non-MCV parts */
2023 sel
= mcv_sel
+ other_sel
;
2024 CLAMP_PROBABILITY(sel
);
2031 * mcv_clauselist_selectivity
2032 * Use MCV statistics to estimate the selectivity of an implicitly-ANDed
2035 * This determines which MCV items match every clause in the list and returns
2036 * the sum of the frequencies of those items.
2038 * In addition, it returns the sum of the base frequencies of each of those
2039 * items (that is the sum of the selectivities that each item would have if
2040 * the columns were independent of one another), and the total selectivity of
2041 * all the MCV items (not just the matching ones). These are expected to be
2042 * used together with a "simple" selectivity estimate (one based only on
2043 * per-column statistics) to produce an overall selectivity estimate that
2044 * makes use of both per-column and multi-column statistics --- see
2045 * mcv_combine_selectivities().
2048 mcv_clauselist_selectivity(PlannerInfo
*root
, StatisticExtInfo
*stat
,
2049 List
*clauses
, int varRelid
,
2050 JoinType jointype
, SpecialJoinInfo
*sjinfo
,
2052 Selectivity
*basesel
, Selectivity
*totalsel
)
2056 Selectivity s
= 0.0;
2057 RangeTblEntry
*rte
= root
->simple_rte_array
[rel
->relid
];
2059 /* match/mismatch bitmap for each MCV item */
2060 bool *matches
= NULL
;
2062 /* load the MCV list stored in the statistics object */
2063 mcv
= statext_mcv_load(stat
->statOid
, rte
->inh
);
2065 /* build a match bitmap for the clauses */
2066 matches
= mcv_get_match_bitmap(root
, clauses
, stat
->keys
, stat
->exprs
,
2069 /* sum frequencies for all the matching MCV items */
2072 for (i
= 0; i
< mcv
->nitems
; i
++)
2074 *totalsel
+= mcv
->items
[i
].frequency
;
2076 if (matches
[i
] != false)
2078 *basesel
+= mcv
->items
[i
].base_frequency
;
2079 s
+= mcv
->items
[i
].frequency
;
2088 * mcv_clause_selectivity_or
2089 * Use MCV statistics to estimate the selectivity of a clause that
2090 * appears in an ORed list of clauses.
2092 * As with mcv_clauselist_selectivity() this determines which MCV items match
2093 * the clause and returns both the sum of the frequencies and the sum of the
2094 * base frequencies of those items, as well as the sum of the frequencies of
2095 * all MCV items (not just the matching ones) so that this information can be
2096 * used by mcv_combine_selectivities() to produce a selectivity estimate that
2097 * makes use of both per-column and multi-column statistics.
2099 * Additionally, we return information to help compute the overall selectivity
2100 * of the ORed list of clauses assumed to contain this clause. This function
2101 * is intended to be called for each clause in the ORed list of clauses,
2102 * allowing the overall selectivity to be computed using the following
2105 * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
2106 * of the first n clauses in the list. Then the combined selectivity taking
2107 * into account the next clause C[n+1] can be written as
2109 * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
2111 * The final term above represents the overlap between the clauses examined so
2112 * far and the (n+1)'th clause. To estimate its selectivity, we track the
2113 * match bitmap for the ORed list of clauses examined so far and examine its
2114 * intersection with the match bitmap for the (n+1)'th clause.
2116 * We then also return the sums of the MCV item frequencies and base
2117 * frequencies for the match bitmap intersection corresponding to the overlap
2118 * term above, so that they can be combined with a simple selectivity estimate
2121 * The parameter "or_matches" is an in/out parameter tracking the match bitmap
2122 * for the clauses examined so far. The caller is expected to set it to NULL
2123 * the first time it calls this function.
2126 mcv_clause_selectivity_or(PlannerInfo
*root
, StatisticExtInfo
*stat
,
2127 MCVList
*mcv
, Node
*clause
, bool **or_matches
,
2128 Selectivity
*basesel
, Selectivity
*overlap_mcvsel
,
2129 Selectivity
*overlap_basesel
, Selectivity
*totalsel
)
2131 Selectivity s
= 0.0;
2135 /* build the OR-matches bitmap, if not built already */
2136 if (*or_matches
== NULL
)
2137 *or_matches
= palloc0(sizeof(bool) * mcv
->nitems
);
2139 /* build the match bitmap for the new clause */
2140 new_matches
= mcv_get_match_bitmap(root
, list_make1(clause
), stat
->keys
,
2141 stat
->exprs
, mcv
, false);
2144 * Sum the frequencies for all the MCV items matching this clause and also
2145 * those matching the overlap between this clause and any of the preceding
2146 * clauses as described above.
2149 *overlap_mcvsel
= 0.0;
2150 *overlap_basesel
= 0.0;
2152 for (i
= 0; i
< mcv
->nitems
; i
++)
2154 *totalsel
+= mcv
->items
[i
].frequency
;
2158 s
+= mcv
->items
[i
].frequency
;
2159 *basesel
+= mcv
->items
[i
].base_frequency
;
2161 if ((*or_matches
)[i
])
2163 *overlap_mcvsel
+= mcv
->items
[i
].frequency
;
2164 *overlap_basesel
+= mcv
->items
[i
].base_frequency
;
2168 /* update the OR-matches bitmap for the next clause */
2169 (*or_matches
)[i
] = (*or_matches
)[i
] || new_matches
[i
];