src/backend/statistics/mcv.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * mcv.c
   4  *        POSTGRES multivariate MCV lists
   5  *
   6  *
   7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
   8  * Portions Copyright (c) 1994, Regents of the University of California
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/statistics/mcv.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <math.h>
  18
  19 #include "access/htup_details.h"
  20 #include "catalog/pg_statistic_ext.h"
  21 #include "catalog/pg_statistic_ext_data.h"
  22 #include "fmgr.h"
  23 #include "funcapi.h"
  24 #include "nodes/nodeFuncs.h"
  25 #include "statistics/extended_stats_internal.h"
  26 #include "statistics/statistics.h"
  27 #include "utils/array.h"
  28 #include "utils/builtins.h"
  29 #include "utils/fmgrprotos.h"
  30 #include "utils/lsyscache.h"
  31 #include "utils/selfuncs.h"
  32 #include "utils/syscache.h"
  33 #include "utils/typcache.h"
  34
  35 /*
  36  * Computes size of a serialized MCV item, depending on the number of
  37  * dimensions (columns) the statistic is defined on. The datum values are
  38  * stored in a separate array (deduplicated, to minimize the size), and
  39  * so the serialized items only store uint16 indexes into that array.
  40  *
  41  * Each serialized item stores (in this order):
  42  *
  43  * - indexes to values    (ndim * sizeof(uint16))
  44  * - null flags                   (ndim * sizeof(bool))
  45  * - frequency                    (sizeof(double))
  46  * - base_frequency               (sizeof(double))
  47  *
  48  * There is no alignment padding within an MCV item.
  49  * So in total each MCV item requires this many bytes:
  50  *
  51  *       ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
  52  */
  53 #define ITEM_SIZE(ndims)        \
  54         ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
  55
  56 /*
  57  * Used to compute size of serialized MCV list representation.
  58  */
  59 #define MinSizeOfMCVList                \
  60         (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
  61
  62 /*
  63  * Size of the serialized MCV list, excluding the space needed for
  64  * deduplicated per-dimension values. The macro is meant to be used
  65  * when it's not yet safe to access the serialized info about amount
  66  * of data for each column.
  67  */
  68 #define SizeOfMCVList(ndims,nitems)     \
  69         ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
  70          ((ndims) * sizeof(DimensionInfo)) + \
  71          ((nitems) * ITEM_SIZE(ndims)))
  72
  73 static MultiSortSupport build_mss(StatsBuildData *data);
  74
  75 static SortItem *build_distinct_groups(int numrows, SortItem *items,
  76                                                                            MultiSortSupport mss, int *ndistinct);
  77
  78 static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
  79                                                                                    MultiSortSupport mss, int *ncounts);
  80
  81 static int      count_distinct_groups(int numrows, SortItem *items,
  82                                                                   MultiSortSupport mss);
  83
  84 /*
  85  * Compute new value for bitmap item, considering whether it's used for
  86  * clauses connected by AND/OR.
  87  */
  88 #define RESULT_MERGE(value, is_or, match) \
  89         ((is_or) ? ((value) || (match)) : ((value) && (match)))
  90
  91 /*
  92  * When processing a list of clauses, the bitmap item may get set to a value
  93  * such that additional clauses can't change it. For example, when processing
  94  * a list of clauses connected to AND, as soon as the item gets set to 'false'
  95  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
  96  *
  97  * Returns true when the value in the bitmap can't change no matter how the
  98  * remaining clauses are evaluated.
  99  */
 100 #define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
 101
 102 /*
 103  * get_mincount_for_mcv_list
 104  *              Determine the minimum number of times a value needs to appear in
 105  *              the sample for it to be included in the MCV list.
 106  *
 107  * We want to keep only values that appear sufficiently often in the
 108  * sample that it is reasonable to extrapolate their sample frequencies to
 109  * the entire table.  We do this by placing an upper bound on the relative
 110  * standard error of the sample frequency, so that any estimates the
 111  * planner generates from the MCV statistics can be expected to be
 112  * reasonably accurate.
 113  *
 114  * Since we are sampling without replacement, the sample frequency of a
 115  * particular value is described by a hypergeometric distribution.  A
 116  * common rule of thumb when estimating errors in this situation is to
 117  * require at least 10 instances of the value in the sample, in which case
 118  * the distribution can be approximated by a normal distribution, and
 119  * standard error analysis techniques can be applied.  Given a sample size
 120  * of n, a population size of N, and a sample frequency of p=cnt/n, the
 121  * standard error of the proportion p is given by
 122  *              SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
 123  * where the second term is the finite population correction.  To get
 124  * reasonably accurate planner estimates, we impose an upper bound on the
 125  * relative standard error of 20% -- i.e., SE/p < 0.2.  This 20% relative
 126  * error bound is fairly arbitrary, but has been found empirically to work
 127  * well.  Rearranging this formula gives a lower bound on the number of
 128  * instances of the value seen:
 129  *              cnt > n*(N-n) / (N-n+0.04*n*(N-1))
 130  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
 131  * case where n approaches 0 cannot happen in practice, since the sample
 132  * size is at least 300.  The case where n approaches N corresponds to
 133  * sampling the whole table, in which case it is reasonable to keep
 134  * the whole MCV list (have no lower bound), so it makes sense to apply
 135  * this formula for all inputs, even though the above derivation is
 136  * technically only valid when the right hand side is at least around 10.
 137  *
 138  * An alternative way to look at this formula is as follows -- assume that
 139  * the number of instances of the value seen scales up to the entire
 140  * table, so that the population count is K=N*cnt/n. Then the distribution
 141  * in the sample is a hypergeometric distribution parameterised by N, n
 142  * and K, and the bound above is mathematically equivalent to demanding
 143  * that the standard deviation of that distribution is less than 20% of
 144  * its mean.  Thus the relative errors in any planner estimates produced
 145  * from the MCV statistics are likely to be not too large.
 146  */
 147 static double
 148 get_mincount_for_mcv_list(int samplerows, double totalrows)
 149 {
 150         double          n = samplerows;
 151         double          N = totalrows;
 152         double          numer,
 153                                 denom;
 154
 155         numer = n * (N - n);
 156         denom = N - n + 0.04 * n * (N - 1);
 157
 158         /* Guard against division by zero (possible if n = N = 1) */
 159         if (denom == 0.0)
 160                 return 0.0;
 161
 162         return numer / denom;
 163 }
 164
 165 /*
 166  * Builds MCV list from the set of sampled rows.
 167  *
 168  * The algorithm is quite simple:
 169  *
 170  *         (1) sort the data (default collation, '<' for the data type)
 171  *
 172  *         (2) count distinct groups, decide how many to keep
 173  *
 174  *         (3) build the MCV list using the threshold determined in (2)
 175  *
 176  *         (4) remove rows represented by the MCV from the sample
 177  *
 178  */
 179 MCVList *
 180 statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
 181 {
 182         int                     i,
 183                                 numattrs,
 184                                 numrows,
 185                                 ngroups,
 186                                 nitems;
 187         double          mincount;
 188         SortItem   *items;
 189         SortItem   *groups;
 190         MCVList    *mcvlist = NULL;
 191         MultiSortSupport mss;
 192
 193         /* comparator for all the columns */
 194         mss = build_mss(data);
 195
 196         /* sort the rows */
 197         items = build_sorted_items(data, &nitems, mss,
 198                                                            data->nattnums, data->attnums);
 199
 200         if (!items)
 201                 return NULL;
 202
 203         /* for convenience */
 204         numattrs = data->nattnums;
 205         numrows = data->numrows;
 206
 207         /* transform the sorted rows into groups (sorted by frequency) */
 208         groups = build_distinct_groups(nitems, items, mss, &ngroups);
 209
 210         /*
 211          * The maximum number of MCV items to store, based on the statistics
 212          * target we computed for the statistics object (from the target set for
 213          * the object itself, attributes and the system default). In any case, we
 214          * can't keep more groups than we have available.
 215          */
 216         nitems = stattarget;
 217         if (nitems > ngroups)
 218                 nitems = ngroups;
 219
 220         /*
 221          * Decide how many items to keep in the MCV list. We can't use the same
 222          * algorithm as per-column MCV lists, because that only considers the
 223          * actual group frequency - but we're primarily interested in how the
 224          * actual frequency differs from the base frequency (product of simple
 225          * per-column frequencies, as if the columns were independent).
 226          *
 227          * Using the same algorithm might exclude items that are close to the
 228          * "average" frequency of the sample. But that does not say whether the
 229          * observed frequency is close to the base frequency or not. We also need
 230          * to consider unexpectedly uncommon items (again, compared to the base
 231          * frequency), and the single-column algorithm does not have to.
 232          *
 233          * We simply decide how many items to keep by computing the minimum count
 234          * using get_mincount_for_mcv_list() and then keep all items that seem to
 235          * be more common than that.
 236          */
 237         mincount = get_mincount_for_mcv_list(numrows, totalrows);
 238
 239         /*
 240          * Walk the groups until we find the first group with a count below the
 241          * mincount threshold (the index of that group is the number of groups we
 242          * want to keep).
 243          */
 244         for (i = 0; i < nitems; i++)
 245         {
 246                 if (groups[i].count < mincount)
 247                 {
 248                         nitems = i;
 249                         break;
 250                 }
 251         }
 252
 253         /*
 254          * At this point, we know the number of items for the MCV list. There
 255          * might be none (for uniform distribution with many groups), and in that
 256          * case, there will be no MCV list. Otherwise, construct the MCV list.
 257          */
 258         if (nitems > 0)
 259         {
 260                 int                     j;
 261                 SortItem        key;
 262                 MultiSortSupport tmp;
 263
 264                 /* frequencies for values in each attribute */
 265                 SortItem  **freqs;
 266                 int                *nfreqs;
 267
 268                 /* used to search values */
 269                 tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
 270                                                                                 + sizeof(SortSupportData));
 271
 272                 /* compute frequencies for values in each column */
 273                 nfreqs = (int *) palloc0(sizeof(int) * numattrs);
 274                 freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
 275
 276                 /*
 277                  * Allocate the MCV list structure, set the global parameters.
 278                  */
 279                 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
 280                                                                           sizeof(MCVItem) * nitems);
 281
 282                 mcvlist->magic = STATS_MCV_MAGIC;
 283                 mcvlist->type = STATS_MCV_TYPE_BASIC;
 284                 mcvlist->ndimensions = numattrs;
 285                 mcvlist->nitems = nitems;
 286
 287                 /* store info about data type OIDs */
 288                 for (i = 0; i < numattrs; i++)
 289                         mcvlist->types[i] = data->stats[i]->attrtypid;
 290
 291                 /* Copy the first chunk of groups into the result. */
 292                 for (i = 0; i < nitems; i++)
 293                 {
 294                         /* just point to the proper place in the list */
 295                         MCVItem    *item = &mcvlist->items[i];
 296
 297                         item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
 298                         item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
 299
 300                         /* copy values for the group */
 301                         memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
 302                         memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
 303
 304                         /* groups should be sorted by frequency in descending order */
 305                         Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
 306
 307                         /* group frequency */
 308                         item->frequency = (double) groups[i].count / numrows;
 309
 310                         /* base frequency, if the attributes were independent */
 311                         item->base_frequency = 1.0;
 312                         for (j = 0; j < numattrs; j++)
 313                         {
 314                                 SortItem   *freq;
 315
 316                                 /* single dimension */
 317                                 tmp->ndims = 1;
 318                                 tmp->ssup[0] = mss->ssup[j];
 319
 320                                 /* fill search key */
 321                                 key.values = &groups[i].values[j];
 322                                 key.isnull = &groups[i].isnull[j];
 323
 324                                 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
 325                                                                                                 sizeof(SortItem),
 326                                                                                                 multi_sort_compare, tmp);
 327
 328                                 item->base_frequency *= ((double) freq->count) / numrows;
 329                         }
 330                 }
 331
 332                 pfree(nfreqs);
 333                 pfree(freqs);
 334         }
 335
 336         pfree(items);
 337         pfree(groups);
 338
 339         return mcvlist;
 340 }
 341
 342 /*
 343  * build_mss
 344  *              Build a MultiSortSupport for the given StatsBuildData.
 345  */
 346 static MultiSortSupport
 347 build_mss(StatsBuildData *data)
 348 {
 349         int                     i;
 350         int                     numattrs = data->nattnums;
 351
 352         /* Sort by multiple columns (using array of SortSupport) */
 353         MultiSortSupport mss = multi_sort_init(numattrs);
 354
 355         /* prepare the sort functions for all the attributes */
 356         for (i = 0; i < numattrs; i++)
 357         {
 358                 VacAttrStats *colstat = data->stats[i];
 359                 TypeCacheEntry *type;
 360
 361                 type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
 362                 if (type->lt_opr == InvalidOid) /* shouldn't happen */
 363                         elog(ERROR, "cache lookup failed for ordering operator for type %u",
 364                                  colstat->attrtypid);
 365
 366                 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
 367         }
 368
 369         return mss;
 370 }
 371
 372 /*
 373  * count_distinct_groups
 374  *              Count distinct combinations of SortItems in the array.
 375  *
 376  * The array is assumed to be sorted according to the MultiSortSupport.
 377  */
 378 static int
 379 count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
 380 {
 381         int                     i;
 382         int                     ndistinct;
 383
 384         ndistinct = 1;
 385         for (i = 1; i < numrows; i++)
 386         {
 387                 /* make sure the array really is sorted */
 388                 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
 389
 390                 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
 391                         ndistinct += 1;
 392         }
 393
 394         return ndistinct;
 395 }
 396
 397 /*
 398  * compare_sort_item_count
 399  *              Comparator for sorting items by count (frequencies) in descending
 400  *              order.
 401  */
 402 static int
 403 compare_sort_item_count(const void *a, const void *b, void *arg)
 404 {
 405         SortItem   *ia = (SortItem *) a;
 406         SortItem   *ib = (SortItem *) b;
 407
 408         if (ia->count == ib->count)
 409                 return 0;
 410         else if (ia->count > ib->count)
 411                 return -1;
 412
 413         return 1;
 414 }
 415
 416 /*
 417  * build_distinct_groups
 418  *              Build an array of SortItems for distinct groups and counts matching
 419  *              items.
 420  *
 421  * The 'items' array is assumed to be sorted.
 422  */
 423 static SortItem *
 424 build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
 425                                           int *ndistinct)
 426 {
 427         int                     i,
 428                                 j;
 429         int                     ngroups = count_distinct_groups(numrows, items, mss);
 430
 431         SortItem   *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
 432
 433         j = 0;
 434         groups[0] = items[0];
 435         groups[0].count = 1;
 436
 437         for (i = 1; i < numrows; i++)
 438         {
 439                 /* Assume sorted in ascending order. */
 440                 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
 441
 442                 /* New distinct group detected. */
 443                 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
 444                 {
 445                         groups[++j] = items[i];
 446                         groups[j].count = 0;
 447                 }
 448
 449                 groups[j].count++;
 450         }
 451
 452         /* ensure we filled the expected number of distinct groups */
 453         Assert(j + 1 == ngroups);
 454
 455         /* Sort the distinct groups by frequency (in descending order). */
 456         qsort_interruptible(groups, ngroups, sizeof(SortItem),
 457                                                 compare_sort_item_count, NULL);
 458
 459         *ndistinct = ngroups;
 460         return groups;
 461 }
 462
 463 /* compare sort items (single dimension) */
 464 static int
 465 sort_item_compare(const void *a, const void *b, void *arg)
 466 {
 467         SortSupport ssup = (SortSupport) arg;
 468         SortItem   *ia = (SortItem *) a;
 469         SortItem   *ib = (SortItem *) b;
 470
 471         return ApplySortComparator(ia->values[0], ia->isnull[0],
 472                                                            ib->values[0], ib->isnull[0],
 473                                                            ssup);
 474 }
 475
 476 /*
 477  * build_column_frequencies
 478  *              Compute frequencies of values in each column.
 479  *
 480  * This returns an array of SortItems for each attribute the MCV is built
 481  * on, with a frequency (number of occurrences) for each value. This is
 482  * then used to compute "base" frequency of MCV items.
 483  *
 484  * All the memory is allocated in a single chunk, so that a single pfree
 485  * is enough to release it. We do not allocate space for values/isnull
 486  * arrays in the SortItems, because we can simply point into the input
 487  * groups directly.
 488  */
 489 static SortItem **
 490 build_column_frequencies(SortItem *groups, int ngroups,
 491                                                  MultiSortSupport mss, int *ncounts)
 492 {
 493         int                     i,
 494                                 dim;
 495         SortItem  **result;
 496         char       *ptr;
 497
 498         Assert(groups);
 499         Assert(ncounts);
 500
 501         /* allocate arrays for all columns as a single chunk */
 502         ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
 503                                  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
 504
 505         /* initial array of pointers */
 506         result = (SortItem **) ptr;
 507         ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
 508
 509         for (dim = 0; dim < mss->ndims; dim++)
 510         {
 511                 SortSupport ssup = &mss->ssup[dim];
 512
 513                 /* array of values for a single column */
 514                 result[dim] = (SortItem *) ptr;
 515                 ptr += MAXALIGN(sizeof(SortItem) * ngroups);
 516
 517                 /* extract data for the dimension */
 518                 for (i = 0; i < ngroups; i++)
 519                 {
 520                         /* point into the input groups */
 521                         result[dim][i].values = &groups[i].values[dim];
 522                         result[dim][i].isnull = &groups[i].isnull[dim];
 523                         result[dim][i].count = groups[i].count;
 524                 }
 525
 526                 /* sort the values, deduplicate */
 527                 qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
 528                                                         sort_item_compare, ssup);
 529
 530                 /*
 531                  * Identify distinct values, compute frequency (there might be
 532                  * multiple MCV items containing this value, so we need to sum counts
 533                  * from all of them.
 534                  */
 535                 ncounts[dim] = 1;
 536                 for (i = 1; i < ngroups; i++)
 537                 {
 538                         if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
 539                         {
 540                                 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
 541                                 continue;
 542                         }
 543
 544                         result[dim][ncounts[dim]] = result[dim][i];
 545
 546                         ncounts[dim]++;
 547                 }
 548         }
 549
 550         return result;
 551 }
 552
 553 /*
 554  * statext_mcv_load
 555  *              Load the MCV list for the indicated pg_statistic_ext_data tuple.
 556  */
 557 MCVList *
 558 statext_mcv_load(Oid mvoid, bool inh)
 559 {
 560         MCVList    *result;
 561         bool            isnull;
 562         Datum           mcvlist;
 563         HeapTuple       htup = SearchSysCache2(STATEXTDATASTXOID,
 564                                                                            ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
 565
 566         if (!HeapTupleIsValid(htup))
 567                 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
 568
 569         mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
 570                                                           Anum_pg_statistic_ext_data_stxdmcv, &isnull);
 571
 572         if (isnull)
 573                 elog(ERROR,
 574                          "requested statistics kind \"%c\" is not yet built for statistics object %u",
 575                          STATS_EXT_MCV, mvoid);
 576
 577         result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
 578
 579         ReleaseSysCache(htup);
 580
 581         return result;
 582 }
 583
 584
 585 /*
 586  * statext_mcv_serialize
 587  *              Serialize MCV list into a pg_mcv_list value.
 588  *
 589  * The MCV items may include values of various data types, and it's reasonable
 590  * to expect redundancy (values for a given attribute, repeated for multiple
 591  * MCV list items). So we deduplicate the values into arrays, and then replace
 592  * the values by indexes into those arrays.
 593  *
 594  * The overall structure of the serialized representation looks like this:
 595  *
 596  * +---------------+----------------+---------------------+-------+
 597  * | header fields | dimension info | deduplicated values | items |
 598  * +---------------+----------------+---------------------+-------+
 599  *
 600  * Where dimension info stores information about the type of the K-th
 601  * attribute (e.g. typlen, typbyval and length of deduplicated values).
 602  * Deduplicated values store deduplicated values for each attribute.  And
 603  * items store the actual MCV list items, with values replaced by indexes into
 604  * the arrays.
 605  *
 606  * When serializing the items, we use uint16 indexes. The number of MCV items
 607  * is limited by the statistics target (which is capped to 10k at the moment).
 608  * We might increase this to 65k and still fit into uint16, so there's a bit of
 609  * slack. Furthermore, this limit is on the number of distinct values per column,
 610  * and we usually have few of those (and various combinations of them for the
 611  * those MCV list). So uint16 seems fine for now.
 612  *
 613  * We don't really expect the serialization to save as much space as for
 614  * histograms, as we are not doing any bucket splits (which is the source
 615  * of high redundancy in histograms).
 616  *
 617  * TODO: Consider packing boolean flags (NULL) for each item into a single char
 618  * (or a longer type) instead of using an array of bool items.
 619  */
 620 bytea *
 621 statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
 622 {
 623         int                     i;
 624         int                     dim;
 625         int                     ndims = mcvlist->ndimensions;
 626
 627         SortSupport ssup;
 628         DimensionInfo *info;
 629
 630         Size            total_length;
 631
 632         /* serialized items (indexes into arrays, etc.) */
 633         bytea      *raw;
 634         char       *ptr;
 635         char       *endptr PG_USED_FOR_ASSERTS_ONLY;
 636
 637         /* values per dimension (and number of non-NULL values) */
 638         Datum     **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
 639         int                *counts = (int *) palloc0(sizeof(int) * ndims);
 640
 641         /*
 642          * We'll include some rudimentary information about the attribute types
 643          * (length, by-val flag), so that we don't have to look them up while
 644          * deserializing the MCV list (we already have the type OID in the
 645          * header).  This is safe because when changing the type of the attribute
 646          * the statistics gets dropped automatically.  We need to store the info
 647          * about the arrays of deduplicated values anyway.
 648          */
 649         info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
 650
 651         /* sort support data for all attributes included in the MCV list */
 652         ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
 653
 654         /* collect and deduplicate values for each dimension (attribute) */
 655         for (dim = 0; dim < ndims; dim++)
 656         {
 657                 int                     ndistinct;
 658                 TypeCacheEntry *typentry;
 659
 660                 /*
 661                  * Lookup the LT operator (can't get it from stats extra_data, as we
 662                  * don't know how to interpret that - scalar vs. array etc.).
 663                  */
 664                 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
 665
 666                 /* copy important info about the data type (length, by-value) */
 667                 info[dim].typlen = stats[dim]->attrtype->typlen;
 668                 info[dim].typbyval = stats[dim]->attrtype->typbyval;
 669
 670                 /* allocate space for values in the attribute and collect them */
 671                 values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
 672
 673                 for (i = 0; i < mcvlist->nitems; i++)
 674                 {
 675                         /* skip NULL values - we don't need to deduplicate those */
 676                         if (mcvlist->items[i].isnull[dim])
 677                                 continue;
 678
 679                         /* append the value at the end */
 680                         values[dim][counts[dim]] = mcvlist->items[i].values[dim];
 681                         counts[dim] += 1;
 682                 }
 683
 684                 /* if there are just NULL values in this dimension, we're done */
 685                 if (counts[dim] == 0)
 686                         continue;
 687
 688                 /* sort and deduplicate the data */
 689                 ssup[dim].ssup_cxt = CurrentMemoryContext;
 690                 ssup[dim].ssup_collation = stats[dim]->attrcollid;
 691                 ssup[dim].ssup_nulls_first = false;
 692
 693                 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
 694
 695                 qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
 696                                                         compare_scalars_simple, &ssup[dim]);
 697
 698                 /*
 699                  * Walk through the array and eliminate duplicate values, but keep the
 700                  * ordering (so that we can do a binary search later). We know there's
 701                  * at least one item as (counts[dim] != 0), so we can skip the first
 702                  * element.
 703                  */
 704                 ndistinct = 1;                  /* number of distinct values */
 705                 for (i = 1; i < counts[dim]; i++)
 706                 {
 707                         /* expect sorted array */
 708                         Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
 709
 710                         /* if the value is the same as the previous one, we can skip it */
 711                         if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
 712                                 continue;
 713
 714                         values[dim][ndistinct] = values[dim][i];
 715                         ndistinct += 1;
 716                 }
 717
 718                 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
 719                 Assert(ndistinct <= PG_UINT16_MAX);
 720
 721                 /*
 722                  * Store additional info about the attribute - number of deduplicated
 723                  * values, and also size of the serialized data. For fixed-length data
 724                  * types this is trivial to compute, for varwidth types we need to
 725                  * actually walk the array and sum the sizes.
 726                  */
 727                 info[dim].nvalues = ndistinct;
 728
 729                 if (info[dim].typbyval) /* by-value data types */
 730                 {
 731                         info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
 732
 733                         /*
 734                          * We copy the data into the MCV item during deserialization, so
 735                          * we don't need to allocate any extra space.
 736                          */
 737                         info[dim].nbytes_aligned = 0;
 738                 }
 739                 else if (info[dim].typlen > 0)  /* fixed-length by-ref */
 740                 {
 741                         /*
 742                          * We don't care about alignment in the serialized data, so we
 743                          * pack the data as much as possible. But we also track how much
 744                          * data will be needed after deserialization, and in that case we
 745                          * need to account for alignment of each item.
 746                          *
 747                          * Note: As the items are fixed-length, we could easily compute
 748                          * this during deserialization, but we do it here anyway.
 749                          */
 750                         info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
 751                         info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
 752                 }
 753                 else if (info[dim].typlen == -1)        /* varlena */
 754                 {
 755                         info[dim].nbytes = 0;
 756                         info[dim].nbytes_aligned = 0;
 757                         for (i = 0; i < info[dim].nvalues; i++)
 758                         {
 759                                 Size            len;
 760
 761                                 /*
 762                                  * For varlena values, we detoast the values and store the
 763                                  * length and data separately. We don't bother with alignment
 764                                  * here, which means that during deserialization we need to
 765                                  * copy the fields and only access the copies.
 766                                  */
 767                                 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
 768
 769                                 /* serialized length (uint32 length + data) */
 770                                 len = VARSIZE_ANY_EXHDR(values[dim][i]);
 771                                 info[dim].nbytes += sizeof(uint32); /* length */
 772                                 info[dim].nbytes += len;        /* value (no header) */
 773
 774                                 /*
 775                                  * During deserialization we'll build regular varlena values
 776                                  * with full headers, and we need to align them properly.
 777                                  */
 778                                 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
 779                         }
 780                 }
 781                 else if (info[dim].typlen == -2)        /* cstring */
 782                 {
 783                         info[dim].nbytes = 0;
 784                         info[dim].nbytes_aligned = 0;
 785                         for (i = 0; i < info[dim].nvalues; i++)
 786                         {
 787                                 Size            len;
 788
 789                                 /*
 790                                  * cstring is handled similar to varlena - first we store the
 791                                  * length as uint32 and then the data. We don't care about
 792                                  * alignment, which means that during deserialization we need
 793                                  * to copy the fields and only access the copies.
 794                                  */
 795
 796                                 /* c-strings include terminator, so +1 byte */
 797                                 len = strlen(DatumGetCString(values[dim][i])) + 1;
 798                                 info[dim].nbytes += sizeof(uint32); /* length */
 799                                 info[dim].nbytes += len;        /* value */
 800
 801                                 /* space needed for properly aligned deserialized copies */
 802                                 info[dim].nbytes_aligned += MAXALIGN(len);
 803                         }
 804                 }
 805
 806                 /* we know (count>0) so there must be some data */
 807                 Assert(info[dim].nbytes > 0);
 808         }
 809
 810         /*
 811          * Now we can finally compute how much space we'll actually need for the
 812          * whole serialized MCV list (varlena header, MCV header, dimension info
 813          * for each attribute, deduplicated values and items).
 814          */
 815         total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
 816                 + sizeof(AttrNumber)    /* ndimensions */
 817                 + (ndims * sizeof(Oid));        /* attribute types */
 818
 819         /* dimension info */
 820         total_length += ndims * sizeof(DimensionInfo);
 821
 822         /* add space for the arrays of deduplicated values */
 823         for (i = 0; i < ndims; i++)
 824                 total_length += info[i].nbytes;
 825
 826         /*
 827          * And finally account for the items (those are fixed-length, thanks to
 828          * replacing values with uint16 indexes into the deduplicated arrays).
 829          */
 830         total_length += mcvlist->nitems * ITEM_SIZE(dim);
 831
 832         /*
 833          * Allocate space for the whole serialized MCV list (we'll skip bytes, so
 834          * we set them to zero to make the result more compressible).
 835          */
 836         raw = (bytea *) palloc0(VARHDRSZ + total_length);
 837         SET_VARSIZE(raw, VARHDRSZ + total_length);
 838
 839         ptr = VARDATA(raw);
 840         endptr = ptr + total_length;
 841
 842         /* copy the MCV list header fields, one by one */
 843         memcpy(ptr, &mcvlist->magic, sizeof(uint32));
 844         ptr += sizeof(uint32);
 845
 846         memcpy(ptr, &mcvlist->type, sizeof(uint32));
 847         ptr += sizeof(uint32);
 848
 849         memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
 850         ptr += sizeof(uint32);
 851
 852         memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
 853         ptr += sizeof(AttrNumber);
 854
 855         memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
 856         ptr += (sizeof(Oid) * ndims);
 857
 858         /* store information about the attributes (data amounts, ...) */
 859         memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
 860         ptr += sizeof(DimensionInfo) * ndims;
 861
 862         /* Copy the deduplicated values for all attributes to the output. */
 863         for (dim = 0; dim < ndims; dim++)
 864         {
 865                 /* remember the starting point for Asserts later */
 866                 char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
 867
 868                 for (i = 0; i < info[dim].nvalues; i++)
 869                 {
 870                         Datum           value = values[dim][i];
 871
 872                         if (info[dim].typbyval) /* passed by value */
 873                         {
 874                                 Datum           tmp;
 875
 876                                 /*
 877                                  * For byval types, we need to copy just the significant bytes
 878                                  * - we can't use memcpy directly, as that assumes
 879                                  * little-endian behavior.  store_att_byval does almost what
 880                                  * we need, but it requires a properly aligned buffer - the
 881                                  * output buffer does not guarantee that. So we simply use a
 882                                  * local Datum variable (which guarantees proper alignment),
 883                                  * and then copy the value from it.
 884                                  */
 885                                 store_att_byval(&tmp, value, info[dim].typlen);
 886
 887                                 memcpy(ptr, &tmp, info[dim].typlen);
 888                                 ptr += info[dim].typlen;
 889                         }
 890                         else if (info[dim].typlen > 0)  /* passed by reference */
 891                         {
 892                                 /* no special alignment needed, treated as char array */
 893                                 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
 894                                 ptr += info[dim].typlen;
 895                         }
 896                         else if (info[dim].typlen == -1)        /* varlena */
 897                         {
 898                                 uint32          len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
 899
 900                                 /* copy the length */
 901                                 memcpy(ptr, &len, sizeof(uint32));
 902                                 ptr += sizeof(uint32);
 903
 904                                 /* data from the varlena value (without the header) */
 905                                 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
 906                                 ptr += len;
 907                         }
 908                         else if (info[dim].typlen == -2)        /* cstring */
 909                         {
 910                                 uint32          len = (uint32) strlen(DatumGetCString(value)) + 1;
 911
 912                                 /* copy the length */
 913                                 memcpy(ptr, &len, sizeof(uint32));
 914                                 ptr += sizeof(uint32);
 915
 916                                 /* value */
 917                                 memcpy(ptr, DatumGetCString(value), len);
 918                                 ptr += len;
 919                         }
 920
 921                         /* no underflows or overflows */
 922                         Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
 923                 }
 924
 925                 /* we should get exactly nbytes of data for this dimension */
 926                 Assert((ptr - start) == info[dim].nbytes);
 927         }
 928
 929         /* Serialize the items, with uint16 indexes instead of the values. */
 930         for (i = 0; i < mcvlist->nitems; i++)
 931         {
 932                 MCVItem    *mcvitem = &mcvlist->items[i];
 933
 934                 /* don't write beyond the allocated space */
 935                 Assert(ptr <= (endptr - ITEM_SIZE(dim)));
 936
 937                 /* copy NULL and frequency flags into the serialized MCV */
 938                 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
 939                 ptr += sizeof(bool) * ndims;
 940
 941                 memcpy(ptr, &mcvitem->frequency, sizeof(double));
 942                 ptr += sizeof(double);
 943
 944                 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
 945                 ptr += sizeof(double);
 946
 947                 /* store the indexes last */
 948                 for (dim = 0; dim < ndims; dim++)
 949                 {
 950                         uint16          index = 0;
 951                         Datum      *value;
 952
 953                         /* do the lookup only for non-NULL values */
 954                         if (!mcvitem->isnull[dim])
 955                         {
 956                                 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
 957                                                                                           info[dim].nvalues, sizeof(Datum),
 958                                                                                           compare_scalars_simple, &ssup[dim]);
 959
 960                                 Assert(value != NULL);  /* serialization or deduplication
 961                                                                                  * error */
 962
 963                                 /* compute index within the deduplicated array */
 964                                 index = (uint16) (value - values[dim]);
 965
 966                                 /* check the index is within expected bounds */
 967                                 Assert(index < info[dim].nvalues);
 968                         }
 969
 970                         /* copy the index into the serialized MCV */
 971                         memcpy(ptr, &index, sizeof(uint16));
 972                         ptr += sizeof(uint16);
 973                 }
 974
 975                 /* make sure we don't overflow the allocated value */
 976                 Assert(ptr <= endptr);
 977         }
 978
 979         /* at this point we expect to match the total_length exactly */
 980         Assert(ptr == endptr);
 981
 982         pfree(values);
 983         pfree(counts);
 984
 985         return raw;
 986 }
 987
 988 /*
 989  * statext_mcv_deserialize
 990  *              Reads serialized MCV list into MCVList structure.
 991  *
 992  * All the memory needed by the MCV list is allocated as a single chunk, so
 993  * it's possible to simply pfree() it at once.
 994  */
 995 MCVList *
 996 statext_mcv_deserialize(bytea *data)
 997 {
 998         int                     dim,
 999                                 i;
1000         Size            expected_size;
1001         MCVList    *mcvlist;
1002         char       *raw;
1003         char       *ptr;
1004         char       *endptr PG_USED_FOR_ASSERTS_ONLY;
1005
1006         int                     ndims,
1007                                 nitems;
1008         DimensionInfo *info = NULL;
1009
1010         /* local allocation buffer (used only for deserialization) */
1011         Datum     **map = NULL;
1012
1013         /* MCV list */
1014         Size            mcvlen;
1015
1016         /* buffer used for the result */
1017         Size            datalen;
1018         char       *dataptr;
1019         char       *valuesptr;
1020         char       *isnullptr;
1021
1022         if (data == NULL)
1023                 return NULL;
1024
1025         /*
1026          * We can't possibly deserialize a MCV list if there's not even a complete
1027          * header. We need an explicit formula here, because we serialize the
1028          * header fields one by one, so we need to ignore struct alignment.
1029          */
1030         if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1031                 elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1032                          VARSIZE_ANY(data), MinSizeOfMCVList);
1033
1034         /* read the MCV list header */
1035         mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1036
1037         /* pointer to the data part (skip the varlena header) */
1038         raw = (char *) data;
1039         ptr = VARDATA_ANY(raw);
1040         endptr = (char *) raw + VARSIZE_ANY(data);
1041
1042         /* get the header and perform further sanity checks */
1043         memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1044         ptr += sizeof(uint32);
1045
1046         memcpy(&mcvlist->type, ptr, sizeof(uint32));
1047         ptr += sizeof(uint32);
1048
1049         memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1050         ptr += sizeof(uint32);
1051
1052         memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1053         ptr += sizeof(AttrNumber);
1054
1055         if (mcvlist->magic != STATS_MCV_MAGIC)
1056                 elog(ERROR, "invalid MCV magic %u (expected %u)",
1057                          mcvlist->magic, STATS_MCV_MAGIC);
1058
1059         if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1060                 elog(ERROR, "invalid MCV type %u (expected %u)",
1061                          mcvlist->type, STATS_MCV_TYPE_BASIC);
1062
1063         if (mcvlist->ndimensions == 0)
1064                 elog(ERROR, "invalid zero-length dimension array in MCVList");
1065         else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1066                          (mcvlist->ndimensions < 0))
1067                 elog(ERROR, "invalid length (%d) dimension array in MCVList",
1068                          mcvlist->ndimensions);
1069
1070         if (mcvlist->nitems == 0)
1071                 elog(ERROR, "invalid zero-length item array in MCVList");
1072         else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1073                 elog(ERROR, "invalid length (%u) item array in MCVList",
1074                          mcvlist->nitems);
1075
1076         nitems = mcvlist->nitems;
1077         ndims = mcvlist->ndimensions;
1078
1079         /*
1080          * Check amount of data including DimensionInfo for all dimensions and
1081          * also the serialized items (including uint16 indexes). Also, walk
1082          * through the dimension information and add it to the sum.
1083          */
1084         expected_size = SizeOfMCVList(ndims, nitems);
1085
1086         /*
1087          * Check that we have at least the dimension and info records, along with
1088          * the items. We don't know the size of the serialized values yet. We need
1089          * to do this check first, before accessing the dimension info.
1090          */
1091         if (VARSIZE_ANY(data) < expected_size)
1092                 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1093                          VARSIZE_ANY(data), expected_size);
1094
1095         /* Now copy the array of type Oids. */
1096         memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1097         ptr += (sizeof(Oid) * ndims);
1098
1099         /* Now it's safe to access the dimension info. */
1100         info = palloc(ndims * sizeof(DimensionInfo));
1101
1102         memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1103         ptr += (ndims * sizeof(DimensionInfo));
1104
1105         /* account for the value arrays */
1106         for (dim = 0; dim < ndims; dim++)
1107         {
1108                 /*
1109                  * XXX I wonder if we can/should rely on asserts here. Maybe those
1110                  * checks should be done every time?
1111                  */
1112                 Assert(info[dim].nvalues >= 0);
1113                 Assert(info[dim].nbytes >= 0);
1114
1115                 expected_size += info[dim].nbytes;
1116         }
1117
1118         /*
1119          * Now we know the total expected MCV size, including all the pieces
1120          * (header, dimension info. items and deduplicated data). So do the final
1121          * check on size.
1122          */
1123         if (VARSIZE_ANY(data) != expected_size)
1124                 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1125                          VARSIZE_ANY(data), expected_size);
1126
1127         /*
1128          * We need an array of Datum values for each dimension, so that we can
1129          * easily translate the uint16 indexes later. We also need a top-level
1130          * array of pointers to those per-dimension arrays.
1131          *
1132          * While allocating the arrays for dimensions, compute how much space we
1133          * need for a copy of the by-ref data, as we can't simply point to the
1134          * original values (it might go away).
1135          */
1136         datalen = 0;                            /* space for by-ref data */
1137         map = (Datum **) palloc(ndims * sizeof(Datum *));
1138
1139         for (dim = 0; dim < ndims; dim++)
1140         {
1141                 map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1142
1143                 /* space needed for a copy of data for by-ref types */
1144                 datalen += info[dim].nbytes_aligned;
1145         }
1146
1147         /*
1148          * Now resize the MCV list so that the allocation includes all the data.
1149          *
1150          * Allocate space for a copy of the data, as we can't simply reference the
1151          * serialized data - it's not aligned properly, and it may disappear while
1152          * we're still using the MCV list, e.g. due to catcache release.
1153          *
1154          * We do care about alignment here, because we will allocate all the
1155          * pieces at once, but then use pointers to different parts.
1156          */
1157         mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1158
1159         /* arrays of values and isnull flags for all MCV items */
1160         mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1161         mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1162
1163         /* we don't quite need to align this, but it makes some asserts easier */
1164         mcvlen += MAXALIGN(datalen);
1165
1166         /* now resize the deserialized MCV list, and compute pointers to parts */
1167         mcvlist = repalloc(mcvlist, mcvlen);
1168
1169         /* pointer to the beginning of values/isnull arrays */
1170         valuesptr = (char *) mcvlist
1171                 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1172
1173         isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1174
1175         dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1176
1177         /*
1178          * Build mapping (index => value) for translating the serialized data into
1179          * the in-memory representation.
1180          */
1181         for (dim = 0; dim < ndims; dim++)
1182         {
1183                 /* remember start position in the input array */
1184                 char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1185
1186                 if (info[dim].typbyval)
1187                 {
1188                         /* for by-val types we simply copy data into the mapping */
1189                         for (i = 0; i < info[dim].nvalues; i++)
1190                         {
1191                                 Datum           v = 0;
1192
1193                                 memcpy(&v, ptr, info[dim].typlen);
1194                                 ptr += info[dim].typlen;
1195
1196                                 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1197
1198                                 /* no under/overflow of input array */
1199                                 Assert(ptr <= (start + info[dim].nbytes));
1200                         }
1201                 }
1202                 else
1203                 {
1204                         /* for by-ref types we need to also make a copy of the data */
1205
1206                         /* passed by reference, but fixed length (name, tid, ...) */
1207                         if (info[dim].typlen > 0)
1208                         {
1209                                 for (i = 0; i < info[dim].nvalues; i++)
1210                                 {
1211                                         memcpy(dataptr, ptr, info[dim].typlen);
1212                                         ptr += info[dim].typlen;
1213
1214                                         /* just point into the array */
1215                                         map[dim][i] = PointerGetDatum(dataptr);
1216                                         dataptr += MAXALIGN(info[dim].typlen);
1217                                 }
1218                         }
1219                         else if (info[dim].typlen == -1)
1220                         {
1221                                 /* varlena */
1222                                 for (i = 0; i < info[dim].nvalues; i++)
1223                                 {
1224                                         uint32          len;
1225
1226                                         /* read the uint32 length */
1227                                         memcpy(&len, ptr, sizeof(uint32));
1228                                         ptr += sizeof(uint32);
1229
1230                                         /* the length is data-only */
1231                                         SET_VARSIZE(dataptr, len + VARHDRSZ);
1232                                         memcpy(VARDATA(dataptr), ptr, len);
1233                                         ptr += len;
1234
1235                                         /* just point into the array */
1236                                         map[dim][i] = PointerGetDatum(dataptr);
1237
1238                                         /* skip to place of the next deserialized value */
1239                                         dataptr += MAXALIGN(len + VARHDRSZ);
1240                                 }
1241                         }
1242                         else if (info[dim].typlen == -2)
1243                         {
1244                                 /* cstring */
1245                                 for (i = 0; i < info[dim].nvalues; i++)
1246                                 {
1247                                         uint32          len;
1248
1249                                         memcpy(&len, ptr, sizeof(uint32));
1250                                         ptr += sizeof(uint32);
1251
1252                                         memcpy(dataptr, ptr, len);
1253                                         ptr += len;
1254
1255                                         /* just point into the array */
1256                                         map[dim][i] = PointerGetDatum(dataptr);
1257                                         dataptr += MAXALIGN(len);
1258                                 }
1259                         }
1260
1261                         /* no under/overflow of input array */
1262                         Assert(ptr <= (start + info[dim].nbytes));
1263
1264                         /* no overflow of the output mcv value */
1265                         Assert(dataptr <= ((char *) mcvlist + mcvlen));
1266                 }
1267
1268                 /* check we consumed input data for this dimension exactly */
1269                 Assert(ptr == (start + info[dim].nbytes));
1270         }
1271
1272         /* we should have also filled the MCV list exactly */
1273         Assert(dataptr == ((char *) mcvlist + mcvlen));
1274
1275         /* deserialize the MCV items and translate the indexes to Datums */
1276         for (i = 0; i < nitems; i++)
1277         {
1278                 MCVItem    *item = &mcvlist->items[i];
1279
1280                 item->values = (Datum *) valuesptr;
1281                 valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1282
1283                 item->isnull = (bool *) isnullptr;
1284                 isnullptr += MAXALIGN(sizeof(bool) * ndims);
1285
1286                 memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1287                 ptr += sizeof(bool) * ndims;
1288
1289                 memcpy(&item->frequency, ptr, sizeof(double));
1290                 ptr += sizeof(double);
1291
1292                 memcpy(&item->base_frequency, ptr, sizeof(double));
1293                 ptr += sizeof(double);
1294
1295                 /* finally translate the indexes (for non-NULL only) */
1296                 for (dim = 0; dim < ndims; dim++)
1297                 {
1298                         uint16          index;
1299
1300                         memcpy(&index, ptr, sizeof(uint16));
1301                         ptr += sizeof(uint16);
1302
1303                         if (item->isnull[dim])
1304                                 continue;
1305
1306                         item->values[dim] = map[dim][index];
1307                 }
1308
1309                 /* check we're not overflowing the input */
1310                 Assert(ptr <= endptr);
1311         }
1312
1313         /* check that we processed all the data */
1314         Assert(ptr == endptr);
1315
1316         /* release the buffers used for mapping */
1317         for (dim = 0; dim < ndims; dim++)
1318                 pfree(map[dim]);
1319
1320         pfree(map);
1321
1322         return mcvlist;
1323 }
1324
1325 /*
1326  * SRF with details about buckets of a histogram:
1327  *
1328  * - item ID (0...nitems)
1329  * - values (string array)
1330  * - nulls only (boolean array)
1331  * - frequency (double precision)
1332  * - base_frequency (double precision)
1333  *
1334  * The input is the OID of the statistics, and there are no rows returned if
1335  * the statistics contains no histogram.
1336  */
1337 Datum
1338 pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
1339 {
1340         FuncCallContext *funcctx;
1341
1342         /* stuff done only on the first call of the function */
1343         if (SRF_IS_FIRSTCALL())
1344         {
1345                 MemoryContext oldcontext;
1346                 MCVList    *mcvlist;
1347                 TupleDesc       tupdesc;
1348
1349                 /* create a function context for cross-call persistence */
1350                 funcctx = SRF_FIRSTCALL_INIT();
1351
1352                 /* switch to memory context appropriate for multiple function calls */
1353                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1354
1355                 mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
1356
1357                 funcctx->user_fctx = mcvlist;
1358
1359                 /* total number of tuples to be returned */
1360                 funcctx->max_calls = 0;
1361                 if (funcctx->user_fctx != NULL)
1362                         funcctx->max_calls = mcvlist->nitems;
1363
1364                 /* Build a tuple descriptor for our result type */
1365                 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1366                         ereport(ERROR,
1367                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1368                                          errmsg("function returning record called in context "
1369                                                         "that cannot accept type record")));
1370                 tupdesc = BlessTupleDesc(tupdesc);
1371
1372                 /*
1373                  * generate attribute metadata needed later to produce tuples from raw
1374                  * C strings
1375                  */
1376                 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1377
1378                 MemoryContextSwitchTo(oldcontext);
1379         }
1380
1381         /* stuff done on every call of the function */
1382         funcctx = SRF_PERCALL_SETUP();
1383
1384         if (funcctx->call_cntr < funcctx->max_calls)    /* do when there is more
1385                                                                                                          * left to send */
1386         {
1387                 Datum           values[5];
1388                 bool            nulls[5];
1389                 HeapTuple       tuple;
1390                 Datum           result;
1391                 ArrayBuildState *astate_values = NULL;
1392                 ArrayBuildState *astate_nulls = NULL;
1393
1394                 int                     i;
1395                 MCVList    *mcvlist;
1396                 MCVItem    *item;
1397
1398                 mcvlist = (MCVList *) funcctx->user_fctx;
1399
1400                 Assert(funcctx->call_cntr < mcvlist->nitems);
1401
1402                 item = &mcvlist->items[funcctx->call_cntr];
1403
1404                 for (i = 0; i < mcvlist->ndimensions; i++)
1405                 {
1406
1407                         astate_nulls = accumArrayResult(astate_nulls,
1408                                                                                         BoolGetDatum(item->isnull[i]),
1409                                                                                         false,
1410                                                                                         BOOLOID,
1411                                                                                         CurrentMemoryContext);
1412
1413                         if (!item->isnull[i])
1414                         {
1415                                 bool            isvarlena;
1416                                 Oid                     outfunc;
1417                                 FmgrInfo        fmgrinfo;
1418                                 Datum           val;
1419                                 text       *txt;
1420
1421                                 /* lookup output func for the type */
1422                                 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1423                                 fmgr_info(outfunc, &fmgrinfo);
1424
1425                                 val = FunctionCall1(&fmgrinfo, item->values[i]);
1426                                 txt = cstring_to_text(DatumGetPointer(val));
1427
1428                                 astate_values = accumArrayResult(astate_values,
1429                                                                                                  PointerGetDatum(txt),
1430                                                                                                  false,
1431                                                                                                  TEXTOID,
1432                                                                                                  CurrentMemoryContext);
1433                         }
1434                         else
1435                                 astate_values = accumArrayResult(astate_values,
1436                                                                                                  (Datum) 0,
1437                                                                                                  true,
1438                                                                                                  TEXTOID,
1439                                                                                                  CurrentMemoryContext);
1440                 }
1441
1442                 values[0] = Int32GetDatum(funcctx->call_cntr);
1443                 values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
1444                 values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
1445                 values[3] = Float8GetDatum(item->frequency);
1446                 values[4] = Float8GetDatum(item->base_frequency);
1447
1448                 /* no NULLs in the tuple */
1449                 memset(nulls, 0, sizeof(nulls));
1450
1451                 /* build a tuple */
1452                 tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1453
1454                 /* make the tuple into a datum */
1455                 result = HeapTupleGetDatum(tuple);
1456
1457                 SRF_RETURN_NEXT(funcctx, result);
1458         }
1459         else                                            /* do when there is no more left */
1460         {
1461                 SRF_RETURN_DONE(funcctx);
1462         }
1463 }
1464
1465 /*
1466  * pg_mcv_list_in               - input routine for type pg_mcv_list.
1467  *
1468  * pg_mcv_list is real enough to be a table column, but it has no operations
1469  * of its own, and disallows input too
1470  */
1471 Datum
1472 pg_mcv_list_in(PG_FUNCTION_ARGS)
1473 {
1474         /*
1475          * pg_mcv_list stores the data in binary form and parsing text input is
1476          * not needed, so disallow this.
1477          */
1478         ereport(ERROR,
1479                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1480                          errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1481
1482         PG_RETURN_VOID();                       /* keep compiler quiet */
1483 }
1484
1485
1486 /*
1487  * pg_mcv_list_out              - output routine for type pg_mcv_list.
1488  *
1489  * MCV lists are serialized into a bytea value, so we simply call byteaout()
1490  * to serialize the value into text. But it'd be nice to serialize that into
1491  * a meaningful representation (e.g. for inspection by people).
1492  *
1493  * XXX This should probably return something meaningful, similar to what
1494  * pg_dependencies_out does. Not sure how to deal with the deduplicated
1495  * values, though - do we want to expand that or not?
1496  */
1497 Datum
1498 pg_mcv_list_out(PG_FUNCTION_ARGS)
1499 {
1500         return byteaout(fcinfo);
1501 }
1502
1503 /*
1504  * pg_mcv_list_recv             - binary input routine for type pg_mcv_list.
1505  */
1506 Datum
1507 pg_mcv_list_recv(PG_FUNCTION_ARGS)
1508 {
1509         ereport(ERROR,
1510                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1511                          errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1512
1513         PG_RETURN_VOID();                       /* keep compiler quiet */
1514 }
1515
1516 /*
1517  * pg_mcv_list_send             - binary output routine for type pg_mcv_list.
1518  *
1519  * MCV lists are serialized in a bytea value (although the type is named
1520  * differently), so let's just send that.
1521  */
1522 Datum
1523 pg_mcv_list_send(PG_FUNCTION_ARGS)
1524 {
1525         return byteasend(fcinfo);
1526 }
1527
1528 /*
1529  * match the attribute/expression to a dimension of the statistic
1530  *
1531  * Returns the zero-based index of the matching statistics dimension.
1532  * Optionally determines the collation.
1533  */
1534 static int
1535 mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
1536 {
1537         int                     idx;
1538
1539         if (IsA(expr, Var))
1540         {
1541                 /* simple Var, so just lookup using varattno */
1542                 Var                *var = (Var *) expr;
1543
1544                 if (collid)
1545                         *collid = var->varcollid;
1546
1547                 idx = bms_member_index(keys, var->varattno);
1548
1549                 if (idx < 0)
1550                         elog(ERROR, "variable not found in statistics object");
1551         }
1552         else
1553         {
1554                 /* expression - lookup in stats expressions */
1555                 ListCell   *lc;
1556
1557                 if (collid)
1558                         *collid = exprCollation(expr);
1559
1560                 /* expressions are stored after the simple columns */
1561                 idx = bms_num_members(keys);
1562                 foreach(lc, exprs)
1563                 {
1564                         Node       *stat_expr = (Node *) lfirst(lc);
1565
1566                         if (equal(expr, stat_expr))
1567                                 break;
1568
1569                         idx++;
1570                 }
1571
1572                 if (lc == NULL)
1573                         elog(ERROR, "expression not found in statistics object");
1574         }
1575
1576         return idx;
1577 }
1578
1579 /*
1580  * mcv_get_match_bitmap
1581  *      Evaluate clauses using the MCV list, and update the match bitmap.
1582  *
1583  * A match bitmap keeps match/mismatch status for each MCV item, and we
1584  * update it based on additional clauses. We also use it to skip items
1585  * that can't possibly match (e.g. item marked as "mismatch" can't change
1586  * to "match" when evaluating AND clause list).
1587  *
1588  * The function also returns a flag indicating whether there was an
1589  * equality condition for all attributes, the minimum frequency in the MCV
1590  * list, and a total MCV frequency (sum of frequencies for all items).
1591  *
1592  * XXX Currently the match bitmap uses a bool for each MCV item, which is
1593  * somewhat wasteful as we could do with just a single bit, thus reducing
1594  * the size to ~1/8. It would also allow us to combine bitmaps simply using
1595  * & and |, which should be faster than min/max. The bitmaps are fairly
1596  * small, though (thanks to the cap on the MCV list size).
1597  */
1598 static bool *
1599 mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
1600                                          Bitmapset *keys, List *exprs,
1601                                          MCVList *mcvlist, bool is_or)
1602 {
1603         ListCell   *l;
1604         bool       *matches;
1605
1606         /* The bitmap may be partially built. */
1607         Assert(clauses != NIL);
1608         Assert(mcvlist != NULL);
1609         Assert(mcvlist->nitems > 0);
1610         Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1611
1612         matches = palloc(sizeof(bool) * mcvlist->nitems);
1613         memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
1614
1615         /*
1616          * Loop through the list of clauses, and for each of them evaluate all the
1617          * MCV items not yet eliminated by the preceding clauses.
1618          */
1619         foreach(l, clauses)
1620         {
1621                 Node       *clause = (Node *) lfirst(l);
1622
1623                 /* if it's a RestrictInfo, then extract the clause */
1624                 if (IsA(clause, RestrictInfo))
1625                         clause = (Node *) ((RestrictInfo *) clause)->clause;
1626
1627                 /*
1628                  * Handle the various types of clauses - OpClause, NullTest and
1629                  * AND/OR/NOT
1630                  */
1631                 if (is_opclause(clause))
1632                 {
1633                         OpExpr     *expr = (OpExpr *) clause;
1634                         FmgrInfo        opproc;
1635
1636                         /* valid only after examine_opclause_args returns true */
1637                         Node       *clause_expr;
1638                         Const      *cst;
1639                         bool            expronleft;
1640                         int                     idx;
1641                         Oid                     collid;
1642
1643                         fmgr_info(get_opcode(expr->opno), &opproc);
1644
1645                         /* extract the var/expr and const from the expression */
1646                         if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1647                                 elog(ERROR, "incompatible clause");
1648
1649                         /* match the attribute/expression to a dimension of the statistic */
1650                         idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1651
1652                         /*
1653                          * Walk through the MCV items and evaluate the current clause. We
1654                          * can skip items that were already ruled out, and terminate if
1655                          * there are no remaining MCV items that might possibly match.
1656                          */
1657                         for (int i = 0; i < mcvlist->nitems; i++)
1658                         {
1659                                 bool            match = true;
1660                                 MCVItem    *item = &mcvlist->items[i];
1661
1662                                 Assert(idx >= 0);
1663
1664                                 /*
1665                                  * When the MCV item or the Const value is NULL we can treat
1666                                  * this as a mismatch. We must not call the operator because
1667                                  * of strictness.
1668                                  */
1669                                 if (item->isnull[idx] || cst->constisnull)
1670                                 {
1671                                         matches[i] = RESULT_MERGE(matches[i], is_or, false);
1672                                         continue;
1673                                 }
1674
1675                                 /*
1676                                  * Skip MCV items that can't change result in the bitmap. Once
1677                                  * the value gets false for AND-lists, or true for OR-lists,
1678                                  * we don't need to look at more clauses.
1679                                  */
1680                                 if (RESULT_IS_FINAL(matches[i], is_or))
1681                                         continue;
1682
1683                                 /*
1684                                  * First check whether the constant is below the lower
1685                                  * boundary (in that case we can skip the bucket, because
1686                                  * there's no overlap).
1687                                  *
1688                                  * We don't store collations used to build the statistics, but
1689                                  * we can use the collation for the attribute itself, as
1690                                  * stored in varcollid. We do reset the statistics after a
1691                                  * type change (including collation change), so this is OK.
1692                                  * For expressions, we use the collation extracted from the
1693                                  * expression itself.
1694                                  */
1695                                 if (expronleft)
1696                                         match = DatumGetBool(FunctionCall2Coll(&opproc,
1697                                                                                                                    collid,
1698                                                                                                                    item->values[idx],
1699                                                                                                                    cst->constvalue));
1700                                 else
1701                                         match = DatumGetBool(FunctionCall2Coll(&opproc,
1702                                                                                                                    collid,
1703                                                                                                                    cst->constvalue,
1704                                                                                                                    item->values[idx]));
1705
1706                                 /* update the match bitmap with the result */
1707                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1708                         }
1709                 }
1710                 else if (IsA(clause, ScalarArrayOpExpr))
1711                 {
1712                         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1713                         FmgrInfo        opproc;
1714
1715                         /* valid only after examine_opclause_args returns true */
1716                         Node       *clause_expr;
1717                         Const      *cst;
1718                         bool            expronleft;
1719                         Oid                     collid;
1720                         int                     idx;
1721
1722                         /* array evaluation */
1723                         ArrayType  *arrayval;
1724                         int16           elmlen;
1725                         bool            elmbyval;
1726                         char            elmalign;
1727                         int                     num_elems;
1728                         Datum      *elem_values;
1729                         bool       *elem_nulls;
1730
1731                         fmgr_info(get_opcode(expr->opno), &opproc);
1732
1733                         /* extract the var/expr and const from the expression */
1734                         if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1735                                 elog(ERROR, "incompatible clause");
1736
1737                         /* We expect Var on left */
1738                         if (!expronleft)
1739                                 elog(ERROR, "incompatible clause");
1740
1741                         /*
1742                          * Deconstruct the array constant, unless it's NULL (we'll cover
1743                          * that case below)
1744                          */
1745                         if (!cst->constisnull)
1746                         {
1747                                 arrayval = DatumGetArrayTypeP(cst->constvalue);
1748                                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1749                                                                          &elmlen, &elmbyval, &elmalign);
1750                                 deconstruct_array(arrayval,
1751                                                                   ARR_ELEMTYPE(arrayval),
1752                                                                   elmlen, elmbyval, elmalign,
1753                                                                   &elem_values, &elem_nulls, &num_elems);
1754                         }
1755
1756                         /* match the attribute/expression to a dimension of the statistic */
1757                         idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1758
1759                         /*
1760                          * Walk through the MCV items and evaluate the current clause. We
1761                          * can skip items that were already ruled out, and terminate if
1762                          * there are no remaining MCV items that might possibly match.
1763                          */
1764                         for (int i = 0; i < mcvlist->nitems; i++)
1765                         {
1766                                 int                     j;
1767                                 bool            match = !expr->useOr;
1768                                 MCVItem    *item = &mcvlist->items[i];
1769
1770                                 /*
1771                                  * When the MCV item or the Const value is NULL we can treat
1772                                  * this as a mismatch. We must not call the operator because
1773                                  * of strictness.
1774                                  */
1775                                 if (item->isnull[idx] || cst->constisnull)
1776                                 {
1777                                         matches[i] = RESULT_MERGE(matches[i], is_or, false);
1778                                         continue;
1779                                 }
1780
1781                                 /*
1782                                  * Skip MCV items that can't change result in the bitmap. Once
1783                                  * the value gets false for AND-lists, or true for OR-lists,
1784                                  * we don't need to look at more clauses.
1785                                  */
1786                                 if (RESULT_IS_FINAL(matches[i], is_or))
1787                                         continue;
1788
1789                                 for (j = 0; j < num_elems; j++)
1790                                 {
1791                                         Datum           elem_value = elem_values[j];
1792                                         bool            elem_isnull = elem_nulls[j];
1793                                         bool            elem_match;
1794
1795                                         /* NULL values always evaluate as not matching. */
1796                                         if (elem_isnull)
1797                                         {
1798                                                 match = RESULT_MERGE(match, expr->useOr, false);
1799                                                 continue;
1800                                         }
1801
1802                                         /*
1803                                          * Stop evaluating the array elements once we reach a
1804                                          * matching value that can't change - ALL() is the same as
1805                                          * AND-list, ANY() is the same as OR-list.
1806                                          */
1807                                         if (RESULT_IS_FINAL(match, expr->useOr))
1808                                                 break;
1809
1810                                         elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1811                                                                                                                                 collid,
1812                                                                                                                                 item->values[idx],
1813                                                                                                                                 elem_value));
1814
1815                                         match = RESULT_MERGE(match, expr->useOr, elem_match);
1816                                 }
1817
1818                                 /* update the match bitmap with the result */
1819                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1820                         }
1821                 }
1822                 else if (IsA(clause, NullTest))
1823                 {
1824                         NullTest   *expr = (NullTest *) clause;
1825                         Node       *clause_expr = (Node *) (expr->arg);
1826
1827                         /* match the attribute/expression to a dimension of the statistic */
1828                         int                     idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1829
1830                         /*
1831                          * Walk through the MCV items and evaluate the current clause. We
1832                          * can skip items that were already ruled out, and terminate if
1833                          * there are no remaining MCV items that might possibly match.
1834                          */
1835                         for (int i = 0; i < mcvlist->nitems; i++)
1836                         {
1837                                 bool            match = false;  /* assume mismatch */
1838                                 MCVItem    *item = &mcvlist->items[i];
1839
1840                                 /* if the clause mismatches the MCV item, update the bitmap */
1841                                 switch (expr->nulltesttype)
1842                                 {
1843                                         case IS_NULL:
1844                                                 match = (item->isnull[idx]) ? true : match;
1845                                                 break;
1846
1847                                         case IS_NOT_NULL:
1848                                                 match = (!item->isnull[idx]) ? true : match;
1849                                                 break;
1850                                 }
1851
1852                                 /* now, update the match bitmap, depending on OR/AND type */
1853                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1854                         }
1855                 }
1856                 else if (is_orclause(clause) || is_andclause(clause))
1857                 {
1858                         /* AND/OR clause, with all subclauses being compatible */
1859
1860                         int                     i;
1861                         BoolExpr   *bool_clause = ((BoolExpr *) clause);
1862                         List       *bool_clauses = bool_clause->args;
1863
1864                         /* match/mismatch bitmap for each MCV item */
1865                         bool       *bool_matches = NULL;
1866
1867                         Assert(bool_clauses != NIL);
1868                         Assert(list_length(bool_clauses) >= 2);
1869
1870                         /* build the match bitmap for the OR-clauses */
1871                         bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
1872                                                                                                 mcvlist, is_orclause(clause));
1873
1874                         /*
1875                          * Merge the bitmap produced by mcv_get_match_bitmap into the
1876                          * current one. We need to consider if we're evaluating AND or OR
1877                          * condition when merging the results.
1878                          */
1879                         for (i = 0; i < mcvlist->nitems; i++)
1880                                 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1881
1882                         pfree(bool_matches);
1883                 }
1884                 else if (is_notclause(clause))
1885                 {
1886                         /* NOT clause, with all subclauses compatible */
1887
1888                         int                     i;
1889                         BoolExpr   *not_clause = ((BoolExpr *) clause);
1890                         List       *not_args = not_clause->args;
1891
1892                         /* match/mismatch bitmap for each MCV item */
1893                         bool       *not_matches = NULL;
1894
1895                         Assert(not_args != NIL);
1896                         Assert(list_length(not_args) == 1);
1897
1898                         /* build the match bitmap for the NOT-clause */
1899                         not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
1900                                                                                            mcvlist, false);
1901
1902                         /*
1903                          * Merge the bitmap produced by mcv_get_match_bitmap into the
1904                          * current one. We're handling a NOT clause, so invert the result
1905                          * before merging it into the global bitmap.
1906                          */
1907                         for (i = 0; i < mcvlist->nitems; i++)
1908                                 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1909
1910                         pfree(not_matches);
1911                 }
1912                 else if (IsA(clause, Var))
1913                 {
1914                         /* Var (has to be a boolean Var, possibly from below NOT) */
1915
1916                         Var                *var = (Var *) (clause);
1917
1918                         /* match the attribute to a dimension of the statistic */
1919                         int                     idx = bms_member_index(keys, var->varattno);
1920
1921                         Assert(var->vartype == BOOLOID);
1922
1923                         /*
1924                          * Walk through the MCV items and evaluate the current clause. We
1925                          * can skip items that were already ruled out, and terminate if
1926                          * there are no remaining MCV items that might possibly match.
1927                          */
1928                         for (int i = 0; i < mcvlist->nitems; i++)
1929                         {
1930                                 MCVItem    *item = &mcvlist->items[i];
1931                                 bool            match = false;
1932
1933                                 /* if the item is NULL, it's a mismatch */
1934                                 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1935                                         match = true;
1936
1937                                 /* update the result bitmap */
1938                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1939                         }
1940                 }
1941                 else
1942                 {
1943                         /* Otherwise, it must be a bare boolean-returning expression */
1944                         int                     idx;
1945
1946                         /* match the expression to a dimension of the statistic */
1947                         idx = mcv_match_expression(clause, keys, exprs, NULL);
1948
1949                         /*
1950                          * Walk through the MCV items and evaluate the current clause. We
1951                          * can skip items that were already ruled out, and terminate if
1952                          * there are no remaining MCV items that might possibly match.
1953                          */
1954                         for (int i = 0; i < mcvlist->nitems; i++)
1955                         {
1956                                 bool            match;
1957                                 MCVItem    *item = &mcvlist->items[i];
1958
1959                                 /* "match" just means it's bool TRUE */
1960                                 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
1961
1962                                 /* now, update the match bitmap, depending on OR/AND type */
1963                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1964                         }
1965                 }
1966         }
1967
1968         return matches;
1969 }
1970
1971
1972 /*
1973  * mcv_combine_selectivities
1974  *              Combine per-column and multi-column MCV selectivity estimates.
1975  *
1976  * simple_sel is a "simple" selectivity estimate (produced without using any
1977  * extended statistics, essentially assuming independence of columns/clauses).
1978  *
1979  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
1980  * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
1981  * essentially interpreted as a correction to be added to simple_sel, as
1982  * described below.
1983  *
1984  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
1985  * matching ones).  This is used as an upper bound on the portion of the
1986  * selectivity estimates not covered by the MCV statistics.
1987  *
1988  * Note: While simple and base selectivities are defined in a quite similar
1989  * way, the values are computed differently and are not therefore equal. The
1990  * simple selectivity is computed as a product of per-clause estimates, while
1991  * the base selectivity is computed by adding up base frequencies of matching
1992  * items of the multi-column MCV list. So the values may differ for two main
1993  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1994  * the MCV items did not match the estimated clauses.
1995  *
1996  * As both (a) and (b) reduce the base selectivity value, it generally holds
1997  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
1998  * values may be equal.
1999  *
2000  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
2001  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
2002  * correction for the part covered by the MCV list. Those two statements are
2003  * actually equivalent.
2004  */
2005 Selectivity
2006 mcv_combine_selectivities(Selectivity simple_sel,
2007                                                   Selectivity mcv_sel,
2008                                                   Selectivity mcv_basesel,
2009                                                   Selectivity mcv_totalsel)
2010 {
2011         Selectivity other_sel;
2012         Selectivity sel;
2013
2014         /* estimated selectivity of values not covered by MCV matches */
2015         other_sel = simple_sel - mcv_basesel;
2016         CLAMP_PROBABILITY(other_sel);
2017
2018         /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2019         if (other_sel > 1.0 - mcv_totalsel)
2020                 other_sel = 1.0 - mcv_totalsel;
2021
2022         /* overall selectivity is the sum of the MCV and non-MCV parts */
2023         sel = mcv_sel + other_sel;
2024         CLAMP_PROBABILITY(sel);
2025
2026         return sel;
2027 }
2028
2029
2030 /*
2031  * mcv_clauselist_selectivity
2032  *              Use MCV statistics to estimate the selectivity of an implicitly-ANDed
2033  *              list of clauses.
2034  *
2035  * This determines which MCV items match every clause in the list and returns
2036  * the sum of the frequencies of those items.
2037  *
2038  * In addition, it returns the sum of the base frequencies of each of those
2039  * items (that is the sum of the selectivities that each item would have if
2040  * the columns were independent of one another), and the total selectivity of
2041  * all the MCV items (not just the matching ones).  These are expected to be
2042  * used together with a "simple" selectivity estimate (one based only on
2043  * per-column statistics) to produce an overall selectivity estimate that
2044  * makes use of both per-column and multi-column statistics --- see
2045  * mcv_combine_selectivities().
2046  */
2047 Selectivity
2048 mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
2049                                                    List *clauses, int varRelid,
2050                                                    JoinType jointype, SpecialJoinInfo *sjinfo,
2051                                                    RelOptInfo *rel,
2052                                                    Selectivity *basesel, Selectivity *totalsel)
2053 {
2054         int                     i;
2055         MCVList    *mcv;
2056         Selectivity s = 0.0;
2057         RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2058
2059         /* match/mismatch bitmap for each MCV item */
2060         bool       *matches = NULL;
2061
2062         /* load the MCV list stored in the statistics object */
2063         mcv = statext_mcv_load(stat->statOid, rte->inh);
2064
2065         /* build a match bitmap for the clauses */
2066         matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2067                                                                    mcv, false);
2068
2069         /* sum frequencies for all the matching MCV items */
2070         *basesel = 0.0;
2071         *totalsel = 0.0;
2072         for (i = 0; i < mcv->nitems; i++)
2073         {
2074                 *totalsel += mcv->items[i].frequency;
2075
2076                 if (matches[i] != false)
2077                 {
2078                         *basesel += mcv->items[i].base_frequency;
2079                         s += mcv->items[i].frequency;
2080                 }
2081         }
2082
2083         return s;
2084 }
2085
2086
2087 /*
2088  * mcv_clause_selectivity_or
2089  *              Use MCV statistics to estimate the selectivity of a clause that
2090  *              appears in an ORed list of clauses.
2091  *
2092  * As with mcv_clauselist_selectivity() this determines which MCV items match
2093  * the clause and returns both the sum of the frequencies and the sum of the
2094  * base frequencies of those items, as well as the sum of the frequencies of
2095  * all MCV items (not just the matching ones) so that this information can be
2096  * used by mcv_combine_selectivities() to produce a selectivity estimate that
2097  * makes use of both per-column and multi-column statistics.
2098  *
2099  * Additionally, we return information to help compute the overall selectivity
2100  * of the ORed list of clauses assumed to contain this clause.  This function
2101  * is intended to be called for each clause in the ORed list of clauses,
2102  * allowing the overall selectivity to be computed using the following
2103  * algorithm:
2104  *
2105  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
2106  * of the first n clauses in the list.  Then the combined selectivity taking
2107  * into account the next clause C[n+1] can be written as
2108  *
2109  *              P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
2110  *
2111  * The final term above represents the overlap between the clauses examined so
2112  * far and the (n+1)'th clause.  To estimate its selectivity, we track the
2113  * match bitmap for the ORed list of clauses examined so far and examine its
2114  * intersection with the match bitmap for the (n+1)'th clause.
2115  *
2116  * We then also return the sums of the MCV item frequencies and base
2117  * frequencies for the match bitmap intersection corresponding to the overlap
2118  * term above, so that they can be combined with a simple selectivity estimate
2119  * for that term.
2120  *
2121  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
2122  * for the clauses examined so far.  The caller is expected to set it to NULL
2123  * the first time it calls this function.
2124  */
2125 Selectivity
2126 mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
2127                                                   MCVList *mcv, Node *clause, bool **or_matches,
2128                                                   Selectivity *basesel, Selectivity *overlap_mcvsel,
2129                                                   Selectivity *overlap_basesel, Selectivity *totalsel)
2130 {
2131         Selectivity s = 0.0;
2132         bool       *new_matches;
2133         int                     i;
2134
2135         /* build the OR-matches bitmap, if not built already */
2136         if (*or_matches == NULL)
2137                 *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2138
2139         /* build the match bitmap for the new clause */
2140         new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2141                                                                            stat->exprs, mcv, false);
2142
2143         /*
2144          * Sum the frequencies for all the MCV items matching this clause and also
2145          * those matching the overlap between this clause and any of the preceding
2146          * clauses as described above.
2147          */
2148         *basesel = 0.0;
2149         *overlap_mcvsel = 0.0;
2150         *overlap_basesel = 0.0;
2151         *totalsel = 0.0;
2152         for (i = 0; i < mcv->nitems; i++)
2153         {
2154                 *totalsel += mcv->items[i].frequency;
2155
2156                 if (new_matches[i])
2157                 {
2158                         s += mcv->items[i].frequency;
2159                         *basesel += mcv->items[i].base_frequency;
2160
2161                         if ((*or_matches)[i])
2162                         {
2163                                 *overlap_mcvsel += mcv->items[i].frequency;
2164                                 *overlap_basesel += mcv->items[i].base_frequency;
2165                         }
2166                 }
2167
2168                 /* update the OR-matches bitmap for the next clause */
2169                 (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2170         }
2171
2172         pfree(new_matches);
2173
2174         return s;
2175 }