src/backend/statistics/extended_stats.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * extended_stats.c
   4  *        POSTGRES extended statistics
   5  *
   6  * Generic code supporting statistics objects created via CREATE STATISTICS.
   7  *
   8  *
   9  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  10  * Portions Copyright (c) 1994, Regents of the University of California
  11  *
  12  * IDENTIFICATION
  13  *        src/backend/statistics/extended_stats.c
  14  *
  15  *-------------------------------------------------------------------------
  16  */
  17 #include "postgres.h"
  18
  19 #include "access/detoast.h"
  20 #include "access/genam.h"
  21 #include "access/htup_details.h"
  22 #include "access/table.h"
  23 #include "catalog/indexing.h"
  24 #include "catalog/pg_statistic_ext.h"
  25 #include "catalog/pg_statistic_ext_data.h"
  26 #include "commands/defrem.h"
  27 #include "commands/progress.h"
  28 #include "executor/executor.h"
  29 #include "miscadmin.h"
  30 #include "nodes/nodeFuncs.h"
  31 #include "optimizer/optimizer.h"
  32 #include "parser/parsetree.h"
  33 #include "pgstat.h"
  34 #include "postmaster/autovacuum.h"
  35 #include "statistics/extended_stats_internal.h"
  36 #include "statistics/statistics.h"
  37 #include "utils/acl.h"
  38 #include "utils/array.h"
  39 #include "utils/attoptcache.h"
  40 #include "utils/builtins.h"
  41 #include "utils/datum.h"
  42 #include "utils/fmgroids.h"
  43 #include "utils/lsyscache.h"
  44 #include "utils/memutils.h"
  45 #include "utils/rel.h"
  46 #include "utils/selfuncs.h"
  47 #include "utils/syscache.h"
  48
  49 /*
  50  * To avoid consuming too much memory during analysis and/or too much space
  51  * in the resulting pg_statistic rows, we ignore varlena datums that are wider
  52  * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
  53  * and distinct-value calculations since a wide value is unlikely to be
  54  * duplicated at all, much less be a most-common value.  For the same reason,
  55  * ignoring wide values will not affect our estimates of histogram bin
  56  * boundaries very much.
  57  */
  58 #define WIDTH_THRESHOLD  1024
  59
  60 /*
  61  * Used internally to refer to an individual statistics object, i.e.,
  62  * a pg_statistic_ext entry.
  63  */
  64 typedef struct StatExtEntry
  65 {
  66         Oid                     statOid;                /* OID of pg_statistic_ext entry */
  67         char       *schema;                     /* statistics object's schema */
  68         char       *name;                       /* statistics object's name */
  69         Bitmapset  *columns;            /* attribute numbers covered by the object */
  70         List       *types;                      /* 'char' list of enabled statistics kinds */
  71         int                     stattarget;             /* statistics target (-1 for default) */
  72         List       *exprs;                      /* expressions */
  73 } StatExtEntry;
  74
  75
  76 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
  77 static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
  78                                                                                         int nvacatts, VacAttrStats **vacatts);
  79 static void statext_store(Oid statOid, bool inh,
  80                                                   MVNDistinct *ndistinct, MVDependencies *dependencies,
  81                                                   MCVList *mcv, Datum exprs, VacAttrStats **stats);
  82 static int      statext_compute_stattarget(int stattarget,
  83                                                                            int nattrs, VacAttrStats **stats);
  84
  85 /* Information needed to analyze a single simple expression. */
  86 typedef struct AnlExprData
  87 {
  88         Node       *expr;                       /* expression to analyze */
  89         VacAttrStats *vacattrstat;      /* statistics attrs to analyze */
  90 } AnlExprData;
  91
  92 static void compute_expr_stats(Relation onerel, double totalrows,
  93                                                            AnlExprData *exprdata, int nexprs,
  94                                                            HeapTuple *rows, int numrows);
  95 static Datum serialize_expr_stats(AnlExprData *exprdata, int nexprs);
  96 static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  97 static AnlExprData *build_expr_data(List *exprs, int stattarget);
  98
  99 static StatsBuildData *make_build_data(Relation rel, StatExtEntry *stat,
 100                                                                            int numrows, HeapTuple *rows,
 101                                                                            VacAttrStats **stats, int stattarget);
 102
 103
 104 /*
 105  * Compute requested extended stats, using the rows sampled for the plain
 106  * (single-column) stats.
 107  *
 108  * This fetches a list of stats types from pg_statistic_ext, computes the
 109  * requested stats, and serializes them back into the catalog.
 110  */
 111 void
 112 BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
 113                                                    int numrows, HeapTuple *rows,
 114                                                    int natts, VacAttrStats **vacattrstats)
 115 {
 116         Relation        pg_stext;
 117         ListCell   *lc;
 118         List       *statslist;
 119         MemoryContext cxt;
 120         MemoryContext oldcxt;
 121         int64           ext_cnt;
 122
 123         /* Do nothing if there are no columns to analyze. */
 124         if (!natts)
 125                 return;
 126
 127         /* the list of stats has to be allocated outside the memory context */
 128         pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
 129         statslist = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
 130
 131         /* memory context for building each statistics object */
 132         cxt = AllocSetContextCreate(CurrentMemoryContext,
 133                                                                 "BuildRelationExtStatistics",
 134                                                                 ALLOCSET_DEFAULT_SIZES);
 135         oldcxt = MemoryContextSwitchTo(cxt);
 136
 137         /* report this phase */
 138         if (statslist != NIL)
 139         {
 140                 const int       index[] = {
 141                         PROGRESS_ANALYZE_PHASE,
 142                         PROGRESS_ANALYZE_EXT_STATS_TOTAL
 143                 };
 144                 const int64 val[] = {
 145                         PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS,
 146                         list_length(statslist)
 147                 };
 148
 149                 pgstat_progress_update_multi_param(2, index, val);
 150         }
 151
 152         ext_cnt = 0;
 153         foreach(lc, statslist)
 154         {
 155                 StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
 156                 MVNDistinct *ndistinct = NULL;
 157                 MVDependencies *dependencies = NULL;
 158                 MCVList    *mcv = NULL;
 159                 Datum           exprstats = (Datum) 0;
 160                 VacAttrStats **stats;
 161                 ListCell   *lc2;
 162                 int                     stattarget;
 163                 StatsBuildData *data;
 164
 165                 /*
 166                  * Check if we can build these stats based on the column analyzed. If
 167                  * not, report this fact (except in autovacuum) and move on.
 168                  */
 169                 stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
 170                                                                           natts, vacattrstats);
 171                 if (!stats)
 172                 {
 173                         if (!AmAutoVacuumWorkerProcess())
 174                                 ereport(WARNING,
 175                                                 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
 176                                                  errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
 177                                                                 stat->schema, stat->name,
 178                                                                 get_namespace_name(onerel->rd_rel->relnamespace),
 179                                                                 RelationGetRelationName(onerel)),
 180                                                  errtable(onerel)));
 181                         continue;
 182                 }
 183
 184                 /* compute statistics target for this statistics object */
 185                 stattarget = statext_compute_stattarget(stat->stattarget,
 186                                                                                                 bms_num_members(stat->columns),
 187                                                                                                 stats);
 188
 189                 /*
 190                  * Don't rebuild statistics objects with statistics target set to 0
 191                  * (we just leave the existing values around, just like we do for
 192                  * regular per-column statistics).
 193                  */
 194                 if (stattarget == 0)
 195                         continue;
 196
 197                 /* evaluate expressions (if the statistics object has any) */
 198                 data = make_build_data(onerel, stat, numrows, rows, stats, stattarget);
 199
 200                 /* compute statistic of each requested type */
 201                 foreach(lc2, stat->types)
 202                 {
 203                         char            t = (char) lfirst_int(lc2);
 204
 205                         if (t == STATS_EXT_NDISTINCT)
 206                                 ndistinct = statext_ndistinct_build(totalrows, data);
 207                         else if (t == STATS_EXT_DEPENDENCIES)
 208                                 dependencies = statext_dependencies_build(data);
 209                         else if (t == STATS_EXT_MCV)
 210                                 mcv = statext_mcv_build(data, totalrows, stattarget);
 211                         else if (t == STATS_EXT_EXPRESSIONS)
 212                         {
 213                                 AnlExprData *exprdata;
 214                                 int                     nexprs;
 215
 216                                 /* should not happen, thanks to checks when defining stats */
 217                                 if (!stat->exprs)
 218                                         elog(ERROR, "requested expression stats, but there are no expressions");
 219
 220                                 exprdata = build_expr_data(stat->exprs, stattarget);
 221                                 nexprs = list_length(stat->exprs);
 222
 223                                 compute_expr_stats(onerel, totalrows,
 224                                                                    exprdata, nexprs,
 225                                                                    rows, numrows);
 226
 227                                 exprstats = serialize_expr_stats(exprdata, nexprs);
 228                         }
 229                 }
 230
 231                 /* store the statistics in the catalog */
 232                 statext_store(stat->statOid, inh,
 233                                           ndistinct, dependencies, mcv, exprstats, stats);
 234
 235                 /* for reporting progress */
 236                 pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
 237                                                                          ++ext_cnt);
 238
 239                 /* free the data used for building this statistics object */
 240                 MemoryContextReset(cxt);
 241         }
 242
 243         MemoryContextSwitchTo(oldcxt);
 244         MemoryContextDelete(cxt);
 245
 246         list_free(statslist);
 247
 248         table_close(pg_stext, RowExclusiveLock);
 249 }
 250
 251 /*
 252  * ComputeExtStatisticsRows
 253  *              Compute number of rows required by extended statistics on a table.
 254  *
 255  * Computes number of rows we need to sample to build extended statistics on a
 256  * table. This only looks at statistics we can actually build - for example
 257  * when analyzing only some of the columns, this will skip statistics objects
 258  * that would require additional columns.
 259  *
 260  * See statext_compute_stattarget for details about how we compute the
 261  * statistics target for a statistics object (from the object target,
 262  * attribute targets and default statistics target).
 263  */
 264 int
 265 ComputeExtStatisticsRows(Relation onerel,
 266                                                  int natts, VacAttrStats **vacattrstats)
 267 {
 268         Relation        pg_stext;
 269         ListCell   *lc;
 270         List       *lstats;
 271         MemoryContext cxt;
 272         MemoryContext oldcxt;
 273         int                     result = 0;
 274
 275         /* If there are no columns to analyze, just return 0. */
 276         if (!natts)
 277                 return 0;
 278
 279         cxt = AllocSetContextCreate(CurrentMemoryContext,
 280                                                                 "ComputeExtStatisticsRows",
 281                                                                 ALLOCSET_DEFAULT_SIZES);
 282         oldcxt = MemoryContextSwitchTo(cxt);
 283
 284         pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
 285         lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
 286
 287         foreach(lc, lstats)
 288         {
 289                 StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
 290                 int                     stattarget;
 291                 VacAttrStats **stats;
 292                 int                     nattrs = bms_num_members(stat->columns);
 293
 294                 /*
 295                  * Check if we can build this statistics object based on the columns
 296                  * analyzed. If not, ignore it (don't report anything, we'll do that
 297                  * during the actual build BuildRelationExtStatistics).
 298                  */
 299                 stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
 300                                                                           natts, vacattrstats);
 301
 302                 if (!stats)
 303                         continue;
 304
 305                 /*
 306                  * Compute statistics target, based on what's set for the statistic
 307                  * object itself, and for its attributes.
 308                  */
 309                 stattarget = statext_compute_stattarget(stat->stattarget,
 310                                                                                                 nattrs, stats);
 311
 312                 /* Use the largest value for all statistics objects. */
 313                 if (stattarget > result)
 314                         result = stattarget;
 315         }
 316
 317         table_close(pg_stext, RowExclusiveLock);
 318
 319         MemoryContextSwitchTo(oldcxt);
 320         MemoryContextDelete(cxt);
 321
 322         /* compute sample size based on the statistics target */
 323         return (300 * result);
 324 }
 325
 326 /*
 327  * statext_compute_stattarget
 328  *              compute statistics target for an extended statistic
 329  *
 330  * When computing target for extended statistics objects, we consider three
 331  * places where the target may be set - the statistics object itself,
 332  * attributes the statistics object is defined on, and then the default
 333  * statistics target.
 334  *
 335  * First we look at what's set for the statistics object itself, using the
 336  * ALTER STATISTICS ... SET STATISTICS command. If we find a valid value
 337  * there (i.e. not -1) we're done. Otherwise we look at targets set for any
 338  * of the attributes the statistic is defined on, and if there are columns
 339  * with defined target, we use the maximum value. We do this mostly for
 340  * backwards compatibility, because this is what we did before having
 341  * statistics target for extended statistics.
 342  *
 343  * And finally, if we still don't have a statistics target, we use the value
 344  * set in default_statistics_target.
 345  */
 346 static int
 347 statext_compute_stattarget(int stattarget, int nattrs, VacAttrStats **stats)
 348 {
 349         int                     i;
 350
 351         /*
 352          * If there's statistics target set for the statistics object, use it. It
 353          * may be set to 0 which disables building of that statistic.
 354          */
 355         if (stattarget >= 0)
 356                 return stattarget;
 357
 358         /*
 359          * The target for the statistics object is set to -1, in which case we
 360          * look at the maximum target set for any of the attributes the object is
 361          * defined on.
 362          */
 363         for (i = 0; i < nattrs; i++)
 364         {
 365                 /* keep the maximum statistics target */
 366                 if (stats[i]->attstattarget > stattarget)
 367                         stattarget = stats[i]->attstattarget;
 368         }
 369
 370         /*
 371          * If the value is still negative (so neither the statistics object nor
 372          * any of the columns have custom statistics target set), use the global
 373          * default target.
 374          */
 375         if (stattarget < 0)
 376                 stattarget = default_statistics_target;
 377
 378         /* As this point we should have a valid statistics target. */
 379         Assert((stattarget >= 0) && (stattarget <= MAX_STATISTICS_TARGET));
 380
 381         return stattarget;
 382 }
 383
 384 /*
 385  * statext_is_kind_built
 386  *              Is this stat kind built in the given pg_statistic_ext_data tuple?
 387  */
 388 bool
 389 statext_is_kind_built(HeapTuple htup, char type)
 390 {
 391         AttrNumber      attnum;
 392
 393         switch (type)
 394         {
 395                 case STATS_EXT_NDISTINCT:
 396                         attnum = Anum_pg_statistic_ext_data_stxdndistinct;
 397                         break;
 398
 399                 case STATS_EXT_DEPENDENCIES:
 400                         attnum = Anum_pg_statistic_ext_data_stxddependencies;
 401                         break;
 402
 403                 case STATS_EXT_MCV:
 404                         attnum = Anum_pg_statistic_ext_data_stxdmcv;
 405                         break;
 406
 407                 case STATS_EXT_EXPRESSIONS:
 408                         attnum = Anum_pg_statistic_ext_data_stxdexpr;
 409                         break;
 410
 411                 default:
 412                         elog(ERROR, "unexpected statistics type requested: %d", type);
 413         }
 414
 415         return !heap_attisnull(htup, attnum, NULL);
 416 }
 417
 418 /*
 419  * Return a list (of StatExtEntry) of statistics objects for the given relation.
 420  */
 421 static List *
 422 fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 423 {
 424         SysScanDesc scan;
 425         ScanKeyData skey;
 426         HeapTuple       htup;
 427         List       *result = NIL;
 428
 429         /*
 430          * Prepare to scan pg_statistic_ext for entries having stxrelid = this
 431          * rel.
 432          */
 433         ScanKeyInit(&skey,
 434                                 Anum_pg_statistic_ext_stxrelid,
 435                                 BTEqualStrategyNumber, F_OIDEQ,
 436                                 ObjectIdGetDatum(relid));
 437
 438         scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
 439                                                           NULL, 1, &skey);
 440
 441         while (HeapTupleIsValid(htup = systable_getnext(scan)))
 442         {
 443                 StatExtEntry *entry;
 444                 Datum           datum;
 445                 bool            isnull;
 446                 int                     i;
 447                 ArrayType  *arr;
 448                 char       *enabled;
 449                 Form_pg_statistic_ext staForm;
 450                 List       *exprs = NIL;
 451
 452                 entry = palloc0(sizeof(StatExtEntry));
 453                 staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
 454                 entry->statOid = staForm->oid;
 455                 entry->schema = get_namespace_name(staForm->stxnamespace);
 456                 entry->name = pstrdup(NameStr(staForm->stxname));
 457                 for (i = 0; i < staForm->stxkeys.dim1; i++)
 458                 {
 459                         entry->columns = bms_add_member(entry->columns,
 460                                                                                         staForm->stxkeys.values[i]);
 461                 }
 462
 463                 datum = SysCacheGetAttr(STATEXTOID, htup, Anum_pg_statistic_ext_stxstattarget, &isnull);
 464                 entry->stattarget = isnull ? -1 : DatumGetInt16(datum);
 465
 466                 /* decode the stxkind char array into a list of chars */
 467                 datum = SysCacheGetAttrNotNull(STATEXTOID, htup,
 468                                                                            Anum_pg_statistic_ext_stxkind);
 469                 arr = DatumGetArrayTypeP(datum);
 470                 if (ARR_NDIM(arr) != 1 ||
 471                         ARR_HASNULL(arr) ||
 472                         ARR_ELEMTYPE(arr) != CHAROID)
 473                         elog(ERROR, "stxkind is not a 1-D char array");
 474                 enabled = (char *) ARR_DATA_PTR(arr);
 475                 for (i = 0; i < ARR_DIMS(arr)[0]; i++)
 476                 {
 477                         Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
 478                                    (enabled[i] == STATS_EXT_DEPENDENCIES) ||
 479                                    (enabled[i] == STATS_EXT_MCV) ||
 480                                    (enabled[i] == STATS_EXT_EXPRESSIONS));
 481                         entry->types = lappend_int(entry->types, (int) enabled[i]);
 482                 }
 483
 484                 /* decode expression (if any) */
 485                 datum = SysCacheGetAttr(STATEXTOID, htup,
 486                                                                 Anum_pg_statistic_ext_stxexprs, &isnull);
 487
 488                 if (!isnull)
 489                 {
 490                         char       *exprsString;
 491
 492                         exprsString = TextDatumGetCString(datum);
 493                         exprs = (List *) stringToNode(exprsString);
 494
 495                         pfree(exprsString);
 496
 497                         /*
 498                          * Run the expressions through eval_const_expressions. This is not
 499                          * just an optimization, but is necessary, because the planner
 500                          * will be comparing them to similarly-processed qual clauses, and
 501                          * may fail to detect valid matches without this.  We must not use
 502                          * canonicalize_qual, however, since these aren't qual
 503                          * expressions.
 504                          */
 505                         exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
 506
 507                         /* May as well fix opfuncids too */
 508                         fix_opfuncids((Node *) exprs);
 509                 }
 510
 511                 entry->exprs = exprs;
 512
 513                 result = lappend(result, entry);
 514         }
 515
 516         systable_endscan(scan);
 517
 518         return result;
 519 }
 520
 521 /*
 522  * examine_attribute -- pre-analysis of a single column
 523  *
 524  * Determine whether the column is analyzable; if so, create and initialize
 525  * a VacAttrStats struct for it.  If not, return NULL.
 526  */
 527 static VacAttrStats *
 528 examine_attribute(Node *expr)
 529 {
 530         HeapTuple       typtuple;
 531         VacAttrStats *stats;
 532         int                     i;
 533         bool            ok;
 534
 535         /*
 536          * Create the VacAttrStats struct.
 537          */
 538         stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
 539         stats->attstattarget = -1;
 540
 541         /*
 542          * When analyzing an expression, believe the expression tree's type not
 543          * the column datatype --- the latter might be the opckeytype storage type
 544          * of the opclass, which is not interesting for our purposes.  (Note: if
 545          * we did anything with non-expression statistics columns, we'd need to
 546          * figure out where to get the correct type info from, but for now that's
 547          * not a problem.)      It's not clear whether anyone will care about the
 548          * typmod, but we store that too just in case.
 549          */
 550         stats->attrtypid = exprType(expr);
 551         stats->attrtypmod = exprTypmod(expr);
 552         stats->attrcollid = exprCollation(expr);
 553
 554         typtuple = SearchSysCacheCopy1(TYPEOID,
 555                                                                    ObjectIdGetDatum(stats->attrtypid));
 556         if (!HeapTupleIsValid(typtuple))
 557                 elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
 558         stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
 559
 560         /*
 561          * We don't actually analyze individual attributes, so no need to set the
 562          * memory context.
 563          */
 564         stats->anl_context = NULL;
 565         stats->tupattnum = InvalidAttrNumber;
 566
 567         /*
 568          * The fields describing the stats->stavalues[n] element types default to
 569          * the type of the data being analyzed, but the type-specific typanalyze
 570          * function can change them if it wants to store something else.
 571          */
 572         for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
 573         {
 574                 stats->statypid[i] = stats->attrtypid;
 575                 stats->statyplen[i] = stats->attrtype->typlen;
 576                 stats->statypbyval[i] = stats->attrtype->typbyval;
 577                 stats->statypalign[i] = stats->attrtype->typalign;
 578         }
 579
 580         /*
 581          * Call the type-specific typanalyze function.  If none is specified, use
 582          * std_typanalyze().
 583          */
 584         if (OidIsValid(stats->attrtype->typanalyze))
 585                 ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
 586                                                                                    PointerGetDatum(stats)));
 587         else
 588                 ok = std_typanalyze(stats);
 589
 590         if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
 591         {
 592                 heap_freetuple(typtuple);
 593                 pfree(stats);
 594                 return NULL;
 595         }
 596
 597         return stats;
 598 }
 599
 600 /*
 601  * examine_expression -- pre-analysis of a single expression
 602  *
 603  * Determine whether the expression is analyzable; if so, create and initialize
 604  * a VacAttrStats struct for it.  If not, return NULL.
 605  */
 606 static VacAttrStats *
 607 examine_expression(Node *expr, int stattarget)
 608 {
 609         HeapTuple       typtuple;
 610         VacAttrStats *stats;
 611         int                     i;
 612         bool            ok;
 613
 614         Assert(expr != NULL);
 615
 616         /*
 617          * Create the VacAttrStats struct.
 618          */
 619         stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
 620
 621         /*
 622          * We can't have statistics target specified for the expression, so we
 623          * could use either the default_statistics_target, or the target computed
 624          * for the extended statistics. The second option seems more reasonable.
 625          */
 626         stats->attstattarget = stattarget;
 627
 628         /*
 629          * When analyzing an expression, believe the expression tree's type.
 630          */
 631         stats->attrtypid = exprType(expr);
 632         stats->attrtypmod = exprTypmod(expr);
 633
 634         /*
 635          * We don't allow collation to be specified in CREATE STATISTICS, so we
 636          * have to use the collation specified for the expression. It's possible
 637          * to specify the collation in the expression "(col COLLATE "en_US")" in
 638          * which case exprCollation() does the right thing.
 639          */
 640         stats->attrcollid = exprCollation(expr);
 641
 642         typtuple = SearchSysCacheCopy1(TYPEOID,
 643                                                                    ObjectIdGetDatum(stats->attrtypid));
 644         if (!HeapTupleIsValid(typtuple))
 645                 elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
 646
 647         stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
 648         stats->anl_context = CurrentMemoryContext;      /* XXX should be using
 649                                                                                                  * something else? */
 650         stats->tupattnum = InvalidAttrNumber;
 651
 652         /*
 653          * The fields describing the stats->stavalues[n] element types default to
 654          * the type of the data being analyzed, but the type-specific typanalyze
 655          * function can change them if it wants to store something else.
 656          */
 657         for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
 658         {
 659                 stats->statypid[i] = stats->attrtypid;
 660                 stats->statyplen[i] = stats->attrtype->typlen;
 661                 stats->statypbyval[i] = stats->attrtype->typbyval;
 662                 stats->statypalign[i] = stats->attrtype->typalign;
 663         }
 664
 665         /*
 666          * Call the type-specific typanalyze function.  If none is specified, use
 667          * std_typanalyze().
 668          */
 669         if (OidIsValid(stats->attrtype->typanalyze))
 670                 ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
 671                                                                                    PointerGetDatum(stats)));
 672         else
 673                 ok = std_typanalyze(stats);
 674
 675         if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
 676         {
 677                 heap_freetuple(typtuple);
 678                 pfree(stats);
 679                 return NULL;
 680         }
 681
 682         return stats;
 683 }
 684
 685 /*
 686  * Using 'vacatts' of size 'nvacatts' as input data, return a newly-built
 687  * VacAttrStats array which includes only the items corresponding to
 688  * attributes indicated by 'attrs'.  If we don't have all of the per-column
 689  * stats available to compute the extended stats, then we return NULL to
 690  * indicate to the caller that the stats should not be built.
 691  */
 692 static VacAttrStats **
 693 lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
 694                                           int nvacatts, VacAttrStats **vacatts)
 695 {
 696         int                     i = 0;
 697         int                     x = -1;
 698         int                     natts;
 699         VacAttrStats **stats;
 700         ListCell   *lc;
 701
 702         natts = bms_num_members(attrs) + list_length(exprs);
 703
 704         stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *));
 705
 706         /* lookup VacAttrStats info for the requested columns (same attnum) */
 707         while ((x = bms_next_member(attrs, x)) >= 0)
 708         {
 709                 int                     j;
 710
 711                 stats[i] = NULL;
 712                 for (j = 0; j < nvacatts; j++)
 713                 {
 714                         if (x == vacatts[j]->tupattnum)
 715                         {
 716                                 stats[i] = vacatts[j];
 717                                 break;
 718                         }
 719                 }
 720
 721                 if (!stats[i])
 722                 {
 723                         /*
 724                          * Looks like stats were not gathered for one of the columns
 725                          * required. We'll be unable to build the extended stats without
 726                          * this column.
 727                          */
 728                         pfree(stats);
 729                         return NULL;
 730                 }
 731
 732                 i++;
 733         }
 734
 735         /* also add info for expressions */
 736         foreach(lc, exprs)
 737         {
 738                 Node       *expr = (Node *) lfirst(lc);
 739
 740                 stats[i] = examine_attribute(expr);
 741
 742                 /*
 743                  * XXX We need tuple descriptor later, and we just grab it from
 744                  * stats[0]->tupDesc (see e.g. statext_mcv_build). But as coded
 745                  * examine_attribute does not set that, so just grab it from the first
 746                  * vacatts element.
 747                  */
 748                 stats[i]->tupDesc = vacatts[0]->tupDesc;
 749
 750                 i++;
 751         }
 752
 753         return stats;
 754 }
 755
 756 /*
 757  * statext_store
 758  *      Serializes the statistics and stores them into the pg_statistic_ext_data
 759  *      tuple.
 760  */
 761 static void
 762 statext_store(Oid statOid, bool inh,
 763                           MVNDistinct *ndistinct, MVDependencies *dependencies,
 764                           MCVList *mcv, Datum exprs, VacAttrStats **stats)
 765 {
 766         Relation        pg_stextdata;
 767         HeapTuple       stup;
 768         Datum           values[Natts_pg_statistic_ext_data];
 769         bool            nulls[Natts_pg_statistic_ext_data];
 770
 771         pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
 772
 773         memset(nulls, true, sizeof(nulls));
 774         memset(values, 0, sizeof(values));
 775
 776         /* basic info */
 777         values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statOid);
 778         nulls[Anum_pg_statistic_ext_data_stxoid - 1] = false;
 779
 780         values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inh);
 781         nulls[Anum_pg_statistic_ext_data_stxdinherit - 1] = false;
 782
 783         /*
 784          * Construct a new pg_statistic_ext_data tuple, replacing the calculated
 785          * stats.
 786          */
 787         if (ndistinct != NULL)
 788         {
 789                 bytea      *data = statext_ndistinct_serialize(ndistinct);
 790
 791                 nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = (data == NULL);
 792                 values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PointerGetDatum(data);
 793         }
 794
 795         if (dependencies != NULL)
 796         {
 797                 bytea      *data = statext_dependencies_serialize(dependencies);
 798
 799                 nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = (data == NULL);
 800                 values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PointerGetDatum(data);
 801         }
 802         if (mcv != NULL)
 803         {
 804                 bytea      *data = statext_mcv_serialize(mcv, stats);
 805
 806                 nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
 807                 values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
 808         }
 809         if (exprs != (Datum) 0)
 810         {
 811                 nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = false;
 812                 values[Anum_pg_statistic_ext_data_stxdexpr - 1] = exprs;
 813         }
 814
 815         /*
 816          * Delete the old tuple if it exists, and insert a new one. It's easier
 817          * than trying to update or insert, based on various conditions.
 818          */
 819         RemoveStatisticsDataById(statOid, inh);
 820
 821         /* form and insert a new tuple */
 822         stup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls);
 823         CatalogTupleInsert(pg_stextdata, stup);
 824
 825         heap_freetuple(stup);
 826
 827         table_close(pg_stextdata, RowExclusiveLock);
 828 }
 829
 830 /* initialize multi-dimensional sort */
 831 MultiSortSupport
 832 multi_sort_init(int ndims)
 833 {
 834         MultiSortSupport mss;
 835
 836         Assert(ndims >= 2);
 837
 838         mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
 839                                                                          + sizeof(SortSupportData) * ndims);
 840
 841         mss->ndims = ndims;
 842
 843         return mss;
 844 }
 845
 846 /*
 847  * Prepare sort support info using the given sort operator and collation
 848  * at the position 'sortdim'
 849  */
 850 void
 851 multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
 852                                                  Oid oper, Oid collation)
 853 {
 854         SortSupport ssup = &mss->ssup[sortdim];
 855
 856         ssup->ssup_cxt = CurrentMemoryContext;
 857         ssup->ssup_collation = collation;
 858         ssup->ssup_nulls_first = false;
 859
 860         PrepareSortSupportFromOrderingOp(oper, ssup);
 861 }
 862
 863 /* compare all the dimensions in the selected order */
 864 int
 865 multi_sort_compare(const void *a, const void *b, void *arg)
 866 {
 867         MultiSortSupport mss = (MultiSortSupport) arg;
 868         SortItem   *ia = (SortItem *) a;
 869         SortItem   *ib = (SortItem *) b;
 870         int                     i;
 871
 872         for (i = 0; i < mss->ndims; i++)
 873         {
 874                 int                     compare;
 875
 876                 compare = ApplySortComparator(ia->values[i], ia->isnull[i],
 877                                                                           ib->values[i], ib->isnull[i],
 878                                                                           &mss->ssup[i]);
 879
 880                 if (compare != 0)
 881                         return compare;
 882         }
 883
 884         /* equal by default */
 885         return 0;
 886 }
 887
 888 /* compare selected dimension */
 889 int
 890 multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
 891                                            MultiSortSupport mss)
 892 {
 893         return ApplySortComparator(a->values[dim], a->isnull[dim],
 894                                                            b->values[dim], b->isnull[dim],
 895                                                            &mss->ssup[dim]);
 896 }
 897
 898 int
 899 multi_sort_compare_dims(int start, int end,
 900                                                 const SortItem *a, const SortItem *b,
 901                                                 MultiSortSupport mss)
 902 {
 903         int                     dim;
 904
 905         for (dim = start; dim <= end; dim++)
 906         {
 907                 int                     r = ApplySortComparator(a->values[dim], a->isnull[dim],
 908                                                                                         b->values[dim], b->isnull[dim],
 909                                                                                         &mss->ssup[dim]);
 910
 911                 if (r != 0)
 912                         return r;
 913         }
 914
 915         return 0;
 916 }
 917
 918 int
 919 compare_scalars_simple(const void *a, const void *b, void *arg)
 920 {
 921         return compare_datums_simple(*(Datum *) a,
 922                                                                  *(Datum *) b,
 923                                                                  (SortSupport) arg);
 924 }
 925
 926 int
 927 compare_datums_simple(Datum a, Datum b, SortSupport ssup)
 928 {
 929         return ApplySortComparator(a, false, b, false, ssup);
 930 }
 931
 932 /*
 933  * build_attnums_array
 934  *              Transforms a bitmap into an array of AttrNumber values.
 935  *
 936  * This is used for extended statistics only, so all the attributes must be
 937  * user-defined. That means offsetting by FirstLowInvalidHeapAttributeNumber
 938  * is not necessary here (and when querying the bitmap).
 939  */
 940 AttrNumber *
 941 build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs)
 942 {
 943         int                     i,
 944                                 j;
 945         AttrNumber *attnums;
 946         int                     num = bms_num_members(attrs);
 947
 948         if (numattrs)
 949                 *numattrs = num;
 950
 951         /* build attnums from the bitmapset */
 952         attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
 953         i = 0;
 954         j = -1;
 955         while ((j = bms_next_member(attrs, j)) >= 0)
 956         {
 957                 int                     attnum = (j - nexprs);
 958
 959                 /*
 960                  * Make sure the bitmap contains only user-defined attributes. As
 961                  * bitmaps can't contain negative values, this can be violated in two
 962                  * ways. Firstly, the bitmap might contain 0 as a member, and secondly
 963                  * the integer value might be larger than MaxAttrNumber.
 964                  */
 965                 Assert(AttributeNumberIsValid(attnum));
 966                 Assert(attnum <= MaxAttrNumber);
 967                 Assert(attnum >= (-nexprs));
 968
 969                 attnums[i++] = (AttrNumber) attnum;
 970
 971                 /* protect against overflows */
 972                 Assert(i <= num);
 973         }
 974
 975         return attnums;
 976 }
 977
 978 /*
 979  * build_sorted_items
 980  *              build a sorted array of SortItem with values from rows
 981  *
 982  * Note: All the memory is allocated in a single chunk, so that the caller
 983  * can simply pfree the return value to release all of it.
 984  */
 985 SortItem *
 986 build_sorted_items(StatsBuildData *data, int *nitems,
 987                                    MultiSortSupport mss,
 988                                    int numattrs, AttrNumber *attnums)
 989 {
 990         int                     i,
 991                                 j,
 992                                 len,
 993                                 nrows;
 994         int                     nvalues = data->numrows * numattrs;
 995
 996         SortItem   *items;
 997         Datum      *values;
 998         bool       *isnull;
 999         char       *ptr;
1000         int                *typlen;
1001
1002         /* Compute the total amount of memory we need (both items and values). */
1003         len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
1004
1005         /* Allocate the memory and split it into the pieces. */
1006         ptr = palloc0(len);
1007
1008         /* items to sort */
1009         items = (SortItem *) ptr;
1010         ptr += data->numrows * sizeof(SortItem);
1011
1012         /* values and null flags */
1013         values = (Datum *) ptr;
1014         ptr += nvalues * sizeof(Datum);
1015
1016         isnull = (bool *) ptr;
1017         ptr += nvalues * sizeof(bool);
1018
1019         /* make sure we consumed the whole buffer exactly */
1020         Assert((ptr - (char *) items) == len);
1021
1022         /* fix the pointers to Datum and bool arrays */
1023         nrows = 0;
1024         for (i = 0; i < data->numrows; i++)
1025         {
1026                 items[nrows].values = &values[nrows * numattrs];
1027                 items[nrows].isnull = &isnull[nrows * numattrs];
1028
1029                 nrows++;
1030         }
1031
1032         /* build a local cache of typlen for all attributes */
1033         typlen = (int *) palloc(sizeof(int) * data->nattnums);
1034         for (i = 0; i < data->nattnums; i++)
1035                 typlen[i] = get_typlen(data->stats[i]->attrtypid);
1036
1037         nrows = 0;
1038         for (i = 0; i < data->numrows; i++)
1039         {
1040                 bool            toowide = false;
1041
1042                 /* load the values/null flags from sample rows */
1043                 for (j = 0; j < numattrs; j++)
1044                 {
1045                         Datum           value;
1046                         bool            isnull;
1047                         int                     attlen;
1048                         AttrNumber      attnum = attnums[j];
1049
1050                         int                     idx;
1051
1052                         /* match attnum to the pre-calculated data */
1053                         for (idx = 0; idx < data->nattnums; idx++)
1054                         {
1055                                 if (attnum == data->attnums[idx])
1056                                         break;
1057                         }
1058
1059                         Assert(idx < data->nattnums);
1060
1061                         value = data->values[idx][i];
1062                         isnull = data->nulls[idx][i];
1063                         attlen = typlen[idx];
1064
1065                         /*
1066                          * If this is a varlena value, check if it's too wide and if yes
1067                          * then skip the whole item. Otherwise detoast the value.
1068                          *
1069                          * XXX It may happen that we've already detoasted some preceding
1070                          * values for the current item. We don't bother to cleanup those
1071                          * on the assumption that those are small (below WIDTH_THRESHOLD)
1072                          * and will be discarded at the end of analyze.
1073                          */
1074                         if ((!isnull) && (attlen == -1))
1075                         {
1076                                 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
1077                                 {
1078                                         toowide = true;
1079                                         break;
1080                                 }
1081
1082                                 value = PointerGetDatum(PG_DETOAST_DATUM(value));
1083                         }
1084
1085                         items[nrows].values[j] = value;
1086                         items[nrows].isnull[j] = isnull;
1087                 }
1088
1089                 if (toowide)
1090                         continue;
1091
1092                 nrows++;
1093         }
1094
1095         /* store the actual number of items (ignoring the too-wide ones) */
1096         *nitems = nrows;
1097
1098         /* all items were too wide */
1099         if (nrows == 0)
1100         {
1101                 /* everything is allocated as a single chunk */
1102                 pfree(items);
1103                 return NULL;
1104         }
1105
1106         /* do the sort, using the multi-sort */
1107         qsort_interruptible(items, nrows, sizeof(SortItem),
1108                                                 multi_sort_compare, mss);
1109
1110         return items;
1111 }
1112
1113 /*
1114  * has_stats_of_kind
1115  *              Check whether the list contains statistic of a given kind
1116  */
1117 bool
1118 has_stats_of_kind(List *stats, char requiredkind)
1119 {
1120         ListCell   *l;
1121
1122         foreach(l, stats)
1123         {
1124                 StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
1125
1126                 if (stat->kind == requiredkind)
1127                         return true;
1128         }
1129
1130         return false;
1131 }
1132
1133 /*
1134  * stat_find_expression
1135  *              Search for an expression in statistics object's list of expressions.
1136  *
1137  * Returns the index of the expression in the statistics object's list of
1138  * expressions, or -1 if not found.
1139  */
1140 static int
1141 stat_find_expression(StatisticExtInfo *stat, Node *expr)
1142 {
1143         ListCell   *lc;
1144         int                     idx;
1145
1146         idx = 0;
1147         foreach(lc, stat->exprs)
1148         {
1149                 Node       *stat_expr = (Node *) lfirst(lc);
1150
1151                 if (equal(stat_expr, expr))
1152                         return idx;
1153                 idx++;
1154         }
1155
1156         /* Expression not found */
1157         return -1;
1158 }
1159
1160 /*
1161  * stat_covers_expressions
1162  *              Test whether a statistics object covers all expressions in a list.
1163  *
1164  * Returns true if all expressions are covered.  If expr_idxs is non-NULL, it
1165  * is populated with the indexes of the expressions found.
1166  */
1167 static bool
1168 stat_covers_expressions(StatisticExtInfo *stat, List *exprs,
1169                                                 Bitmapset **expr_idxs)
1170 {
1171         ListCell   *lc;
1172
1173         foreach(lc, exprs)
1174         {
1175                 Node       *expr = (Node *) lfirst(lc);
1176                 int                     expr_idx;
1177
1178                 expr_idx = stat_find_expression(stat, expr);
1179                 if (expr_idx == -1)
1180                         return false;
1181
1182                 if (expr_idxs != NULL)
1183                         *expr_idxs = bms_add_member(*expr_idxs, expr_idx);
1184         }
1185
1186         /* If we reach here, all expressions are covered */
1187         return true;
1188 }
1189
1190 /*
1191  * choose_best_statistics
1192  *              Look for and return statistics with the specified 'requiredkind' which
1193  *              have keys that match at least two of the given attnums.  Return NULL if
1194  *              there's no match.
1195  *
1196  * The current selection criteria is very simple - we choose the statistics
1197  * object referencing the most attributes in covered (and still unestimated
1198  * clauses), breaking ties in favor of objects with fewer keys overall.
1199  *
1200  * The clause_attnums is an array of bitmaps, storing attnums for individual
1201  * clauses. A NULL element means the clause is either incompatible or already
1202  * estimated.
1203  *
1204  * XXX If multiple statistics objects tie on both criteria, then which object
1205  * is chosen depends on the order that they appear in the stats list. Perhaps
1206  * further tiebreakers are needed.
1207  */
1208 StatisticExtInfo *
1209 choose_best_statistics(List *stats, char requiredkind, bool inh,
1210                                            Bitmapset **clause_attnums, List **clause_exprs,
1211                                            int nclauses)
1212 {
1213         ListCell   *lc;
1214         StatisticExtInfo *best_match = NULL;
1215         int                     best_num_matched = 2;   /* goal #1: maximize */
1216         int                     best_match_keys = (STATS_MAX_DIMENSIONS + 1);   /* goal #2: minimize */
1217
1218         foreach(lc, stats)
1219         {
1220                 int                     i;
1221                 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1222                 Bitmapset  *matched_attnums = NULL;
1223                 Bitmapset  *matched_exprs = NULL;
1224                 int                     num_matched;
1225                 int                     numkeys;
1226
1227                 /* skip statistics that are not of the correct type */
1228                 if (info->kind != requiredkind)
1229                         continue;
1230
1231                 /* skip statistics with mismatching inheritance flag */
1232                 if (info->inherit != inh)
1233                         continue;
1234
1235                 /*
1236                  * Collect attributes and expressions in remaining (unestimated)
1237                  * clauses fully covered by this statistic object.
1238                  *
1239                  * We know already estimated clauses have both clause_attnums and
1240                  * clause_exprs set to NULL. We leave the pointers NULL if already
1241                  * estimated, or we reset them to NULL after estimating the clause.
1242                  */
1243                 for (i = 0; i < nclauses; i++)
1244                 {
1245                         Bitmapset  *expr_idxs = NULL;
1246
1247                         /* ignore incompatible/estimated clauses */
1248                         if (!clause_attnums[i] && !clause_exprs[i])
1249                                 continue;
1250
1251                         /* ignore clauses that are not covered by this object */
1252                         if (!bms_is_subset(clause_attnums[i], info->keys) ||
1253                                 !stat_covers_expressions(info, clause_exprs[i], &expr_idxs))
1254                                 continue;
1255
1256                         /* record attnums and indexes of expressions covered */
1257                         matched_attnums = bms_add_members(matched_attnums, clause_attnums[i]);
1258                         matched_exprs = bms_add_members(matched_exprs, expr_idxs);
1259                 }
1260
1261                 num_matched = bms_num_members(matched_attnums) + bms_num_members(matched_exprs);
1262
1263                 bms_free(matched_attnums);
1264                 bms_free(matched_exprs);
1265
1266                 /*
1267                  * save the actual number of keys in the stats so that we can choose
1268                  * the narrowest stats with the most matching keys.
1269                  */
1270                 numkeys = bms_num_members(info->keys) + list_length(info->exprs);
1271
1272                 /*
1273                  * Use this object when it increases the number of matched attributes
1274                  * and expressions or when it matches the same number of attributes
1275                  * and expressions but these stats have fewer keys than any previous
1276                  * match.
1277                  */
1278                 if (num_matched > best_num_matched ||
1279                         (num_matched == best_num_matched && numkeys < best_match_keys))
1280                 {
1281                         best_match = info;
1282                         best_num_matched = num_matched;
1283                         best_match_keys = numkeys;
1284                 }
1285         }
1286
1287         return best_match;
1288 }
1289
1290 /*
1291  * statext_is_compatible_clause_internal
1292  *              Determines if the clause is compatible with MCV lists.
1293  *
1294  * To be compatible, the given clause must be a combination of supported
1295  * clauses built from Vars or sub-expressions (where a sub-expression is
1296  * something that exactly matches an expression found in statistics objects).
1297  * This function recursively examines the clause and extracts any
1298  * sub-expressions that will need to be matched against statistics.
1299  *
1300  * Currently, we only support the following types of clauses:
1301  *
1302  * (a) OpExprs of the form (Var/Expr op Const), or (Const op Var/Expr), where
1303  * the op is one of ("=", "<", ">", ">=", "<=")
1304  *
1305  * (b) (Var/Expr IS [NOT] NULL)
1306  *
1307  * (c) combinations using AND/OR/NOT
1308  *
1309  * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (Const)) or
1310  * (Var/Expr op ALL (Const))
1311  *
1312  * In the future, the range of supported clauses may be expanded to more
1313  * complex cases, for example (Var op Var).
1314  *
1315  * Arguments:
1316  * clause: (sub)clause to be inspected (bare clause, not a RestrictInfo)
1317  * relid: rel that all Vars in clause must belong to
1318  * *attnums: input/output parameter collecting attribute numbers of all
1319  *              mentioned Vars.  Note that we do not offset the attribute numbers,
1320  *              so we can't cope with system columns.
1321  * *exprs: input/output parameter collecting primitive subclauses within
1322  *              the clause tree
1323  *
1324  * Returns false if there is something we definitively can't handle.
1325  * On true return, we can proceed to match the *exprs against statistics.
1326  */
1327 static bool
1328 statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
1329                                                                           Index relid, Bitmapset **attnums,
1330                                                                           List **exprs)
1331 {
1332         /* Look inside any binary-compatible relabeling (as in examine_variable) */
1333         if (IsA(clause, RelabelType))
1334                 clause = (Node *) ((RelabelType *) clause)->arg;
1335
1336         /* plain Var references (boolean Vars or recursive checks) */
1337         if (IsA(clause, Var))
1338         {
1339                 Var                *var = (Var *) clause;
1340
1341                 /* Ensure var is from the correct relation */
1342                 if (var->varno != relid)
1343                         return false;
1344
1345                 /* we also better ensure the Var is from the current level */
1346                 if (var->varlevelsup > 0)
1347                         return false;
1348
1349                 /*
1350                  * Also reject system attributes and whole-row Vars (we don't allow
1351                  * stats on those).
1352                  */
1353                 if (!AttrNumberIsForUserDefinedAttr(var->varattno))
1354                         return false;
1355
1356                 /* OK, record the attnum for later permissions checks. */
1357                 *attnums = bms_add_member(*attnums, var->varattno);
1358
1359                 return true;
1360         }
1361
1362         /* (Var/Expr op Const) or (Const op Var/Expr) */
1363         if (is_opclause(clause))
1364         {
1365                 RangeTblEntry *rte = root->simple_rte_array[relid];
1366                 OpExpr     *expr = (OpExpr *) clause;
1367                 Node       *clause_expr;
1368
1369                 /* Only expressions with two arguments are considered compatible. */
1370                 if (list_length(expr->args) != 2)
1371                         return false;
1372
1373                 /* Check if the expression has the right shape */
1374                 if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
1375                         return false;
1376
1377                 /*
1378                  * If it's not one of the supported operators ("=", "<", ">", etc.),
1379                  * just ignore the clause, as it's not compatible with MCV lists.
1380                  *
1381                  * This uses the function for estimating selectivity, not the operator
1382                  * directly (a bit awkward, but well ...).
1383                  */
1384                 switch (get_oprrest(expr->opno))
1385                 {
1386                         case F_EQSEL:
1387                         case F_NEQSEL:
1388                         case F_SCALARLTSEL:
1389                         case F_SCALARLESEL:
1390                         case F_SCALARGTSEL:
1391                         case F_SCALARGESEL:
1392                                 /* supported, will continue with inspection of the Var/Expr */
1393                                 break;
1394
1395                         default:
1396                                 /* other estimators are considered unknown/unsupported */
1397                                 return false;
1398                 }
1399
1400                 /*
1401                  * If there are any securityQuals on the RTE from security barrier
1402                  * views or RLS policies, then the user may not have access to all the
1403                  * table's data, and we must check that the operator is leak-proof.
1404                  *
1405                  * If the operator is leaky, then we must ignore this clause for the
1406                  * purposes of estimating with MCV lists, otherwise the operator might
1407                  * reveal values from the MCV list that the user doesn't have
1408                  * permission to see.
1409                  */
1410                 if (rte->securityQuals != NIL &&
1411                         !get_func_leakproof(get_opcode(expr->opno)))
1412                         return false;
1413
1414                 /* Check (Var op Const) or (Const op Var) clauses by recursing. */
1415                 if (IsA(clause_expr, Var))
1416                         return statext_is_compatible_clause_internal(root, clause_expr,
1417                                                                                                                  relid, attnums, exprs);
1418
1419                 /* Otherwise we have (Expr op Const) or (Const op Expr). */
1420                 *exprs = lappend(*exprs, clause_expr);
1421                 return true;
1422         }
1423
1424         /* Var/Expr IN Array */
1425         if (IsA(clause, ScalarArrayOpExpr))
1426         {
1427                 RangeTblEntry *rte = root->simple_rte_array[relid];
1428                 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1429                 Node       *clause_expr;
1430                 bool            expronleft;
1431
1432                 /* Only expressions with two arguments are considered compatible. */
1433                 if (list_length(expr->args) != 2)
1434                         return false;
1435
1436                 /* Check if the expression has the right shape (one Var, one Const) */
1437                 if (!examine_opclause_args(expr->args, &clause_expr, NULL, &expronleft))
1438                         return false;
1439
1440                 /* We only support Var on left, Const on right */
1441                 if (!expronleft)
1442                         return false;
1443
1444                 /*
1445                  * If it's not one of the supported operators ("=", "<", ">", etc.),
1446                  * just ignore the clause, as it's not compatible with MCV lists.
1447                  *
1448                  * This uses the function for estimating selectivity, not the operator
1449                  * directly (a bit awkward, but well ...).
1450                  */
1451                 switch (get_oprrest(expr->opno))
1452                 {
1453                         case F_EQSEL:
1454                         case F_NEQSEL:
1455                         case F_SCALARLTSEL:
1456                         case F_SCALARLESEL:
1457                         case F_SCALARGTSEL:
1458                         case F_SCALARGESEL:
1459                                 /* supported, will continue with inspection of the Var/Expr */
1460                                 break;
1461
1462                         default:
1463                                 /* other estimators are considered unknown/unsupported */
1464                                 return false;
1465                 }
1466
1467                 /*
1468                  * If there are any securityQuals on the RTE from security barrier
1469                  * views or RLS policies, then the user may not have access to all the
1470                  * table's data, and we must check that the operator is leak-proof.
1471                  *
1472                  * If the operator is leaky, then we must ignore this clause for the
1473                  * purposes of estimating with MCV lists, otherwise the operator might
1474                  * reveal values from the MCV list that the user doesn't have
1475                  * permission to see.
1476                  */
1477                 if (rte->securityQuals != NIL &&
1478                         !get_func_leakproof(get_opcode(expr->opno)))
1479                         return false;
1480
1481                 /* Check Var IN Array clauses by recursing. */
1482                 if (IsA(clause_expr, Var))
1483                         return statext_is_compatible_clause_internal(root, clause_expr,
1484                                                                                                                  relid, attnums, exprs);
1485
1486                 /* Otherwise we have Expr IN Array. */
1487                 *exprs = lappend(*exprs, clause_expr);
1488                 return true;
1489         }
1490
1491         /* AND/OR/NOT clause */
1492         if (is_andclause(clause) ||
1493                 is_orclause(clause) ||
1494                 is_notclause(clause))
1495         {
1496                 /*
1497                  * AND/OR/NOT-clauses are supported if all sub-clauses are supported
1498                  *
1499                  * Perhaps we could improve this by handling mixed cases, when some of
1500                  * the clauses are supported and some are not. Selectivity for the
1501                  * supported subclauses would be computed using extended statistics,
1502                  * and the remaining clauses would be estimated using the traditional
1503                  * algorithm (product of selectivities).
1504                  *
1505                  * It however seems overly complex, and in a way we already do that
1506                  * because if we reject the whole clause as unsupported here, it will
1507                  * be eventually passed to clauselist_selectivity() which does exactly
1508                  * this (split into supported/unsupported clauses etc).
1509                  */
1510                 BoolExpr   *expr = (BoolExpr *) clause;
1511                 ListCell   *lc;
1512
1513                 foreach(lc, expr->args)
1514                 {
1515                         /*
1516                          * If we find an incompatible clause in the arguments, treat the
1517                          * whole clause as incompatible.
1518                          */
1519                         if (!statext_is_compatible_clause_internal(root,
1520                                                                                                            (Node *) lfirst(lc),
1521                                                                                                            relid, attnums, exprs))
1522                                 return false;
1523                 }
1524
1525                 return true;
1526         }
1527
1528         /* Var/Expr IS NULL */
1529         if (IsA(clause, NullTest))
1530         {
1531                 NullTest   *nt = (NullTest *) clause;
1532
1533                 /* Check Var IS NULL clauses by recursing. */
1534                 if (IsA(nt->arg, Var))
1535                         return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
1536                                                                                                                  relid, attnums, exprs);
1537
1538                 /* Otherwise we have Expr IS NULL. */
1539                 *exprs = lappend(*exprs, nt->arg);
1540                 return true;
1541         }
1542
1543         /*
1544          * Treat any other expressions as bare expressions to be matched against
1545          * expressions in statistics objects.
1546          */
1547         *exprs = lappend(*exprs, clause);
1548         return true;
1549 }
1550
1551 /*
1552  * statext_is_compatible_clause
1553  *              Determines if the clause is compatible with MCV lists.
1554  *
1555  * See statext_is_compatible_clause_internal, above, for the basic rules.
1556  * This layer deals with RestrictInfo superstructure and applies permissions
1557  * checks to verify that it's okay to examine all mentioned Vars.
1558  *
1559  * Arguments:
1560  * clause: clause to be inspected (in RestrictInfo form)
1561  * relid: rel that all Vars in clause must belong to
1562  * *attnums: input/output parameter collecting attribute numbers of all
1563  *              mentioned Vars.  Note that we do not offset the attribute numbers,
1564  *              so we can't cope with system columns.
1565  * *exprs: input/output parameter collecting primitive subclauses within
1566  *              the clause tree
1567  *
1568  * Returns false if there is something we definitively can't handle.
1569  * On true return, we can proceed to match the *exprs against statistics.
1570  */
1571 static bool
1572 statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
1573                                                          Bitmapset **attnums, List **exprs)
1574 {
1575         RangeTblEntry *rte = root->simple_rte_array[relid];
1576         RelOptInfo *rel = root->simple_rel_array[relid];
1577         RestrictInfo *rinfo;
1578         int                     clause_relid;
1579         Oid                     userid;
1580
1581         /*
1582          * Special-case handling for bare BoolExpr AND clauses, because the
1583          * restrictinfo machinery doesn't build RestrictInfos on top of AND
1584          * clauses.
1585          */
1586         if (is_andclause(clause))
1587         {
1588                 BoolExpr   *expr = (BoolExpr *) clause;
1589                 ListCell   *lc;
1590
1591                 /*
1592                  * Check that each sub-clause is compatible.  We expect these to be
1593                  * RestrictInfos.
1594                  */
1595                 foreach(lc, expr->args)
1596                 {
1597                         if (!statext_is_compatible_clause(root, (Node *) lfirst(lc),
1598                                                                                           relid, attnums, exprs))
1599                                 return false;
1600                 }
1601
1602                 return true;
1603         }
1604
1605         /* Otherwise it must be a RestrictInfo. */
1606         if (!IsA(clause, RestrictInfo))
1607                 return false;
1608         rinfo = (RestrictInfo *) clause;
1609
1610         /* Pseudoconstants are not really interesting here. */
1611         if (rinfo->pseudoconstant)
1612                 return false;
1613
1614         /* Clauses referencing other varnos are incompatible. */
1615         if (!bms_get_singleton_member(rinfo->clause_relids, &clause_relid) ||
1616                 clause_relid != relid)
1617                 return false;
1618
1619         /* Check the clause and determine what attributes it references. */
1620         if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
1621                                                                                            relid, attnums, exprs))
1622                 return false;
1623
1624         /*
1625          * Check that the user has permission to read all required attributes.
1626          */
1627         userid = OidIsValid(rel->userid) ? rel->userid : GetUserId();
1628
1629         /* Table-level SELECT privilege is sufficient for all columns */
1630         if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK)
1631         {
1632                 Bitmapset  *clause_attnums = NULL;
1633                 int                     attnum = -1;
1634
1635                 /*
1636                  * We have to check per-column privileges.  *attnums has the attnums
1637                  * for individual Vars we saw, but there may also be Vars within
1638                  * subexpressions in *exprs.  We can use pull_varattnos() to extract
1639                  * those, but there's an impedance mismatch: attnums returned by
1640                  * pull_varattnos() are offset by FirstLowInvalidHeapAttributeNumber,
1641                  * while attnums within *attnums aren't.  Convert *attnums to the
1642                  * offset style so we can combine the results.
1643                  */
1644                 while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
1645                 {
1646                         clause_attnums =
1647                                 bms_add_member(clause_attnums,
1648                                                            attnum - FirstLowInvalidHeapAttributeNumber);
1649                 }
1650
1651                 /* Now merge attnums from *exprs into clause_attnums */
1652                 if (*exprs != NIL)
1653                         pull_varattnos((Node *) *exprs, relid, &clause_attnums);
1654
1655                 attnum = -1;
1656                 while ((attnum = bms_next_member(clause_attnums, attnum)) >= 0)
1657                 {
1658                         /* Undo the offset */
1659                         AttrNumber      attno = attnum + FirstLowInvalidHeapAttributeNumber;
1660
1661                         if (attno == InvalidAttrNumber)
1662                         {
1663                                 /* Whole-row reference, so must have access to all columns */
1664                                 if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT,
1665                                                                                           ACLMASK_ALL) != ACLCHECK_OK)
1666                                         return false;
1667                         }
1668                         else
1669                         {
1670                                 if (pg_attribute_aclcheck(rte->relid, attno, userid,
1671                                                                                   ACL_SELECT) != ACLCHECK_OK)
1672                                         return false;
1673                         }
1674                 }
1675         }
1676
1677         /* If we reach here, the clause is OK */
1678         return true;
1679 }
1680
1681 /*
1682  * statext_mcv_clauselist_selectivity
1683  *              Estimate clauses using the best multi-column statistics.
1684  *
1685  * Applies available extended (multi-column) statistics on a table. There may
1686  * be multiple applicable statistics (with respect to the clauses), in which
1687  * case we use greedy approach. In each round we select the best statistic on
1688  * a table (measured by the number of attributes extracted from the clauses
1689  * and covered by it), and compute the selectivity for the supplied clauses.
1690  * We repeat this process with the remaining clauses (if any), until none of
1691  * the available statistics can be used.
1692  *
1693  * One of the main challenges with using MCV lists is how to extrapolate the
1694  * estimate to the data not covered by the MCV list. To do that, we compute
1695  * not only the "MCV selectivity" (selectivities for MCV items matching the
1696  * supplied clauses), but also the following related selectivities:
1697  *
1698  * - simple selectivity:  Computed without extended statistics, i.e. as if the
1699  * columns/clauses were independent.
1700  *
1701  * - base selectivity:  Similar to simple selectivity, but is computed using
1702  * the extended statistic by adding up the base frequencies (that we compute
1703  * and store for each MCV item) of matching MCV items.
1704  *
1705  * - total selectivity: Selectivity covered by the whole MCV list.
1706  *
1707  * These are passed to mcv_combine_selectivities() which combines them to
1708  * produce a selectivity estimate that makes use of both per-column statistics
1709  * and the multi-column MCV statistics.
1710  *
1711  * 'estimatedclauses' is an input/output parameter.  We set bits for the
1712  * 0-based 'clauses' indexes we estimate for and also skip clause items that
1713  * already have a bit set.
1714  */
1715 static Selectivity
1716 statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
1717                                                                    JoinType jointype, SpecialJoinInfo *sjinfo,
1718                                                                    RelOptInfo *rel, Bitmapset **estimatedclauses,
1719                                                                    bool is_or)
1720 {
1721         ListCell   *l;
1722         Bitmapset **list_attnums;       /* attnums extracted from the clause */
1723         List      **list_exprs;         /* expressions matched to any statistic */
1724         int                     listidx;
1725         Selectivity sel = (is_or) ? 0.0 : 1.0;
1726         RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1727
1728         /* check if there's any stats that might be useful for us. */
1729         if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
1730                 return sel;
1731
1732         list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
1733                                                                                  list_length(clauses));
1734
1735         /* expressions extracted from complex expressions */
1736         list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses));
1737
1738         /*
1739          * Pre-process the clauses list to extract the attnums and expressions
1740          * seen in each item.  We need to determine if there are any clauses which
1741          * will be useful for selectivity estimations with extended stats.  Along
1742          * the way we'll record all of the attnums and expressions for each clause
1743          * in lists which we'll reference later so we don't need to repeat the
1744          * same work again.
1745          *
1746          * We also skip clauses that we already estimated using different types of
1747          * statistics (we treat them as incompatible).
1748          */
1749         listidx = 0;
1750         foreach(l, clauses)
1751         {
1752                 Node       *clause = (Node *) lfirst(l);
1753                 Bitmapset  *attnums = NULL;
1754                 List       *exprs = NIL;
1755
1756                 if (!bms_is_member(listidx, *estimatedclauses) &&
1757                         statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs))
1758                 {
1759                         list_attnums[listidx] = attnums;
1760                         list_exprs[listidx] = exprs;
1761                 }
1762                 else
1763                 {
1764                         list_attnums[listidx] = NULL;
1765                         list_exprs[listidx] = NIL;
1766                 }
1767
1768                 listidx++;
1769         }
1770
1771         /* apply as many extended statistics as possible */
1772         while (true)
1773         {
1774                 StatisticExtInfo *stat;
1775                 List       *stat_clauses;
1776                 Bitmapset  *simple_clauses;
1777
1778                 /* find the best suited statistics object for these attnums */
1779                 stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, rte->inh,
1780                                                                           list_attnums, list_exprs,
1781                                                                           list_length(clauses));
1782
1783                 /*
1784                  * if no (additional) matching stats could be found then we've nothing
1785                  * to do
1786                  */
1787                 if (!stat)
1788                         break;
1789
1790                 /* Ensure choose_best_statistics produced an expected stats type. */
1791                 Assert(stat->kind == STATS_EXT_MCV);
1792
1793                 /* now filter the clauses to be estimated using the selected MCV */
1794                 stat_clauses = NIL;
1795
1796                 /* record which clauses are simple (single column or expression) */
1797                 simple_clauses = NULL;
1798
1799                 listidx = -1;
1800                 foreach(l, clauses)
1801                 {
1802                         /* Increment the index before we decide if to skip the clause. */
1803                         listidx++;
1804
1805                         /*
1806                          * Ignore clauses from which we did not extract any attnums or
1807                          * expressions (this needs to be consistent with what we do in
1808                          * choose_best_statistics).
1809                          *
1810                          * This also eliminates already estimated clauses - both those
1811                          * estimated before and during applying extended statistics.
1812                          *
1813                          * XXX This check is needed because both bms_is_subset and
1814                          * stat_covers_expressions return true for empty attnums and
1815                          * expressions.
1816                          */
1817                         if (!list_attnums[listidx] && !list_exprs[listidx])
1818                                 continue;
1819
1820                         /*
1821                          * The clause was not estimated yet, and we've extracted either
1822                          * attnums or expressions from it. Ignore it if it's not fully
1823                          * covered by the chosen statistics object.
1824                          *
1825                          * We need to check both attributes and expressions, and reject if
1826                          * either is not covered.
1827                          */
1828                         if (!bms_is_subset(list_attnums[listidx], stat->keys) ||
1829                                 !stat_covers_expressions(stat, list_exprs[listidx], NULL))
1830                                 continue;
1831
1832                         /*
1833                          * Now we know the clause is compatible (we have either attnums or
1834                          * expressions extracted from it), and was not estimated yet.
1835                          */
1836
1837                         /* record simple clauses (single column or expression) */
1838                         if ((list_attnums[listidx] == NULL &&
1839                                  list_length(list_exprs[listidx]) == 1) ||
1840                                 (list_exprs[listidx] == NIL &&
1841                                  bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
1842                                 simple_clauses = bms_add_member(simple_clauses,
1843                                                                                                 list_length(stat_clauses));
1844
1845                         /* add clause to list and mark it as estimated */
1846                         stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
1847                         *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1848
1849                         /*
1850                          * Reset the pointers, so that choose_best_statistics knows this
1851                          * clause was estimated and does not consider it again.
1852                          */
1853                         bms_free(list_attnums[listidx]);
1854                         list_attnums[listidx] = NULL;
1855
1856                         list_free(list_exprs[listidx]);
1857                         list_exprs[listidx] = NULL;
1858                 }
1859
1860                 if (is_or)
1861                 {
1862                         bool       *or_matches = NULL;
1863                         Selectivity simple_or_sel = 0.0,
1864                                                 stat_sel = 0.0;
1865                         MCVList    *mcv_list;
1866
1867                         /* Load the MCV list stored in the statistics object */
1868                         mcv_list = statext_mcv_load(stat->statOid, rte->inh);
1869
1870                         /*
1871                          * Compute the selectivity of the ORed list of clauses covered by
1872                          * this statistics object by estimating each in turn and combining
1873                          * them using the formula P(A OR B) = P(A) + P(B) - P(A AND B).
1874                          * This allows us to use the multivariate MCV stats to better
1875                          * estimate the individual terms and their overlap.
1876                          *
1877                          * Each time we iterate this formula, the clause "A" above is
1878                          * equal to all the clauses processed so far, combined with "OR".
1879                          */
1880                         listidx = 0;
1881                         foreach(l, stat_clauses)
1882                         {
1883                                 Node       *clause = (Node *) lfirst(l);
1884                                 Selectivity simple_sel,
1885                                                         overlap_simple_sel,
1886                                                         mcv_sel,
1887                                                         mcv_basesel,
1888                                                         overlap_mcvsel,
1889                                                         overlap_basesel,
1890                                                         mcv_totalsel,
1891                                                         clause_sel,
1892                                                         overlap_sel;
1893
1894                                 /*
1895                                  * "Simple" selectivity of the next clause and its overlap
1896                                  * with any of the previous clauses.  These are our initial
1897                                  * estimates of P(B) and P(A AND B), assuming independence of
1898                                  * columns/clauses.
1899                                  */
1900                                 simple_sel = clause_selectivity_ext(root, clause, varRelid,
1901                                                                                                         jointype, sjinfo, false);
1902
1903                                 overlap_simple_sel = simple_or_sel * simple_sel;
1904
1905                                 /*
1906                                  * New "simple" selectivity of all clauses seen so far,
1907                                  * assuming independence.
1908                                  */
1909                                 simple_or_sel += simple_sel - overlap_simple_sel;
1910                                 CLAMP_PROBABILITY(simple_or_sel);
1911
1912                                 /*
1913                                  * Multi-column estimate of this clause using MCV statistics,
1914                                  * along with base and total selectivities, and corresponding
1915                                  * selectivities for the overlap term P(A AND B).
1916                                  */
1917                                 mcv_sel = mcv_clause_selectivity_or(root, stat, mcv_list,
1918                                                                                                         clause, &or_matches,
1919                                                                                                         &mcv_basesel,
1920                                                                                                         &overlap_mcvsel,
1921                                                                                                         &overlap_basesel,
1922                                                                                                         &mcv_totalsel);
1923
1924                                 /*
1925                                  * Combine the simple and multi-column estimates.
1926                                  *
1927                                  * If this clause is a simple single-column clause, then we
1928                                  * just use the simple selectivity estimate for it, since the
1929                                  * multi-column statistics are unlikely to improve on that
1930                                  * (and in fact could make it worse).  For the overlap, we
1931                                  * always make use of the multi-column statistics.
1932                                  */
1933                                 if (bms_is_member(listidx, simple_clauses))
1934                                         clause_sel = simple_sel;
1935                                 else
1936                                         clause_sel = mcv_combine_selectivities(simple_sel,
1937                                                                                                                    mcv_sel,
1938                                                                                                                    mcv_basesel,
1939                                                                                                                    mcv_totalsel);
1940
1941                                 overlap_sel = mcv_combine_selectivities(overlap_simple_sel,
1942                                                                                                                 overlap_mcvsel,
1943                                                                                                                 overlap_basesel,
1944                                                                                                                 mcv_totalsel);
1945
1946                                 /* Factor these into the result for this statistics object */
1947                                 stat_sel += clause_sel - overlap_sel;
1948                                 CLAMP_PROBABILITY(stat_sel);
1949
1950                                 listidx++;
1951                         }
1952
1953                         /*
1954                          * Factor the result for this statistics object into the overall
1955                          * result.  We treat the results from each separate statistics
1956                          * object as independent of one another.
1957                          */
1958                         sel = sel + stat_sel - sel * stat_sel;
1959                 }
1960                 else                                    /* Implicitly-ANDed list of clauses */
1961                 {
1962                         Selectivity simple_sel,
1963                                                 mcv_sel,
1964                                                 mcv_basesel,
1965                                                 mcv_totalsel,
1966                                                 stat_sel;
1967
1968                         /*
1969                          * "Simple" selectivity, i.e. without any extended statistics,
1970                          * essentially assuming independence of the columns/clauses.
1971                          */
1972                         simple_sel = clauselist_selectivity_ext(root, stat_clauses,
1973                                                                                                         varRelid, jointype,
1974                                                                                                         sjinfo, false);
1975
1976                         /*
1977                          * Multi-column estimate using MCV statistics, along with base and
1978                          * total selectivities.
1979                          */
1980                         mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses,
1981                                                                                                  varRelid, jointype, sjinfo,
1982                                                                                                  rel, &mcv_basesel,
1983                                                                                                  &mcv_totalsel);
1984
1985                         /* Combine the simple and multi-column estimates. */
1986                         stat_sel = mcv_combine_selectivities(simple_sel,
1987                                                                                                  mcv_sel,
1988                                                                                                  mcv_basesel,
1989                                                                                                  mcv_totalsel);
1990
1991                         /* Factor this into the overall result */
1992                         sel *= stat_sel;
1993                 }
1994         }
1995
1996         return sel;
1997 }
1998
1999 /*
2000  * statext_clauselist_selectivity
2001  *              Estimate clauses using the best multi-column statistics.
2002  */
2003 Selectivity
2004 statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
2005                                                            JoinType jointype, SpecialJoinInfo *sjinfo,
2006                                                            RelOptInfo *rel, Bitmapset **estimatedclauses,
2007                                                            bool is_or)
2008 {
2009         Selectivity sel;
2010
2011         /* First, try estimating clauses using a multivariate MCV list. */
2012         sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
2013                                                                                          sjinfo, rel, estimatedclauses, is_or);
2014
2015         /*
2016          * Functional dependencies only work for clauses connected by AND, so for
2017          * OR clauses we're done.
2018          */
2019         if (is_or)
2020                 return sel;
2021
2022         /*
2023          * Then, apply functional dependencies on the remaining clauses by calling
2024          * dependencies_clauselist_selectivity.  Pass 'estimatedclauses' so the
2025          * function can properly skip clauses already estimated above.
2026          *
2027          * The reasoning for applying dependencies last is that the more complex
2028          * stats can track more complex correlations between the attributes, and
2029          * so may be considered more reliable.
2030          *
2031          * For example, MCV list can give us an exact selectivity for values in
2032          * two columns, while functional dependencies can only provide information
2033          * about the overall strength of the dependency.
2034          */
2035         sel *= dependencies_clauselist_selectivity(root, clauses, varRelid,
2036                                                                                            jointype, sjinfo, rel,
2037                                                                                            estimatedclauses);
2038
2039         return sel;
2040 }
2041
2042 /*
2043  * examine_opclause_args
2044  *              Split an operator expression's arguments into Expr and Const parts.
2045  *
2046  * Attempts to match the arguments to either (Expr op Const) or (Const op
2047  * Expr), possibly with a RelabelType on top. When the expression matches this
2048  * form, returns true, otherwise returns false.
2049  *
2050  * Optionally returns pointers to the extracted Expr/Const nodes, when passed
2051  * non-null pointers (exprp, cstp and expronleftp). The expronleftp flag
2052  * specifies on which side of the operator we found the expression node.
2053  */
2054 bool
2055 examine_opclause_args(List *args, Node **exprp, Const **cstp,
2056                                           bool *expronleftp)
2057 {
2058         Node       *expr;
2059         Const      *cst;
2060         bool            expronleft;
2061         Node       *leftop,
2062                            *rightop;
2063
2064         /* enforced by statext_is_compatible_clause_internal */
2065         Assert(list_length(args) == 2);
2066
2067         leftop = linitial(args);
2068         rightop = lsecond(args);
2069
2070         /* strip RelabelType from either side of the expression */
2071         if (IsA(leftop, RelabelType))
2072                 leftop = (Node *) ((RelabelType *) leftop)->arg;
2073
2074         if (IsA(rightop, RelabelType))
2075                 rightop = (Node *) ((RelabelType *) rightop)->arg;
2076
2077         if (IsA(rightop, Const))
2078         {
2079                 expr = (Node *) leftop;
2080                 cst = (Const *) rightop;
2081                 expronleft = true;
2082         }
2083         else if (IsA(leftop, Const))
2084         {
2085                 expr = (Node *) rightop;
2086                 cst = (Const *) leftop;
2087                 expronleft = false;
2088         }
2089         else
2090                 return false;
2091
2092         /* return pointers to the extracted parts if requested */
2093         if (exprp)
2094                 *exprp = expr;
2095
2096         if (cstp)
2097                 *cstp = cst;
2098
2099         if (expronleftp)
2100                 *expronleftp = expronleft;
2101
2102         return true;
2103 }
2104
2105
2106 /*
2107  * Compute statistics about expressions of a relation.
2108  */
2109 static void
2110 compute_expr_stats(Relation onerel, double totalrows,
2111                                    AnlExprData *exprdata, int nexprs,
2112                                    HeapTuple *rows, int numrows)
2113 {
2114         MemoryContext expr_context,
2115                                 old_context;
2116         int                     ind,
2117                                 i;
2118
2119         expr_context = AllocSetContextCreate(CurrentMemoryContext,
2120                                                                                  "Analyze Expression",
2121                                                                                  ALLOCSET_DEFAULT_SIZES);
2122         old_context = MemoryContextSwitchTo(expr_context);
2123
2124         for (ind = 0; ind < nexprs; ind++)
2125         {
2126                 AnlExprData *thisdata = &exprdata[ind];
2127                 VacAttrStats *stats = thisdata->vacattrstat;
2128                 Node       *expr = thisdata->expr;
2129                 TupleTableSlot *slot;
2130                 EState     *estate;
2131                 ExprContext *econtext;
2132                 Datum      *exprvals;
2133                 bool       *exprnulls;
2134                 ExprState  *exprstate;
2135                 int                     tcnt;
2136
2137                 /* Are we still in the main context? */
2138                 Assert(CurrentMemoryContext == expr_context);
2139
2140                 /*
2141                  * Need an EState for evaluation of expressions.  Create it in the
2142                  * per-expression context to be sure it gets cleaned up at the bottom
2143                  * of the loop.
2144                  */
2145                 estate = CreateExecutorState();
2146                 econtext = GetPerTupleExprContext(estate);
2147
2148                 /* Set up expression evaluation state */
2149                 exprstate = ExecPrepareExpr((Expr *) expr, estate);
2150
2151                 /* Need a slot to hold the current heap tuple, too */
2152                 slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
2153                                                                                 &TTSOpsHeapTuple);
2154
2155                 /* Arrange for econtext's scan tuple to be the tuple under test */
2156                 econtext->ecxt_scantuple = slot;
2157
2158                 /* Compute and save expression values */
2159                 exprvals = (Datum *) palloc(numrows * sizeof(Datum));
2160                 exprnulls = (bool *) palloc(numrows * sizeof(bool));
2161
2162                 tcnt = 0;
2163                 for (i = 0; i < numrows; i++)
2164                 {
2165                         Datum           datum;
2166                         bool            isnull;
2167
2168                         /*
2169                          * Reset the per-tuple context each time, to reclaim any cruft
2170                          * left behind by evaluating the statistics expressions.
2171                          */
2172                         ResetExprContext(econtext);
2173
2174                         /* Set up for expression evaluation */
2175                         ExecStoreHeapTuple(rows[i], slot, false);
2176
2177                         /*
2178                          * Evaluate the expression. We do this in the per-tuple context so
2179                          * as not to leak memory, and then copy the result into the
2180                          * context created at the beginning of this function.
2181                          */
2182                         datum = ExecEvalExprSwitchContext(exprstate,
2183                                                                                           GetPerTupleExprContext(estate),
2184                                                                                           &isnull);
2185                         if (isnull)
2186                         {
2187                                 exprvals[tcnt] = (Datum) 0;
2188                                 exprnulls[tcnt] = true;
2189                         }
2190                         else
2191                         {
2192                                 /* Make sure we copy the data into the context. */
2193                                 Assert(CurrentMemoryContext == expr_context);
2194
2195                                 exprvals[tcnt] = datumCopy(datum,
2196                                                                                    stats->attrtype->typbyval,
2197                                                                                    stats->attrtype->typlen);
2198                                 exprnulls[tcnt] = false;
2199                         }
2200
2201                         tcnt++;
2202                 }
2203
2204                 /*
2205                  * Now we can compute the statistics for the expression columns.
2206                  *
2207                  * XXX Unlike compute_index_stats we don't need to switch and reset
2208                  * memory contexts here, because we're only computing stats for a
2209                  * single expression (and not iterating over many indexes), so we just
2210                  * do it in expr_context. Note that compute_stats copies the result
2211                  * into stats->anl_context, so it does not disappear.
2212                  */
2213                 if (tcnt > 0)
2214                 {
2215                         AttributeOpts *aopt =
2216                                 get_attribute_options(onerel->rd_id, stats->tupattnum);
2217
2218                         stats->exprvals = exprvals;
2219                         stats->exprnulls = exprnulls;
2220                         stats->rowstride = 1;
2221                         stats->compute_stats(stats,
2222                                                                  expr_fetch_func,
2223                                                                  tcnt,
2224                                                                  tcnt);
2225
2226                         /*
2227                          * If the n_distinct option is specified, it overrides the above
2228                          * computation.
2229                          */
2230                         if (aopt != NULL && aopt->n_distinct != 0.0)
2231                                 stats->stadistinct = aopt->n_distinct;
2232                 }
2233
2234                 /* And clean up */
2235                 MemoryContextSwitchTo(expr_context);
2236
2237                 ExecDropSingleTupleTableSlot(slot);
2238                 FreeExecutorState(estate);
2239                 MemoryContextReset(expr_context);
2240         }
2241
2242         MemoryContextSwitchTo(old_context);
2243         MemoryContextDelete(expr_context);
2244 }
2245
2246
2247 /*
2248  * Fetch function for analyzing statistics object expressions.
2249  *
2250  * We have not bothered to construct tuples from the data, instead the data
2251  * is just in Datum arrays.
2252  */
2253 static Datum
2254 expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
2255 {
2256         int                     i;
2257
2258         /* exprvals and exprnulls are already offset for proper column */
2259         i = rownum * stats->rowstride;
2260         *isNull = stats->exprnulls[i];
2261         return stats->exprvals[i];
2262 }
2263
2264 /*
2265  * Build analyze data for a list of expressions. As this is not tied
2266  * directly to a relation (table or index), we have to fake some of
2267  * the fields in examine_expression().
2268  */
2269 static AnlExprData *
2270 build_expr_data(List *exprs, int stattarget)
2271 {
2272         int                     idx;
2273         int                     nexprs = list_length(exprs);
2274         AnlExprData *exprdata;
2275         ListCell   *lc;
2276
2277         exprdata = (AnlExprData *) palloc0(nexprs * sizeof(AnlExprData));
2278
2279         idx = 0;
2280         foreach(lc, exprs)
2281         {
2282                 Node       *expr = (Node *) lfirst(lc);
2283                 AnlExprData *thisdata = &exprdata[idx];
2284
2285                 thisdata->expr = expr;
2286                 thisdata->vacattrstat = examine_expression(expr, stattarget);
2287                 idx++;
2288         }
2289
2290         return exprdata;
2291 }
2292
2293 /* form an array of pg_statistic rows (per update_attstats) */
2294 static Datum
2295 serialize_expr_stats(AnlExprData *exprdata, int nexprs)
2296 {
2297         int                     exprno;
2298         Oid                     typOid;
2299         Relation        sd;
2300
2301         ArrayBuildState *astate = NULL;
2302
2303         sd = table_open(StatisticRelationId, RowExclusiveLock);
2304
2305         /* lookup OID of composite type for pg_statistic */
2306         typOid = get_rel_type_id(StatisticRelationId);
2307         if (!OidIsValid(typOid))
2308                 ereport(ERROR,
2309                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2310                                  errmsg("relation \"%s\" does not have a composite type",
2311                                                 "pg_statistic")));
2312
2313         for (exprno = 0; exprno < nexprs; exprno++)
2314         {
2315                 int                     i,
2316                                         k;
2317                 VacAttrStats *stats = exprdata[exprno].vacattrstat;
2318
2319                 Datum           values[Natts_pg_statistic];
2320                 bool            nulls[Natts_pg_statistic];
2321                 HeapTuple       stup;
2322
2323                 if (!stats->stats_valid)
2324                 {
2325                         astate = accumArrayResult(astate,
2326                                                                           (Datum) 0,
2327                                                                           true,
2328                                                                           typOid,
2329                                                                           CurrentMemoryContext);
2330                         continue;
2331                 }
2332
2333                 /*
2334                  * Construct a new pg_statistic tuple
2335                  */
2336                 for (i = 0; i < Natts_pg_statistic; ++i)
2337                 {
2338                         nulls[i] = false;
2339                 }
2340
2341                 values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(InvalidOid);
2342                 values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(InvalidAttrNumber);
2343                 values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(false);
2344                 values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
2345                 values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
2346                 values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
2347                 i = Anum_pg_statistic_stakind1 - 1;
2348                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
2349                 {
2350                         values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
2351                 }
2352                 i = Anum_pg_statistic_staop1 - 1;
2353                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
2354                 {
2355                         values[i++] = ObjectIdGetDatum(stats->staop[k]);        /* staopN */
2356                 }
2357                 i = Anum_pg_statistic_stacoll1 - 1;
2358                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
2359                 {
2360                         values[i++] = ObjectIdGetDatum(stats->stacoll[k]);      /* stacollN */
2361                 }
2362                 i = Anum_pg_statistic_stanumbers1 - 1;
2363                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
2364                 {
2365                         int                     nnum = stats->numnumbers[k];
2366
2367                         if (nnum > 0)
2368                         {
2369                                 int                     n;
2370                                 Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
2371                                 ArrayType  *arry;
2372
2373                                 for (n = 0; n < nnum; n++)
2374                                         numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
2375                                 arry = construct_array_builtin(numdatums, nnum, FLOAT4OID);
2376                                 values[i++] = PointerGetDatum(arry);    /* stanumbersN */
2377                         }
2378                         else
2379                         {
2380                                 nulls[i] = true;
2381                                 values[i++] = (Datum) 0;
2382                         }
2383                 }
2384                 i = Anum_pg_statistic_stavalues1 - 1;
2385                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
2386                 {
2387                         if (stats->numvalues[k] > 0)
2388                         {
2389                                 ArrayType  *arry;
2390
2391                                 arry = construct_array(stats->stavalues[k],
2392                                                                            stats->numvalues[k],
2393                                                                            stats->statypid[k],
2394                                                                            stats->statyplen[k],
2395                                                                            stats->statypbyval[k],
2396                                                                            stats->statypalign[k]);
2397                                 values[i++] = PointerGetDatum(arry);    /* stavaluesN */
2398                         }
2399                         else
2400                         {
2401                                 nulls[i] = true;
2402                                 values[i++] = (Datum) 0;
2403                         }
2404                 }
2405
2406                 stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
2407
2408                 astate = accumArrayResult(astate,
2409                                                                   heap_copy_tuple_as_datum(stup, RelationGetDescr(sd)),
2410                                                                   false,
2411                                                                   typOid,
2412                                                                   CurrentMemoryContext);
2413         }
2414
2415         table_close(sd, RowExclusiveLock);
2416
2417         return makeArrayResult(astate, CurrentMemoryContext);
2418 }
2419
2420 /*
2421  * Loads pg_statistic record from expression statistics for expression
2422  * identified by the supplied index.
2423  */
2424 HeapTuple
2425 statext_expressions_load(Oid stxoid, bool inh, int idx)
2426 {
2427         bool            isnull;
2428         Datum           value;
2429         HeapTuple       htup;
2430         ExpandedArrayHeader *eah;
2431         HeapTupleHeader td;
2432         HeapTupleData tmptup;
2433         HeapTuple       tup;
2434
2435         htup = SearchSysCache2(STATEXTDATASTXOID,
2436                                                    ObjectIdGetDatum(stxoid), BoolGetDatum(inh));
2437         if (!HeapTupleIsValid(htup))
2438                 elog(ERROR, "cache lookup failed for statistics object %u", stxoid);
2439
2440         value = SysCacheGetAttr(STATEXTDATASTXOID, htup,
2441                                                         Anum_pg_statistic_ext_data_stxdexpr, &isnull);
2442         if (isnull)
2443                 elog(ERROR,
2444                          "requested statistics kind \"%c\" is not yet built for statistics object %u",
2445                          STATS_EXT_EXPRESSIONS, stxoid);
2446
2447         eah = DatumGetExpandedArray(value);
2448
2449         deconstruct_expanded_array(eah);
2450
2451         td = DatumGetHeapTupleHeader(eah->dvalues[idx]);
2452
2453         /* Build a temporary HeapTuple control structure */
2454         tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
2455         ItemPointerSetInvalid(&(tmptup.t_self));
2456         tmptup.t_tableOid = InvalidOid;
2457         tmptup.t_data = td;
2458
2459         tup = heap_copytuple(&tmptup);
2460
2461         ReleaseSysCache(htup);
2462
2463         return tup;
2464 }
2465
2466 /*
2467  * Evaluate the expressions, so that we can use the results to build
2468  * all the requested statistics types. This matters especially for
2469  * expensive expressions, of course.
2470  */
2471 static StatsBuildData *
2472 make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows,
2473                                 VacAttrStats **stats, int stattarget)
2474 {
2475         /* evaluated expressions */
2476         StatsBuildData *result;
2477         char       *ptr;
2478         Size            len;
2479
2480         int                     i;
2481         int                     k;
2482         int                     idx;
2483         TupleTableSlot *slot;
2484         EState     *estate;
2485         ExprContext *econtext;
2486         List       *exprstates = NIL;
2487         int                     nkeys = bms_num_members(stat->columns) + list_length(stat->exprs);
2488         ListCell   *lc;
2489
2490         /* allocate everything as a single chunk, so we can free it easily */
2491         len = MAXALIGN(sizeof(StatsBuildData));
2492         len += MAXALIGN(sizeof(AttrNumber) * nkeys);    /* attnums */
2493         len += MAXALIGN(sizeof(VacAttrStats *) * nkeys);        /* stats */
2494
2495         /* values */
2496         len += MAXALIGN(sizeof(Datum *) * nkeys);
2497         len += nkeys * MAXALIGN(sizeof(Datum) * numrows);
2498
2499         /* nulls */
2500         len += MAXALIGN(sizeof(bool *) * nkeys);
2501         len += nkeys * MAXALIGN(sizeof(bool) * numrows);
2502
2503         ptr = palloc(len);
2504
2505         /* set the pointers */
2506         result = (StatsBuildData *) ptr;
2507         ptr += MAXALIGN(sizeof(StatsBuildData));
2508
2509         /* attnums */
2510         result->attnums = (AttrNumber *) ptr;
2511         ptr += MAXALIGN(sizeof(AttrNumber) * nkeys);
2512
2513         /* stats */
2514         result->stats = (VacAttrStats **) ptr;
2515         ptr += MAXALIGN(sizeof(VacAttrStats *) * nkeys);
2516
2517         /* values */
2518         result->values = (Datum **) ptr;
2519         ptr += MAXALIGN(sizeof(Datum *) * nkeys);
2520
2521         /* nulls */
2522         result->nulls = (bool **) ptr;
2523         ptr += MAXALIGN(sizeof(bool *) * nkeys);
2524
2525         for (i = 0; i < nkeys; i++)
2526         {
2527                 result->values[i] = (Datum *) ptr;
2528                 ptr += MAXALIGN(sizeof(Datum) * numrows);
2529
2530                 result->nulls[i] = (bool *) ptr;
2531                 ptr += MAXALIGN(sizeof(bool) * numrows);
2532         }
2533
2534         Assert((ptr - (char *) result) == len);
2535
2536         /* we have it allocated, so let's fill the values */
2537         result->nattnums = nkeys;
2538         result->numrows = numrows;
2539
2540         /* fill the attribute info - first attributes, then expressions */
2541         idx = 0;
2542         k = -1;
2543         while ((k = bms_next_member(stat->columns, k)) >= 0)
2544         {
2545                 result->attnums[idx] = k;
2546                 result->stats[idx] = stats[idx];
2547
2548                 idx++;
2549         }
2550
2551         k = -1;
2552         foreach(lc, stat->exprs)
2553         {
2554                 Node       *expr = (Node *) lfirst(lc);
2555
2556                 result->attnums[idx] = k;
2557                 result->stats[idx] = examine_expression(expr, stattarget);
2558
2559                 idx++;
2560                 k--;
2561         }
2562
2563         /* first extract values for all the regular attributes */
2564         for (i = 0; i < numrows; i++)
2565         {
2566                 idx = 0;
2567                 k = -1;
2568                 while ((k = bms_next_member(stat->columns, k)) >= 0)
2569                 {
2570                         result->values[idx][i] = heap_getattr(rows[i], k,
2571                                                                                                   result->stats[idx]->tupDesc,
2572                                                                                                   &result->nulls[idx][i]);
2573
2574                         idx++;
2575                 }
2576         }
2577
2578         /* Need an EState for evaluation expressions. */
2579         estate = CreateExecutorState();
2580         econtext = GetPerTupleExprContext(estate);
2581
2582         /* Need a slot to hold the current heap tuple, too */
2583         slot = MakeSingleTupleTableSlot(RelationGetDescr(rel),
2584                                                                         &TTSOpsHeapTuple);
2585
2586         /* Arrange for econtext's scan tuple to be the tuple under test */
2587         econtext->ecxt_scantuple = slot;
2588
2589         /* Set up expression evaluation state */
2590         exprstates = ExecPrepareExprList(stat->exprs, estate);
2591
2592         for (i = 0; i < numrows; i++)
2593         {
2594                 /*
2595                  * Reset the per-tuple context each time, to reclaim any cruft left
2596                  * behind by evaluating the statistics object expressions.
2597                  */
2598                 ResetExprContext(econtext);
2599
2600                 /* Set up for expression evaluation */
2601                 ExecStoreHeapTuple(rows[i], slot, false);
2602
2603                 idx = bms_num_members(stat->columns);
2604                 foreach(lc, exprstates)
2605                 {
2606                         Datum           datum;
2607                         bool            isnull;
2608                         ExprState  *exprstate = (ExprState *) lfirst(lc);
2609
2610                         /*
2611                          * XXX This probably leaks memory. Maybe we should use
2612                          * ExecEvalExprSwitchContext but then we need to copy the result
2613                          * somewhere else.
2614                          */
2615                         datum = ExecEvalExpr(exprstate,
2616                                                                  GetPerTupleExprContext(estate),
2617                                                                  &isnull);
2618                         if (isnull)
2619                         {
2620                                 result->values[idx][i] = (Datum) 0;
2621                                 result->nulls[idx][i] = true;
2622                         }
2623                         else
2624                         {
2625                                 result->values[idx][i] = (Datum) datum;
2626                                 result->nulls[idx][i] = false;
2627                         }
2628
2629                         idx++;
2630                 }
2631         }
2632
2633         ExecDropSingleTupleTableSlot(slot);
2634         FreeExecutorState(estate);
2635
2636         return result;
2637 }