src/backend/optimizer/prep/prepunion.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * prepunion.c
   4  *        Routines to plan set-operation queries.  The filename is a leftover
   5  *        from a time when only UNIONs were implemented.
   6  *
   7  * There are two code paths in the planner for set-operation queries.
   8  * If a subquery consists entirely of simple UNION ALL operations, it
   9  * is converted into an "append relation".  Otherwise, it is handled
  10  * by the general code in this module (plan_set_operations and its
  11  * subroutines).  There is some support code here for the append-relation
  12  * case, but most of the heavy lifting for that is done elsewhere,
  13  * notably in prepjointree.c and allpaths.c.
  14  *
  15  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
  16  * Portions Copyright (c) 1994, Regents of the University of California
  17  *
  18  *
  19  * IDENTIFICATION
  20  *        src/backend/optimizer/prep/prepunion.c
  21  *
  22  *-------------------------------------------------------------------------
  23  */
  24 #include "postgres.h"
  25
  26 #include "access/htup_details.h"
  27 #include "access/sysattr.h"
  28 #include "catalog/partition.h"
  29 #include "catalog/pg_inherits.h"
  30 #include "catalog/pg_type.h"
  31 #include "miscadmin.h"
  32 #include "nodes/makefuncs.h"
  33 #include "nodes/nodeFuncs.h"
  34 #include "optimizer/cost.h"
  35 #include "optimizer/pathnode.h"
  36 #include "optimizer/paths.h"
  37 #include "optimizer/planmain.h"
  38 #include "optimizer/planner.h"
  39 #include "optimizer/prep.h"
  40 #include "optimizer/tlist.h"
  41 #include "parser/parse_coerce.h"
  42 #include "parser/parsetree.h"
  43 #include "utils/lsyscache.h"
  44 #include "utils/rel.h"
  45 #include "utils/selfuncs.h"
  46 #include "utils/syscache.h"
  47
  48
  49 static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
  50                                                                                   List *colTypes, List *colCollations,
  51                                                                                   bool junkOK,
  52                                                                                   int flag, List *refnames_tlist,
  53                                                                                   List **pTargetList,
  54                                                                                   double *pNumGroups);
  55 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
  56                                                                                    PlannerInfo *root,
  57                                                                                    List *refnames_tlist,
  58                                                                                    List **pTargetList);
  59 static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
  60                                                                                 List *refnames_tlist,
  61                                                                                 List **pTargetList);
  62 static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
  63                                                                                    List *refnames_tlist,
  64                                                                                    List **pTargetList);
  65 static List *plan_union_children(PlannerInfo *root,
  66                                                                  SetOperationStmt *top_union,
  67                                                                  List *refnames_tlist,
  68                                                                  List **tlist_list);
  69 static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
  70                                                            PlannerInfo *root);
  71 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
  72 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
  73                                                                 Path *input_path,
  74                                                                 double dNumGroups, double dNumOutputRows,
  75                                                                 const char *construct);
  76 static List *generate_setop_tlist(List *colTypes, List *colCollations,
  77                                                                   int flag,
  78                                                                   Index varno,
  79                                                                   bool hack_constants,
  80                                                                   List *input_tlist,
  81                                                                   List *refnames_tlist);
  82 static List *generate_append_tlist(List *colTypes, List *colCollations,
  83                                                                    bool flag,
  84                                                                    List *input_tlists,
  85                                                                    List *refnames_tlist);
  86 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
  87
  88
  89 /*
  90  * plan_set_operations
  91  *
  92  *        Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
  93  *
  94  * This routine only deals with the setOperations tree of the given query.
  95  * Any top-level ORDER BY requested in root->parse->sortClause will be handled
  96  * when we return to grouping_planner; likewise for LIMIT.
  97  *
  98  * What we return is an "upperrel" RelOptInfo containing at least one Path
  99  * that implements the set-operation tree.  In addition, root->processed_tlist
 100  * receives a targetlist representing the output of the topmost setop node.
 101  */
 102 RelOptInfo *
 103 plan_set_operations(PlannerInfo *root)
 104 {
 105         Query      *parse = root->parse;
 106         SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
 107         Node       *node;
 108         RangeTblEntry *leftmostRTE;
 109         Query      *leftmostQuery;
 110         RelOptInfo *setop_rel;
 111         List       *top_tlist;
 112
 113         Assert(topop);
 114
 115         /* check for unsupported stuff */
 116         Assert(parse->jointree->fromlist == NIL);
 117         Assert(parse->jointree->quals == NULL);
 118         Assert(parse->groupClause == NIL);
 119         Assert(parse->havingQual == NULL);
 120         Assert(parse->windowClause == NIL);
 121         Assert(parse->distinctClause == NIL);
 122
 123         /*
 124          * In the outer query level, we won't have any true equivalences to deal
 125          * with; but we do want to be able to make pathkeys, which will require
 126          * single-member EquivalenceClasses.  Indicate that EC merging is complete
 127          * so that pathkeys.c won't complain.
 128          */
 129         Assert(root->eq_classes == NIL);
 130         root->ec_merging_done = true;
 131
 132         /*
 133          * We'll need to build RelOptInfos for each of the leaf subqueries, which
 134          * are RTE_SUBQUERY rangetable entries in this Query.  Prepare the index
 135          * arrays for those, and for AppendRelInfos in case they're needed.
 136          */
 137         setup_simple_rel_arrays(root);
 138
 139         /*
 140          * Find the leftmost component Query.  We need to use its column names for
 141          * all generated tlists (else SELECT INTO won't work right).
 142          */
 143         node = topop->larg;
 144         while (node && IsA(node, SetOperationStmt))
 145                 node = ((SetOperationStmt *) node)->larg;
 146         Assert(node && IsA(node, RangeTblRef));
 147         leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
 148         leftmostQuery = leftmostRTE->subquery;
 149         Assert(leftmostQuery != NULL);
 150
 151         /*
 152          * If the topmost node is a recursive union, it needs special processing.
 153          */
 154         if (root->hasRecursion)
 155         {
 156                 setop_rel = generate_recursion_path(topop, root,
 157                                                                                         leftmostQuery->targetList,
 158                                                                                         &top_tlist);
 159         }
 160         else
 161         {
 162                 /*
 163                  * Recurse on setOperations tree to generate paths for set ops. The
 164                  * final output paths should have just the column types shown as the
 165                  * output from the top-level node, plus possibly resjunk working
 166                  * columns (we can rely on upper-level nodes to deal with that).
 167                  */
 168                 setop_rel = recurse_set_operations((Node *) topop, root,
 169                                                                                    topop->colTypes, topop->colCollations,
 170                                                                                    true, -1,
 171                                                                                    leftmostQuery->targetList,
 172                                                                                    &top_tlist,
 173                                                                                    NULL);
 174         }
 175
 176         /* Must return the built tlist into root->processed_tlist. */
 177         root->processed_tlist = top_tlist;
 178
 179         return setop_rel;
 180 }
 181
 182 /*
 183  * recurse_set_operations
 184  *        Recursively handle one step in a tree of set operations
 185  *
 186  * colTypes: OID list of set-op's result column datatypes
 187  * colCollations: OID list of set-op's result column collations
 188  * junkOK: if true, child resjunk columns may be left in the result
 189  * flag: if >= 0, add a resjunk output column indicating value of flag
 190  * refnames_tlist: targetlist to take column names from
 191  *
 192  * Returns a RelOptInfo for the subtree, as well as these output parameters:
 193  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
 194  * *pNumGroups: if not NULL, we estimate the number of distinct groups
 195  *              in the result, and store it there
 196  *
 197  * The pTargetList output parameter is mostly redundant with the pathtarget
 198  * of the returned RelOptInfo, but for the moment we need it because much of
 199  * the logic in this file depends on flag columns being marked resjunk.
 200  * Pending a redesign of how that works, this is the easy way out.
 201  *
 202  * We don't have to care about typmods here: the only allowed difference
 203  * between set-op input and output typmods is input is a specific typmod
 204  * and output is -1, and that does not require a coercion.
 205  */
 206 static RelOptInfo *
 207 recurse_set_operations(Node *setOp, PlannerInfo *root,
 208                                            List *colTypes, List *colCollations,
 209                                            bool junkOK,
 210                                            int flag, List *refnames_tlist,
 211                                            List **pTargetList,
 212                                            double *pNumGroups)
 213 {
 214         RelOptInfo *rel = NULL;         /* keep compiler quiet */
 215
 216         /* Guard against stack overflow due to overly complex setop nests */
 217         check_stack_depth();
 218
 219         if (IsA(setOp, RangeTblRef))
 220         {
 221                 RangeTblRef *rtr = (RangeTblRef *) setOp;
 222                 RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
 223                 Query      *subquery = rte->subquery;
 224                 PlannerInfo *subroot;
 225                 RelOptInfo *final_rel;
 226                 Path       *subpath;
 227                 Path       *path;
 228                 List       *tlist;
 229
 230                 Assert(subquery != NULL);
 231
 232                 /* Build a RelOptInfo for this leaf subquery. */
 233                 rel = build_simple_rel(root, rtr->rtindex, NULL);
 234
 235                 /* plan_params should not be in use in current query level */
 236                 Assert(root->plan_params == NIL);
 237
 238                 /* Generate a subroot and Paths for the subquery */
 239                 subroot = rel->subroot = subquery_planner(root->glob, subquery,
 240                                                                                                   root,
 241                                                                                                   false,
 242                                                                                                   root->tuple_fraction);
 243
 244                 /*
 245                  * It should not be possible for the primitive query to contain any
 246                  * cross-references to other primitive queries in the setop tree.
 247                  */
 248                 if (root->plan_params)
 249                         elog(ERROR, "unexpected outer reference in set operation subquery");
 250
 251                 /* Figure out the appropriate target list for this subquery. */
 252                 tlist = generate_setop_tlist(colTypes, colCollations,
 253                                                                          flag,
 254                                                                          rtr->rtindex,
 255                                                                          true,
 256                                                                          subroot->processed_tlist,
 257                                                                          refnames_tlist);
 258                 rel->reltarget = create_pathtarget(root, tlist);
 259
 260                 /* Return the fully-fledged tlist to caller, too */
 261                 *pTargetList = tlist;
 262
 263                 /*
 264                  * Mark rel with estimated output rows, width, etc.  Note that we have
 265                  * to do this before generating outer-query paths, else
 266                  * cost_subqueryscan is not happy.
 267                  */
 268                 set_subquery_size_estimates(root, rel);
 269
 270                 /*
 271                  * Since we may want to add a partial path to this relation, we must
 272                  * set its consider_parallel flag correctly.
 273                  */
 274                 final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
 275                 rel->consider_parallel = final_rel->consider_parallel;
 276
 277                 /*
 278                  * For the moment, we consider only a single Path for the subquery.
 279                  * This should change soon (make it look more like
 280                  * set_subquery_pathlist).
 281                  */
 282                 subpath = get_cheapest_fractional_path(final_rel,
 283                                                                                            root->tuple_fraction);
 284
 285                 /*
 286                  * Stick a SubqueryScanPath atop that.
 287                  *
 288                  * We don't bother to determine the subquery's output ordering since
 289                  * it won't be reflected in the set-op result anyhow; so just label
 290                  * the SubqueryScanPath with nil pathkeys.  (XXX that should change
 291                  * soon too, likely.)
 292                  */
 293                 path = (Path *) create_subqueryscan_path(root, rel, subpath,
 294                                                                                                  NIL, NULL);
 295
 296                 add_path(rel, path);
 297
 298                 /*
 299                  * If we have a partial path for the child relation, we can use that
 300                  * to build a partial path for this relation.  But there's no point in
 301                  * considering any path but the cheapest.
 302                  */
 303                 if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
 304                         final_rel->partial_pathlist != NIL)
 305                 {
 306                         Path       *partial_subpath;
 307                         Path       *partial_path;
 308
 309                         partial_subpath = linitial(final_rel->partial_pathlist);
 310                         partial_path = (Path *)
 311                                 create_subqueryscan_path(root, rel, partial_subpath,
 312                                                                                  NIL, NULL);
 313                         add_partial_path(rel, partial_path);
 314                 }
 315
 316                 /*
 317                  * Estimate number of groups if caller wants it.  If the subquery used
 318                  * grouping or aggregation, its output is probably mostly unique
 319                  * anyway; otherwise do statistical estimation.
 320                  *
 321                  * XXX you don't really want to know about this: we do the estimation
 322                  * using the subquery's original targetlist expressions, not the
 323                  * subroot->processed_tlist which might seem more appropriate.  The
 324                  * reason is that if the subquery is itself a setop, it may return a
 325                  * processed_tlist containing "varno 0" Vars generated by
 326                  * generate_append_tlist, and those would confuse estimate_num_groups
 327                  * mightily.  We ought to get rid of the "varno 0" hack, but that
 328                  * requires a redesign of the parsetree representation of setops, so
 329                  * that there can be an RTE corresponding to each setop's output.
 330                  */
 331                 if (pNumGroups)
 332                 {
 333                         if (subquery->groupClause || subquery->groupingSets ||
 334                                 subquery->distinctClause ||
 335                                 subroot->hasHavingQual || subquery->hasAggs)
 336                                 *pNumGroups = subpath->rows;
 337                         else
 338                                 *pNumGroups = estimate_num_groups(subroot,
 339                                                                                                   get_tlist_exprs(subquery->targetList, false),
 340                                                                                                   subpath->rows,
 341                                                                                                   NULL);
 342                 }
 343         }
 344         else if (IsA(setOp, SetOperationStmt))
 345         {
 346                 SetOperationStmt *op = (SetOperationStmt *) setOp;
 347
 348                 /* UNIONs are much different from INTERSECT/EXCEPT */
 349                 if (op->op == SETOP_UNION)
 350                         rel = generate_union_paths(op, root,
 351                                                                            refnames_tlist,
 352                                                                            pTargetList);
 353                 else
 354                         rel = generate_nonunion_paths(op, root,
 355                                                                                   refnames_tlist,
 356                                                                                   pTargetList);
 357                 if (pNumGroups)
 358                         *pNumGroups = rel->rows;
 359
 360                 /*
 361                  * If necessary, add a Result node to project the caller-requested
 362                  * output columns.
 363                  *
 364                  * XXX you don't really want to know about this: setrefs.c will apply
 365                  * fix_upper_expr() to the Result node's tlist. This would fail if the
 366                  * Vars generated by generate_setop_tlist() were not exactly equal()
 367                  * to the corresponding tlist entries of the subplan. However, since
 368                  * the subplan was generated by generate_union_paths() or
 369                  * generate_nonunion_paths(), and hence its tlist was generated by
 370                  * generate_append_tlist(), this will work.  We just tell
 371                  * generate_setop_tlist() to use varno 0.
 372                  */
 373                 if (flag >= 0 ||
 374                         !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
 375                         !tlist_same_collations(*pTargetList, colCollations, junkOK))
 376                 {
 377                         PathTarget *target;
 378                         ListCell   *lc;
 379
 380                         *pTargetList = generate_setop_tlist(colTypes, colCollations,
 381                                                                                                 flag,
 382                                                                                                 0,
 383                                                                                                 false,
 384                                                                                                 *pTargetList,
 385                                                                                                 refnames_tlist);
 386                         target = create_pathtarget(root, *pTargetList);
 387
 388                         /* Apply projection to each path */
 389                         foreach(lc, rel->pathlist)
 390                         {
 391                                 Path       *subpath = (Path *) lfirst(lc);
 392                                 Path       *path;
 393
 394                                 Assert(subpath->param_info == NULL);
 395                                 path = apply_projection_to_path(root, subpath->parent,
 396                                                                                                 subpath, target);
 397                                 /* If we had to add a Result, path is different from subpath */
 398                                 if (path != subpath)
 399                                         lfirst(lc) = path;
 400                         }
 401
 402                         /* Apply projection to each partial path */
 403                         foreach(lc, rel->partial_pathlist)
 404                         {
 405                                 Path       *subpath = (Path *) lfirst(lc);
 406                                 Path       *path;
 407
 408                                 Assert(subpath->param_info == NULL);
 409
 410                                 /* avoid apply_projection_to_path, in case of multiple refs */
 411                                 path = (Path *) create_projection_path(root, subpath->parent,
 412                                                                                                            subpath, target);
 413                                 lfirst(lc) = path;
 414                         }
 415                 }
 416         }
 417         else
 418         {
 419                 elog(ERROR, "unrecognized node type: %d",
 420                          (int) nodeTag(setOp));
 421                 *pTargetList = NIL;
 422         }
 423
 424         postprocess_setop_rel(root, rel);
 425
 426         return rel;
 427 }
 428
 429 /*
 430  * Generate paths for a recursive UNION node
 431  */
 432 static RelOptInfo *
 433 generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
 434                                                 List *refnames_tlist,
 435                                                 List **pTargetList)
 436 {
 437         RelOptInfo *result_rel;
 438         Path       *path;
 439         RelOptInfo *lrel,
 440                            *rrel;
 441         Path       *lpath;
 442         Path       *rpath;
 443         List       *lpath_tlist;
 444         List       *rpath_tlist;
 445         List       *tlist;
 446         List       *groupList;
 447         double          dNumGroups;
 448
 449         /* Parser should have rejected other cases */
 450         if (setOp->op != SETOP_UNION)
 451                 elog(ERROR, "only UNION queries can be recursive");
 452         /* Worktable ID should be assigned */
 453         Assert(root->wt_param_id >= 0);
 454
 455         /*
 456          * Unlike a regular UNION node, process the left and right inputs
 457          * separately without any intention of combining them into one Append.
 458          */
 459         lrel = recurse_set_operations(setOp->larg, root,
 460                                                                   setOp->colTypes, setOp->colCollations,
 461                                                                   false, -1,
 462                                                                   refnames_tlist,
 463                                                                   &lpath_tlist,
 464                                                                   NULL);
 465         lpath = lrel->cheapest_total_path;
 466         /* The right path will want to look at the left one ... */
 467         root->non_recursive_path = lpath;
 468         rrel = recurse_set_operations(setOp->rarg, root,
 469                                                                   setOp->colTypes, setOp->colCollations,
 470                                                                   false, -1,
 471                                                                   refnames_tlist,
 472                                                                   &rpath_tlist,
 473                                                                   NULL);
 474         rpath = rrel->cheapest_total_path;
 475         root->non_recursive_path = NULL;
 476
 477         /*
 478          * Generate tlist for RecursiveUnion path node --- same as in Append cases
 479          */
 480         tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
 481                                                                   list_make2(lpath_tlist, rpath_tlist),
 482                                                                   refnames_tlist);
 483
 484         *pTargetList = tlist;
 485
 486         /* Build result relation. */
 487         result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
 488                                                                  bms_union(lrel->relids, rrel->relids));
 489         result_rel->reltarget = create_pathtarget(root, tlist);
 490
 491         /*
 492          * If UNION, identify the grouping operators
 493          */
 494         if (setOp->all)
 495         {
 496                 groupList = NIL;
 497                 dNumGroups = 0;
 498         }
 499         else
 500         {
 501                 /* Identify the grouping semantics */
 502                 groupList = generate_setop_grouplist(setOp, tlist);
 503
 504                 /* We only support hashing here */
 505                 if (!grouping_is_hashable(groupList))
 506                         ereport(ERROR,
 507                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 508                                          errmsg("could not implement recursive UNION"),
 509                                          errdetail("All column datatypes must be hashable.")));
 510
 511                 /*
 512                  * For the moment, take the number of distinct groups as equal to the
 513                  * total input size, ie, the worst case.
 514                  */
 515                 dNumGroups = lpath->rows + rpath->rows * 10;
 516         }
 517
 518         /*
 519          * And make the path node.
 520          */
 521         path = (Path *) create_recursiveunion_path(root,
 522                                                                                            result_rel,
 523                                                                                            lpath,
 524                                                                                            rpath,
 525                                                                                            result_rel->reltarget,
 526                                                                                            groupList,
 527                                                                                            root->wt_param_id,
 528                                                                                            dNumGroups);
 529
 530         add_path(result_rel, path);
 531         postprocess_setop_rel(root, result_rel);
 532         return result_rel;
 533 }
 534
 535 /*
 536  * Generate paths for a UNION or UNION ALL node
 537  */
 538 static RelOptInfo *
 539 generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 540                                          List *refnames_tlist,
 541                                          List **pTargetList)
 542 {
 543         Relids          relids = NULL;
 544         RelOptInfo *result_rel;
 545         double          save_fraction = root->tuple_fraction;
 546         ListCell   *lc;
 547         List       *pathlist = NIL;
 548         List       *partial_pathlist = NIL;
 549         bool            partial_paths_valid = true;
 550         bool            consider_parallel = true;
 551         List       *rellist;
 552         List       *tlist_list;
 553         List       *tlist;
 554         Path       *path;
 555
 556         /*
 557          * If plain UNION, tell children to fetch all tuples.
 558          *
 559          * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
 560          * each arm of the UNION ALL.  One could make a case for reducing the
 561          * tuple fraction for later arms (discounting by the expected size of the
 562          * earlier arms' results) but it seems not worth the trouble. The normal
 563          * case where tuple_fraction isn't already zero is a LIMIT at top level,
 564          * and passing it down as-is is usually enough to get the desired result
 565          * of preferring fast-start plans.
 566          */
 567         if (!op->all)
 568                 root->tuple_fraction = 0.0;
 569
 570         /*
 571          * If any of my children are identical UNION nodes (same op, all-flag, and
 572          * colTypes) then they can be merged into this node so that we generate
 573          * only one Append and unique-ification for the lot.  Recurse to find such
 574          * nodes and compute their children's paths.
 575          */
 576         rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
 577
 578         /*
 579          * Generate tlist for Append plan node.
 580          *
 581          * The tlist for an Append plan isn't important as far as the Append is
 582          * concerned, but we must make it look real anyway for the benefit of the
 583          * next plan level up.
 584          */
 585         tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
 586                                                                   tlist_list, refnames_tlist);
 587
 588         *pTargetList = tlist;
 589
 590         /* Build path lists and relid set. */
 591         foreach(lc, rellist)
 592         {
 593                 RelOptInfo *rel = lfirst(lc);
 594
 595                 pathlist = lappend(pathlist, rel->cheapest_total_path);
 596
 597                 if (consider_parallel)
 598                 {
 599                         if (!rel->consider_parallel)
 600                         {
 601                                 consider_parallel = false;
 602                                 partial_paths_valid = false;
 603                         }
 604                         else if (rel->partial_pathlist == NIL)
 605                                 partial_paths_valid = false;
 606                         else
 607                                 partial_pathlist = lappend(partial_pathlist,
 608                                                                                    linitial(rel->partial_pathlist));
 609                 }
 610
 611                 relids = bms_union(relids, rel->relids);
 612         }
 613
 614         /* Build result relation. */
 615         result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
 616         result_rel->reltarget = create_pathtarget(root, tlist);
 617         result_rel->consider_parallel = consider_parallel;
 618
 619         /*
 620          * Append the child results together.
 621          */
 622         path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
 623                                                                            NIL, NULL, 0, false, NIL, -1);
 624
 625         /*
 626          * For UNION ALL, we just need the Append path.  For UNION, need to add
 627          * node(s) to remove duplicates.
 628          */
 629         if (!op->all)
 630                 path = make_union_unique(op, path, tlist, root);
 631
 632         add_path(result_rel, path);
 633
 634         /*
 635          * Estimate number of groups.  For now we just assume the output is unique
 636          * --- this is certainly true for the UNION case, and we want worst-case
 637          * estimates anyway.
 638          */
 639         result_rel->rows = path->rows;
 640
 641         /*
 642          * Now consider doing the same thing using the partial paths plus Append
 643          * plus Gather.
 644          */
 645         if (partial_paths_valid)
 646         {
 647                 Path       *ppath;
 648                 ListCell   *lc;
 649                 int                     parallel_workers = 0;
 650
 651                 /* Find the highest number of workers requested for any subpath. */
 652                 foreach(lc, partial_pathlist)
 653                 {
 654                         Path       *path = lfirst(lc);
 655
 656                         parallel_workers = Max(parallel_workers, path->parallel_workers);
 657                 }
 658                 Assert(parallel_workers > 0);
 659
 660                 /*
 661                  * If the use of parallel append is permitted, always request at least
 662                  * log2(# of children) paths.  We assume it can be useful to have
 663                  * extra workers in this case because they will be spread out across
 664                  * the children.  The precise formula is just a guess; see
 665                  * add_paths_to_append_rel.
 666                  */
 667                 if (enable_parallel_append)
 668                 {
 669                         parallel_workers = Max(parallel_workers,
 670                                                                    fls(list_length(partial_pathlist)));
 671                         parallel_workers = Min(parallel_workers,
 672                                                                    max_parallel_workers_per_gather);
 673                 }
 674                 Assert(parallel_workers > 0);
 675
 676                 ppath = (Path *)
 677                         create_append_path(root, result_rel, NIL, partial_pathlist,
 678                                                            NIL, NULL,
 679                                                            parallel_workers, enable_parallel_append,
 680                                                            NIL, -1);
 681                 ppath = (Path *)
 682                         create_gather_path(root, result_rel, ppath,
 683                                                            result_rel->reltarget, NULL, NULL);
 684                 if (!op->all)
 685                         ppath = make_union_unique(op, ppath, tlist, root);
 686                 add_path(result_rel, ppath);
 687         }
 688
 689         /* Undo effects of possibly forcing tuple_fraction to 0 */
 690         root->tuple_fraction = save_fraction;
 691
 692         return result_rel;
 693 }
 694
 695 /*
 696  * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
 697  */
 698 static RelOptInfo *
 699 generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 700                                                 List *refnames_tlist,
 701                                                 List **pTargetList)
 702 {
 703         RelOptInfo *result_rel;
 704         RelOptInfo *lrel,
 705                            *rrel;
 706         double          save_fraction = root->tuple_fraction;
 707         Path       *lpath,
 708                            *rpath,
 709                            *path;
 710         List       *lpath_tlist,
 711                            *rpath_tlist,
 712                            *tlist_list,
 713                            *tlist,
 714                            *groupList,
 715                            *pathlist;
 716         double          dLeftGroups,
 717                                 dRightGroups,
 718                                 dNumGroups,
 719                                 dNumOutputRows;
 720         bool            use_hash;
 721         SetOpCmd        cmd;
 722         int                     firstFlag;
 723
 724         /*
 725          * Tell children to fetch all tuples.
 726          */
 727         root->tuple_fraction = 0.0;
 728
 729         /* Recurse on children, ensuring their outputs are marked */
 730         lrel = recurse_set_operations(op->larg, root,
 731                                                                   op->colTypes, op->colCollations,
 732                                                                   false, 0,
 733                                                                   refnames_tlist,
 734                                                                   &lpath_tlist,
 735                                                                   &dLeftGroups);
 736         lpath = lrel->cheapest_total_path;
 737         rrel = recurse_set_operations(op->rarg, root,
 738                                                                   op->colTypes, op->colCollations,
 739                                                                   false, 1,
 740                                                                   refnames_tlist,
 741                                                                   &rpath_tlist,
 742                                                                   &dRightGroups);
 743         rpath = rrel->cheapest_total_path;
 744
 745         /* Undo effects of forcing tuple_fraction to 0 */
 746         root->tuple_fraction = save_fraction;
 747
 748         /*
 749          * For EXCEPT, we must put the left input first.  For INTERSECT, either
 750          * order should give the same results, and we prefer to put the smaller
 751          * input first in order to minimize the size of the hash table in the
 752          * hashing case.  "Smaller" means the one with the fewer groups.
 753          */
 754         if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
 755         {
 756                 pathlist = list_make2(lpath, rpath);
 757                 tlist_list = list_make2(lpath_tlist, rpath_tlist);
 758                 firstFlag = 0;
 759         }
 760         else
 761         {
 762                 pathlist = list_make2(rpath, lpath);
 763                 tlist_list = list_make2(rpath_tlist, lpath_tlist);
 764                 firstFlag = 1;
 765         }
 766
 767         /*
 768          * Generate tlist for Append plan node.
 769          *
 770          * The tlist for an Append plan isn't important as far as the Append is
 771          * concerned, but we must make it look real anyway for the benefit of the
 772          * next plan level up.  In fact, it has to be real enough that the flag
 773          * column is shown as a variable not a constant, else setrefs.c will get
 774          * confused.
 775          */
 776         tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
 777                                                                   tlist_list, refnames_tlist);
 778
 779         *pTargetList = tlist;
 780
 781         /* Build result relation. */
 782         result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
 783                                                                  bms_union(lrel->relids, rrel->relids));
 784         result_rel->reltarget = create_pathtarget(root, tlist);
 785
 786         /*
 787          * Append the child results together.
 788          */
 789         path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
 790                                                                            NIL, NULL, 0, false, NIL, -1);
 791
 792         /* Identify the grouping semantics */
 793         groupList = generate_setop_grouplist(op, tlist);
 794
 795         /*
 796          * Estimate number of distinct groups that we'll need hashtable entries
 797          * for; this is the size of the left-hand input for EXCEPT, or the smaller
 798          * input for INTERSECT.  Also estimate the number of eventual output rows.
 799          * In non-ALL cases, we estimate each group produces one output row; in
 800          * ALL cases use the relevant relation size.  These are worst-case
 801          * estimates, of course, but we need to be conservative.
 802          */
 803         if (op->op == SETOP_EXCEPT)
 804         {
 805                 dNumGroups = dLeftGroups;
 806                 dNumOutputRows = op->all ? lpath->rows : dNumGroups;
 807         }
 808         else
 809         {
 810                 dNumGroups = Min(dLeftGroups, dRightGroups);
 811                 dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
 812         }
 813
 814         /*
 815          * Decide whether to hash or sort, and add a sort node if needed.
 816          */
 817         use_hash = choose_hashed_setop(root, groupList, path,
 818                                                                    dNumGroups, dNumOutputRows,
 819                                                                    (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
 820
 821         if (groupList && !use_hash)
 822                 path = (Path *) create_sort_path(root,
 823                                                                                  result_rel,
 824                                                                                  path,
 825                                                                                  make_pathkeys_for_sortclauses(root,
 826                                                                                                                                            groupList,
 827                                                                                                                                            tlist),
 828                                                                                  -1.0);
 829
 830         /*
 831          * Finally, add a SetOp path node to generate the correct output.
 832          */
 833         switch (op->op)
 834         {
 835                 case SETOP_INTERSECT:
 836                         cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
 837                         break;
 838                 case SETOP_EXCEPT:
 839                         cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
 840                         break;
 841                 default:
 842                         elog(ERROR, "unrecognized set op: %d", (int) op->op);
 843                         cmd = SETOPCMD_INTERSECT;       /* keep compiler quiet */
 844                         break;
 845         }
 846         path = (Path *) create_setop_path(root,
 847                                                                           result_rel,
 848                                                                           path,
 849                                                                           cmd,
 850                                                                           use_hash ? SETOP_HASHED : SETOP_SORTED,
 851                                                                           groupList,
 852                                                                           list_length(op->colTypes) + 1,
 853                                                                           use_hash ? firstFlag : -1,
 854                                                                           dNumGroups,
 855                                                                           dNumOutputRows);
 856
 857         result_rel->rows = path->rows;
 858         add_path(result_rel, path);
 859         return result_rel;
 860 }
 861
 862 /*
 863  * Pull up children of a UNION node that are identically-propertied UNIONs.
 864  *
 865  * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
 866  * output rows will be lost anyway.
 867  *
 868  * NOTE: currently, we ignore collations while determining if a child has
 869  * the same properties.  This is semantically sound only so long as all
 870  * collations have the same notion of equality.  It is valid from an
 871  * implementation standpoint because we don't care about the ordering of
 872  * a UNION child's result: UNION ALL results are always unordered, and
 873  * generate_union_paths will force a fresh sort if the top level is a UNION.
 874  */
 875 static List *
 876 plan_union_children(PlannerInfo *root,
 877                                         SetOperationStmt *top_union,
 878                                         List *refnames_tlist,
 879                                         List **tlist_list)
 880 {
 881         List       *pending_rels = list_make1(top_union);
 882         List       *result = NIL;
 883         List       *child_tlist;
 884
 885         *tlist_list = NIL;
 886
 887         while (pending_rels != NIL)
 888         {
 889                 Node       *setOp = linitial(pending_rels);
 890
 891                 pending_rels = list_delete_first(pending_rels);
 892
 893                 if (IsA(setOp, SetOperationStmt))
 894                 {
 895                         SetOperationStmt *op = (SetOperationStmt *) setOp;
 896
 897                         if (op->op == top_union->op &&
 898                                 (op->all == top_union->all || op->all) &&
 899                                 equal(op->colTypes, top_union->colTypes))
 900                         {
 901                                 /* Same UNION, so fold children into parent */
 902                                 pending_rels = lcons(op->rarg, pending_rels);
 903                                 pending_rels = lcons(op->larg, pending_rels);
 904                                 continue;
 905                         }
 906                 }
 907
 908                 /*
 909                  * Not same, so plan this child separately.
 910                  *
 911                  * Note we disallow any resjunk columns in child results.  This is
 912                  * necessary since the Append node that implements the union won't do
 913                  * any projection, and upper levels will get confused if some of our
 914                  * output tuples have junk and some don't.  This case only arises when
 915                  * we have an EXCEPT or INTERSECT as child, else there won't be
 916                  * resjunk anyway.
 917                  */
 918                 result = lappend(result, recurse_set_operations(setOp, root,
 919                                                                                                                 top_union->colTypes,
 920                                                                                                                 top_union->colCollations,
 921                                                                                                                 false, -1,
 922                                                                                                                 refnames_tlist,
 923                                                                                                                 &child_tlist,
 924                                                                                                                 NULL));
 925                 *tlist_list = lappend(*tlist_list, child_tlist);
 926         }
 927
 928         return result;
 929 }
 930
 931 /*
 932  * Add nodes to the given path tree to unique-ify the result of a UNION.
 933  */
 934 static Path *
 935 make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
 936                                   PlannerInfo *root)
 937 {
 938         RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
 939         List       *groupList;
 940         double          dNumGroups;
 941
 942         /* Identify the grouping semantics */
 943         groupList = generate_setop_grouplist(op, tlist);
 944
 945         /*
 946          * XXX for the moment, take the number of distinct groups as equal to the
 947          * total input size, ie, the worst case.  This is too conservative, but
 948          * it's not clear how to get a decent estimate of the true size.  One
 949          * should note as well the propensity of novices to write UNION rather
 950          * than UNION ALL even when they don't expect any duplicates...
 951          */
 952         dNumGroups = path->rows;
 953
 954         /* Decide whether to hash or sort */
 955         if (choose_hashed_setop(root, groupList, path,
 956                                                         dNumGroups, dNumGroups,
 957                                                         "UNION"))
 958         {
 959                 /* Hashed aggregate plan --- no sort needed */
 960                 path = (Path *) create_agg_path(root,
 961                                                                                 result_rel,
 962                                                                                 path,
 963                                                                                 create_pathtarget(root, tlist),
 964                                                                                 AGG_HASHED,
 965                                                                                 AGGSPLIT_SIMPLE,
 966                                                                                 groupList,
 967                                                                                 NIL,
 968                                                                                 NULL,
 969                                                                                 dNumGroups);
 970         }
 971         else
 972         {
 973                 /* Sort and Unique */
 974                 if (groupList)
 975                         path = (Path *)
 976                                 create_sort_path(root,
 977                                                                  result_rel,
 978                                                                  path,
 979                                                                  make_pathkeys_for_sortclauses(root,
 980                                                                                                                            groupList,
 981                                                                                                                            tlist),
 982                                                                  -1.0);
 983                 path = (Path *) create_upper_unique_path(root,
 984                                                                                                  result_rel,
 985                                                                                                  path,
 986                                                                                                  list_length(path->pathkeys),
 987                                                                                                  dNumGroups);
 988         }
 989
 990         return path;
 991 }
 992
 993 /*
 994  * postprocess_setop_rel - perform steps required after adding paths
 995  */
 996 static void
 997 postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
 998 {
 999         /*
1000          * We don't currently worry about allowing FDWs to contribute paths to
1001          * this relation, but give extensions a chance.
1002          */
1003         if (create_upper_paths_hook)
1004                 (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1005                                                                         NULL, rel, NULL);
1006
1007         /* Select cheapest path */
1008         set_cheapest(rel);
1009 }
1010
1011 /*
1012  * choose_hashed_setop - should we use hashing for a set operation?
1013  */
1014 static bool
1015 choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1016                                         Path *input_path,
1017                                         double dNumGroups, double dNumOutputRows,
1018                                         const char *construct)
1019 {
1020         int                     numGroupCols = list_length(groupClauses);
1021         bool            can_sort;
1022         bool            can_hash;
1023         Size            hashentrysize;
1024         Path            hashed_p;
1025         Path            sorted_p;
1026         double          tuple_fraction;
1027
1028         /* Check whether the operators support sorting or hashing */
1029         can_sort = grouping_is_sortable(groupClauses);
1030         can_hash = grouping_is_hashable(groupClauses);
1031         if (can_hash && can_sort)
1032         {
1033                 /* we have a meaningful choice to make, continue ... */
1034         }
1035         else if (can_hash)
1036                 return true;
1037         else if (can_sort)
1038                 return false;
1039         else
1040                 ereport(ERROR,
1041                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1042                 /* translator: %s is UNION, INTERSECT, or EXCEPT */
1043                                  errmsg("could not implement %s", construct),
1044                                  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1045
1046         /* Prefer sorting when enable_hashagg is off */
1047         if (!enable_hashagg)
1048                 return false;
1049
1050         /*
1051          * Don't do it if it doesn't look like the hashtable will fit into
1052          * work_mem.
1053          */
1054         hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1055
1056         if (hashentrysize * dNumGroups > work_mem * 1024L)
1057                 return false;
1058
1059         /*
1060          * See if the estimated cost is no more than doing it the other way.
1061          *
1062          * We need to consider input_plan + hashagg versus input_plan + sort +
1063          * group.  Note that the actual result plan might involve a SetOp or
1064          * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1065          * should be close enough for our purposes here.
1066          *
1067          * These path variables are dummies that just hold cost fields; we don't
1068          * make actual Paths for these steps.
1069          */
1070         cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1071                          numGroupCols, dNumGroups,
1072                          NIL,
1073                          input_path->startup_cost, input_path->total_cost,
1074                          input_path->rows, input_path->pathtarget->width);
1075
1076         /*
1077          * Now for the sorted case.  Note that the input is *always* unsorted,
1078          * since it was made by appending unrelated sub-relations together.
1079          */
1080         sorted_p.startup_cost = input_path->startup_cost;
1081         sorted_p.total_cost = input_path->total_cost;
1082         /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1083         cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1084                           input_path->rows, input_path->pathtarget->width,
1085                           0.0, work_mem, -1.0);
1086         cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1087                            NIL,
1088                            sorted_p.startup_cost, sorted_p.total_cost,
1089                            input_path->rows);
1090
1091         /*
1092          * Now make the decision using the top-level tuple fraction.  First we
1093          * have to convert an absolute count (LIMIT) into fractional form.
1094          */
1095         tuple_fraction = root->tuple_fraction;
1096         if (tuple_fraction >= 1.0)
1097                 tuple_fraction /= dNumOutputRows;
1098
1099         if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1100                                                                           tuple_fraction) < 0)
1101         {
1102                 /* Hashed is cheaper, so use it */
1103                 return true;
1104         }
1105         return false;
1106 }
1107
1108 /*
1109  * Generate targetlist for a set-operation plan node
1110  *
1111  * colTypes: OID list of set-op's result column datatypes
1112  * colCollations: OID list of set-op's result column collations
1113  * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1114  * varno: varno to use in generated Vars
1115  * hack_constants: true to copy up constants (see comments in code)
1116  * input_tlist: targetlist of this node's input node
1117  * refnames_tlist: targetlist to take column names from
1118  */
1119 static List *
1120 generate_setop_tlist(List *colTypes, List *colCollations,
1121                                          int flag,
1122                                          Index varno,
1123                                          bool hack_constants,
1124                                          List *input_tlist,
1125                                          List *refnames_tlist)
1126 {
1127         List       *tlist = NIL;
1128         int                     resno = 1;
1129         ListCell   *ctlc,
1130                            *cclc,
1131                            *itlc,
1132                            *rtlc;
1133         TargetEntry *tle;
1134         Node       *expr;
1135
1136         forfour(ctlc, colTypes, cclc, colCollations,
1137                         itlc, input_tlist, rtlc, refnames_tlist)
1138         {
1139                 Oid                     colType = lfirst_oid(ctlc);
1140                 Oid                     colColl = lfirst_oid(cclc);
1141                 TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1142                 TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1143
1144                 Assert(inputtle->resno == resno);
1145                 Assert(reftle->resno == resno);
1146                 Assert(!inputtle->resjunk);
1147                 Assert(!reftle->resjunk);
1148
1149                 /*
1150                  * Generate columns referencing input columns and having appropriate
1151                  * data types and column names.  Insert datatype coercions where
1152                  * necessary.
1153                  *
1154                  * HACK: constants in the input's targetlist are copied up as-is
1155                  * rather than being referenced as subquery outputs.  This is mainly
1156                  * to ensure that when we try to coerce them to the output column's
1157                  * datatype, the right things happen for UNKNOWN constants.  But do
1158                  * this only at the first level of subquery-scan plans; we don't want
1159                  * phony constants appearing in the output tlists of upper-level
1160                  * nodes!
1161                  */
1162                 if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1163                         expr = (Node *) inputtle->expr;
1164                 else
1165                         expr = (Node *) makeVar(varno,
1166                                                                         inputtle->resno,
1167                                                                         exprType((Node *) inputtle->expr),
1168                                                                         exprTypmod((Node *) inputtle->expr),
1169                                                                         exprCollation((Node *) inputtle->expr),
1170                                                                         0);
1171
1172                 if (exprType(expr) != colType)
1173                 {
1174                         /*
1175                          * Note: it's not really cool to be applying coerce_to_common_type
1176                          * here; one notable point is that assign_expr_collations never
1177                          * gets run on any generated nodes.  For the moment that's not a
1178                          * problem because we force the correct exposed collation below.
1179                          * It would likely be best to make the parser generate the correct
1180                          * output tlist for every set-op to begin with, though.
1181                          */
1182                         expr = coerce_to_common_type(NULL,      /* no UNKNOWNs here */
1183                                                                                  expr,
1184                                                                                  colType,
1185                                                                                  "UNION/INTERSECT/EXCEPT");
1186                 }
1187
1188                 /*
1189                  * Ensure the tlist entry's exposed collation matches the set-op. This
1190                  * is necessary because plan_set_operations() reports the result
1191                  * ordering as a list of SortGroupClauses, which don't carry collation
1192                  * themselves but just refer to tlist entries.  If we don't show the
1193                  * right collation then planner.c might do the wrong thing in
1194                  * higher-level queries.
1195                  *
1196                  * Note we use RelabelType, not CollateExpr, since this expression
1197                  * will reach the executor without any further processing.
1198                  */
1199                 if (exprCollation(expr) != colColl)
1200                 {
1201                         expr = (Node *) makeRelabelType((Expr *) expr,
1202                                                                                         exprType(expr),
1203                                                                                         exprTypmod(expr),
1204                                                                                         colColl,
1205                                                                                         COERCE_IMPLICIT_CAST);
1206                 }
1207
1208                 tle = makeTargetEntry((Expr *) expr,
1209                                                           (AttrNumber) resno++,
1210                                                           pstrdup(reftle->resname),
1211                                                           false);
1212
1213                 /*
1214                  * By convention, all non-resjunk columns in a setop tree have
1215                  * ressortgroupref equal to their resno.  In some cases the ref isn't
1216                  * needed, but this is a cleaner way than modifying the tlist later.
1217                  */
1218                 tle->ressortgroupref = tle->resno;
1219
1220                 tlist = lappend(tlist, tle);
1221         }
1222
1223         if (flag >= 0)
1224         {
1225                 /* Add a resjunk flag column */
1226                 /* flag value is the given constant */
1227                 expr = (Node *) makeConst(INT4OID,
1228                                                                   -1,
1229                                                                   InvalidOid,
1230                                                                   sizeof(int32),
1231                                                                   Int32GetDatum(flag),
1232                                                                   false,
1233                                                                   true);
1234                 tle = makeTargetEntry((Expr *) expr,
1235                                                           (AttrNumber) resno++,
1236                                                           pstrdup("flag"),
1237                                                           true);
1238                 tlist = lappend(tlist, tle);
1239         }
1240
1241         return tlist;
1242 }
1243
1244 /*
1245  * Generate targetlist for a set-operation Append node
1246  *
1247  * colTypes: OID list of set-op's result column datatypes
1248  * colCollations: OID list of set-op's result column collations
1249  * flag: true to create a flag column copied up from subplans
1250  * input_tlists: list of tlists for sub-plans of the Append
1251  * refnames_tlist: targetlist to take column names from
1252  *
1253  * The entries in the Append's targetlist should always be simple Vars;
1254  * we just have to make sure they have the right datatypes/typmods/collations.
1255  * The Vars are always generated with varno 0.
1256  *
1257  * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1258  * cannot figure out a realistic width for the tlist we make here.  But we
1259  * ought to refactor this code to produce a PathTarget directly, anyway.
1260  */
1261 static List *
1262 generate_append_tlist(List *colTypes, List *colCollations,
1263                                           bool flag,
1264                                           List *input_tlists,
1265                                           List *refnames_tlist)
1266 {
1267         List       *tlist = NIL;
1268         int                     resno = 1;
1269         ListCell   *curColType;
1270         ListCell   *curColCollation;
1271         ListCell   *ref_tl_item;
1272         int                     colindex;
1273         TargetEntry *tle;
1274         Node       *expr;
1275         ListCell   *tlistl;
1276         int32      *colTypmods;
1277
1278         /*
1279          * First extract typmods to use.
1280          *
1281          * If the inputs all agree on type and typmod of a particular column, use
1282          * that typmod; else use -1.
1283          */
1284         colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1285
1286         foreach(tlistl, input_tlists)
1287         {
1288                 List       *subtlist = (List *) lfirst(tlistl);
1289                 ListCell   *subtlistl;
1290
1291                 curColType = list_head(colTypes);
1292                 colindex = 0;
1293                 foreach(subtlistl, subtlist)
1294                 {
1295                         TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1296
1297                         if (subtle->resjunk)
1298                                 continue;
1299                         Assert(curColType != NULL);
1300                         if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1301                         {
1302                                 /* If first subplan, copy the typmod; else compare */
1303                                 int32           subtypmod = exprTypmod((Node *) subtle->expr);
1304
1305                                 if (tlistl == list_head(input_tlists))
1306                                         colTypmods[colindex] = subtypmod;
1307                                 else if (subtypmod != colTypmods[colindex])
1308                                         colTypmods[colindex] = -1;
1309                         }
1310                         else
1311                         {
1312                                 /* types disagree, so force typmod to -1 */
1313                                 colTypmods[colindex] = -1;
1314                         }
1315                         curColType = lnext(colTypes, curColType);
1316                         colindex++;
1317                 }
1318                 Assert(curColType == NULL);
1319         }
1320
1321         /*
1322          * Now we can build the tlist for the Append.
1323          */
1324         colindex = 0;
1325         forthree(curColType, colTypes, curColCollation, colCollations,
1326                          ref_tl_item, refnames_tlist)
1327         {
1328                 Oid                     colType = lfirst_oid(curColType);
1329                 int32           colTypmod = colTypmods[colindex++];
1330                 Oid                     colColl = lfirst_oid(curColCollation);
1331                 TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1332
1333                 Assert(reftle->resno == resno);
1334                 Assert(!reftle->resjunk);
1335                 expr = (Node *) makeVar(0,
1336                                                                 resno,
1337                                                                 colType,
1338                                                                 colTypmod,
1339                                                                 colColl,
1340                                                                 0);
1341                 tle = makeTargetEntry((Expr *) expr,
1342                                                           (AttrNumber) resno++,
1343                                                           pstrdup(reftle->resname),
1344                                                           false);
1345
1346                 /*
1347                  * By convention, all non-resjunk columns in a setop tree have
1348                  * ressortgroupref equal to their resno.  In some cases the ref isn't
1349                  * needed, but this is a cleaner way than modifying the tlist later.
1350                  */
1351                 tle->ressortgroupref = tle->resno;
1352
1353                 tlist = lappend(tlist, tle);
1354         }
1355
1356         if (flag)
1357         {
1358                 /* Add a resjunk flag column */
1359                 /* flag value is shown as copied up from subplan */
1360                 expr = (Node *) makeVar(0,
1361                                                                 resno,
1362                                                                 INT4OID,
1363                                                                 -1,
1364                                                                 InvalidOid,
1365                                                                 0);
1366                 tle = makeTargetEntry((Expr *) expr,
1367                                                           (AttrNumber) resno++,
1368                                                           pstrdup("flag"),
1369                                                           true);
1370                 tlist = lappend(tlist, tle);
1371         }
1372
1373         pfree(colTypmods);
1374
1375         return tlist;
1376 }
1377
1378 /*
1379  * generate_setop_grouplist
1380  *              Build a SortGroupClause list defining the sort/grouping properties
1381  *              of the setop's output columns.
1382  *
1383  * Parse analysis already determined the properties and built a suitable
1384  * list, except that the entries do not have sortgrouprefs set because
1385  * the parser output representation doesn't include a tlist for each
1386  * setop.  So what we need to do here is copy that list and install
1387  * proper sortgrouprefs into it (copying those from the targetlist).
1388  */
1389 static List *
1390 generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1391 {
1392         List       *grouplist = copyObject(op->groupClauses);
1393         ListCell   *lg;
1394         ListCell   *lt;
1395
1396         lg = list_head(grouplist);
1397         foreach(lt, targetlist)
1398         {
1399                 TargetEntry *tle = (TargetEntry *) lfirst(lt);
1400                 SortGroupClause *sgc;
1401
1402                 if (tle->resjunk)
1403                 {
1404                         /* resjunk columns should not have sortgrouprefs */
1405                         Assert(tle->ressortgroupref == 0);
1406                         continue;                       /* ignore resjunk columns */
1407                 }
1408
1409                 /* non-resjunk columns should have sortgroupref = resno */
1410                 Assert(tle->ressortgroupref == tle->resno);
1411
1412                 /* non-resjunk columns should have grouping clauses */
1413                 Assert(lg != NULL);
1414                 sgc = (SortGroupClause *) lfirst(lg);
1415                 lg = lnext(grouplist, lg);
1416                 Assert(sgc->tleSortGroupRef == 0);
1417
1418                 sgc->tleSortGroupRef = tle->ressortgroupref;
1419         }
1420         Assert(lg == NULL);
1421         return grouplist;
1422 }