Correct obsolete UNION hash aggs comment.
[pgsql.git] / src / backend / optimizer / prep / prepunion.c
blob6588f83d5ec6fde6491db599b12565ac736ec8bc
1 /*-------------------------------------------------------------------------
3 * prepunion.c
4 * Routines to plan set-operation queries. The filename is a leftover
5 * from a time when only UNIONs were implemented.
7 * There are two code paths in the planner for set-operation queries.
8 * If a subquery consists entirely of simple UNION ALL operations, it
9 * is converted into an "append relation". Otherwise, it is handled
10 * by the general code in this module (plan_set_operations and its
11 * subroutines). There is some support code here for the append-relation
12 * case, but most of the heavy lifting for that is done elsewhere,
13 * notably in prepjointree.c and allpaths.c.
15 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
16 * Portions Copyright (c) 1994, Regents of the University of California
19 * IDENTIFICATION
20 * src/backend/optimizer/prep/prepunion.c
22 *-------------------------------------------------------------------------
24 #include "postgres.h"
26 #include "access/htup_details.h"
27 #include "access/sysattr.h"
28 #include "catalog/partition.h"
29 #include "catalog/pg_inherits.h"
30 #include "catalog/pg_type.h"
31 #include "miscadmin.h"
32 #include "nodes/makefuncs.h"
33 #include "nodes/nodeFuncs.h"
34 #include "optimizer/cost.h"
35 #include "optimizer/pathnode.h"
36 #include "optimizer/paths.h"
37 #include "optimizer/planmain.h"
38 #include "optimizer/planner.h"
39 #include "optimizer/prep.h"
40 #include "optimizer/tlist.h"
41 #include "parser/parse_coerce.h"
42 #include "parser/parsetree.h"
43 #include "utils/lsyscache.h"
44 #include "utils/rel.h"
45 #include "utils/selfuncs.h"
46 #include "utils/syscache.h"
49 static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
50 List *colTypes, List *colCollations,
51 bool junkOK,
52 int flag, List *refnames_tlist,
53 List **pTargetList,
54 double *pNumGroups);
55 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
56 PlannerInfo *root,
57 List *refnames_tlist,
58 List **pTargetList);
59 static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
60 List *refnames_tlist,
61 List **pTargetList);
62 static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
63 List *refnames_tlist,
64 List **pTargetList);
65 static List *plan_union_children(PlannerInfo *root,
66 SetOperationStmt *top_union,
67 List *refnames_tlist,
68 List **tlist_list);
69 static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
70 PlannerInfo *root);
71 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
72 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
73 Path *input_path,
74 double dNumGroups, double dNumOutputRows,
75 const char *construct);
76 static List *generate_setop_tlist(List *colTypes, List *colCollations,
77 int flag,
78 Index varno,
79 bool hack_constants,
80 List *input_tlist,
81 List *refnames_tlist);
82 static List *generate_append_tlist(List *colTypes, List *colCollations,
83 bool flag,
84 List *input_tlists,
85 List *refnames_tlist);
86 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
90 * plan_set_operations
92 * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
94 * This routine only deals with the setOperations tree of the given query.
95 * Any top-level ORDER BY requested in root->parse->sortClause will be handled
96 * when we return to grouping_planner; likewise for LIMIT.
98 * What we return is an "upperrel" RelOptInfo containing at least one Path
99 * that implements the set-operation tree. In addition, root->processed_tlist
100 * receives a targetlist representing the output of the topmost setop node.
102 RelOptInfo *
103 plan_set_operations(PlannerInfo *root)
105 Query *parse = root->parse;
106 SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
107 Node *node;
108 RangeTblEntry *leftmostRTE;
109 Query *leftmostQuery;
110 RelOptInfo *setop_rel;
111 List *top_tlist;
113 Assert(topop);
115 /* check for unsupported stuff */
116 Assert(parse->jointree->fromlist == NIL);
117 Assert(parse->jointree->quals == NULL);
118 Assert(parse->groupClause == NIL);
119 Assert(parse->havingQual == NULL);
120 Assert(parse->windowClause == NIL);
121 Assert(parse->distinctClause == NIL);
124 * In the outer query level, we won't have any true equivalences to deal
125 * with; but we do want to be able to make pathkeys, which will require
126 * single-member EquivalenceClasses. Indicate that EC merging is complete
127 * so that pathkeys.c won't complain.
129 Assert(root->eq_classes == NIL);
130 root->ec_merging_done = true;
133 * We'll need to build RelOptInfos for each of the leaf subqueries, which
134 * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
135 * arrays for those, and for AppendRelInfos in case they're needed.
137 setup_simple_rel_arrays(root);
140 * Find the leftmost component Query. We need to use its column names for
141 * all generated tlists (else SELECT INTO won't work right).
143 node = topop->larg;
144 while (node && IsA(node, SetOperationStmt))
145 node = ((SetOperationStmt *) node)->larg;
146 Assert(node && IsA(node, RangeTblRef));
147 leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
148 leftmostQuery = leftmostRTE->subquery;
149 Assert(leftmostQuery != NULL);
152 * If the topmost node is a recursive union, it needs special processing.
154 if (root->hasRecursion)
156 setop_rel = generate_recursion_path(topop, root,
157 leftmostQuery->targetList,
158 &top_tlist);
160 else
163 * Recurse on setOperations tree to generate paths for set ops. The
164 * final output paths should have just the column types shown as the
165 * output from the top-level node, plus possibly resjunk working
166 * columns (we can rely on upper-level nodes to deal with that).
168 setop_rel = recurse_set_operations((Node *) topop, root,
169 topop->colTypes, topop->colCollations,
170 true, -1,
171 leftmostQuery->targetList,
172 &top_tlist,
173 NULL);
176 /* Must return the built tlist into root->processed_tlist. */
177 root->processed_tlist = top_tlist;
179 return setop_rel;
183 * recurse_set_operations
184 * Recursively handle one step in a tree of set operations
186 * colTypes: OID list of set-op's result column datatypes
187 * colCollations: OID list of set-op's result column collations
188 * junkOK: if true, child resjunk columns may be left in the result
189 * flag: if >= 0, add a resjunk output column indicating value of flag
190 * refnames_tlist: targetlist to take column names from
192 * Returns a RelOptInfo for the subtree, as well as these output parameters:
193 * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
194 * *pNumGroups: if not NULL, we estimate the number of distinct groups
195 * in the result, and store it there
197 * The pTargetList output parameter is mostly redundant with the pathtarget
198 * of the returned RelOptInfo, but for the moment we need it because much of
199 * the logic in this file depends on flag columns being marked resjunk.
200 * Pending a redesign of how that works, this is the easy way out.
202 * We don't have to care about typmods here: the only allowed difference
203 * between set-op input and output typmods is input is a specific typmod
204 * and output is -1, and that does not require a coercion.
206 static RelOptInfo *
207 recurse_set_operations(Node *setOp, PlannerInfo *root,
208 List *colTypes, List *colCollations,
209 bool junkOK,
210 int flag, List *refnames_tlist,
211 List **pTargetList,
212 double *pNumGroups)
214 RelOptInfo *rel = NULL; /* keep compiler quiet */
216 /* Guard against stack overflow due to overly complex setop nests */
217 check_stack_depth();
219 if (IsA(setOp, RangeTblRef))
221 RangeTblRef *rtr = (RangeTblRef *) setOp;
222 RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
223 Query *subquery = rte->subquery;
224 PlannerInfo *subroot;
225 RelOptInfo *final_rel;
226 Path *subpath;
227 Path *path;
228 List *tlist;
230 Assert(subquery != NULL);
232 /* Build a RelOptInfo for this leaf subquery. */
233 rel = build_simple_rel(root, rtr->rtindex, NULL);
235 /* plan_params should not be in use in current query level */
236 Assert(root->plan_params == NIL);
238 /* Generate a subroot and Paths for the subquery */
239 subroot = rel->subroot = subquery_planner(root->glob, subquery,
240 root,
241 false,
242 root->tuple_fraction);
245 * It should not be possible for the primitive query to contain any
246 * cross-references to other primitive queries in the setop tree.
248 if (root->plan_params)
249 elog(ERROR, "unexpected outer reference in set operation subquery");
251 /* Figure out the appropriate target list for this subquery. */
252 tlist = generate_setop_tlist(colTypes, colCollations,
253 flag,
254 rtr->rtindex,
255 true,
256 subroot->processed_tlist,
257 refnames_tlist);
258 rel->reltarget = create_pathtarget(root, tlist);
260 /* Return the fully-fledged tlist to caller, too */
261 *pTargetList = tlist;
264 * Mark rel with estimated output rows, width, etc. Note that we have
265 * to do this before generating outer-query paths, else
266 * cost_subqueryscan is not happy.
268 set_subquery_size_estimates(root, rel);
271 * Since we may want to add a partial path to this relation, we must
272 * set its consider_parallel flag correctly.
274 final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
275 rel->consider_parallel = final_rel->consider_parallel;
278 * For the moment, we consider only a single Path for the subquery.
279 * This should change soon (make it look more like
280 * set_subquery_pathlist).
282 subpath = get_cheapest_fractional_path(final_rel,
283 root->tuple_fraction);
286 * Stick a SubqueryScanPath atop that.
288 * We don't bother to determine the subquery's output ordering since
289 * it won't be reflected in the set-op result anyhow; so just label
290 * the SubqueryScanPath with nil pathkeys. (XXX that should change
291 * soon too, likely.)
293 path = (Path *) create_subqueryscan_path(root, rel, subpath,
294 NIL, NULL);
296 add_path(rel, path);
299 * If we have a partial path for the child relation, we can use that
300 * to build a partial path for this relation. But there's no point in
301 * considering any path but the cheapest.
303 if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
304 final_rel->partial_pathlist != NIL)
306 Path *partial_subpath;
307 Path *partial_path;
309 partial_subpath = linitial(final_rel->partial_pathlist);
310 partial_path = (Path *)
311 create_subqueryscan_path(root, rel, partial_subpath,
312 NIL, NULL);
313 add_partial_path(rel, partial_path);
317 * Estimate number of groups if caller wants it. If the subquery used
318 * grouping or aggregation, its output is probably mostly unique
319 * anyway; otherwise do statistical estimation.
321 * XXX you don't really want to know about this: we do the estimation
322 * using the subquery's original targetlist expressions, not the
323 * subroot->processed_tlist which might seem more appropriate. The
324 * reason is that if the subquery is itself a setop, it may return a
325 * processed_tlist containing "varno 0" Vars generated by
326 * generate_append_tlist, and those would confuse estimate_num_groups
327 * mightily. We ought to get rid of the "varno 0" hack, but that
328 * requires a redesign of the parsetree representation of setops, so
329 * that there can be an RTE corresponding to each setop's output.
331 if (pNumGroups)
333 if (subquery->groupClause || subquery->groupingSets ||
334 subquery->distinctClause ||
335 subroot->hasHavingQual || subquery->hasAggs)
336 *pNumGroups = subpath->rows;
337 else
338 *pNumGroups = estimate_num_groups(subroot,
339 get_tlist_exprs(subquery->targetList, false),
340 subpath->rows,
341 NULL);
344 else if (IsA(setOp, SetOperationStmt))
346 SetOperationStmt *op = (SetOperationStmt *) setOp;
348 /* UNIONs are much different from INTERSECT/EXCEPT */
349 if (op->op == SETOP_UNION)
350 rel = generate_union_paths(op, root,
351 refnames_tlist,
352 pTargetList);
353 else
354 rel = generate_nonunion_paths(op, root,
355 refnames_tlist,
356 pTargetList);
357 if (pNumGroups)
358 *pNumGroups = rel->rows;
361 * If necessary, add a Result node to project the caller-requested
362 * output columns.
364 * XXX you don't really want to know about this: setrefs.c will apply
365 * fix_upper_expr() to the Result node's tlist. This would fail if the
366 * Vars generated by generate_setop_tlist() were not exactly equal()
367 * to the corresponding tlist entries of the subplan. However, since
368 * the subplan was generated by generate_union_paths() or
369 * generate_nonunion_paths(), and hence its tlist was generated by
370 * generate_append_tlist(), this will work. We just tell
371 * generate_setop_tlist() to use varno 0.
373 if (flag >= 0 ||
374 !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
375 !tlist_same_collations(*pTargetList, colCollations, junkOK))
377 PathTarget *target;
378 ListCell *lc;
380 *pTargetList = generate_setop_tlist(colTypes, colCollations,
381 flag,
383 false,
384 *pTargetList,
385 refnames_tlist);
386 target = create_pathtarget(root, *pTargetList);
388 /* Apply projection to each path */
389 foreach(lc, rel->pathlist)
391 Path *subpath = (Path *) lfirst(lc);
392 Path *path;
394 Assert(subpath->param_info == NULL);
395 path = apply_projection_to_path(root, subpath->parent,
396 subpath, target);
397 /* If we had to add a Result, path is different from subpath */
398 if (path != subpath)
399 lfirst(lc) = path;
402 /* Apply projection to each partial path */
403 foreach(lc, rel->partial_pathlist)
405 Path *subpath = (Path *) lfirst(lc);
406 Path *path;
408 Assert(subpath->param_info == NULL);
410 /* avoid apply_projection_to_path, in case of multiple refs */
411 path = (Path *) create_projection_path(root, subpath->parent,
412 subpath, target);
413 lfirst(lc) = path;
417 else
419 elog(ERROR, "unrecognized node type: %d",
420 (int) nodeTag(setOp));
421 *pTargetList = NIL;
424 postprocess_setop_rel(root, rel);
426 return rel;
430 * Generate paths for a recursive UNION node
432 static RelOptInfo *
433 generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
434 List *refnames_tlist,
435 List **pTargetList)
437 RelOptInfo *result_rel;
438 Path *path;
439 RelOptInfo *lrel,
440 *rrel;
441 Path *lpath;
442 Path *rpath;
443 List *lpath_tlist;
444 List *rpath_tlist;
445 List *tlist;
446 List *groupList;
447 double dNumGroups;
449 /* Parser should have rejected other cases */
450 if (setOp->op != SETOP_UNION)
451 elog(ERROR, "only UNION queries can be recursive");
452 /* Worktable ID should be assigned */
453 Assert(root->wt_param_id >= 0);
456 * Unlike a regular UNION node, process the left and right inputs
457 * separately without any intention of combining them into one Append.
459 lrel = recurse_set_operations(setOp->larg, root,
460 setOp->colTypes, setOp->colCollations,
461 false, -1,
462 refnames_tlist,
463 &lpath_tlist,
464 NULL);
465 lpath = lrel->cheapest_total_path;
466 /* The right path will want to look at the left one ... */
467 root->non_recursive_path = lpath;
468 rrel = recurse_set_operations(setOp->rarg, root,
469 setOp->colTypes, setOp->colCollations,
470 false, -1,
471 refnames_tlist,
472 &rpath_tlist,
473 NULL);
474 rpath = rrel->cheapest_total_path;
475 root->non_recursive_path = NULL;
478 * Generate tlist for RecursiveUnion path node --- same as in Append cases
480 tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
481 list_make2(lpath_tlist, rpath_tlist),
482 refnames_tlist);
484 *pTargetList = tlist;
486 /* Build result relation. */
487 result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
488 bms_union(lrel->relids, rrel->relids));
489 result_rel->reltarget = create_pathtarget(root, tlist);
492 * If UNION, identify the grouping operators
494 if (setOp->all)
496 groupList = NIL;
497 dNumGroups = 0;
499 else
501 /* Identify the grouping semantics */
502 groupList = generate_setop_grouplist(setOp, tlist);
504 /* We only support hashing here */
505 if (!grouping_is_hashable(groupList))
506 ereport(ERROR,
507 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
508 errmsg("could not implement recursive UNION"),
509 errdetail("All column datatypes must be hashable.")));
512 * For the moment, take the number of distinct groups as equal to the
513 * total input size, ie, the worst case.
515 dNumGroups = lpath->rows + rpath->rows * 10;
519 * And make the path node.
521 path = (Path *) create_recursiveunion_path(root,
522 result_rel,
523 lpath,
524 rpath,
525 result_rel->reltarget,
526 groupList,
527 root->wt_param_id,
528 dNumGroups);
530 add_path(result_rel, path);
531 postprocess_setop_rel(root, result_rel);
532 return result_rel;
536 * Generate paths for a UNION or UNION ALL node
538 static RelOptInfo *
539 generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
540 List *refnames_tlist,
541 List **pTargetList)
543 Relids relids = NULL;
544 RelOptInfo *result_rel;
545 double save_fraction = root->tuple_fraction;
546 ListCell *lc;
547 List *pathlist = NIL;
548 List *partial_pathlist = NIL;
549 bool partial_paths_valid = true;
550 bool consider_parallel = true;
551 List *rellist;
552 List *tlist_list;
553 List *tlist;
554 Path *path;
557 * If plain UNION, tell children to fetch all tuples.
559 * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
560 * each arm of the UNION ALL. One could make a case for reducing the
561 * tuple fraction for later arms (discounting by the expected size of the
562 * earlier arms' results) but it seems not worth the trouble. The normal
563 * case where tuple_fraction isn't already zero is a LIMIT at top level,
564 * and passing it down as-is is usually enough to get the desired result
565 * of preferring fast-start plans.
567 if (!op->all)
568 root->tuple_fraction = 0.0;
571 * If any of my children are identical UNION nodes (same op, all-flag, and
572 * colTypes) then they can be merged into this node so that we generate
573 * only one Append and unique-ification for the lot. Recurse to find such
574 * nodes and compute their children's paths.
576 rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
579 * Generate tlist for Append plan node.
581 * The tlist for an Append plan isn't important as far as the Append is
582 * concerned, but we must make it look real anyway for the benefit of the
583 * next plan level up.
585 tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
586 tlist_list, refnames_tlist);
588 *pTargetList = tlist;
590 /* Build path lists and relid set. */
591 foreach(lc, rellist)
593 RelOptInfo *rel = lfirst(lc);
595 pathlist = lappend(pathlist, rel->cheapest_total_path);
597 if (consider_parallel)
599 if (!rel->consider_parallel)
601 consider_parallel = false;
602 partial_paths_valid = false;
604 else if (rel->partial_pathlist == NIL)
605 partial_paths_valid = false;
606 else
607 partial_pathlist = lappend(partial_pathlist,
608 linitial(rel->partial_pathlist));
611 relids = bms_union(relids, rel->relids);
614 /* Build result relation. */
615 result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
616 result_rel->reltarget = create_pathtarget(root, tlist);
617 result_rel->consider_parallel = consider_parallel;
620 * Append the child results together.
622 path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
623 NIL, NULL, 0, false, NIL, -1);
626 * For UNION ALL, we just need the Append path. For UNION, need to add
627 * node(s) to remove duplicates.
629 if (!op->all)
630 path = make_union_unique(op, path, tlist, root);
632 add_path(result_rel, path);
635 * Estimate number of groups. For now we just assume the output is unique
636 * --- this is certainly true for the UNION case, and we want worst-case
637 * estimates anyway.
639 result_rel->rows = path->rows;
642 * Now consider doing the same thing using the partial paths plus Append
643 * plus Gather.
645 if (partial_paths_valid)
647 Path *ppath;
648 ListCell *lc;
649 int parallel_workers = 0;
651 /* Find the highest number of workers requested for any subpath. */
652 foreach(lc, partial_pathlist)
654 Path *path = lfirst(lc);
656 parallel_workers = Max(parallel_workers, path->parallel_workers);
658 Assert(parallel_workers > 0);
661 * If the use of parallel append is permitted, always request at least
662 * log2(# of children) paths. We assume it can be useful to have
663 * extra workers in this case because they will be spread out across
664 * the children. The precise formula is just a guess; see
665 * add_paths_to_append_rel.
667 if (enable_parallel_append)
669 parallel_workers = Max(parallel_workers,
670 fls(list_length(partial_pathlist)));
671 parallel_workers = Min(parallel_workers,
672 max_parallel_workers_per_gather);
674 Assert(parallel_workers > 0);
676 ppath = (Path *)
677 create_append_path(root, result_rel, NIL, partial_pathlist,
678 NIL, NULL,
679 parallel_workers, enable_parallel_append,
680 NIL, -1);
681 ppath = (Path *)
682 create_gather_path(root, result_rel, ppath,
683 result_rel->reltarget, NULL, NULL);
684 if (!op->all)
685 ppath = make_union_unique(op, ppath, tlist, root);
686 add_path(result_rel, ppath);
689 /* Undo effects of possibly forcing tuple_fraction to 0 */
690 root->tuple_fraction = save_fraction;
692 return result_rel;
696 * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
698 static RelOptInfo *
699 generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
700 List *refnames_tlist,
701 List **pTargetList)
703 RelOptInfo *result_rel;
704 RelOptInfo *lrel,
705 *rrel;
706 double save_fraction = root->tuple_fraction;
707 Path *lpath,
708 *rpath,
709 *path;
710 List *lpath_tlist,
711 *rpath_tlist,
712 *tlist_list,
713 *tlist,
714 *groupList,
715 *pathlist;
716 double dLeftGroups,
717 dRightGroups,
718 dNumGroups,
719 dNumOutputRows;
720 bool use_hash;
721 SetOpCmd cmd;
722 int firstFlag;
725 * Tell children to fetch all tuples.
727 root->tuple_fraction = 0.0;
729 /* Recurse on children, ensuring their outputs are marked */
730 lrel = recurse_set_operations(op->larg, root,
731 op->colTypes, op->colCollations,
732 false, 0,
733 refnames_tlist,
734 &lpath_tlist,
735 &dLeftGroups);
736 lpath = lrel->cheapest_total_path;
737 rrel = recurse_set_operations(op->rarg, root,
738 op->colTypes, op->colCollations,
739 false, 1,
740 refnames_tlist,
741 &rpath_tlist,
742 &dRightGroups);
743 rpath = rrel->cheapest_total_path;
745 /* Undo effects of forcing tuple_fraction to 0 */
746 root->tuple_fraction = save_fraction;
749 * For EXCEPT, we must put the left input first. For INTERSECT, either
750 * order should give the same results, and we prefer to put the smaller
751 * input first in order to minimize the size of the hash table in the
752 * hashing case. "Smaller" means the one with the fewer groups.
754 if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
756 pathlist = list_make2(lpath, rpath);
757 tlist_list = list_make2(lpath_tlist, rpath_tlist);
758 firstFlag = 0;
760 else
762 pathlist = list_make2(rpath, lpath);
763 tlist_list = list_make2(rpath_tlist, lpath_tlist);
764 firstFlag = 1;
768 * Generate tlist for Append plan node.
770 * The tlist for an Append plan isn't important as far as the Append is
771 * concerned, but we must make it look real anyway for the benefit of the
772 * next plan level up. In fact, it has to be real enough that the flag
773 * column is shown as a variable not a constant, else setrefs.c will get
774 * confused.
776 tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
777 tlist_list, refnames_tlist);
779 *pTargetList = tlist;
781 /* Build result relation. */
782 result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
783 bms_union(lrel->relids, rrel->relids));
784 result_rel->reltarget = create_pathtarget(root, tlist);
787 * Append the child results together.
789 path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
790 NIL, NULL, 0, false, NIL, -1);
792 /* Identify the grouping semantics */
793 groupList = generate_setop_grouplist(op, tlist);
796 * Estimate number of distinct groups that we'll need hashtable entries
797 * for; this is the size of the left-hand input for EXCEPT, or the smaller
798 * input for INTERSECT. Also estimate the number of eventual output rows.
799 * In non-ALL cases, we estimate each group produces one output row; in
800 * ALL cases use the relevant relation size. These are worst-case
801 * estimates, of course, but we need to be conservative.
803 if (op->op == SETOP_EXCEPT)
805 dNumGroups = dLeftGroups;
806 dNumOutputRows = op->all ? lpath->rows : dNumGroups;
808 else
810 dNumGroups = Min(dLeftGroups, dRightGroups);
811 dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
815 * Decide whether to hash or sort, and add a sort node if needed.
817 use_hash = choose_hashed_setop(root, groupList, path,
818 dNumGroups, dNumOutputRows,
819 (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
821 if (groupList && !use_hash)
822 path = (Path *) create_sort_path(root,
823 result_rel,
824 path,
825 make_pathkeys_for_sortclauses(root,
826 groupList,
827 tlist),
828 -1.0);
831 * Finally, add a SetOp path node to generate the correct output.
833 switch (op->op)
835 case SETOP_INTERSECT:
836 cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
837 break;
838 case SETOP_EXCEPT:
839 cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
840 break;
841 default:
842 elog(ERROR, "unrecognized set op: %d", (int) op->op);
843 cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
844 break;
846 path = (Path *) create_setop_path(root,
847 result_rel,
848 path,
849 cmd,
850 use_hash ? SETOP_HASHED : SETOP_SORTED,
851 groupList,
852 list_length(op->colTypes) + 1,
853 use_hash ? firstFlag : -1,
854 dNumGroups,
855 dNumOutputRows);
857 result_rel->rows = path->rows;
858 add_path(result_rel, path);
859 return result_rel;
863 * Pull up children of a UNION node that are identically-propertied UNIONs.
865 * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
866 * output rows will be lost anyway.
868 * NOTE: currently, we ignore collations while determining if a child has
869 * the same properties. This is semantically sound only so long as all
870 * collations have the same notion of equality. It is valid from an
871 * implementation standpoint because we don't care about the ordering of
872 * a UNION child's result: UNION ALL results are always unordered, and
873 * generate_union_paths will force a fresh sort if the top level is a UNION.
875 static List *
876 plan_union_children(PlannerInfo *root,
877 SetOperationStmt *top_union,
878 List *refnames_tlist,
879 List **tlist_list)
881 List *pending_rels = list_make1(top_union);
882 List *result = NIL;
883 List *child_tlist;
885 *tlist_list = NIL;
887 while (pending_rels != NIL)
889 Node *setOp = linitial(pending_rels);
891 pending_rels = list_delete_first(pending_rels);
893 if (IsA(setOp, SetOperationStmt))
895 SetOperationStmt *op = (SetOperationStmt *) setOp;
897 if (op->op == top_union->op &&
898 (op->all == top_union->all || op->all) &&
899 equal(op->colTypes, top_union->colTypes))
901 /* Same UNION, so fold children into parent */
902 pending_rels = lcons(op->rarg, pending_rels);
903 pending_rels = lcons(op->larg, pending_rels);
904 continue;
909 * Not same, so plan this child separately.
911 * Note we disallow any resjunk columns in child results. This is
912 * necessary since the Append node that implements the union won't do
913 * any projection, and upper levels will get confused if some of our
914 * output tuples have junk and some don't. This case only arises when
915 * we have an EXCEPT or INTERSECT as child, else there won't be
916 * resjunk anyway.
918 result = lappend(result, recurse_set_operations(setOp, root,
919 top_union->colTypes,
920 top_union->colCollations,
921 false, -1,
922 refnames_tlist,
923 &child_tlist,
924 NULL));
925 *tlist_list = lappend(*tlist_list, child_tlist);
928 return result;
932 * Add nodes to the given path tree to unique-ify the result of a UNION.
934 static Path *
935 make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
936 PlannerInfo *root)
938 RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
939 List *groupList;
940 double dNumGroups;
942 /* Identify the grouping semantics */
943 groupList = generate_setop_grouplist(op, tlist);
946 * XXX for the moment, take the number of distinct groups as equal to the
947 * total input size, ie, the worst case. This is too conservative, but
948 * it's not clear how to get a decent estimate of the true size. One
949 * should note as well the propensity of novices to write UNION rather
950 * than UNION ALL even when they don't expect any duplicates...
952 dNumGroups = path->rows;
954 /* Decide whether to hash or sort */
955 if (choose_hashed_setop(root, groupList, path,
956 dNumGroups, dNumGroups,
957 "UNION"))
959 /* Hashed aggregate plan --- no sort needed */
960 path = (Path *) create_agg_path(root,
961 result_rel,
962 path,
963 create_pathtarget(root, tlist),
964 AGG_HASHED,
965 AGGSPLIT_SIMPLE,
966 groupList,
967 NIL,
968 NULL,
969 dNumGroups);
971 else
973 /* Sort and Unique */
974 if (groupList)
975 path = (Path *)
976 create_sort_path(root,
977 result_rel,
978 path,
979 make_pathkeys_for_sortclauses(root,
980 groupList,
981 tlist),
982 -1.0);
983 path = (Path *) create_upper_unique_path(root,
984 result_rel,
985 path,
986 list_length(path->pathkeys),
987 dNumGroups);
990 return path;
994 * postprocess_setop_rel - perform steps required after adding paths
996 static void
997 postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1000 * We don't currently worry about allowing FDWs to contribute paths to
1001 * this relation, but give extensions a chance.
1003 if (create_upper_paths_hook)
1004 (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1005 NULL, rel, NULL);
1007 /* Select cheapest path */
1008 set_cheapest(rel);
1012 * choose_hashed_setop - should we use hashing for a set operation?
1014 static bool
1015 choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1016 Path *input_path,
1017 double dNumGroups, double dNumOutputRows,
1018 const char *construct)
1020 int numGroupCols = list_length(groupClauses);
1021 bool can_sort;
1022 bool can_hash;
1023 Size hashentrysize;
1024 Path hashed_p;
1025 Path sorted_p;
1026 double tuple_fraction;
1028 /* Check whether the operators support sorting or hashing */
1029 can_sort = grouping_is_sortable(groupClauses);
1030 can_hash = grouping_is_hashable(groupClauses);
1031 if (can_hash && can_sort)
1033 /* we have a meaningful choice to make, continue ... */
1035 else if (can_hash)
1036 return true;
1037 else if (can_sort)
1038 return false;
1039 else
1040 ereport(ERROR,
1041 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1042 /* translator: %s is UNION, INTERSECT, or EXCEPT */
1043 errmsg("could not implement %s", construct),
1044 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1046 /* Prefer sorting when enable_hashagg is off */
1047 if (!enable_hashagg)
1048 return false;
1051 * Don't do it if it doesn't look like the hashtable will fit into
1052 * work_mem.
1054 hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1056 if (hashentrysize * dNumGroups > work_mem * 1024L)
1057 return false;
1060 * See if the estimated cost is no more than doing it the other way.
1062 * We need to consider input_plan + hashagg versus input_plan + sort +
1063 * group. Note that the actual result plan might involve a SetOp or
1064 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1065 * should be close enough for our purposes here.
1067 * These path variables are dummies that just hold cost fields; we don't
1068 * make actual Paths for these steps.
1070 cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1071 numGroupCols, dNumGroups,
1072 NIL,
1073 input_path->startup_cost, input_path->total_cost,
1074 input_path->rows, input_path->pathtarget->width);
1077 * Now for the sorted case. Note that the input is *always* unsorted,
1078 * since it was made by appending unrelated sub-relations together.
1080 sorted_p.startup_cost = input_path->startup_cost;
1081 sorted_p.total_cost = input_path->total_cost;
1082 /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1083 cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1084 input_path->rows, input_path->pathtarget->width,
1085 0.0, work_mem, -1.0);
1086 cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1087 NIL,
1088 sorted_p.startup_cost, sorted_p.total_cost,
1089 input_path->rows);
1092 * Now make the decision using the top-level tuple fraction. First we
1093 * have to convert an absolute count (LIMIT) into fractional form.
1095 tuple_fraction = root->tuple_fraction;
1096 if (tuple_fraction >= 1.0)
1097 tuple_fraction /= dNumOutputRows;
1099 if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1100 tuple_fraction) < 0)
1102 /* Hashed is cheaper, so use it */
1103 return true;
1105 return false;
1109 * Generate targetlist for a set-operation plan node
1111 * colTypes: OID list of set-op's result column datatypes
1112 * colCollations: OID list of set-op's result column collations
1113 * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1114 * varno: varno to use in generated Vars
1115 * hack_constants: true to copy up constants (see comments in code)
1116 * input_tlist: targetlist of this node's input node
1117 * refnames_tlist: targetlist to take column names from
1119 static List *
1120 generate_setop_tlist(List *colTypes, List *colCollations,
1121 int flag,
1122 Index varno,
1123 bool hack_constants,
1124 List *input_tlist,
1125 List *refnames_tlist)
1127 List *tlist = NIL;
1128 int resno = 1;
1129 ListCell *ctlc,
1130 *cclc,
1131 *itlc,
1132 *rtlc;
1133 TargetEntry *tle;
1134 Node *expr;
1136 forfour(ctlc, colTypes, cclc, colCollations,
1137 itlc, input_tlist, rtlc, refnames_tlist)
1139 Oid colType = lfirst_oid(ctlc);
1140 Oid colColl = lfirst_oid(cclc);
1141 TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1142 TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1144 Assert(inputtle->resno == resno);
1145 Assert(reftle->resno == resno);
1146 Assert(!inputtle->resjunk);
1147 Assert(!reftle->resjunk);
1150 * Generate columns referencing input columns and having appropriate
1151 * data types and column names. Insert datatype coercions where
1152 * necessary.
1154 * HACK: constants in the input's targetlist are copied up as-is
1155 * rather than being referenced as subquery outputs. This is mainly
1156 * to ensure that when we try to coerce them to the output column's
1157 * datatype, the right things happen for UNKNOWN constants. But do
1158 * this only at the first level of subquery-scan plans; we don't want
1159 * phony constants appearing in the output tlists of upper-level
1160 * nodes!
1162 if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1163 expr = (Node *) inputtle->expr;
1164 else
1165 expr = (Node *) makeVar(varno,
1166 inputtle->resno,
1167 exprType((Node *) inputtle->expr),
1168 exprTypmod((Node *) inputtle->expr),
1169 exprCollation((Node *) inputtle->expr),
1172 if (exprType(expr) != colType)
1175 * Note: it's not really cool to be applying coerce_to_common_type
1176 * here; one notable point is that assign_expr_collations never
1177 * gets run on any generated nodes. For the moment that's not a
1178 * problem because we force the correct exposed collation below.
1179 * It would likely be best to make the parser generate the correct
1180 * output tlist for every set-op to begin with, though.
1182 expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1183 expr,
1184 colType,
1185 "UNION/INTERSECT/EXCEPT");
1189 * Ensure the tlist entry's exposed collation matches the set-op. This
1190 * is necessary because plan_set_operations() reports the result
1191 * ordering as a list of SortGroupClauses, which don't carry collation
1192 * themselves but just refer to tlist entries. If we don't show the
1193 * right collation then planner.c might do the wrong thing in
1194 * higher-level queries.
1196 * Note we use RelabelType, not CollateExpr, since this expression
1197 * will reach the executor without any further processing.
1199 if (exprCollation(expr) != colColl)
1201 expr = (Node *) makeRelabelType((Expr *) expr,
1202 exprType(expr),
1203 exprTypmod(expr),
1204 colColl,
1205 COERCE_IMPLICIT_CAST);
1208 tle = makeTargetEntry((Expr *) expr,
1209 (AttrNumber) resno++,
1210 pstrdup(reftle->resname),
1211 false);
1214 * By convention, all non-resjunk columns in a setop tree have
1215 * ressortgroupref equal to their resno. In some cases the ref isn't
1216 * needed, but this is a cleaner way than modifying the tlist later.
1218 tle->ressortgroupref = tle->resno;
1220 tlist = lappend(tlist, tle);
1223 if (flag >= 0)
1225 /* Add a resjunk flag column */
1226 /* flag value is the given constant */
1227 expr = (Node *) makeConst(INT4OID,
1229 InvalidOid,
1230 sizeof(int32),
1231 Int32GetDatum(flag),
1232 false,
1233 true);
1234 tle = makeTargetEntry((Expr *) expr,
1235 (AttrNumber) resno++,
1236 pstrdup("flag"),
1237 true);
1238 tlist = lappend(tlist, tle);
1241 return tlist;
1245 * Generate targetlist for a set-operation Append node
1247 * colTypes: OID list of set-op's result column datatypes
1248 * colCollations: OID list of set-op's result column collations
1249 * flag: true to create a flag column copied up from subplans
1250 * input_tlists: list of tlists for sub-plans of the Append
1251 * refnames_tlist: targetlist to take column names from
1253 * The entries in the Append's targetlist should always be simple Vars;
1254 * we just have to make sure they have the right datatypes/typmods/collations.
1255 * The Vars are always generated with varno 0.
1257 * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1258 * cannot figure out a realistic width for the tlist we make here. But we
1259 * ought to refactor this code to produce a PathTarget directly, anyway.
1261 static List *
1262 generate_append_tlist(List *colTypes, List *colCollations,
1263 bool flag,
1264 List *input_tlists,
1265 List *refnames_tlist)
1267 List *tlist = NIL;
1268 int resno = 1;
1269 ListCell *curColType;
1270 ListCell *curColCollation;
1271 ListCell *ref_tl_item;
1272 int colindex;
1273 TargetEntry *tle;
1274 Node *expr;
1275 ListCell *tlistl;
1276 int32 *colTypmods;
1279 * First extract typmods to use.
1281 * If the inputs all agree on type and typmod of a particular column, use
1282 * that typmod; else use -1.
1284 colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1286 foreach(tlistl, input_tlists)
1288 List *subtlist = (List *) lfirst(tlistl);
1289 ListCell *subtlistl;
1291 curColType = list_head(colTypes);
1292 colindex = 0;
1293 foreach(subtlistl, subtlist)
1295 TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1297 if (subtle->resjunk)
1298 continue;
1299 Assert(curColType != NULL);
1300 if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1302 /* If first subplan, copy the typmod; else compare */
1303 int32 subtypmod = exprTypmod((Node *) subtle->expr);
1305 if (tlistl == list_head(input_tlists))
1306 colTypmods[colindex] = subtypmod;
1307 else if (subtypmod != colTypmods[colindex])
1308 colTypmods[colindex] = -1;
1310 else
1312 /* types disagree, so force typmod to -1 */
1313 colTypmods[colindex] = -1;
1315 curColType = lnext(colTypes, curColType);
1316 colindex++;
1318 Assert(curColType == NULL);
1322 * Now we can build the tlist for the Append.
1324 colindex = 0;
1325 forthree(curColType, colTypes, curColCollation, colCollations,
1326 ref_tl_item, refnames_tlist)
1328 Oid colType = lfirst_oid(curColType);
1329 int32 colTypmod = colTypmods[colindex++];
1330 Oid colColl = lfirst_oid(curColCollation);
1331 TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1333 Assert(reftle->resno == resno);
1334 Assert(!reftle->resjunk);
1335 expr = (Node *) makeVar(0,
1336 resno,
1337 colType,
1338 colTypmod,
1339 colColl,
1341 tle = makeTargetEntry((Expr *) expr,
1342 (AttrNumber) resno++,
1343 pstrdup(reftle->resname),
1344 false);
1347 * By convention, all non-resjunk columns in a setop tree have
1348 * ressortgroupref equal to their resno. In some cases the ref isn't
1349 * needed, but this is a cleaner way than modifying the tlist later.
1351 tle->ressortgroupref = tle->resno;
1353 tlist = lappend(tlist, tle);
1356 if (flag)
1358 /* Add a resjunk flag column */
1359 /* flag value is shown as copied up from subplan */
1360 expr = (Node *) makeVar(0,
1361 resno,
1362 INT4OID,
1364 InvalidOid,
1366 tle = makeTargetEntry((Expr *) expr,
1367 (AttrNumber) resno++,
1368 pstrdup("flag"),
1369 true);
1370 tlist = lappend(tlist, tle);
1373 pfree(colTypmods);
1375 return tlist;
1379 * generate_setop_grouplist
1380 * Build a SortGroupClause list defining the sort/grouping properties
1381 * of the setop's output columns.
1383 * Parse analysis already determined the properties and built a suitable
1384 * list, except that the entries do not have sortgrouprefs set because
1385 * the parser output representation doesn't include a tlist for each
1386 * setop. So what we need to do here is copy that list and install
1387 * proper sortgrouprefs into it (copying those from the targetlist).
1389 static List *
1390 generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1392 List *grouplist = copyObject(op->groupClauses);
1393 ListCell *lg;
1394 ListCell *lt;
1396 lg = list_head(grouplist);
1397 foreach(lt, targetlist)
1399 TargetEntry *tle = (TargetEntry *) lfirst(lt);
1400 SortGroupClause *sgc;
1402 if (tle->resjunk)
1404 /* resjunk columns should not have sortgrouprefs */
1405 Assert(tle->ressortgroupref == 0);
1406 continue; /* ignore resjunk columns */
1409 /* non-resjunk columns should have sortgroupref = resno */
1410 Assert(tle->ressortgroupref == tle->resno);
1412 /* non-resjunk columns should have grouping clauses */
1413 Assert(lg != NULL);
1414 sgc = (SortGroupClause *) lfirst(lg);
1415 lg = lnext(grouplist, lg);
1416 Assert(sgc->tleSortGroupRef == 0);
1418 sgc->tleSortGroupRef = tle->ressortgroupref;
1420 Assert(lg == NULL);
1421 return grouplist;