1 /*-------------------------------------------------------------------------
4 * Routines to plan set-operation queries. The filename is a leftover
5 * from a time when only UNIONs were implemented.
7 * There are two code paths in the planner for set-operation queries.
8 * If a subquery consists entirely of simple UNION ALL operations, it
9 * is converted into an "append relation". Otherwise, it is handled
10 * by the general code in this module (plan_set_operations and its
11 * subroutines). There is some support code here for the append-relation
12 * case, but most of the heavy lifting for that is done elsewhere,
13 * notably in prepjointree.c and allpaths.c.
15 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
16 * Portions Copyright (c) 1994, Regents of the University of California
20 * src/backend/optimizer/prep/prepunion.c
22 *-------------------------------------------------------------------------
26 #include "access/htup_details.h"
27 #include "access/sysattr.h"
28 #include "catalog/partition.h"
29 #include "catalog/pg_inherits.h"
30 #include "catalog/pg_type.h"
31 #include "miscadmin.h"
32 #include "nodes/makefuncs.h"
33 #include "nodes/nodeFuncs.h"
34 #include "optimizer/cost.h"
35 #include "optimizer/pathnode.h"
36 #include "optimizer/paths.h"
37 #include "optimizer/planmain.h"
38 #include "optimizer/planner.h"
39 #include "optimizer/prep.h"
40 #include "optimizer/tlist.h"
41 #include "parser/parse_coerce.h"
42 #include "parser/parsetree.h"
43 #include "utils/lsyscache.h"
44 #include "utils/rel.h"
45 #include "utils/selfuncs.h"
46 #include "utils/syscache.h"
49 static RelOptInfo
*recurse_set_operations(Node
*setOp
, PlannerInfo
*root
,
50 List
*colTypes
, List
*colCollations
,
52 int flag
, List
*refnames_tlist
,
55 static RelOptInfo
*generate_recursion_path(SetOperationStmt
*setOp
,
59 static RelOptInfo
*generate_union_paths(SetOperationStmt
*op
, PlannerInfo
*root
,
62 static RelOptInfo
*generate_nonunion_paths(SetOperationStmt
*op
, PlannerInfo
*root
,
65 static List
*plan_union_children(PlannerInfo
*root
,
66 SetOperationStmt
*top_union
,
69 static Path
*make_union_unique(SetOperationStmt
*op
, Path
*path
, List
*tlist
,
71 static void postprocess_setop_rel(PlannerInfo
*root
, RelOptInfo
*rel
);
72 static bool choose_hashed_setop(PlannerInfo
*root
, List
*groupClauses
,
74 double dNumGroups
, double dNumOutputRows
,
75 const char *construct
);
76 static List
*generate_setop_tlist(List
*colTypes
, List
*colCollations
,
81 List
*refnames_tlist
);
82 static List
*generate_append_tlist(List
*colTypes
, List
*colCollations
,
85 List
*refnames_tlist
);
86 static List
*generate_setop_grouplist(SetOperationStmt
*op
, List
*targetlist
);
92 * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
94 * This routine only deals with the setOperations tree of the given query.
95 * Any top-level ORDER BY requested in root->parse->sortClause will be handled
96 * when we return to grouping_planner; likewise for LIMIT.
98 * What we return is an "upperrel" RelOptInfo containing at least one Path
99 * that implements the set-operation tree. In addition, root->processed_tlist
100 * receives a targetlist representing the output of the topmost setop node.
103 plan_set_operations(PlannerInfo
*root
)
105 Query
*parse
= root
->parse
;
106 SetOperationStmt
*topop
= castNode(SetOperationStmt
, parse
->setOperations
);
108 RangeTblEntry
*leftmostRTE
;
109 Query
*leftmostQuery
;
110 RelOptInfo
*setop_rel
;
115 /* check for unsupported stuff */
116 Assert(parse
->jointree
->fromlist
== NIL
);
117 Assert(parse
->jointree
->quals
== NULL
);
118 Assert(parse
->groupClause
== NIL
);
119 Assert(parse
->havingQual
== NULL
);
120 Assert(parse
->windowClause
== NIL
);
121 Assert(parse
->distinctClause
== NIL
);
124 * In the outer query level, we won't have any true equivalences to deal
125 * with; but we do want to be able to make pathkeys, which will require
126 * single-member EquivalenceClasses. Indicate that EC merging is complete
127 * so that pathkeys.c won't complain.
129 Assert(root
->eq_classes
== NIL
);
130 root
->ec_merging_done
= true;
133 * We'll need to build RelOptInfos for each of the leaf subqueries, which
134 * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
135 * arrays for those, and for AppendRelInfos in case they're needed.
137 setup_simple_rel_arrays(root
);
140 * Find the leftmost component Query. We need to use its column names for
141 * all generated tlists (else SELECT INTO won't work right).
144 while (node
&& IsA(node
, SetOperationStmt
))
145 node
= ((SetOperationStmt
*) node
)->larg
;
146 Assert(node
&& IsA(node
, RangeTblRef
));
147 leftmostRTE
= root
->simple_rte_array
[((RangeTblRef
*) node
)->rtindex
];
148 leftmostQuery
= leftmostRTE
->subquery
;
149 Assert(leftmostQuery
!= NULL
);
152 * If the topmost node is a recursive union, it needs special processing.
154 if (root
->hasRecursion
)
156 setop_rel
= generate_recursion_path(topop
, root
,
157 leftmostQuery
->targetList
,
163 * Recurse on setOperations tree to generate paths for set ops. The
164 * final output paths should have just the column types shown as the
165 * output from the top-level node, plus possibly resjunk working
166 * columns (we can rely on upper-level nodes to deal with that).
168 setop_rel
= recurse_set_operations((Node
*) topop
, root
,
169 topop
->colTypes
, topop
->colCollations
,
171 leftmostQuery
->targetList
,
176 /* Must return the built tlist into root->processed_tlist. */
177 root
->processed_tlist
= top_tlist
;
183 * recurse_set_operations
184 * Recursively handle one step in a tree of set operations
186 * colTypes: OID list of set-op's result column datatypes
187 * colCollations: OID list of set-op's result column collations
188 * junkOK: if true, child resjunk columns may be left in the result
189 * flag: if >= 0, add a resjunk output column indicating value of flag
190 * refnames_tlist: targetlist to take column names from
192 * Returns a RelOptInfo for the subtree, as well as these output parameters:
193 * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
194 * *pNumGroups: if not NULL, we estimate the number of distinct groups
195 * in the result, and store it there
197 * The pTargetList output parameter is mostly redundant with the pathtarget
198 * of the returned RelOptInfo, but for the moment we need it because much of
199 * the logic in this file depends on flag columns being marked resjunk.
200 * Pending a redesign of how that works, this is the easy way out.
202 * We don't have to care about typmods here: the only allowed difference
203 * between set-op input and output typmods is input is a specific typmod
204 * and output is -1, and that does not require a coercion.
207 recurse_set_operations(Node
*setOp
, PlannerInfo
*root
,
208 List
*colTypes
, List
*colCollations
,
210 int flag
, List
*refnames_tlist
,
214 RelOptInfo
*rel
= NULL
; /* keep compiler quiet */
216 /* Guard against stack overflow due to overly complex setop nests */
219 if (IsA(setOp
, RangeTblRef
))
221 RangeTblRef
*rtr
= (RangeTblRef
*) setOp
;
222 RangeTblEntry
*rte
= root
->simple_rte_array
[rtr
->rtindex
];
223 Query
*subquery
= rte
->subquery
;
224 PlannerInfo
*subroot
;
225 RelOptInfo
*final_rel
;
230 Assert(subquery
!= NULL
);
232 /* Build a RelOptInfo for this leaf subquery. */
233 rel
= build_simple_rel(root
, rtr
->rtindex
, NULL
);
235 /* plan_params should not be in use in current query level */
236 Assert(root
->plan_params
== NIL
);
238 /* Generate a subroot and Paths for the subquery */
239 subroot
= rel
->subroot
= subquery_planner(root
->glob
, subquery
,
242 root
->tuple_fraction
);
245 * It should not be possible for the primitive query to contain any
246 * cross-references to other primitive queries in the setop tree.
248 if (root
->plan_params
)
249 elog(ERROR
, "unexpected outer reference in set operation subquery");
251 /* Figure out the appropriate target list for this subquery. */
252 tlist
= generate_setop_tlist(colTypes
, colCollations
,
256 subroot
->processed_tlist
,
258 rel
->reltarget
= create_pathtarget(root
, tlist
);
260 /* Return the fully-fledged tlist to caller, too */
261 *pTargetList
= tlist
;
264 * Mark rel with estimated output rows, width, etc. Note that we have
265 * to do this before generating outer-query paths, else
266 * cost_subqueryscan is not happy.
268 set_subquery_size_estimates(root
, rel
);
271 * Since we may want to add a partial path to this relation, we must
272 * set its consider_parallel flag correctly.
274 final_rel
= fetch_upper_rel(subroot
, UPPERREL_FINAL
, NULL
);
275 rel
->consider_parallel
= final_rel
->consider_parallel
;
278 * For the moment, we consider only a single Path for the subquery.
279 * This should change soon (make it look more like
280 * set_subquery_pathlist).
282 subpath
= get_cheapest_fractional_path(final_rel
,
283 root
->tuple_fraction
);
286 * Stick a SubqueryScanPath atop that.
288 * We don't bother to determine the subquery's output ordering since
289 * it won't be reflected in the set-op result anyhow; so just label
290 * the SubqueryScanPath with nil pathkeys. (XXX that should change
293 path
= (Path
*) create_subqueryscan_path(root
, rel
, subpath
,
299 * If we have a partial path for the child relation, we can use that
300 * to build a partial path for this relation. But there's no point in
301 * considering any path but the cheapest.
303 if (rel
->consider_parallel
&& bms_is_empty(rel
->lateral_relids
) &&
304 final_rel
->partial_pathlist
!= NIL
)
306 Path
*partial_subpath
;
309 partial_subpath
= linitial(final_rel
->partial_pathlist
);
310 partial_path
= (Path
*)
311 create_subqueryscan_path(root
, rel
, partial_subpath
,
313 add_partial_path(rel
, partial_path
);
317 * Estimate number of groups if caller wants it. If the subquery used
318 * grouping or aggregation, its output is probably mostly unique
319 * anyway; otherwise do statistical estimation.
321 * XXX you don't really want to know about this: we do the estimation
322 * using the subquery's original targetlist expressions, not the
323 * subroot->processed_tlist which might seem more appropriate. The
324 * reason is that if the subquery is itself a setop, it may return a
325 * processed_tlist containing "varno 0" Vars generated by
326 * generate_append_tlist, and those would confuse estimate_num_groups
327 * mightily. We ought to get rid of the "varno 0" hack, but that
328 * requires a redesign of the parsetree representation of setops, so
329 * that there can be an RTE corresponding to each setop's output.
333 if (subquery
->groupClause
|| subquery
->groupingSets
||
334 subquery
->distinctClause
||
335 subroot
->hasHavingQual
|| subquery
->hasAggs
)
336 *pNumGroups
= subpath
->rows
;
338 *pNumGroups
= estimate_num_groups(subroot
,
339 get_tlist_exprs(subquery
->targetList
, false),
344 else if (IsA(setOp
, SetOperationStmt
))
346 SetOperationStmt
*op
= (SetOperationStmt
*) setOp
;
348 /* UNIONs are much different from INTERSECT/EXCEPT */
349 if (op
->op
== SETOP_UNION
)
350 rel
= generate_union_paths(op
, root
,
354 rel
= generate_nonunion_paths(op
, root
,
358 *pNumGroups
= rel
->rows
;
361 * If necessary, add a Result node to project the caller-requested
364 * XXX you don't really want to know about this: setrefs.c will apply
365 * fix_upper_expr() to the Result node's tlist. This would fail if the
366 * Vars generated by generate_setop_tlist() were not exactly equal()
367 * to the corresponding tlist entries of the subplan. However, since
368 * the subplan was generated by generate_union_paths() or
369 * generate_nonunion_paths(), and hence its tlist was generated by
370 * generate_append_tlist(), this will work. We just tell
371 * generate_setop_tlist() to use varno 0.
374 !tlist_same_datatypes(*pTargetList
, colTypes
, junkOK
) ||
375 !tlist_same_collations(*pTargetList
, colCollations
, junkOK
))
380 *pTargetList
= generate_setop_tlist(colTypes
, colCollations
,
386 target
= create_pathtarget(root
, *pTargetList
);
388 /* Apply projection to each path */
389 foreach(lc
, rel
->pathlist
)
391 Path
*subpath
= (Path
*) lfirst(lc
);
394 Assert(subpath
->param_info
== NULL
);
395 path
= apply_projection_to_path(root
, subpath
->parent
,
397 /* If we had to add a Result, path is different from subpath */
402 /* Apply projection to each partial path */
403 foreach(lc
, rel
->partial_pathlist
)
405 Path
*subpath
= (Path
*) lfirst(lc
);
408 Assert(subpath
->param_info
== NULL
);
410 /* avoid apply_projection_to_path, in case of multiple refs */
411 path
= (Path
*) create_projection_path(root
, subpath
->parent
,
419 elog(ERROR
, "unrecognized node type: %d",
420 (int) nodeTag(setOp
));
424 postprocess_setop_rel(root
, rel
);
430 * Generate paths for a recursive UNION node
433 generate_recursion_path(SetOperationStmt
*setOp
, PlannerInfo
*root
,
434 List
*refnames_tlist
,
437 RelOptInfo
*result_rel
;
449 /* Parser should have rejected other cases */
450 if (setOp
->op
!= SETOP_UNION
)
451 elog(ERROR
, "only UNION queries can be recursive");
452 /* Worktable ID should be assigned */
453 Assert(root
->wt_param_id
>= 0);
456 * Unlike a regular UNION node, process the left and right inputs
457 * separately without any intention of combining them into one Append.
459 lrel
= recurse_set_operations(setOp
->larg
, root
,
460 setOp
->colTypes
, setOp
->colCollations
,
465 lpath
= lrel
->cheapest_total_path
;
466 /* The right path will want to look at the left one ... */
467 root
->non_recursive_path
= lpath
;
468 rrel
= recurse_set_operations(setOp
->rarg
, root
,
469 setOp
->colTypes
, setOp
->colCollations
,
474 rpath
= rrel
->cheapest_total_path
;
475 root
->non_recursive_path
= NULL
;
478 * Generate tlist for RecursiveUnion path node --- same as in Append cases
480 tlist
= generate_append_tlist(setOp
->colTypes
, setOp
->colCollations
, false,
481 list_make2(lpath_tlist
, rpath_tlist
),
484 *pTargetList
= tlist
;
486 /* Build result relation. */
487 result_rel
= fetch_upper_rel(root
, UPPERREL_SETOP
,
488 bms_union(lrel
->relids
, rrel
->relids
));
489 result_rel
->reltarget
= create_pathtarget(root
, tlist
);
492 * If UNION, identify the grouping operators
501 /* Identify the grouping semantics */
502 groupList
= generate_setop_grouplist(setOp
, tlist
);
504 /* We only support hashing here */
505 if (!grouping_is_hashable(groupList
))
507 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
508 errmsg("could not implement recursive UNION"),
509 errdetail("All column datatypes must be hashable.")));
512 * For the moment, take the number of distinct groups as equal to the
513 * total input size, ie, the worst case.
515 dNumGroups
= lpath
->rows
+ rpath
->rows
* 10;
519 * And make the path node.
521 path
= (Path
*) create_recursiveunion_path(root
,
525 result_rel
->reltarget
,
530 add_path(result_rel
, path
);
531 postprocess_setop_rel(root
, result_rel
);
536 * Generate paths for a UNION or UNION ALL node
539 generate_union_paths(SetOperationStmt
*op
, PlannerInfo
*root
,
540 List
*refnames_tlist
,
543 Relids relids
= NULL
;
544 RelOptInfo
*result_rel
;
545 double save_fraction
= root
->tuple_fraction
;
547 List
*pathlist
= NIL
;
548 List
*partial_pathlist
= NIL
;
549 bool partial_paths_valid
= true;
550 bool consider_parallel
= true;
557 * If plain UNION, tell children to fetch all tuples.
559 * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
560 * each arm of the UNION ALL. One could make a case for reducing the
561 * tuple fraction for later arms (discounting by the expected size of the
562 * earlier arms' results) but it seems not worth the trouble. The normal
563 * case where tuple_fraction isn't already zero is a LIMIT at top level,
564 * and passing it down as-is is usually enough to get the desired result
565 * of preferring fast-start plans.
568 root
->tuple_fraction
= 0.0;
571 * If any of my children are identical UNION nodes (same op, all-flag, and
572 * colTypes) then they can be merged into this node so that we generate
573 * only one Append and unique-ification for the lot. Recurse to find such
574 * nodes and compute their children's paths.
576 rellist
= plan_union_children(root
, op
, refnames_tlist
, &tlist_list
);
579 * Generate tlist for Append plan node.
581 * The tlist for an Append plan isn't important as far as the Append is
582 * concerned, but we must make it look real anyway for the benefit of the
583 * next plan level up.
585 tlist
= generate_append_tlist(op
->colTypes
, op
->colCollations
, false,
586 tlist_list
, refnames_tlist
);
588 *pTargetList
= tlist
;
590 /* Build path lists and relid set. */
593 RelOptInfo
*rel
= lfirst(lc
);
595 pathlist
= lappend(pathlist
, rel
->cheapest_total_path
);
597 if (consider_parallel
)
599 if (!rel
->consider_parallel
)
601 consider_parallel
= false;
602 partial_paths_valid
= false;
604 else if (rel
->partial_pathlist
== NIL
)
605 partial_paths_valid
= false;
607 partial_pathlist
= lappend(partial_pathlist
,
608 linitial(rel
->partial_pathlist
));
611 relids
= bms_union(relids
, rel
->relids
);
614 /* Build result relation. */
615 result_rel
= fetch_upper_rel(root
, UPPERREL_SETOP
, relids
);
616 result_rel
->reltarget
= create_pathtarget(root
, tlist
);
617 result_rel
->consider_parallel
= consider_parallel
;
620 * Append the child results together.
622 path
= (Path
*) create_append_path(root
, result_rel
, pathlist
, NIL
,
623 NIL
, NULL
, 0, false, NIL
, -1);
626 * For UNION ALL, we just need the Append path. For UNION, need to add
627 * node(s) to remove duplicates.
630 path
= make_union_unique(op
, path
, tlist
, root
);
632 add_path(result_rel
, path
);
635 * Estimate number of groups. For now we just assume the output is unique
636 * --- this is certainly true for the UNION case, and we want worst-case
639 result_rel
->rows
= path
->rows
;
642 * Now consider doing the same thing using the partial paths plus Append
645 if (partial_paths_valid
)
649 int parallel_workers
= 0;
651 /* Find the highest number of workers requested for any subpath. */
652 foreach(lc
, partial_pathlist
)
654 Path
*path
= lfirst(lc
);
656 parallel_workers
= Max(parallel_workers
, path
->parallel_workers
);
658 Assert(parallel_workers
> 0);
661 * If the use of parallel append is permitted, always request at least
662 * log2(# of children) paths. We assume it can be useful to have
663 * extra workers in this case because they will be spread out across
664 * the children. The precise formula is just a guess; see
665 * add_paths_to_append_rel.
667 if (enable_parallel_append
)
669 parallel_workers
= Max(parallel_workers
,
670 fls(list_length(partial_pathlist
)));
671 parallel_workers
= Min(parallel_workers
,
672 max_parallel_workers_per_gather
);
674 Assert(parallel_workers
> 0);
677 create_append_path(root
, result_rel
, NIL
, partial_pathlist
,
679 parallel_workers
, enable_parallel_append
,
682 create_gather_path(root
, result_rel
, ppath
,
683 result_rel
->reltarget
, NULL
, NULL
);
685 ppath
= make_union_unique(op
, ppath
, tlist
, root
);
686 add_path(result_rel
, ppath
);
689 /* Undo effects of possibly forcing tuple_fraction to 0 */
690 root
->tuple_fraction
= save_fraction
;
696 * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
699 generate_nonunion_paths(SetOperationStmt
*op
, PlannerInfo
*root
,
700 List
*refnames_tlist
,
703 RelOptInfo
*result_rel
;
706 double save_fraction
= root
->tuple_fraction
;
725 * Tell children to fetch all tuples.
727 root
->tuple_fraction
= 0.0;
729 /* Recurse on children, ensuring their outputs are marked */
730 lrel
= recurse_set_operations(op
->larg
, root
,
731 op
->colTypes
, op
->colCollations
,
736 lpath
= lrel
->cheapest_total_path
;
737 rrel
= recurse_set_operations(op
->rarg
, root
,
738 op
->colTypes
, op
->colCollations
,
743 rpath
= rrel
->cheapest_total_path
;
745 /* Undo effects of forcing tuple_fraction to 0 */
746 root
->tuple_fraction
= save_fraction
;
749 * For EXCEPT, we must put the left input first. For INTERSECT, either
750 * order should give the same results, and we prefer to put the smaller
751 * input first in order to minimize the size of the hash table in the
752 * hashing case. "Smaller" means the one with the fewer groups.
754 if (op
->op
== SETOP_EXCEPT
|| dLeftGroups
<= dRightGroups
)
756 pathlist
= list_make2(lpath
, rpath
);
757 tlist_list
= list_make2(lpath_tlist
, rpath_tlist
);
762 pathlist
= list_make2(rpath
, lpath
);
763 tlist_list
= list_make2(rpath_tlist
, lpath_tlist
);
768 * Generate tlist for Append plan node.
770 * The tlist for an Append plan isn't important as far as the Append is
771 * concerned, but we must make it look real anyway for the benefit of the
772 * next plan level up. In fact, it has to be real enough that the flag
773 * column is shown as a variable not a constant, else setrefs.c will get
776 tlist
= generate_append_tlist(op
->colTypes
, op
->colCollations
, true,
777 tlist_list
, refnames_tlist
);
779 *pTargetList
= tlist
;
781 /* Build result relation. */
782 result_rel
= fetch_upper_rel(root
, UPPERREL_SETOP
,
783 bms_union(lrel
->relids
, rrel
->relids
));
784 result_rel
->reltarget
= create_pathtarget(root
, tlist
);
787 * Append the child results together.
789 path
= (Path
*) create_append_path(root
, result_rel
, pathlist
, NIL
,
790 NIL
, NULL
, 0, false, NIL
, -1);
792 /* Identify the grouping semantics */
793 groupList
= generate_setop_grouplist(op
, tlist
);
796 * Estimate number of distinct groups that we'll need hashtable entries
797 * for; this is the size of the left-hand input for EXCEPT, or the smaller
798 * input for INTERSECT. Also estimate the number of eventual output rows.
799 * In non-ALL cases, we estimate each group produces one output row; in
800 * ALL cases use the relevant relation size. These are worst-case
801 * estimates, of course, but we need to be conservative.
803 if (op
->op
== SETOP_EXCEPT
)
805 dNumGroups
= dLeftGroups
;
806 dNumOutputRows
= op
->all
? lpath
->rows
: dNumGroups
;
810 dNumGroups
= Min(dLeftGroups
, dRightGroups
);
811 dNumOutputRows
= op
->all
? Min(lpath
->rows
, rpath
->rows
) : dNumGroups
;
815 * Decide whether to hash or sort, and add a sort node if needed.
817 use_hash
= choose_hashed_setop(root
, groupList
, path
,
818 dNumGroups
, dNumOutputRows
,
819 (op
->op
== SETOP_INTERSECT
) ? "INTERSECT" : "EXCEPT");
821 if (groupList
&& !use_hash
)
822 path
= (Path
*) create_sort_path(root
,
825 make_pathkeys_for_sortclauses(root
,
831 * Finally, add a SetOp path node to generate the correct output.
835 case SETOP_INTERSECT
:
836 cmd
= op
->all
? SETOPCMD_INTERSECT_ALL
: SETOPCMD_INTERSECT
;
839 cmd
= op
->all
? SETOPCMD_EXCEPT_ALL
: SETOPCMD_EXCEPT
;
842 elog(ERROR
, "unrecognized set op: %d", (int) op
->op
);
843 cmd
= SETOPCMD_INTERSECT
; /* keep compiler quiet */
846 path
= (Path
*) create_setop_path(root
,
850 use_hash
? SETOP_HASHED
: SETOP_SORTED
,
852 list_length(op
->colTypes
) + 1,
853 use_hash
? firstFlag
: -1,
857 result_rel
->rows
= path
->rows
;
858 add_path(result_rel
, path
);
863 * Pull up children of a UNION node that are identically-propertied UNIONs.
865 * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
866 * output rows will be lost anyway.
868 * NOTE: currently, we ignore collations while determining if a child has
869 * the same properties. This is semantically sound only so long as all
870 * collations have the same notion of equality. It is valid from an
871 * implementation standpoint because we don't care about the ordering of
872 * a UNION child's result: UNION ALL results are always unordered, and
873 * generate_union_paths will force a fresh sort if the top level is a UNION.
876 plan_union_children(PlannerInfo
*root
,
877 SetOperationStmt
*top_union
,
878 List
*refnames_tlist
,
881 List
*pending_rels
= list_make1(top_union
);
887 while (pending_rels
!= NIL
)
889 Node
*setOp
= linitial(pending_rels
);
891 pending_rels
= list_delete_first(pending_rels
);
893 if (IsA(setOp
, SetOperationStmt
))
895 SetOperationStmt
*op
= (SetOperationStmt
*) setOp
;
897 if (op
->op
== top_union
->op
&&
898 (op
->all
== top_union
->all
|| op
->all
) &&
899 equal(op
->colTypes
, top_union
->colTypes
))
901 /* Same UNION, so fold children into parent */
902 pending_rels
= lcons(op
->rarg
, pending_rels
);
903 pending_rels
= lcons(op
->larg
, pending_rels
);
909 * Not same, so plan this child separately.
911 * Note we disallow any resjunk columns in child results. This is
912 * necessary since the Append node that implements the union won't do
913 * any projection, and upper levels will get confused if some of our
914 * output tuples have junk and some don't. This case only arises when
915 * we have an EXCEPT or INTERSECT as child, else there won't be
918 result
= lappend(result
, recurse_set_operations(setOp
, root
,
920 top_union
->colCollations
,
925 *tlist_list
= lappend(*tlist_list
, child_tlist
);
932 * Add nodes to the given path tree to unique-ify the result of a UNION.
935 make_union_unique(SetOperationStmt
*op
, Path
*path
, List
*tlist
,
938 RelOptInfo
*result_rel
= fetch_upper_rel(root
, UPPERREL_SETOP
, NULL
);
942 /* Identify the grouping semantics */
943 groupList
= generate_setop_grouplist(op
, tlist
);
946 * XXX for the moment, take the number of distinct groups as equal to the
947 * total input size, ie, the worst case. This is too conservative, but
948 * it's not clear how to get a decent estimate of the true size. One
949 * should note as well the propensity of novices to write UNION rather
950 * than UNION ALL even when they don't expect any duplicates...
952 dNumGroups
= path
->rows
;
954 /* Decide whether to hash or sort */
955 if (choose_hashed_setop(root
, groupList
, path
,
956 dNumGroups
, dNumGroups
,
959 /* Hashed aggregate plan --- no sort needed */
960 path
= (Path
*) create_agg_path(root
,
963 create_pathtarget(root
, tlist
),
973 /* Sort and Unique */
976 create_sort_path(root
,
979 make_pathkeys_for_sortclauses(root
,
983 path
= (Path
*) create_upper_unique_path(root
,
986 list_length(path
->pathkeys
),
994 * postprocess_setop_rel - perform steps required after adding paths
997 postprocess_setop_rel(PlannerInfo
*root
, RelOptInfo
*rel
)
1000 * We don't currently worry about allowing FDWs to contribute paths to
1001 * this relation, but give extensions a chance.
1003 if (create_upper_paths_hook
)
1004 (*create_upper_paths_hook
) (root
, UPPERREL_SETOP
,
1007 /* Select cheapest path */
1012 * choose_hashed_setop - should we use hashing for a set operation?
1015 choose_hashed_setop(PlannerInfo
*root
, List
*groupClauses
,
1017 double dNumGroups
, double dNumOutputRows
,
1018 const char *construct
)
1020 int numGroupCols
= list_length(groupClauses
);
1026 double tuple_fraction
;
1028 /* Check whether the operators support sorting or hashing */
1029 can_sort
= grouping_is_sortable(groupClauses
);
1030 can_hash
= grouping_is_hashable(groupClauses
);
1031 if (can_hash
&& can_sort
)
1033 /* we have a meaningful choice to make, continue ... */
1041 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
1042 /* translator: %s is UNION, INTERSECT, or EXCEPT */
1043 errmsg("could not implement %s", construct
),
1044 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1046 /* Prefer sorting when enable_hashagg is off */
1047 if (!enable_hashagg
)
1051 * Don't do it if it doesn't look like the hashtable will fit into
1054 hashentrysize
= MAXALIGN(input_path
->pathtarget
->width
) + MAXALIGN(SizeofMinimalTupleHeader
);
1056 if (hashentrysize
* dNumGroups
> work_mem
* 1024L)
1060 * See if the estimated cost is no more than doing it the other way.
1062 * We need to consider input_plan + hashagg versus input_plan + sort +
1063 * group. Note that the actual result plan might involve a SetOp or
1064 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1065 * should be close enough for our purposes here.
1067 * These path variables are dummies that just hold cost fields; we don't
1068 * make actual Paths for these steps.
1070 cost_agg(&hashed_p
, root
, AGG_HASHED
, NULL
,
1071 numGroupCols
, dNumGroups
,
1073 input_path
->startup_cost
, input_path
->total_cost
,
1074 input_path
->rows
, input_path
->pathtarget
->width
);
1077 * Now for the sorted case. Note that the input is *always* unsorted,
1078 * since it was made by appending unrelated sub-relations together.
1080 sorted_p
.startup_cost
= input_path
->startup_cost
;
1081 sorted_p
.total_cost
= input_path
->total_cost
;
1082 /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1083 cost_sort(&sorted_p
, root
, NIL
, sorted_p
.total_cost
,
1084 input_path
->rows
, input_path
->pathtarget
->width
,
1085 0.0, work_mem
, -1.0);
1086 cost_group(&sorted_p
, root
, numGroupCols
, dNumGroups
,
1088 sorted_p
.startup_cost
, sorted_p
.total_cost
,
1092 * Now make the decision using the top-level tuple fraction. First we
1093 * have to convert an absolute count (LIMIT) into fractional form.
1095 tuple_fraction
= root
->tuple_fraction
;
1096 if (tuple_fraction
>= 1.0)
1097 tuple_fraction
/= dNumOutputRows
;
1099 if (compare_fractional_path_costs(&hashed_p
, &sorted_p
,
1100 tuple_fraction
) < 0)
1102 /* Hashed is cheaper, so use it */
1109 * Generate targetlist for a set-operation plan node
1111 * colTypes: OID list of set-op's result column datatypes
1112 * colCollations: OID list of set-op's result column collations
1113 * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1114 * varno: varno to use in generated Vars
1115 * hack_constants: true to copy up constants (see comments in code)
1116 * input_tlist: targetlist of this node's input node
1117 * refnames_tlist: targetlist to take column names from
1120 generate_setop_tlist(List
*colTypes
, List
*colCollations
,
1123 bool hack_constants
,
1125 List
*refnames_tlist
)
1136 forfour(ctlc
, colTypes
, cclc
, colCollations
,
1137 itlc
, input_tlist
, rtlc
, refnames_tlist
)
1139 Oid colType
= lfirst_oid(ctlc
);
1140 Oid colColl
= lfirst_oid(cclc
);
1141 TargetEntry
*inputtle
= (TargetEntry
*) lfirst(itlc
);
1142 TargetEntry
*reftle
= (TargetEntry
*) lfirst(rtlc
);
1144 Assert(inputtle
->resno
== resno
);
1145 Assert(reftle
->resno
== resno
);
1146 Assert(!inputtle
->resjunk
);
1147 Assert(!reftle
->resjunk
);
1150 * Generate columns referencing input columns and having appropriate
1151 * data types and column names. Insert datatype coercions where
1154 * HACK: constants in the input's targetlist are copied up as-is
1155 * rather than being referenced as subquery outputs. This is mainly
1156 * to ensure that when we try to coerce them to the output column's
1157 * datatype, the right things happen for UNKNOWN constants. But do
1158 * this only at the first level of subquery-scan plans; we don't want
1159 * phony constants appearing in the output tlists of upper-level
1162 if (hack_constants
&& inputtle
->expr
&& IsA(inputtle
->expr
, Const
))
1163 expr
= (Node
*) inputtle
->expr
;
1165 expr
= (Node
*) makeVar(varno
,
1167 exprType((Node
*) inputtle
->expr
),
1168 exprTypmod((Node
*) inputtle
->expr
),
1169 exprCollation((Node
*) inputtle
->expr
),
1172 if (exprType(expr
) != colType
)
1175 * Note: it's not really cool to be applying coerce_to_common_type
1176 * here; one notable point is that assign_expr_collations never
1177 * gets run on any generated nodes. For the moment that's not a
1178 * problem because we force the correct exposed collation below.
1179 * It would likely be best to make the parser generate the correct
1180 * output tlist for every set-op to begin with, though.
1182 expr
= coerce_to_common_type(NULL
, /* no UNKNOWNs here */
1185 "UNION/INTERSECT/EXCEPT");
1189 * Ensure the tlist entry's exposed collation matches the set-op. This
1190 * is necessary because plan_set_operations() reports the result
1191 * ordering as a list of SortGroupClauses, which don't carry collation
1192 * themselves but just refer to tlist entries. If we don't show the
1193 * right collation then planner.c might do the wrong thing in
1194 * higher-level queries.
1196 * Note we use RelabelType, not CollateExpr, since this expression
1197 * will reach the executor without any further processing.
1199 if (exprCollation(expr
) != colColl
)
1201 expr
= (Node
*) makeRelabelType((Expr
*) expr
,
1205 COERCE_IMPLICIT_CAST
);
1208 tle
= makeTargetEntry((Expr
*) expr
,
1209 (AttrNumber
) resno
++,
1210 pstrdup(reftle
->resname
),
1214 * By convention, all non-resjunk columns in a setop tree have
1215 * ressortgroupref equal to their resno. In some cases the ref isn't
1216 * needed, but this is a cleaner way than modifying the tlist later.
1218 tle
->ressortgroupref
= tle
->resno
;
1220 tlist
= lappend(tlist
, tle
);
1225 /* Add a resjunk flag column */
1226 /* flag value is the given constant */
1227 expr
= (Node
*) makeConst(INT4OID
,
1231 Int32GetDatum(flag
),
1234 tle
= makeTargetEntry((Expr
*) expr
,
1235 (AttrNumber
) resno
++,
1238 tlist
= lappend(tlist
, tle
);
1245 * Generate targetlist for a set-operation Append node
1247 * colTypes: OID list of set-op's result column datatypes
1248 * colCollations: OID list of set-op's result column collations
1249 * flag: true to create a flag column copied up from subplans
1250 * input_tlists: list of tlists for sub-plans of the Append
1251 * refnames_tlist: targetlist to take column names from
1253 * The entries in the Append's targetlist should always be simple Vars;
1254 * we just have to make sure they have the right datatypes/typmods/collations.
1255 * The Vars are always generated with varno 0.
1257 * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1258 * cannot figure out a realistic width for the tlist we make here. But we
1259 * ought to refactor this code to produce a PathTarget directly, anyway.
1262 generate_append_tlist(List
*colTypes
, List
*colCollations
,
1265 List
*refnames_tlist
)
1269 ListCell
*curColType
;
1270 ListCell
*curColCollation
;
1271 ListCell
*ref_tl_item
;
1279 * First extract typmods to use.
1281 * If the inputs all agree on type and typmod of a particular column, use
1282 * that typmod; else use -1.
1284 colTypmods
= (int32
*) palloc(list_length(colTypes
) * sizeof(int32
));
1286 foreach(tlistl
, input_tlists
)
1288 List
*subtlist
= (List
*) lfirst(tlistl
);
1289 ListCell
*subtlistl
;
1291 curColType
= list_head(colTypes
);
1293 foreach(subtlistl
, subtlist
)
1295 TargetEntry
*subtle
= (TargetEntry
*) lfirst(subtlistl
);
1297 if (subtle
->resjunk
)
1299 Assert(curColType
!= NULL
);
1300 if (exprType((Node
*) subtle
->expr
) == lfirst_oid(curColType
))
1302 /* If first subplan, copy the typmod; else compare */
1303 int32 subtypmod
= exprTypmod((Node
*) subtle
->expr
);
1305 if (tlistl
== list_head(input_tlists
))
1306 colTypmods
[colindex
] = subtypmod
;
1307 else if (subtypmod
!= colTypmods
[colindex
])
1308 colTypmods
[colindex
] = -1;
1312 /* types disagree, so force typmod to -1 */
1313 colTypmods
[colindex
] = -1;
1315 curColType
= lnext(colTypes
, curColType
);
1318 Assert(curColType
== NULL
);
1322 * Now we can build the tlist for the Append.
1325 forthree(curColType
, colTypes
, curColCollation
, colCollations
,
1326 ref_tl_item
, refnames_tlist
)
1328 Oid colType
= lfirst_oid(curColType
);
1329 int32 colTypmod
= colTypmods
[colindex
++];
1330 Oid colColl
= lfirst_oid(curColCollation
);
1331 TargetEntry
*reftle
= (TargetEntry
*) lfirst(ref_tl_item
);
1333 Assert(reftle
->resno
== resno
);
1334 Assert(!reftle
->resjunk
);
1335 expr
= (Node
*) makeVar(0,
1341 tle
= makeTargetEntry((Expr
*) expr
,
1342 (AttrNumber
) resno
++,
1343 pstrdup(reftle
->resname
),
1347 * By convention, all non-resjunk columns in a setop tree have
1348 * ressortgroupref equal to their resno. In some cases the ref isn't
1349 * needed, but this is a cleaner way than modifying the tlist later.
1351 tle
->ressortgroupref
= tle
->resno
;
1353 tlist
= lappend(tlist
, tle
);
1358 /* Add a resjunk flag column */
1359 /* flag value is shown as copied up from subplan */
1360 expr
= (Node
*) makeVar(0,
1366 tle
= makeTargetEntry((Expr
*) expr
,
1367 (AttrNumber
) resno
++,
1370 tlist
= lappend(tlist
, tle
);
1379 * generate_setop_grouplist
1380 * Build a SortGroupClause list defining the sort/grouping properties
1381 * of the setop's output columns.
1383 * Parse analysis already determined the properties and built a suitable
1384 * list, except that the entries do not have sortgrouprefs set because
1385 * the parser output representation doesn't include a tlist for each
1386 * setop. So what we need to do here is copy that list and install
1387 * proper sortgrouprefs into it (copying those from the targetlist).
1390 generate_setop_grouplist(SetOperationStmt
*op
, List
*targetlist
)
1392 List
*grouplist
= copyObject(op
->groupClauses
);
1396 lg
= list_head(grouplist
);
1397 foreach(lt
, targetlist
)
1399 TargetEntry
*tle
= (TargetEntry
*) lfirst(lt
);
1400 SortGroupClause
*sgc
;
1404 /* resjunk columns should not have sortgrouprefs */
1405 Assert(tle
->ressortgroupref
== 0);
1406 continue; /* ignore resjunk columns */
1409 /* non-resjunk columns should have sortgroupref = resno */
1410 Assert(tle
->ressortgroupref
== tle
->resno
);
1412 /* non-resjunk columns should have grouping clauses */
1414 sgc
= (SortGroupClause
*) lfirst(lg
);
1415 lg
= lnext(grouplist
, lg
);
1416 Assert(sgc
->tleSortGroupRef
== 0);
1418 sgc
->tleSortGroupRef
= tle
->ressortgroupref
;