Revert commit 66c0185a3 and follow-on patches.
[pgsql.git] / src / backend / optimizer / path / allpaths.c
blob10aeebc2c1d6e620cf371075ba6060b8c682312c
1 /*-------------------------------------------------------------------------
3 * allpaths.c
4 * Routines to find possible search paths for processing a query
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/optimizer/path/allpaths.c
13 *-------------------------------------------------------------------------
16 #include "postgres.h"
18 #include <limits.h>
19 #include <math.h>
21 #include "access/sysattr.h"
22 #include "access/tsmapi.h"
23 #include "catalog/pg_class.h"
24 #include "catalog/pg_operator.h"
25 #include "catalog/pg_proc.h"
26 #include "foreign/fdwapi.h"
27 #include "miscadmin.h"
28 #include "nodes/makefuncs.h"
29 #include "nodes/nodeFuncs.h"
30 #include "nodes/supportnodes.h"
31 #ifdef OPTIMIZER_DEBUG
32 #include "nodes/print.h"
33 #endif
34 #include "optimizer/appendinfo.h"
35 #include "optimizer/clauses.h"
36 #include "optimizer/cost.h"
37 #include "optimizer/geqo.h"
38 #include "optimizer/optimizer.h"
39 #include "optimizer/pathnode.h"
40 #include "optimizer/paths.h"
41 #include "optimizer/plancat.h"
42 #include "optimizer/planner.h"
43 #include "optimizer/tlist.h"
44 #include "parser/parse_clause.h"
45 #include "parser/parsetree.h"
46 #include "partitioning/partbounds.h"
47 #include "port/pg_bitutils.h"
48 #include "rewrite/rewriteManip.h"
49 #include "utils/lsyscache.h"
52 /* Bitmask flags for pushdown_safety_info.unsafeFlags */
53 #define UNSAFE_HAS_VOLATILE_FUNC (1 << 0)
54 #define UNSAFE_HAS_SET_FUNC (1 << 1)
55 #define UNSAFE_NOTIN_DISTINCTON_CLAUSE (1 << 2)
56 #define UNSAFE_NOTIN_PARTITIONBY_CLAUSE (1 << 3)
57 #define UNSAFE_TYPE_MISMATCH (1 << 4)
59 /* results of subquery_is_pushdown_safe */
60 typedef struct pushdown_safety_info
62 unsigned char *unsafeFlags; /* bitmask of reasons why this target list
63 * column is unsafe for qual pushdown, or 0 if
64 * no reason. */
65 bool unsafeVolatile; /* don't push down volatile quals */
66 bool unsafeLeaky; /* don't push down leaky quals */
67 } pushdown_safety_info;
69 /* Return type for qual_is_pushdown_safe */
70 typedef enum pushdown_safe_type
72 PUSHDOWN_UNSAFE, /* unsafe to push qual into subquery */
73 PUSHDOWN_SAFE, /* safe to push qual into subquery */
74 PUSHDOWN_WINDOWCLAUSE_RUNCOND, /* unsafe, but may work as WindowClause
75 * run condition */
76 } pushdown_safe_type;
78 /* These parameters are set by GUC */
79 bool enable_geqo = false; /* just in case GUC doesn't set it */
80 int geqo_threshold;
81 int min_parallel_table_scan_size;
82 int min_parallel_index_scan_size;
84 /* Hook for plugins to get control in set_rel_pathlist() */
85 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
87 /* Hook for plugins to replace standard_join_search() */
88 join_search_hook_type join_search_hook = NULL;
91 static void set_base_rel_consider_startup(PlannerInfo *root);
92 static void set_base_rel_sizes(PlannerInfo *root);
93 static void set_base_rel_pathlists(PlannerInfo *root);
94 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
95 Index rti, RangeTblEntry *rte);
96 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
97 Index rti, RangeTblEntry *rte);
98 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
99 RangeTblEntry *rte);
100 static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
101 static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
102 RangeTblEntry *rte);
103 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
104 RangeTblEntry *rte);
105 static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
106 RangeTblEntry *rte);
107 static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
108 RangeTblEntry *rte);
109 static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
110 RangeTblEntry *rte);
111 static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
112 RangeTblEntry *rte);
113 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
114 Index rti, RangeTblEntry *rte);
115 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
116 Index rti, RangeTblEntry *rte);
117 static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
118 List *live_childrels,
119 List *all_child_pathkeys);
120 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
121 RelOptInfo *rel,
122 Relids required_outer);
123 static void accumulate_append_subpath(Path *path,
124 List **subpaths,
125 List **special_subpaths);
126 static Path *get_singleton_append_subpath(Path *path);
127 static void set_dummy_rel_pathlist(RelOptInfo *rel);
128 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
129 Index rti, RangeTblEntry *rte);
130 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
131 RangeTblEntry *rte);
132 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
133 RangeTblEntry *rte);
134 static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel,
135 RangeTblEntry *rte);
136 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
137 RangeTblEntry *rte);
138 static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
139 RangeTblEntry *rte);
140 static void set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
141 RangeTblEntry *rte);
142 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
143 RangeTblEntry *rte);
144 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
145 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
146 pushdown_safety_info *safetyInfo);
147 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
148 pushdown_safety_info *safetyInfo);
149 static void check_output_expressions(Query *subquery,
150 pushdown_safety_info *safetyInfo);
151 static void compare_tlist_datatypes(List *tlist, List *colTypes,
152 pushdown_safety_info *safetyInfo);
153 static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query);
154 static pushdown_safe_type qual_is_pushdown_safe(Query *subquery, Index rti,
155 RestrictInfo *rinfo,
156 pushdown_safety_info *safetyInfo);
157 static void subquery_push_qual(Query *subquery,
158 RangeTblEntry *rte, Index rti, Node *qual);
159 static void recurse_push_qual(Node *setOp, Query *topquery,
160 RangeTblEntry *rte, Index rti, Node *qual);
161 static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel,
162 Bitmapset *extra_used_attrs);
166 * make_one_rel
167 * Finds all possible access paths for executing a query, returning a
168 * single rel that represents the join of all base rels in the query.
170 RelOptInfo *
171 make_one_rel(PlannerInfo *root, List *joinlist)
173 RelOptInfo *rel;
174 Index rti;
175 double total_pages;
177 /* Mark base rels as to whether we care about fast-start plans */
178 set_base_rel_consider_startup(root);
181 * Compute size estimates and consider_parallel flags for each base rel.
183 set_base_rel_sizes(root);
186 * We should now have size estimates for every actual table involved in
187 * the query, and we also know which if any have been deleted from the
188 * query by join removal, pruned by partition pruning, or eliminated by
189 * constraint exclusion. So we can now compute total_table_pages.
191 * Note that appendrels are not double-counted here, even though we don't
192 * bother to distinguish RelOptInfos for appendrel parents, because the
193 * parents will have pages = 0.
195 * XXX if a table is self-joined, we will count it once per appearance,
196 * which perhaps is the wrong thing ... but that's not completely clear,
197 * and detecting self-joins here is difficult, so ignore it for now.
199 total_pages = 0;
200 for (rti = 1; rti < root->simple_rel_array_size; rti++)
202 RelOptInfo *brel = root->simple_rel_array[rti];
204 /* there may be empty slots corresponding to non-baserel RTEs */
205 if (brel == NULL)
206 continue;
208 Assert(brel->relid == rti); /* sanity check on array */
210 if (IS_DUMMY_REL(brel))
211 continue;
213 if (IS_SIMPLE_REL(brel))
214 total_pages += (double) brel->pages;
216 root->total_table_pages = total_pages;
219 * Generate access paths for each base rel.
221 set_base_rel_pathlists(root);
224 * Generate access paths for the entire join tree.
226 rel = make_rel_from_joinlist(root, joinlist);
229 * The result should join all and only the query's base + outer-join rels.
231 Assert(bms_equal(rel->relids, root->all_query_rels));
233 return rel;
237 * set_base_rel_consider_startup
238 * Set the consider_[param_]startup flags for each base-relation entry.
240 * For the moment, we only deal with consider_param_startup here; because the
241 * logic for consider_startup is pretty trivial and is the same for every base
242 * relation, we just let build_simple_rel() initialize that flag correctly to
243 * start with. If that logic ever gets more complicated it would probably
244 * be better to move it here.
246 static void
247 set_base_rel_consider_startup(PlannerInfo *root)
250 * Since parameterized paths can only be used on the inside of a nestloop
251 * join plan, there is usually little value in considering fast-start
252 * plans for them. However, for relations that are on the RHS of a SEMI
253 * or ANTI join, a fast-start plan can be useful because we're only going
254 * to care about fetching one tuple anyway.
256 * To minimize growth of planning time, we currently restrict this to
257 * cases where the RHS is a single base relation, not a join; there is no
258 * provision for consider_param_startup to get set at all on joinrels.
259 * Also we don't worry about appendrels. costsize.c's costing rules for
260 * nestloop semi/antijoins don't consider such cases either.
262 ListCell *lc;
264 foreach(lc, root->join_info_list)
266 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
267 int varno;
269 if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
270 bms_get_singleton_member(sjinfo->syn_righthand, &varno))
272 RelOptInfo *rel = find_base_rel(root, varno);
274 rel->consider_param_startup = true;
280 * set_base_rel_sizes
281 * Set the size estimates (rows and widths) for each base-relation entry.
282 * Also determine whether to consider parallel paths for base relations.
284 * We do this in a separate pass over the base rels so that rowcount
285 * estimates are available for parameterized path generation, and also so
286 * that each rel's consider_parallel flag is set correctly before we begin to
287 * generate paths.
289 static void
290 set_base_rel_sizes(PlannerInfo *root)
292 Index rti;
294 for (rti = 1; rti < root->simple_rel_array_size; rti++)
296 RelOptInfo *rel = root->simple_rel_array[rti];
297 RangeTblEntry *rte;
299 /* there may be empty slots corresponding to non-baserel RTEs */
300 if (rel == NULL)
301 continue;
303 Assert(rel->relid == rti); /* sanity check on array */
305 /* ignore RTEs that are "other rels" */
306 if (rel->reloptkind != RELOPT_BASEREL)
307 continue;
309 rte = root->simple_rte_array[rti];
312 * If parallelism is allowable for this query in general, see whether
313 * it's allowable for this rel in particular. We have to do this
314 * before set_rel_size(), because (a) if this rel is an inheritance
315 * parent, set_append_rel_size() will use and perhaps change the rel's
316 * consider_parallel flag, and (b) for some RTE types, set_rel_size()
317 * goes ahead and makes paths immediately.
319 if (root->glob->parallelModeOK)
320 set_rel_consider_parallel(root, rel, rte);
322 set_rel_size(root, rel, rti, rte);
327 * set_base_rel_pathlists
328 * Finds all paths available for scanning each base-relation entry.
329 * Sequential scan and any available indices are considered.
330 * Each useful path is attached to its relation's 'pathlist' field.
332 static void
333 set_base_rel_pathlists(PlannerInfo *root)
335 Index rti;
337 for (rti = 1; rti < root->simple_rel_array_size; rti++)
339 RelOptInfo *rel = root->simple_rel_array[rti];
341 /* there may be empty slots corresponding to non-baserel RTEs */
342 if (rel == NULL)
343 continue;
345 Assert(rel->relid == rti); /* sanity check on array */
347 /* ignore RTEs that are "other rels" */
348 if (rel->reloptkind != RELOPT_BASEREL)
349 continue;
351 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
356 * set_rel_size
357 * Set size estimates for a base relation
359 static void
360 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
361 Index rti, RangeTblEntry *rte)
363 if (rel->reloptkind == RELOPT_BASEREL &&
364 relation_excluded_by_constraints(root, rel, rte))
367 * We proved we don't need to scan the rel via constraint exclusion,
368 * so set up a single dummy path for it. Here we only check this for
369 * regular baserels; if it's an otherrel, CE was already checked in
370 * set_append_rel_size().
372 * In this case, we go ahead and set up the relation's path right away
373 * instead of leaving it for set_rel_pathlist to do. This is because
374 * we don't have a convention for marking a rel as dummy except by
375 * assigning a dummy path to it.
377 set_dummy_rel_pathlist(rel);
379 else if (rte->inh)
381 /* It's an "append relation", process accordingly */
382 set_append_rel_size(root, rel, rti, rte);
384 else
386 switch (rel->rtekind)
388 case RTE_RELATION:
389 if (rte->relkind == RELKIND_FOREIGN_TABLE)
391 /* Foreign table */
392 set_foreign_size(root, rel, rte);
394 else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
397 * We could get here if asked to scan a partitioned table
398 * with ONLY. In that case we shouldn't scan any of the
399 * partitions, so mark it as a dummy rel.
401 set_dummy_rel_pathlist(rel);
403 else if (rte->tablesample != NULL)
405 /* Sampled relation */
406 set_tablesample_rel_size(root, rel, rte);
408 else
410 /* Plain relation */
411 set_plain_rel_size(root, rel, rte);
413 break;
414 case RTE_SUBQUERY:
417 * Subqueries don't support making a choice between
418 * parameterized and unparameterized paths, so just go ahead
419 * and build their paths immediately.
421 set_subquery_pathlist(root, rel, rti, rte);
422 break;
423 case RTE_FUNCTION:
424 set_function_size_estimates(root, rel);
425 break;
426 case RTE_TABLEFUNC:
427 set_tablefunc_size_estimates(root, rel);
428 break;
429 case RTE_VALUES:
430 set_values_size_estimates(root, rel);
431 break;
432 case RTE_CTE:
435 * CTEs don't support making a choice between parameterized
436 * and unparameterized paths, so just go ahead and build their
437 * paths immediately.
439 if (rte->self_reference)
440 set_worktable_pathlist(root, rel, rte);
441 else
442 set_cte_pathlist(root, rel, rte);
443 break;
444 case RTE_NAMEDTUPLESTORE:
445 /* Might as well just build the path immediately */
446 set_namedtuplestore_pathlist(root, rel, rte);
447 break;
448 case RTE_RESULT:
449 /* Might as well just build the path immediately */
450 set_result_pathlist(root, rel, rte);
451 break;
452 default:
453 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
454 break;
459 * We insist that all non-dummy rels have a nonzero rowcount estimate.
461 Assert(rel->rows > 0 || IS_DUMMY_REL(rel));
465 * set_rel_pathlist
466 * Build access paths for a base relation
468 static void
469 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
470 Index rti, RangeTblEntry *rte)
472 if (IS_DUMMY_REL(rel))
474 /* We already proved the relation empty, so nothing more to do */
476 else if (rte->inh)
478 /* It's an "append relation", process accordingly */
479 set_append_rel_pathlist(root, rel, rti, rte);
481 else
483 switch (rel->rtekind)
485 case RTE_RELATION:
486 if (rte->relkind == RELKIND_FOREIGN_TABLE)
488 /* Foreign table */
489 set_foreign_pathlist(root, rel, rte);
491 else if (rte->tablesample != NULL)
493 /* Sampled relation */
494 set_tablesample_rel_pathlist(root, rel, rte);
496 else
498 /* Plain relation */
499 set_plain_rel_pathlist(root, rel, rte);
501 break;
502 case RTE_SUBQUERY:
503 /* Subquery --- fully handled during set_rel_size */
504 break;
505 case RTE_FUNCTION:
506 /* RangeFunction */
507 set_function_pathlist(root, rel, rte);
508 break;
509 case RTE_TABLEFUNC:
510 /* Table Function */
511 set_tablefunc_pathlist(root, rel, rte);
512 break;
513 case RTE_VALUES:
514 /* Values list */
515 set_values_pathlist(root, rel, rte);
516 break;
517 case RTE_CTE:
518 /* CTE reference --- fully handled during set_rel_size */
519 break;
520 case RTE_NAMEDTUPLESTORE:
521 /* tuplestore reference --- fully handled during set_rel_size */
522 break;
523 case RTE_RESULT:
524 /* simple Result --- fully handled during set_rel_size */
525 break;
526 default:
527 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
528 break;
533 * Allow a plugin to editorialize on the set of Paths for this base
534 * relation. It could add new paths (such as CustomPaths) by calling
535 * add_path(), or add_partial_path() if parallel aware. It could also
536 * delete or modify paths added by the core code.
538 if (set_rel_pathlist_hook)
539 (*set_rel_pathlist_hook) (root, rel, rti, rte);
542 * If this is a baserel, we should normally consider gathering any partial
543 * paths we may have created for it. We have to do this after calling the
544 * set_rel_pathlist_hook, else it cannot add partial paths to be included
545 * here.
547 * However, if this is an inheritance child, skip it. Otherwise, we could
548 * end up with a very large number of gather nodes, each trying to grab
549 * its own pool of workers. Instead, we'll consider gathering partial
550 * paths for the parent appendrel.
552 * Also, if this is the topmost scan/join rel, we postpone gathering until
553 * the final scan/join targetlist is available (see grouping_planner).
555 if (rel->reloptkind == RELOPT_BASEREL &&
556 !bms_equal(rel->relids, root->all_query_rels))
557 generate_useful_gather_paths(root, rel, false);
559 /* Now find the cheapest of the paths for this rel */
560 set_cheapest(rel);
562 #ifdef OPTIMIZER_DEBUG
563 pprint(rel);
564 #endif
568 * set_plain_rel_size
569 * Set size estimates for a plain relation (no subquery, no inheritance)
571 static void
572 set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
575 * Test any partial indexes of rel for applicability. We must do this
576 * first since partial unique indexes can affect size estimates.
578 check_index_predicates(root, rel);
580 /* Mark rel with estimated output rows, width, etc */
581 set_baserel_size_estimates(root, rel);
585 * If this relation could possibly be scanned from within a worker, then set
586 * its consider_parallel flag.
588 static void
589 set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
590 RangeTblEntry *rte)
593 * The flag has previously been initialized to false, so we can just
594 * return if it becomes clear that we can't safely set it.
596 Assert(!rel->consider_parallel);
598 /* Don't call this if parallelism is disallowed for the entire query. */
599 Assert(root->glob->parallelModeOK);
601 /* This should only be called for baserels and appendrel children. */
602 Assert(IS_SIMPLE_REL(rel));
604 /* Assorted checks based on rtekind. */
605 switch (rte->rtekind)
607 case RTE_RELATION:
610 * Currently, parallel workers can't access the leader's temporary
611 * tables. We could possibly relax this if we wrote all of its
612 * local buffers at the start of the query and made no changes
613 * thereafter (maybe we could allow hint bit changes), and if we
614 * taught the workers to read them. Writing a large number of
615 * temporary buffers could be expensive, though, and we don't have
616 * the rest of the necessary infrastructure right now anyway. So
617 * for now, bail out if we see a temporary table.
619 if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
620 return;
623 * Table sampling can be pushed down to workers if the sample
624 * function and its arguments are safe.
626 if (rte->tablesample != NULL)
628 char proparallel = func_parallel(rte->tablesample->tsmhandler);
630 if (proparallel != PROPARALLEL_SAFE)
631 return;
632 if (!is_parallel_safe(root, (Node *) rte->tablesample->args))
633 return;
637 * Ask FDWs whether they can support performing a ForeignScan
638 * within a worker. Most often, the answer will be no. For
639 * example, if the nature of the FDW is such that it opens a TCP
640 * connection with a remote server, each parallel worker would end
641 * up with a separate connection, and these connections might not
642 * be appropriately coordinated between workers and the leader.
644 if (rte->relkind == RELKIND_FOREIGN_TABLE)
646 Assert(rel->fdwroutine);
647 if (!rel->fdwroutine->IsForeignScanParallelSafe)
648 return;
649 if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte))
650 return;
654 * There are additional considerations for appendrels, which we'll
655 * deal with in set_append_rel_size and set_append_rel_pathlist.
656 * For now, just set consider_parallel based on the rel's own
657 * quals and targetlist.
659 break;
661 case RTE_SUBQUERY:
664 * There's no intrinsic problem with scanning a subquery-in-FROM
665 * (as distinct from a SubPlan or InitPlan) in a parallel worker.
666 * If the subquery doesn't happen to have any parallel-safe paths,
667 * then flagging it as consider_parallel won't change anything,
668 * but that's true for plain tables, too. We must set
669 * consider_parallel based on the rel's own quals and targetlist,
670 * so that if a subquery path is parallel-safe but the quals and
671 * projection we're sticking onto it are not, we correctly mark
672 * the SubqueryScanPath as not parallel-safe. (Note that
673 * set_subquery_pathlist() might push some of these quals down
674 * into the subquery itself, but that doesn't change anything.)
676 * We can't push sub-select containing LIMIT/OFFSET to workers as
677 * there is no guarantee that the row order will be fully
678 * deterministic, and applying LIMIT/OFFSET will lead to
679 * inconsistent results at the top-level. (In some cases, where
680 * the result is ordered, we could relax this restriction. But it
681 * doesn't currently seem worth expending extra effort to do so.)
684 Query *subquery = castNode(Query, rte->subquery);
686 if (limit_needed(subquery))
687 return;
689 break;
691 case RTE_JOIN:
692 /* Shouldn't happen; we're only considering baserels here. */
693 Assert(false);
694 return;
696 case RTE_FUNCTION:
697 /* Check for parallel-restricted functions. */
698 if (!is_parallel_safe(root, (Node *) rte->functions))
699 return;
700 break;
702 case RTE_TABLEFUNC:
703 /* not parallel safe */
704 return;
706 case RTE_VALUES:
707 /* Check for parallel-restricted functions. */
708 if (!is_parallel_safe(root, (Node *) rte->values_lists))
709 return;
710 break;
712 case RTE_CTE:
715 * CTE tuplestores aren't shared among parallel workers, so we
716 * force all CTE scans to happen in the leader. Also, populating
717 * the CTE would require executing a subplan that's not available
718 * in the worker, might be parallel-restricted, and must get
719 * executed only once.
721 return;
723 case RTE_NAMEDTUPLESTORE:
726 * tuplestore cannot be shared, at least without more
727 * infrastructure to support that.
729 return;
731 case RTE_RESULT:
732 /* RESULT RTEs, in themselves, are no problem. */
733 break;
737 * If there's anything in baserestrictinfo that's parallel-restricted, we
738 * give up on parallelizing access to this relation. We could consider
739 * instead postponing application of the restricted quals until we're
740 * above all the parallelism in the plan tree, but it's not clear that
741 * that would be a win in very many cases, and it might be tricky to make
742 * outer join clauses work correctly. It would likely break equivalence
743 * classes, too.
745 if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo))
746 return;
749 * Likewise, if the relation's outputs are not parallel-safe, give up.
750 * (Usually, they're just Vars, but sometimes they're not.)
752 if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs))
753 return;
755 /* We have a winner. */
756 rel->consider_parallel = true;
760 * set_plain_rel_pathlist
761 * Build access paths for a plain relation (no subquery, no inheritance)
763 static void
764 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
766 Relids required_outer;
769 * We don't support pushing join clauses into the quals of a seqscan, but
770 * it could still have required parameterization due to LATERAL refs in
771 * its tlist.
773 required_outer = rel->lateral_relids;
775 /* Consider sequential scan */
776 add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
778 /* If appropriate, consider parallel sequential scan */
779 if (rel->consider_parallel && required_outer == NULL)
780 create_plain_partial_paths(root, rel);
782 /* Consider index scans */
783 create_index_paths(root, rel);
785 /* Consider TID scans */
786 create_tidscan_paths(root, rel);
790 * create_plain_partial_paths
791 * Build partial access paths for parallel scan of a plain relation
793 static void
794 create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
796 int parallel_workers;
798 parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
799 max_parallel_workers_per_gather);
801 /* If any limit was set to zero, the user doesn't want a parallel scan. */
802 if (parallel_workers <= 0)
803 return;
805 /* Add an unordered partial path based on a parallel sequential scan. */
806 add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
810 * set_tablesample_rel_size
811 * Set size estimates for a sampled relation
813 static void
814 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
816 TableSampleClause *tsc = rte->tablesample;
817 TsmRoutine *tsm;
818 BlockNumber pages;
819 double tuples;
822 * Test any partial indexes of rel for applicability. We must do this
823 * first since partial unique indexes can affect size estimates.
825 check_index_predicates(root, rel);
828 * Call the sampling method's estimation function to estimate the number
829 * of pages it will read and the number of tuples it will return. (Note:
830 * we assume the function returns sane values.)
832 tsm = GetTsmRoutine(tsc->tsmhandler);
833 tsm->SampleScanGetSampleSize(root, rel, tsc->args,
834 &pages, &tuples);
837 * For the moment, because we will only consider a SampleScan path for the
838 * rel, it's okay to just overwrite the pages and tuples estimates for the
839 * whole relation. If we ever consider multiple path types for sampled
840 * rels, we'll need more complication.
842 rel->pages = pages;
843 rel->tuples = tuples;
845 /* Mark rel with estimated output rows, width, etc */
846 set_baserel_size_estimates(root, rel);
850 * set_tablesample_rel_pathlist
851 * Build access paths for a sampled relation
853 static void
854 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
856 Relids required_outer;
857 Path *path;
860 * We don't support pushing join clauses into the quals of a samplescan,
861 * but it could still have required parameterization due to LATERAL refs
862 * in its tlist or TABLESAMPLE arguments.
864 required_outer = rel->lateral_relids;
866 /* Consider sampled scan */
867 path = create_samplescan_path(root, rel, required_outer);
870 * If the sampling method does not support repeatable scans, we must avoid
871 * plans that would scan the rel multiple times. Ideally, we'd simply
872 * avoid putting the rel on the inside of a nestloop join; but adding such
873 * a consideration to the planner seems like a great deal of complication
874 * to support an uncommon usage of second-rate sampling methods. Instead,
875 * if there is a risk that the query might perform an unsafe join, just
876 * wrap the SampleScan in a Materialize node. We can check for joins by
877 * counting the membership of all_query_rels (note that this correctly
878 * counts inheritance trees as single rels). If we're inside a subquery,
879 * we can't easily check whether a join might occur in the outer query, so
880 * just assume one is possible.
882 * GetTsmRoutine is relatively expensive compared to the other tests here,
883 * so check repeatable_across_scans last, even though that's a bit odd.
885 if ((root->query_level > 1 ||
886 bms_membership(root->all_query_rels) != BMS_SINGLETON) &&
887 !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
889 path = (Path *) create_material_path(rel, path);
892 add_path(rel, path);
894 /* For the moment, at least, there are no other paths to consider */
898 * set_foreign_size
899 * Set size estimates for a foreign table RTE
901 static void
902 set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
904 /* Mark rel with estimated output rows, width, etc */
905 set_foreign_size_estimates(root, rel);
907 /* Let FDW adjust the size estimates, if it can */
908 rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid);
910 /* ... but do not let it set the rows estimate to zero */
911 rel->rows = clamp_row_est(rel->rows);
914 * Also, make sure rel->tuples is not insane relative to rel->rows.
915 * Notably, this ensures sanity if pg_class.reltuples contains -1 and the
916 * FDW doesn't do anything to replace that.
918 rel->tuples = Max(rel->tuples, rel->rows);
922 * set_foreign_pathlist
923 * Build access paths for a foreign table RTE
925 static void
926 set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
928 /* Call the FDW's GetForeignPaths function to generate path(s) */
929 rel->fdwroutine->GetForeignPaths(root, rel, rte->relid);
933 * set_append_rel_size
934 * Set size estimates for a simple "append relation"
936 * The passed-in rel and RTE represent the entire append relation. The
937 * relation's contents are computed by appending together the output of the
938 * individual member relations. Note that in the non-partitioned inheritance
939 * case, the first member relation is actually the same table as is mentioned
940 * in the parent RTE ... but it has a different RTE and RelOptInfo. This is
941 * a good thing because their outputs are not the same size.
943 static void
944 set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
945 Index rti, RangeTblEntry *rte)
947 int parentRTindex = rti;
948 bool has_live_children;
949 double parent_rows;
950 double parent_size;
951 double *parent_attrsizes;
952 int nattrs;
953 ListCell *l;
955 /* Guard against stack overflow due to overly deep inheritance tree. */
956 check_stack_depth();
958 Assert(IS_SIMPLE_REL(rel));
961 * If this is a partitioned baserel, set the consider_partitionwise_join
962 * flag; currently, we only consider partitionwise joins with the baserel
963 * if its targetlist doesn't contain a whole-row Var.
965 if (enable_partitionwise_join &&
966 rel->reloptkind == RELOPT_BASEREL &&
967 rte->relkind == RELKIND_PARTITIONED_TABLE &&
968 bms_is_empty(rel->attr_needed[InvalidAttrNumber - rel->min_attr]))
969 rel->consider_partitionwise_join = true;
972 * Initialize to compute size estimates for whole append relation.
974 * We handle width estimates by weighting the widths of different child
975 * rels proportionally to their number of rows. This is sensible because
976 * the use of width estimates is mainly to compute the total relation
977 * "footprint" if we have to sort or hash it. To do this, we sum the
978 * total equivalent size (in "double" arithmetic) and then divide by the
979 * total rowcount estimate. This is done separately for the total rel
980 * width and each attribute.
982 * Note: if you consider changing this logic, beware that child rels could
983 * have zero rows and/or width, if they were excluded by constraints.
985 has_live_children = false;
986 parent_rows = 0;
987 parent_size = 0;
988 nattrs = rel->max_attr - rel->min_attr + 1;
989 parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
991 foreach(l, root->append_rel_list)
993 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
994 int childRTindex;
995 RangeTblEntry *childRTE;
996 RelOptInfo *childrel;
997 List *childrinfos;
998 ListCell *parentvars;
999 ListCell *childvars;
1000 ListCell *lc;
1002 /* append_rel_list contains all append rels; ignore others */
1003 if (appinfo->parent_relid != parentRTindex)
1004 continue;
1006 childRTindex = appinfo->child_relid;
1007 childRTE = root->simple_rte_array[childRTindex];
1010 * The child rel's RelOptInfo was already created during
1011 * add_other_rels_to_query.
1013 childrel = find_base_rel(root, childRTindex);
1014 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
1016 /* We may have already proven the child to be dummy. */
1017 if (IS_DUMMY_REL(childrel))
1018 continue;
1021 * We have to copy the parent's targetlist and quals to the child,
1022 * with appropriate substitution of variables. However, the
1023 * baserestrictinfo quals were already copied/substituted when the
1024 * child RelOptInfo was built. So we don't need any additional setup
1025 * before applying constraint exclusion.
1027 if (relation_excluded_by_constraints(root, childrel, childRTE))
1030 * This child need not be scanned, so we can omit it from the
1031 * appendrel.
1033 set_dummy_rel_pathlist(childrel);
1034 continue;
1038 * Constraint exclusion failed, so copy the parent's join quals and
1039 * targetlist to the child, with appropriate variable substitutions.
1041 * We skip join quals that came from above outer joins that can null
1042 * this rel, since they would be of no value while generating paths
1043 * for the child. This saves some effort while processing the child
1044 * rel, and it also avoids an implementation restriction in
1045 * adjust_appendrel_attrs (it can't apply nullingrels to a non-Var).
1047 childrinfos = NIL;
1048 foreach(lc, rel->joininfo)
1050 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1052 if (!bms_overlap(rinfo->clause_relids, rel->nulling_relids))
1053 childrinfos = lappend(childrinfos,
1054 adjust_appendrel_attrs(root,
1055 (Node *) rinfo,
1056 1, &appinfo));
1058 childrel->joininfo = childrinfos;
1061 * Now for the child's targetlist.
1063 * NB: the resulting childrel->reltarget->exprs may contain arbitrary
1064 * expressions, which otherwise would not occur in a rel's targetlist.
1065 * Code that might be looking at an appendrel child must cope with
1066 * such. (Normally, a rel's targetlist would only include Vars and
1067 * PlaceHolderVars.) XXX we do not bother to update the cost or width
1068 * fields of childrel->reltarget; not clear if that would be useful.
1070 childrel->reltarget->exprs = (List *)
1071 adjust_appendrel_attrs(root,
1072 (Node *) rel->reltarget->exprs,
1073 1, &appinfo);
1076 * We have to make child entries in the EquivalenceClass data
1077 * structures as well. This is needed either if the parent
1078 * participates in some eclass joins (because we will want to consider
1079 * inner-indexscan joins on the individual children) or if the parent
1080 * has useful pathkeys (because we should try to build MergeAppend
1081 * paths that produce those sort orderings).
1083 if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
1084 add_child_rel_equivalences(root, appinfo, rel, childrel);
1085 childrel->has_eclass_joins = rel->has_eclass_joins;
1088 * Note: we could compute appropriate attr_needed data for the child's
1089 * variables, by transforming the parent's attr_needed through the
1090 * translated_vars mapping. However, currently there's no need
1091 * because attr_needed is only examined for base relations not
1092 * otherrels. So we just leave the child's attr_needed empty.
1096 * If we consider partitionwise joins with the parent rel, do the same
1097 * for partitioned child rels.
1099 * Note: here we abuse the consider_partitionwise_join flag by setting
1100 * it for child rels that are not themselves partitioned. We do so to
1101 * tell try_partitionwise_join() that the child rel is sufficiently
1102 * valid to be used as a per-partition input, even if it later gets
1103 * proven to be dummy. (It's not usable until we've set up the
1104 * reltarget and EC entries, which we just did.)
1106 if (rel->consider_partitionwise_join)
1107 childrel->consider_partitionwise_join = true;
1110 * If parallelism is allowable for this query in general, see whether
1111 * it's allowable for this childrel in particular. But if we've
1112 * already decided the appendrel is not parallel-safe as a whole,
1113 * there's no point in considering parallelism for this child. For
1114 * consistency, do this before calling set_rel_size() for the child.
1116 if (root->glob->parallelModeOK && rel->consider_parallel)
1117 set_rel_consider_parallel(root, childrel, childRTE);
1120 * Compute the child's size.
1122 set_rel_size(root, childrel, childRTindex, childRTE);
1125 * It is possible that constraint exclusion detected a contradiction
1126 * within a child subquery, even though we didn't prove one above. If
1127 * so, we can skip this child.
1129 if (IS_DUMMY_REL(childrel))
1130 continue;
1132 /* We have at least one live child. */
1133 has_live_children = true;
1136 * If any live child is not parallel-safe, treat the whole appendrel
1137 * as not parallel-safe. In future we might be able to generate plans
1138 * in which some children are farmed out to workers while others are
1139 * not; but we don't have that today, so it's a waste to consider
1140 * partial paths anywhere in the appendrel unless it's all safe.
1141 * (Child rels visited before this one will be unmarked in
1142 * set_append_rel_pathlist().)
1144 if (!childrel->consider_parallel)
1145 rel->consider_parallel = false;
1148 * Accumulate size information from each live child.
1150 Assert(childrel->rows > 0);
1152 parent_rows += childrel->rows;
1153 parent_size += childrel->reltarget->width * childrel->rows;
1156 * Accumulate per-column estimates too. We need not do anything for
1157 * PlaceHolderVars in the parent list. If child expression isn't a
1158 * Var, or we didn't record a width estimate for it, we have to fall
1159 * back on a datatype-based estimate.
1161 * By construction, child's targetlist is 1-to-1 with parent's.
1163 forboth(parentvars, rel->reltarget->exprs,
1164 childvars, childrel->reltarget->exprs)
1166 Var *parentvar = (Var *) lfirst(parentvars);
1167 Node *childvar = (Node *) lfirst(childvars);
1169 if (IsA(parentvar, Var) && parentvar->varno == parentRTindex)
1171 int pndx = parentvar->varattno - rel->min_attr;
1172 int32 child_width = 0;
1174 if (IsA(childvar, Var) &&
1175 ((Var *) childvar)->varno == childrel->relid)
1177 int cndx = ((Var *) childvar)->varattno - childrel->min_attr;
1179 child_width = childrel->attr_widths[cndx];
1181 if (child_width <= 0)
1182 child_width = get_typavgwidth(exprType(childvar),
1183 exprTypmod(childvar));
1184 Assert(child_width > 0);
1185 parent_attrsizes[pndx] += child_width * childrel->rows;
1190 if (has_live_children)
1193 * Save the finished size estimates.
1195 int i;
1197 Assert(parent_rows > 0);
1198 rel->rows = parent_rows;
1199 rel->reltarget->width = rint(parent_size / parent_rows);
1200 for (i = 0; i < nattrs; i++)
1201 rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
1204 * Set "raw tuples" count equal to "rows" for the appendrel; needed
1205 * because some places assume rel->tuples is valid for any baserel.
1207 rel->tuples = parent_rows;
1210 * Note that we leave rel->pages as zero; this is important to avoid
1211 * double-counting the appendrel tree in total_table_pages.
1214 else
1217 * All children were excluded by constraints, so mark the whole
1218 * appendrel dummy. We must do this in this phase so that the rel's
1219 * dummy-ness is visible when we generate paths for other rels.
1221 set_dummy_rel_pathlist(rel);
1224 pfree(parent_attrsizes);
1228 * set_append_rel_pathlist
1229 * Build access paths for an "append relation"
1231 static void
1232 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
1233 Index rti, RangeTblEntry *rte)
1235 int parentRTindex = rti;
1236 List *live_childrels = NIL;
1237 ListCell *l;
1240 * Generate access paths for each member relation, and remember the
1241 * non-dummy children.
1243 foreach(l, root->append_rel_list)
1245 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1246 int childRTindex;
1247 RangeTblEntry *childRTE;
1248 RelOptInfo *childrel;
1250 /* append_rel_list contains all append rels; ignore others */
1251 if (appinfo->parent_relid != parentRTindex)
1252 continue;
1254 /* Re-locate the child RTE and RelOptInfo */
1255 childRTindex = appinfo->child_relid;
1256 childRTE = root->simple_rte_array[childRTindex];
1257 childrel = root->simple_rel_array[childRTindex];
1260 * If set_append_rel_size() decided the parent appendrel was
1261 * parallel-unsafe at some point after visiting this child rel, we
1262 * need to propagate the unsafety marking down to the child, so that
1263 * we don't generate useless partial paths for it.
1265 if (!rel->consider_parallel)
1266 childrel->consider_parallel = false;
1269 * Compute the child's access paths.
1271 set_rel_pathlist(root, childrel, childRTindex, childRTE);
1274 * If child is dummy, ignore it.
1276 if (IS_DUMMY_REL(childrel))
1277 continue;
1280 * Child is live, so add it to the live_childrels list for use below.
1282 live_childrels = lappend(live_childrels, childrel);
1285 /* Add paths to the append relation. */
1286 add_paths_to_append_rel(root, rel, live_childrels);
1291 * add_paths_to_append_rel
1292 * Generate paths for the given append relation given the set of non-dummy
1293 * child rels.
1295 * The function collects all parameterizations and orderings supported by the
1296 * non-dummy children. For every such parameterization or ordering, it creates
1297 * an append path collecting one path from each non-dummy child with given
1298 * parameterization or ordering. Similarly it collects partial paths from
1299 * non-dummy children to create partial append paths.
1301 void
1302 add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
1303 List *live_childrels)
1305 List *subpaths = NIL;
1306 bool subpaths_valid = true;
1307 List *startup_subpaths = NIL;
1308 bool startup_subpaths_valid = true;
1309 List *partial_subpaths = NIL;
1310 List *pa_partial_subpaths = NIL;
1311 List *pa_nonpartial_subpaths = NIL;
1312 bool partial_subpaths_valid = true;
1313 bool pa_subpaths_valid;
1314 List *all_child_pathkeys = NIL;
1315 List *all_child_outers = NIL;
1316 ListCell *l;
1317 double partial_rows = -1;
1319 /* If appropriate, consider parallel append */
1320 pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
1323 * For every non-dummy child, remember the cheapest path. Also, identify
1324 * all pathkeys (orderings) and parameterizations (required_outer sets)
1325 * available for the non-dummy member relations.
1327 foreach(l, live_childrels)
1329 RelOptInfo *childrel = lfirst(l);
1330 ListCell *lcp;
1331 Path *cheapest_partial_path = NULL;
1334 * If child has an unparameterized cheapest-total path, add that to
1335 * the unparameterized Append path we are constructing for the parent.
1336 * If not, there's no workable unparameterized path.
1338 * With partitionwise aggregates, the child rel's pathlist may be
1339 * empty, so don't assume that a path exists here.
1341 if (childrel->pathlist != NIL &&
1342 childrel->cheapest_total_path->param_info == NULL)
1343 accumulate_append_subpath(childrel->cheapest_total_path,
1344 &subpaths, NULL);
1345 else
1346 subpaths_valid = false;
1349 * When the planner is considering cheap startup plans, we'll also
1350 * collect all the cheapest_startup_paths (if set) and build an
1351 * AppendPath containing those as subpaths.
1353 if (rel->consider_startup && childrel->cheapest_startup_path != NULL)
1355 /* cheapest_startup_path must not be a parameterized path. */
1356 Assert(childrel->cheapest_startup_path->param_info == NULL);
1357 accumulate_append_subpath(childrel->cheapest_startup_path,
1358 &startup_subpaths,
1359 NULL);
1361 else
1362 startup_subpaths_valid = false;
1365 /* Same idea, but for a partial plan. */
1366 if (childrel->partial_pathlist != NIL)
1368 cheapest_partial_path = linitial(childrel->partial_pathlist);
1369 accumulate_append_subpath(cheapest_partial_path,
1370 &partial_subpaths, NULL);
1372 else
1373 partial_subpaths_valid = false;
1376 * Same idea, but for a parallel append mixing partial and non-partial
1377 * paths.
1379 if (pa_subpaths_valid)
1381 Path *nppath = NULL;
1383 nppath =
1384 get_cheapest_parallel_safe_total_inner(childrel->pathlist);
1386 if (cheapest_partial_path == NULL && nppath == NULL)
1388 /* Neither a partial nor a parallel-safe path? Forget it. */
1389 pa_subpaths_valid = false;
1391 else if (nppath == NULL ||
1392 (cheapest_partial_path != NULL &&
1393 cheapest_partial_path->total_cost < nppath->total_cost))
1395 /* Partial path is cheaper or the only option. */
1396 Assert(cheapest_partial_path != NULL);
1397 accumulate_append_subpath(cheapest_partial_path,
1398 &pa_partial_subpaths,
1399 &pa_nonpartial_subpaths);
1401 else
1404 * Either we've got only a non-partial path, or we think that
1405 * a single backend can execute the best non-partial path
1406 * faster than all the parallel backends working together can
1407 * execute the best partial path.
1409 * It might make sense to be more aggressive here. Even if
1410 * the best non-partial path is more expensive than the best
1411 * partial path, it could still be better to choose the
1412 * non-partial path if there are several such paths that can
1413 * be given to different workers. For now, we don't try to
1414 * figure that out.
1416 accumulate_append_subpath(nppath,
1417 &pa_nonpartial_subpaths,
1418 NULL);
1423 * Collect lists of all the available path orderings and
1424 * parameterizations for all the children. We use these as a
1425 * heuristic to indicate which sort orderings and parameterizations we
1426 * should build Append and MergeAppend paths for.
1428 foreach(lcp, childrel->pathlist)
1430 Path *childpath = (Path *) lfirst(lcp);
1431 List *childkeys = childpath->pathkeys;
1432 Relids childouter = PATH_REQ_OUTER(childpath);
1434 /* Unsorted paths don't contribute to pathkey list */
1435 if (childkeys != NIL)
1437 ListCell *lpk;
1438 bool found = false;
1440 /* Have we already seen this ordering? */
1441 foreach(lpk, all_child_pathkeys)
1443 List *existing_pathkeys = (List *) lfirst(lpk);
1445 if (compare_pathkeys(existing_pathkeys,
1446 childkeys) == PATHKEYS_EQUAL)
1448 found = true;
1449 break;
1452 if (!found)
1454 /* No, so add it to all_child_pathkeys */
1455 all_child_pathkeys = lappend(all_child_pathkeys,
1456 childkeys);
1460 /* Unparameterized paths don't contribute to param-set list */
1461 if (childouter)
1463 ListCell *lco;
1464 bool found = false;
1466 /* Have we already seen this param set? */
1467 foreach(lco, all_child_outers)
1469 Relids existing_outers = (Relids) lfirst(lco);
1471 if (bms_equal(existing_outers, childouter))
1473 found = true;
1474 break;
1477 if (!found)
1479 /* No, so add it to all_child_outers */
1480 all_child_outers = lappend(all_child_outers,
1481 childouter);
1488 * If we found unparameterized paths for all children, build an unordered,
1489 * unparameterized Append path for the rel. (Note: this is correct even
1490 * if we have zero or one live subpath due to constraint exclusion.)
1492 if (subpaths_valid)
1493 add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
1494 NIL, NULL, 0, false,
1495 -1));
1497 /* build an AppendPath for the cheap startup paths, if valid */
1498 if (startup_subpaths_valid)
1499 add_path(rel, (Path *) create_append_path(root, rel, startup_subpaths,
1500 NIL, NIL, NULL, 0, false, -1));
1503 * Consider an append of unordered, unparameterized partial paths. Make
1504 * it parallel-aware if possible.
1506 if (partial_subpaths_valid && partial_subpaths != NIL)
1508 AppendPath *appendpath;
1509 ListCell *lc;
1510 int parallel_workers = 0;
1512 /* Find the highest number of workers requested for any subpath. */
1513 foreach(lc, partial_subpaths)
1515 Path *path = lfirst(lc);
1517 parallel_workers = Max(parallel_workers, path->parallel_workers);
1519 Assert(parallel_workers > 0);
1522 * If the use of parallel append is permitted, always request at least
1523 * log2(# of children) workers. We assume it can be useful to have
1524 * extra workers in this case because they will be spread out across
1525 * the children. The precise formula is just a guess, but we don't
1526 * want to end up with a radically different answer for a table with N
1527 * partitions vs. an unpartitioned table with the same data, so the
1528 * use of some kind of log-scaling here seems to make some sense.
1530 if (enable_parallel_append)
1532 parallel_workers = Max(parallel_workers,
1533 pg_leftmost_one_pos32(list_length(live_childrels)) + 1);
1534 parallel_workers = Min(parallel_workers,
1535 max_parallel_workers_per_gather);
1537 Assert(parallel_workers > 0);
1539 /* Generate a partial append path. */
1540 appendpath = create_append_path(root, rel, NIL, partial_subpaths,
1541 NIL, NULL, parallel_workers,
1542 enable_parallel_append,
1543 -1);
1546 * Make sure any subsequent partial paths use the same row count
1547 * estimate.
1549 partial_rows = appendpath->path.rows;
1551 /* Add the path. */
1552 add_partial_path(rel, (Path *) appendpath);
1556 * Consider a parallel-aware append using a mix of partial and non-partial
1557 * paths. (This only makes sense if there's at least one child which has
1558 * a non-partial path that is substantially cheaper than any partial path;
1559 * otherwise, we should use the append path added in the previous step.)
1561 if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL)
1563 AppendPath *appendpath;
1564 ListCell *lc;
1565 int parallel_workers = 0;
1568 * Find the highest number of workers requested for any partial
1569 * subpath.
1571 foreach(lc, pa_partial_subpaths)
1573 Path *path = lfirst(lc);
1575 parallel_workers = Max(parallel_workers, path->parallel_workers);
1579 * Same formula here as above. It's even more important in this
1580 * instance because the non-partial paths won't contribute anything to
1581 * the planned number of parallel workers.
1583 parallel_workers = Max(parallel_workers,
1584 pg_leftmost_one_pos32(list_length(live_childrels)) + 1);
1585 parallel_workers = Min(parallel_workers,
1586 max_parallel_workers_per_gather);
1587 Assert(parallel_workers > 0);
1589 appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
1590 pa_partial_subpaths,
1591 NIL, NULL, parallel_workers, true,
1592 partial_rows);
1593 add_partial_path(rel, (Path *) appendpath);
1597 * Also build unparameterized ordered append paths based on the collected
1598 * list of child pathkeys.
1600 if (subpaths_valid)
1601 generate_orderedappend_paths(root, rel, live_childrels,
1602 all_child_pathkeys);
1605 * Build Append paths for each parameterization seen among the child rels.
1606 * (This may look pretty expensive, but in most cases of practical
1607 * interest, the child rels will expose mostly the same parameterizations,
1608 * so that not that many cases actually get considered here.)
1610 * The Append node itself cannot enforce quals, so all qual checking must
1611 * be done in the child paths. This means that to have a parameterized
1612 * Append path, we must have the exact same parameterization for each
1613 * child path; otherwise some children might be failing to check the
1614 * moved-down quals. To make them match up, we can try to increase the
1615 * parameterization of lesser-parameterized paths.
1617 foreach(l, all_child_outers)
1619 Relids required_outer = (Relids) lfirst(l);
1620 ListCell *lcr;
1622 /* Select the child paths for an Append with this parameterization */
1623 subpaths = NIL;
1624 subpaths_valid = true;
1625 foreach(lcr, live_childrels)
1627 RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
1628 Path *subpath;
1630 if (childrel->pathlist == NIL)
1632 /* failed to make a suitable path for this child */
1633 subpaths_valid = false;
1634 break;
1637 subpath = get_cheapest_parameterized_child_path(root,
1638 childrel,
1639 required_outer);
1640 if (subpath == NULL)
1642 /* failed to make a suitable path for this child */
1643 subpaths_valid = false;
1644 break;
1646 accumulate_append_subpath(subpath, &subpaths, NULL);
1649 if (subpaths_valid)
1650 add_path(rel, (Path *)
1651 create_append_path(root, rel, subpaths, NIL,
1652 NIL, required_outer, 0, false,
1653 -1));
1657 * When there is only a single child relation, the Append path can inherit
1658 * any ordering available for the child rel's path, so that it's useful to
1659 * consider ordered partial paths. Above we only considered the cheapest
1660 * partial path for each child, but let's also make paths using any
1661 * partial paths that have pathkeys.
1663 if (list_length(live_childrels) == 1)
1665 RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels);
1667 /* skip the cheapest partial path, since we already used that above */
1668 for_each_from(l, childrel->partial_pathlist, 1)
1670 Path *path = (Path *) lfirst(l);
1671 AppendPath *appendpath;
1673 /* skip paths with no pathkeys. */
1674 if (path->pathkeys == NIL)
1675 continue;
1677 appendpath = create_append_path(root, rel, NIL, list_make1(path),
1678 NIL, NULL,
1679 path->parallel_workers, true,
1680 partial_rows);
1681 add_partial_path(rel, (Path *) appendpath);
1687 * generate_orderedappend_paths
1688 * Generate ordered append paths for an append relation
1690 * Usually we generate MergeAppend paths here, but there are some special
1691 * cases where we can generate simple Append paths, because the subpaths
1692 * can provide tuples in the required order already.
1694 * We generate a path for each ordering (pathkey list) appearing in
1695 * all_child_pathkeys.
1697 * We consider both cheapest-startup and cheapest-total cases, ie, for each
1698 * interesting ordering, collect all the cheapest startup subpaths and all the
1699 * cheapest total paths, and build a suitable path for each case.
1701 * We don't currently generate any parameterized ordered paths here. While
1702 * it would not take much more code here to do so, it's very unclear that it
1703 * is worth the planning cycles to investigate such paths: there's little
1704 * use for an ordered path on the inside of a nestloop. In fact, it's likely
1705 * that the current coding of add_path would reject such paths out of hand,
1706 * because add_path gives no credit for sort ordering of parameterized paths,
1707 * and a parameterized MergeAppend is going to be more expensive than the
1708 * corresponding parameterized Append path. If we ever try harder to support
1709 * parameterized mergejoin plans, it might be worth adding support for
1710 * parameterized paths here to feed such joins. (See notes in
1711 * optimizer/README for why that might not ever happen, though.)
1713 static void
1714 generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
1715 List *live_childrels,
1716 List *all_child_pathkeys)
1718 ListCell *lcp;
1719 List *partition_pathkeys = NIL;
1720 List *partition_pathkeys_desc = NIL;
1721 bool partition_pathkeys_partial = true;
1722 bool partition_pathkeys_desc_partial = true;
1725 * Some partitioned table setups may allow us to use an Append node
1726 * instead of a MergeAppend. This is possible in cases such as RANGE
1727 * partitioned tables where it's guaranteed that an earlier partition must
1728 * contain rows which come earlier in the sort order. To detect whether
1729 * this is relevant, build pathkey descriptions of the partition ordering,
1730 * for both forward and reverse scans.
1732 if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) &&
1733 partitions_are_ordered(rel->boundinfo, rel->live_parts))
1735 partition_pathkeys = build_partition_pathkeys(root, rel,
1736 ForwardScanDirection,
1737 &partition_pathkeys_partial);
1739 partition_pathkeys_desc = build_partition_pathkeys(root, rel,
1740 BackwardScanDirection,
1741 &partition_pathkeys_desc_partial);
1744 * You might think we should truncate_useless_pathkeys here, but
1745 * allowing partition keys which are a subset of the query's pathkeys
1746 * can often be useful. For example, consider a table partitioned by
1747 * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child
1748 * paths that can produce the a, b, c ordering (perhaps via indexes on
1749 * (a, b, c)) then it works to consider the appendrel output as
1750 * ordered by a, b, c.
1754 /* Now consider each interesting sort ordering */
1755 foreach(lcp, all_child_pathkeys)
1757 List *pathkeys = (List *) lfirst(lcp);
1758 List *startup_subpaths = NIL;
1759 List *total_subpaths = NIL;
1760 List *fractional_subpaths = NIL;
1761 bool startup_neq_total = false;
1762 bool match_partition_order;
1763 bool match_partition_order_desc;
1764 int end_index;
1765 int first_index;
1766 int direction;
1769 * Determine if this sort ordering matches any partition pathkeys we
1770 * have, for both ascending and descending partition order. If the
1771 * partition pathkeys happen to be contained in pathkeys then it still
1772 * works, as described above, providing that the partition pathkeys
1773 * are complete and not just a prefix of the partition keys. (In such
1774 * cases we'll be relying on the child paths to have sorted the
1775 * lower-order columns of the required pathkeys.)
1777 match_partition_order =
1778 pathkeys_contained_in(pathkeys, partition_pathkeys) ||
1779 (!partition_pathkeys_partial &&
1780 pathkeys_contained_in(partition_pathkeys, pathkeys));
1782 match_partition_order_desc = !match_partition_order &&
1783 (pathkeys_contained_in(pathkeys, partition_pathkeys_desc) ||
1784 (!partition_pathkeys_desc_partial &&
1785 pathkeys_contained_in(partition_pathkeys_desc, pathkeys)));
1788 * When the required pathkeys match the reverse of the partition
1789 * order, we must build the list of paths in reverse starting with the
1790 * last matching partition first. We can get away without making any
1791 * special cases for this in the loop below by just looping backward
1792 * over the child relations in this case.
1794 if (match_partition_order_desc)
1796 /* loop backward */
1797 first_index = list_length(live_childrels) - 1;
1798 end_index = -1;
1799 direction = -1;
1802 * Set this to true to save us having to check for
1803 * match_partition_order_desc in the loop below.
1805 match_partition_order = true;
1807 else
1809 /* for all other case, loop forward */
1810 first_index = 0;
1811 end_index = list_length(live_childrels);
1812 direction = 1;
1815 /* Select the child paths for this ordering... */
1816 for (int i = first_index; i != end_index; i += direction)
1818 RelOptInfo *childrel = list_nth_node(RelOptInfo, live_childrels, i);
1819 Path *cheapest_startup,
1820 *cheapest_total,
1821 *cheapest_fractional = NULL;
1823 /* Locate the right paths, if they are available. */
1824 cheapest_startup =
1825 get_cheapest_path_for_pathkeys(childrel->pathlist,
1826 pathkeys,
1827 NULL,
1828 STARTUP_COST,
1829 false);
1830 cheapest_total =
1831 get_cheapest_path_for_pathkeys(childrel->pathlist,
1832 pathkeys,
1833 NULL,
1834 TOTAL_COST,
1835 false);
1838 * If we can't find any paths with the right order just use the
1839 * cheapest-total path; we'll have to sort it later.
1841 if (cheapest_startup == NULL || cheapest_total == NULL)
1843 cheapest_startup = cheapest_total =
1844 childrel->cheapest_total_path;
1845 /* Assert we do have an unparameterized path for this child */
1846 Assert(cheapest_total->param_info == NULL);
1850 * When building a fractional path, determine a cheapest
1851 * fractional path for each child relation too. Looking at startup
1852 * and total costs is not enough, because the cheapest fractional
1853 * path may be dominated by two separate paths (one for startup,
1854 * one for total).
1856 * When needed (building fractional path), determine the cheapest
1857 * fractional path too.
1859 if (root->tuple_fraction > 0)
1861 double path_fraction = (1.0 / root->tuple_fraction);
1863 cheapest_fractional =
1864 get_cheapest_fractional_path_for_pathkeys(childrel->pathlist,
1865 pathkeys,
1866 NULL,
1867 path_fraction);
1870 * If we found no path with matching pathkeys, use the
1871 * cheapest total path instead.
1873 * XXX We might consider partially sorted paths too (with an
1874 * incremental sort on top). But we'd have to build all the
1875 * incremental paths, do the costing etc.
1877 if (!cheapest_fractional)
1878 cheapest_fractional = cheapest_total;
1882 * Notice whether we actually have different paths for the
1883 * "cheapest" and "total" cases; frequently there will be no point
1884 * in two create_merge_append_path() calls.
1886 if (cheapest_startup != cheapest_total)
1887 startup_neq_total = true;
1890 * Collect the appropriate child paths. The required logic varies
1891 * for the Append and MergeAppend cases.
1893 if (match_partition_order)
1896 * We're going to make a plain Append path. We don't need
1897 * most of what accumulate_append_subpath would do, but we do
1898 * want to cut out child Appends or MergeAppends if they have
1899 * just a single subpath (and hence aren't doing anything
1900 * useful).
1902 cheapest_startup = get_singleton_append_subpath(cheapest_startup);
1903 cheapest_total = get_singleton_append_subpath(cheapest_total);
1905 startup_subpaths = lappend(startup_subpaths, cheapest_startup);
1906 total_subpaths = lappend(total_subpaths, cheapest_total);
1908 if (cheapest_fractional)
1910 cheapest_fractional = get_singleton_append_subpath(cheapest_fractional);
1911 fractional_subpaths = lappend(fractional_subpaths, cheapest_fractional);
1914 else
1917 * Otherwise, rely on accumulate_append_subpath to collect the
1918 * child paths for the MergeAppend.
1920 accumulate_append_subpath(cheapest_startup,
1921 &startup_subpaths, NULL);
1922 accumulate_append_subpath(cheapest_total,
1923 &total_subpaths, NULL);
1925 if (cheapest_fractional)
1926 accumulate_append_subpath(cheapest_fractional,
1927 &fractional_subpaths, NULL);
1931 /* ... and build the Append or MergeAppend paths */
1932 if (match_partition_order)
1934 /* We only need Append */
1935 add_path(rel, (Path *) create_append_path(root,
1936 rel,
1937 startup_subpaths,
1938 NIL,
1939 pathkeys,
1940 NULL,
1942 false,
1943 -1));
1944 if (startup_neq_total)
1945 add_path(rel, (Path *) create_append_path(root,
1946 rel,
1947 total_subpaths,
1948 NIL,
1949 pathkeys,
1950 NULL,
1952 false,
1953 -1));
1955 if (fractional_subpaths)
1956 add_path(rel, (Path *) create_append_path(root,
1957 rel,
1958 fractional_subpaths,
1959 NIL,
1960 pathkeys,
1961 NULL,
1963 false,
1964 -1));
1966 else
1968 /* We need MergeAppend */
1969 add_path(rel, (Path *) create_merge_append_path(root,
1970 rel,
1971 startup_subpaths,
1972 pathkeys,
1973 NULL));
1974 if (startup_neq_total)
1975 add_path(rel, (Path *) create_merge_append_path(root,
1976 rel,
1977 total_subpaths,
1978 pathkeys,
1979 NULL));
1981 if (fractional_subpaths)
1982 add_path(rel, (Path *) create_merge_append_path(root,
1983 rel,
1984 fractional_subpaths,
1985 pathkeys,
1986 NULL));
1992 * get_cheapest_parameterized_child_path
1993 * Get cheapest path for this relation that has exactly the requested
1994 * parameterization.
1996 * Returns NULL if unable to create such a path.
1998 static Path *
1999 get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
2000 Relids required_outer)
2002 Path *cheapest;
2003 ListCell *lc;
2006 * Look up the cheapest existing path with no more than the needed
2007 * parameterization. If it has exactly the needed parameterization, we're
2008 * done.
2010 cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
2011 NIL,
2012 required_outer,
2013 TOTAL_COST,
2014 false);
2015 Assert(cheapest != NULL);
2016 if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
2017 return cheapest;
2020 * Otherwise, we can "reparameterize" an existing path to match the given
2021 * parameterization, which effectively means pushing down additional
2022 * joinquals to be checked within the path's scan. However, some existing
2023 * paths might check the available joinquals already while others don't;
2024 * therefore, it's not clear which existing path will be cheapest after
2025 * reparameterization. We have to go through them all and find out.
2027 cheapest = NULL;
2028 foreach(lc, rel->pathlist)
2030 Path *path = (Path *) lfirst(lc);
2032 /* Can't use it if it needs more than requested parameterization */
2033 if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
2034 continue;
2037 * Reparameterization can only increase the path's cost, so if it's
2038 * already more expensive than the current cheapest, forget it.
2040 if (cheapest != NULL &&
2041 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
2042 continue;
2044 /* Reparameterize if needed, then recheck cost */
2045 if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
2047 path = reparameterize_path(root, path, required_outer, 1.0);
2048 if (path == NULL)
2049 continue; /* failed to reparameterize this one */
2050 Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
2052 if (cheapest != NULL &&
2053 compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
2054 continue;
2057 /* We have a new best path */
2058 cheapest = path;
2061 /* Return the best path, or NULL if we found no suitable candidate */
2062 return cheapest;
2066 * accumulate_append_subpath
2067 * Add a subpath to the list being built for an Append or MergeAppend.
2069 * It's possible that the child is itself an Append or MergeAppend path, in
2070 * which case we can "cut out the middleman" and just add its child paths to
2071 * our own list. (We don't try to do this earlier because we need to apply
2072 * both levels of transformation to the quals.)
2074 * Note that if we omit a child MergeAppend in this way, we are effectively
2075 * omitting a sort step, which seems fine: if the parent is to be an Append,
2076 * its result would be unsorted anyway, while if the parent is to be a
2077 * MergeAppend, there's no point in a separate sort on a child.
2079 * Normally, either path is a partial path and subpaths is a list of partial
2080 * paths, or else path is a non-partial plan and subpaths is a list of those.
2081 * However, if path is a parallel-aware Append, then we add its partial path
2082 * children to subpaths and the rest to special_subpaths. If the latter is
2083 * NULL, we don't flatten the path at all (unless it contains only partial
2084 * paths).
2086 static void
2087 accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
2089 if (IsA(path, AppendPath))
2091 AppendPath *apath = (AppendPath *) path;
2093 if (!apath->path.parallel_aware || apath->first_partial_path == 0)
2095 *subpaths = list_concat(*subpaths, apath->subpaths);
2096 return;
2098 else if (special_subpaths != NULL)
2100 List *new_special_subpaths;
2102 /* Split Parallel Append into partial and non-partial subpaths */
2103 *subpaths = list_concat(*subpaths,
2104 list_copy_tail(apath->subpaths,
2105 apath->first_partial_path));
2106 new_special_subpaths = list_copy_head(apath->subpaths,
2107 apath->first_partial_path);
2108 *special_subpaths = list_concat(*special_subpaths,
2109 new_special_subpaths);
2110 return;
2113 else if (IsA(path, MergeAppendPath))
2115 MergeAppendPath *mpath = (MergeAppendPath *) path;
2117 *subpaths = list_concat(*subpaths, mpath->subpaths);
2118 return;
2121 *subpaths = lappend(*subpaths, path);
2125 * get_singleton_append_subpath
2126 * Returns the single subpath of an Append/MergeAppend, or just
2127 * return 'path' if it's not a single sub-path Append/MergeAppend.
2129 * Note: 'path' must not be a parallel-aware path.
2131 static Path *
2132 get_singleton_append_subpath(Path *path)
2134 Assert(!path->parallel_aware);
2136 if (IsA(path, AppendPath))
2138 AppendPath *apath = (AppendPath *) path;
2140 if (list_length(apath->subpaths) == 1)
2141 return (Path *) linitial(apath->subpaths);
2143 else if (IsA(path, MergeAppendPath))
2145 MergeAppendPath *mpath = (MergeAppendPath *) path;
2147 if (list_length(mpath->subpaths) == 1)
2148 return (Path *) linitial(mpath->subpaths);
2151 return path;
2155 * set_dummy_rel_pathlist
2156 * Build a dummy path for a relation that's been excluded by constraints
2158 * Rather than inventing a special "dummy" path type, we represent this as an
2159 * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros).
2161 * (See also mark_dummy_rel, which does basically the same thing, but is
2162 * typically used to change a rel into dummy state after we already made
2163 * paths for it.)
2165 static void
2166 set_dummy_rel_pathlist(RelOptInfo *rel)
2168 /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
2169 rel->rows = 0;
2170 rel->reltarget->width = 0;
2172 /* Discard any pre-existing paths; no further need for them */
2173 rel->pathlist = NIL;
2174 rel->partial_pathlist = NIL;
2176 /* Set up the dummy path */
2177 add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
2178 NIL, rel->lateral_relids,
2179 0, false, -1));
2182 * We set the cheapest-path fields immediately, just in case they were
2183 * pointing at some discarded path. This is redundant in current usage
2184 * because set_rel_pathlist will do it later, but it's cheap so we keep it
2185 * for safety and consistency with mark_dummy_rel.
2187 set_cheapest(rel);
2191 * find_window_run_conditions
2192 * Determine if 'wfunc' is really a WindowFunc and call its prosupport
2193 * function to determine the function's monotonic properties. We then
2194 * see if 'opexpr' can be used to short-circuit execution.
2196 * For example row_number() over (order by ...) always produces a value one
2197 * higher than the previous. If someone has a window function in a subquery
2198 * and has a WHERE clause in the outer query to filter rows <= 10, then we may
2199 * as well stop processing the windowagg once the row number reaches 11. Here
2200 * we check if 'opexpr' might help us to stop doing needless extra processing
2201 * in WindowAgg nodes.
2203 * '*keep_original' is set to true if the caller should also use 'opexpr' for
2204 * its original purpose. This is set to false if the caller can assume that
2205 * the run condition will handle all of the required filtering.
2207 * Returns true if 'opexpr' was found to be useful and was added to the
2208 * WindowFunc's runCondition. We also set *keep_original accordingly and add
2209 * 'attno' to *run_cond_attrs offset by FirstLowInvalidHeapAttributeNumber.
2210 * If the 'opexpr' cannot be used then we set *keep_original to true and
2211 * return false.
2213 static bool
2214 find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti,
2215 AttrNumber attno, WindowFunc *wfunc, OpExpr *opexpr,
2216 bool wfunc_left, bool *keep_original,
2217 Bitmapset **run_cond_attrs)
2219 Oid prosupport;
2220 Expr *otherexpr;
2221 SupportRequestWFuncMonotonic req;
2222 SupportRequestWFuncMonotonic *res;
2223 WindowClause *wclause;
2224 List *opinfos;
2225 OpExpr *runopexpr;
2226 Oid runoperator;
2227 ListCell *lc;
2229 *keep_original = true;
2231 while (IsA(wfunc, RelabelType))
2232 wfunc = (WindowFunc *) ((RelabelType *) wfunc)->arg;
2234 /* we can only work with window functions */
2235 if (!IsA(wfunc, WindowFunc))
2236 return false;
2238 /* can't use it if there are subplans in the WindowFunc */
2239 if (contain_subplans((Node *) wfunc))
2240 return false;
2242 prosupport = get_func_support(wfunc->winfnoid);
2244 /* Check if there's a support function for 'wfunc' */
2245 if (!OidIsValid(prosupport))
2246 return false;
2248 /* get the Expr from the other side of the OpExpr */
2249 if (wfunc_left)
2250 otherexpr = lsecond(opexpr->args);
2251 else
2252 otherexpr = linitial(opexpr->args);
2255 * The value being compared must not change during the evaluation of the
2256 * window partition.
2258 if (!is_pseudo_constant_clause((Node *) otherexpr))
2259 return false;
2261 /* find the window clause belonging to the window function */
2262 wclause = (WindowClause *) list_nth(subquery->windowClause,
2263 wfunc->winref - 1);
2265 req.type = T_SupportRequestWFuncMonotonic;
2266 req.window_func = wfunc;
2267 req.window_clause = wclause;
2269 /* call the support function */
2270 res = (SupportRequestWFuncMonotonic *)
2271 DatumGetPointer(OidFunctionCall1(prosupport,
2272 PointerGetDatum(&req)));
2275 * Nothing to do if the function is neither monotonically increasing nor
2276 * monotonically decreasing.
2278 if (res == NULL || res->monotonic == MONOTONICFUNC_NONE)
2279 return false;
2281 runopexpr = NULL;
2282 runoperator = InvalidOid;
2283 opinfos = get_op_btree_interpretation(opexpr->opno);
2285 foreach(lc, opinfos)
2287 OpBtreeInterpretation *opinfo = (OpBtreeInterpretation *) lfirst(lc);
2288 int strategy = opinfo->strategy;
2290 /* handle < / <= */
2291 if (strategy == BTLessStrategyNumber ||
2292 strategy == BTLessEqualStrategyNumber)
2295 * < / <= is supported for monotonically increasing functions in
2296 * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc>
2297 * for monotonically decreasing functions.
2299 if ((wfunc_left && (res->monotonic & MONOTONICFUNC_INCREASING)) ||
2300 (!wfunc_left && (res->monotonic & MONOTONICFUNC_DECREASING)))
2302 *keep_original = false;
2303 runopexpr = opexpr;
2304 runoperator = opexpr->opno;
2306 break;
2308 /* handle > / >= */
2309 else if (strategy == BTGreaterStrategyNumber ||
2310 strategy == BTGreaterEqualStrategyNumber)
2313 * > / >= is supported for monotonically decreasing functions in
2314 * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc>
2315 * for monotonically increasing functions.
2317 if ((wfunc_left && (res->monotonic & MONOTONICFUNC_DECREASING)) ||
2318 (!wfunc_left && (res->monotonic & MONOTONICFUNC_INCREASING)))
2320 *keep_original = false;
2321 runopexpr = opexpr;
2322 runoperator = opexpr->opno;
2324 break;
2326 /* handle = */
2327 else if (strategy == BTEqualStrategyNumber)
2329 int16 newstrategy;
2332 * When both monotonically increasing and decreasing then the
2333 * return value of the window function will be the same each time.
2334 * We can simply use 'opexpr' as the run condition without
2335 * modifying it.
2337 if ((res->monotonic & MONOTONICFUNC_BOTH) == MONOTONICFUNC_BOTH)
2339 *keep_original = false;
2340 runopexpr = opexpr;
2341 runoperator = opexpr->opno;
2342 break;
2346 * When monotonically increasing we make a qual with <wfunc> <=
2347 * <value> or <value> >= <wfunc> in order to filter out values
2348 * which are above the value in the equality condition. For
2349 * monotonically decreasing functions we want to filter values
2350 * below the value in the equality condition.
2352 if (res->monotonic & MONOTONICFUNC_INCREASING)
2353 newstrategy = wfunc_left ? BTLessEqualStrategyNumber : BTGreaterEqualStrategyNumber;
2354 else
2355 newstrategy = wfunc_left ? BTGreaterEqualStrategyNumber : BTLessEqualStrategyNumber;
2357 /* We must keep the original equality qual */
2358 *keep_original = true;
2359 runopexpr = opexpr;
2361 /* determine the operator to use for the WindowFuncRunCondition */
2362 runoperator = get_opfamily_member(opinfo->opfamily_id,
2363 opinfo->oplefttype,
2364 opinfo->oprighttype,
2365 newstrategy);
2366 break;
2370 if (runopexpr != NULL)
2372 WindowFuncRunCondition *wfuncrc;
2374 wfuncrc = makeNode(WindowFuncRunCondition);
2375 wfuncrc->opno = runoperator;
2376 wfuncrc->inputcollid = runopexpr->inputcollid;
2377 wfuncrc->wfunc_left = wfunc_left;
2378 wfuncrc->arg = copyObject(otherexpr);
2380 wfunc->runCondition = lappend(wfunc->runCondition, wfuncrc);
2382 /* record that this attno was used in a run condition */
2383 *run_cond_attrs = bms_add_member(*run_cond_attrs,
2384 attno - FirstLowInvalidHeapAttributeNumber);
2385 return true;
2388 /* unsupported OpExpr */
2389 return false;
2393 * check_and_push_window_quals
2394 * Check if 'clause' is a qual that can be pushed into a WindowFunc
2395 * as a 'runCondition' qual. These, when present, allow some unnecessary
2396 * work to be skipped during execution.
2398 * 'run_cond_attrs' will be populated with all targetlist resnos of subquery
2399 * targets (offset by FirstLowInvalidHeapAttributeNumber) that we pushed
2400 * window quals for.
2402 * Returns true if the caller still must keep the original qual or false if
2403 * the caller can safely ignore the original qual because the WindowAgg node
2404 * will use the runCondition to stop returning tuples.
2406 static bool
2407 check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti,
2408 Node *clause, Bitmapset **run_cond_attrs)
2410 OpExpr *opexpr = (OpExpr *) clause;
2411 bool keep_original = true;
2412 Var *var1;
2413 Var *var2;
2415 /* We're only able to use OpExprs with 2 operands */
2416 if (!IsA(opexpr, OpExpr))
2417 return true;
2419 if (list_length(opexpr->args) != 2)
2420 return true;
2423 * Currently, we restrict this optimization to strict OpExprs. The reason
2424 * for this is that during execution, once the runcondition becomes false,
2425 * we stop evaluating WindowFuncs. To avoid leaving around stale window
2426 * function result values, we set them to NULL. Having only strict
2427 * OpExprs here ensures that we properly filter out the tuples with NULLs
2428 * in the top-level WindowAgg.
2430 set_opfuncid(opexpr);
2431 if (!func_strict(opexpr->opfuncid))
2432 return true;
2435 * Check for plain Vars that reference window functions in the subquery.
2436 * If we find any, we'll ask find_window_run_conditions() if 'opexpr' can
2437 * be used as part of the run condition.
2440 /* Check the left side of the OpExpr */
2441 var1 = linitial(opexpr->args);
2442 if (IsA(var1, Var) && var1->varattno > 0)
2444 TargetEntry *tle = list_nth(subquery->targetList, var1->varattno - 1);
2445 WindowFunc *wfunc = (WindowFunc *) tle->expr;
2447 if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc,
2448 opexpr, true, &keep_original,
2449 run_cond_attrs))
2450 return keep_original;
2453 /* and check the right side */
2454 var2 = lsecond(opexpr->args);
2455 if (IsA(var2, Var) && var2->varattno > 0)
2457 TargetEntry *tle = list_nth(subquery->targetList, var2->varattno - 1);
2458 WindowFunc *wfunc = (WindowFunc *) tle->expr;
2460 if (find_window_run_conditions(subquery, rte, rti, tle->resno, wfunc,
2461 opexpr, false, &keep_original,
2462 run_cond_attrs))
2463 return keep_original;
2466 return true;
2470 * set_subquery_pathlist
2471 * Generate SubqueryScan access paths for a subquery RTE
2473 * We don't currently support generating parameterized paths for subqueries
2474 * by pushing join clauses down into them; it seems too expensive to re-plan
2475 * the subquery multiple times to consider different alternatives.
2476 * (XXX that could stand to be reconsidered, now that we use Paths.)
2477 * So the paths made here will be parameterized if the subquery contains
2478 * LATERAL references, otherwise not. As long as that's true, there's no need
2479 * for a separate set_subquery_size phase: just make the paths right away.
2481 static void
2482 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
2483 Index rti, RangeTblEntry *rte)
2485 Query *parse = root->parse;
2486 Query *subquery = rte->subquery;
2487 bool trivial_pathtarget;
2488 Relids required_outer;
2489 pushdown_safety_info safetyInfo;
2490 double tuple_fraction;
2491 RelOptInfo *sub_final_rel;
2492 Bitmapset *run_cond_attrs = NULL;
2493 ListCell *lc;
2496 * Must copy the Query so that planning doesn't mess up the RTE contents
2497 * (really really need to fix the planner to not scribble on its input,
2498 * someday ... but see remove_unused_subquery_outputs to start with).
2500 subquery = copyObject(subquery);
2503 * If it's a LATERAL subquery, it might contain some Vars of the current
2504 * query level, requiring it to be treated as parameterized, even though
2505 * we don't support pushing down join quals into subqueries.
2507 required_outer = rel->lateral_relids;
2510 * Zero out result area for subquery_is_pushdown_safe, so that it can set
2511 * flags as needed while recursing. In particular, we need a workspace
2512 * for keeping track of the reasons why columns are unsafe to reference.
2513 * These reasons are stored in the bits inside unsafeFlags[i] when we
2514 * discover reasons that column i of the subquery is unsafe to be used in
2515 * a pushed-down qual.
2517 memset(&safetyInfo, 0, sizeof(safetyInfo));
2518 safetyInfo.unsafeFlags = (unsigned char *)
2519 palloc0((list_length(subquery->targetList) + 1) * sizeof(unsigned char));
2522 * If the subquery has the "security_barrier" flag, it means the subquery
2523 * originated from a view that must enforce row-level security. Then we
2524 * must not push down quals that contain leaky functions. (Ideally this
2525 * would be checked inside subquery_is_pushdown_safe, but since we don't
2526 * currently pass the RTE to that function, we must do it here.)
2528 safetyInfo.unsafeLeaky = rte->security_barrier;
2531 * If there are any restriction clauses that have been attached to the
2532 * subquery relation, consider pushing them down to become WHERE or HAVING
2533 * quals of the subquery itself. This transformation is useful because it
2534 * may allow us to generate a better plan for the subquery than evaluating
2535 * all the subquery output rows and then filtering them.
2537 * There are several cases where we cannot push down clauses. Restrictions
2538 * involving the subquery are checked by subquery_is_pushdown_safe().
2539 * Restrictions on individual clauses are checked by
2540 * qual_is_pushdown_safe(). Also, we don't want to push down
2541 * pseudoconstant clauses; better to have the gating node above the
2542 * subquery.
2544 * Non-pushed-down clauses will get evaluated as qpquals of the
2545 * SubqueryScan node.
2547 * XXX Are there any cases where we want to make a policy decision not to
2548 * push down a pushable qual, because it'd result in a worse plan?
2550 if (rel->baserestrictinfo != NIL &&
2551 subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
2553 /* OK to consider pushing down individual quals */
2554 List *upperrestrictlist = NIL;
2555 ListCell *l;
2557 foreach(l, rel->baserestrictinfo)
2559 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
2560 Node *clause = (Node *) rinfo->clause;
2562 if (rinfo->pseudoconstant)
2564 upperrestrictlist = lappend(upperrestrictlist, rinfo);
2565 continue;
2568 switch (qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo))
2570 case PUSHDOWN_SAFE:
2571 /* Push it down */
2572 subquery_push_qual(subquery, rte, rti, clause);
2573 break;
2575 case PUSHDOWN_WINDOWCLAUSE_RUNCOND:
2578 * Since we can't push the qual down into the subquery,
2579 * check if it happens to reference a window function. If
2580 * so then it might be useful to use for the WindowAgg's
2581 * runCondition.
2583 if (!subquery->hasWindowFuncs ||
2584 check_and_push_window_quals(subquery, rte, rti, clause,
2585 &run_cond_attrs))
2588 * subquery has no window funcs or the clause is not a
2589 * suitable window run condition qual or it is, but
2590 * the original must also be kept in the upper query.
2592 upperrestrictlist = lappend(upperrestrictlist, rinfo);
2594 break;
2596 case PUSHDOWN_UNSAFE:
2597 upperrestrictlist = lappend(upperrestrictlist, rinfo);
2598 break;
2601 rel->baserestrictinfo = upperrestrictlist;
2602 /* We don't bother recomputing baserestrict_min_security */
2605 pfree(safetyInfo.unsafeFlags);
2608 * The upper query might not use all the subquery's output columns; if
2609 * not, we can simplify. Pass the attributes that were pushed down into
2610 * WindowAgg run conditions to ensure we don't accidentally think those
2611 * are unused.
2613 remove_unused_subquery_outputs(subquery, rel, run_cond_attrs);
2616 * We can safely pass the outer tuple_fraction down to the subquery if the
2617 * outer level has no joining, aggregation, or sorting to do. Otherwise
2618 * we'd better tell the subquery to plan for full retrieval. (XXX This
2619 * could probably be made more intelligent ...)
2621 if (parse->hasAggs ||
2622 parse->groupClause ||
2623 parse->groupingSets ||
2624 root->hasHavingQual ||
2625 parse->distinctClause ||
2626 parse->sortClause ||
2627 bms_membership(root->all_baserels) == BMS_MULTIPLE)
2628 tuple_fraction = 0.0; /* default case */
2629 else
2630 tuple_fraction = root->tuple_fraction;
2632 /* plan_params should not be in use in current query level */
2633 Assert(root->plan_params == NIL);
2635 /* Generate a subroot and Paths for the subquery */
2636 rel->subroot = subquery_planner(root->glob, subquery,
2637 root,
2638 false, tuple_fraction);
2640 /* Isolate the params needed by this specific subplan */
2641 rel->subplan_params = root->plan_params;
2642 root->plan_params = NIL;
2645 * It's possible that constraint exclusion proved the subquery empty. If
2646 * so, it's desirable to produce an unadorned dummy path so that we will
2647 * recognize appropriate optimizations at this query level.
2649 sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
2651 if (IS_DUMMY_REL(sub_final_rel))
2653 set_dummy_rel_pathlist(rel);
2654 return;
2658 * Mark rel with estimated output rows, width, etc. Note that we have to
2659 * do this before generating outer-query paths, else cost_subqueryscan is
2660 * not happy.
2662 set_subquery_size_estimates(root, rel);
2665 * Also detect whether the reltarget is trivial, so that we can pass that
2666 * info to cost_subqueryscan (rather than re-deriving it multiple times).
2667 * It's trivial if it fetches all the subplan output columns in order.
2669 if (list_length(rel->reltarget->exprs) != list_length(subquery->targetList))
2670 trivial_pathtarget = false;
2671 else
2673 trivial_pathtarget = true;
2674 foreach(lc, rel->reltarget->exprs)
2676 Node *node = (Node *) lfirst(lc);
2677 Var *var;
2679 if (!IsA(node, Var))
2681 trivial_pathtarget = false;
2682 break;
2684 var = (Var *) node;
2685 if (var->varno != rti ||
2686 var->varattno != foreach_current_index(lc) + 1)
2688 trivial_pathtarget = false;
2689 break;
2695 * For each Path that subquery_planner produced, make a SubqueryScanPath
2696 * in the outer query.
2698 foreach(lc, sub_final_rel->pathlist)
2700 Path *subpath = (Path *) lfirst(lc);
2701 List *pathkeys;
2703 /* Convert subpath's pathkeys to outer representation */
2704 pathkeys = convert_subquery_pathkeys(root,
2705 rel,
2706 subpath->pathkeys,
2707 make_tlist_from_pathtarget(subpath->pathtarget));
2709 /* Generate outer path using this subpath */
2710 add_path(rel, (Path *)
2711 create_subqueryscan_path(root, rel, subpath,
2712 trivial_pathtarget,
2713 pathkeys, required_outer));
2716 /* If outer rel allows parallelism, do same for partial paths. */
2717 if (rel->consider_parallel && bms_is_empty(required_outer))
2719 /* If consider_parallel is false, there should be no partial paths. */
2720 Assert(sub_final_rel->consider_parallel ||
2721 sub_final_rel->partial_pathlist == NIL);
2723 /* Same for partial paths. */
2724 foreach(lc, sub_final_rel->partial_pathlist)
2726 Path *subpath = (Path *) lfirst(lc);
2727 List *pathkeys;
2729 /* Convert subpath's pathkeys to outer representation */
2730 pathkeys = convert_subquery_pathkeys(root,
2731 rel,
2732 subpath->pathkeys,
2733 make_tlist_from_pathtarget(subpath->pathtarget));
2735 /* Generate outer path using this subpath */
2736 add_partial_path(rel, (Path *)
2737 create_subqueryscan_path(root, rel, subpath,
2738 trivial_pathtarget,
2739 pathkeys,
2740 required_outer));
2746 * set_function_pathlist
2747 * Build the (single) access path for a function RTE
2749 static void
2750 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2752 Relids required_outer;
2753 List *pathkeys = NIL;
2756 * We don't support pushing join clauses into the quals of a function
2757 * scan, but it could still have required parameterization due to LATERAL
2758 * refs in the function expression.
2760 required_outer = rel->lateral_relids;
2763 * The result is considered unordered unless ORDINALITY was used, in which
2764 * case it is ordered by the ordinal column (the last one). See if we
2765 * care, by checking for uses of that Var in equivalence classes.
2767 if (rte->funcordinality)
2769 AttrNumber ordattno = rel->max_attr;
2770 Var *var = NULL;
2771 ListCell *lc;
2774 * Is there a Var for it in rel's targetlist? If not, the query did
2775 * not reference the ordinality column, or at least not in any way
2776 * that would be interesting for sorting.
2778 foreach(lc, rel->reltarget->exprs)
2780 Var *node = (Var *) lfirst(lc);
2782 /* checking varno/varlevelsup is just paranoia */
2783 if (IsA(node, Var) &&
2784 node->varattno == ordattno &&
2785 node->varno == rel->relid &&
2786 node->varlevelsup == 0)
2788 var = node;
2789 break;
2794 * Try to build pathkeys for this Var with int8 sorting. We tell
2795 * build_expression_pathkey not to build any new equivalence class; if
2796 * the Var isn't already mentioned in some EC, it means that nothing
2797 * cares about the ordering.
2799 if (var)
2800 pathkeys = build_expression_pathkey(root,
2801 (Expr *) var,
2802 Int8LessOperator,
2803 rel->relids,
2804 false);
2807 /* Generate appropriate path */
2808 add_path(rel, create_functionscan_path(root, rel,
2809 pathkeys, required_outer));
2813 * set_values_pathlist
2814 * Build the (single) access path for a VALUES RTE
2816 static void
2817 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2819 Relids required_outer;
2822 * We don't support pushing join clauses into the quals of a values scan,
2823 * but it could still have required parameterization due to LATERAL refs
2824 * in the values expressions.
2826 required_outer = rel->lateral_relids;
2828 /* Generate appropriate path */
2829 add_path(rel, create_valuesscan_path(root, rel, required_outer));
2833 * set_tablefunc_pathlist
2834 * Build the (single) access path for a table func RTE
2836 static void
2837 set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2839 Relids required_outer;
2842 * We don't support pushing join clauses into the quals of a tablefunc
2843 * scan, but it could still have required parameterization due to LATERAL
2844 * refs in the function expression.
2846 required_outer = rel->lateral_relids;
2848 /* Generate appropriate path */
2849 add_path(rel, create_tablefuncscan_path(root, rel,
2850 required_outer));
2854 * set_cte_pathlist
2855 * Build the (single) access path for a non-self-reference CTE RTE
2857 * There's no need for a separate set_cte_size phase, since we don't
2858 * support join-qual-parameterized paths for CTEs.
2860 static void
2861 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2863 Path *ctepath;
2864 Plan *cteplan;
2865 PlannerInfo *cteroot;
2866 Index levelsup;
2867 List *pathkeys;
2868 int ndx;
2869 ListCell *lc;
2870 int plan_id;
2871 Relids required_outer;
2874 * Find the referenced CTE, and locate the path and plan previously made
2875 * for it.
2877 levelsup = rte->ctelevelsup;
2878 cteroot = root;
2879 while (levelsup-- > 0)
2881 cteroot = cteroot->parent_root;
2882 if (!cteroot) /* shouldn't happen */
2883 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
2887 * Note: cte_plan_ids can be shorter than cteList, if we are still working
2888 * on planning the CTEs (ie, this is a side-reference from another CTE).
2889 * So we mustn't use forboth here.
2891 ndx = 0;
2892 foreach(lc, cteroot->parse->cteList)
2894 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
2896 if (strcmp(cte->ctename, rte->ctename) == 0)
2897 break;
2898 ndx++;
2900 if (lc == NULL) /* shouldn't happen */
2901 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
2902 if (ndx >= list_length(cteroot->cte_plan_ids))
2903 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
2904 plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
2905 if (plan_id <= 0)
2906 elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename);
2908 Assert(list_length(root->glob->subpaths) == list_length(root->glob->subplans));
2909 ctepath = (Path *) list_nth(root->glob->subpaths, plan_id - 1);
2910 cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
2912 /* Mark rel with estimated output rows, width, etc */
2913 set_cte_size_estimates(root, rel, cteplan->plan_rows);
2915 /* Convert the ctepath's pathkeys to outer query's representation */
2916 pathkeys = convert_subquery_pathkeys(root,
2917 rel,
2918 ctepath->pathkeys,
2919 cteplan->targetlist);
2922 * We don't support pushing join clauses into the quals of a CTE scan, but
2923 * it could still have required parameterization due to LATERAL refs in
2924 * its tlist.
2926 required_outer = rel->lateral_relids;
2928 /* Generate appropriate path */
2929 add_path(rel, create_ctescan_path(root, rel, pathkeys, required_outer));
2933 * set_namedtuplestore_pathlist
2934 * Build the (single) access path for a named tuplestore RTE
2936 * There's no need for a separate set_namedtuplestore_size phase, since we
2937 * don't support join-qual-parameterized paths for tuplestores.
2939 static void
2940 set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel,
2941 RangeTblEntry *rte)
2943 Relids required_outer;
2945 /* Mark rel with estimated output rows, width, etc */
2946 set_namedtuplestore_size_estimates(root, rel);
2949 * We don't support pushing join clauses into the quals of a tuplestore
2950 * scan, but it could still have required parameterization due to LATERAL
2951 * refs in its tlist.
2953 required_outer = rel->lateral_relids;
2955 /* Generate appropriate path */
2956 add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer));
2960 * set_result_pathlist
2961 * Build the (single) access path for an RTE_RESULT RTE
2963 * There's no need for a separate set_result_size phase, since we
2964 * don't support join-qual-parameterized paths for these RTEs.
2966 static void
2967 set_result_pathlist(PlannerInfo *root, RelOptInfo *rel,
2968 RangeTblEntry *rte)
2970 Relids required_outer;
2972 /* Mark rel with estimated output rows, width, etc */
2973 set_result_size_estimates(root, rel);
2976 * We don't support pushing join clauses into the quals of a Result scan,
2977 * but it could still have required parameterization due to LATERAL refs
2978 * in its tlist.
2980 required_outer = rel->lateral_relids;
2982 /* Generate appropriate path */
2983 add_path(rel, create_resultscan_path(root, rel, required_outer));
2987 * set_worktable_pathlist
2988 * Build the (single) access path for a self-reference CTE RTE
2990 * There's no need for a separate set_worktable_size phase, since we don't
2991 * support join-qual-parameterized paths for CTEs.
2993 static void
2994 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
2996 Path *ctepath;
2997 PlannerInfo *cteroot;
2998 Index levelsup;
2999 Relids required_outer;
3002 * We need to find the non-recursive term's path, which is in the plan
3003 * level that's processing the recursive UNION, which is one level *below*
3004 * where the CTE comes from.
3006 levelsup = rte->ctelevelsup;
3007 if (levelsup == 0) /* shouldn't happen */
3008 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
3009 levelsup--;
3010 cteroot = root;
3011 while (levelsup-- > 0)
3013 cteroot = cteroot->parent_root;
3014 if (!cteroot) /* shouldn't happen */
3015 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
3017 ctepath = cteroot->non_recursive_path;
3018 if (!ctepath) /* shouldn't happen */
3019 elog(ERROR, "could not find path for CTE \"%s\"", rte->ctename);
3021 /* Mark rel with estimated output rows, width, etc */
3022 set_cte_size_estimates(root, rel, ctepath->rows);
3025 * We don't support pushing join clauses into the quals of a worktable
3026 * scan, but it could still have required parameterization due to LATERAL
3027 * refs in its tlist. (I'm not sure this is actually possible given the
3028 * restrictions on recursive references, but it's easy enough to support.)
3030 required_outer = rel->lateral_relids;
3032 /* Generate appropriate path */
3033 add_path(rel, create_worktablescan_path(root, rel, required_outer));
3037 * generate_gather_paths
3038 * Generate parallel access paths for a relation by pushing a Gather or
3039 * Gather Merge on top of a partial path.
3041 * This must not be called until after we're done creating all partial paths
3042 * for the specified relation. (Otherwise, add_partial_path might delete a
3043 * path that some GatherPath or GatherMergePath has a reference to.)
3045 * If we're generating paths for a scan or join relation, override_rows will
3046 * be false, and we'll just use the relation's size estimate. When we're
3047 * being called for a partially-grouped or partially-distinct path, though, we
3048 * need to override the rowcount estimate. (It's not clear that the
3049 * particular value we're using here is actually best, but the underlying rel
3050 * has no estimate so we must do something.)
3052 void
3053 generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
3055 Path *cheapest_partial_path;
3056 Path *simple_gather_path;
3057 ListCell *lc;
3058 double rows;
3059 double *rowsp = NULL;
3061 /* If there are no partial paths, there's nothing to do here. */
3062 if (rel->partial_pathlist == NIL)
3063 return;
3065 /* Should we override the rel's rowcount estimate? */
3066 if (override_rows)
3067 rowsp = &rows;
3070 * The output of Gather is always unsorted, so there's only one partial
3071 * path of interest: the cheapest one. That will be the one at the front
3072 * of partial_pathlist because of the way add_partial_path works.
3074 cheapest_partial_path = linitial(rel->partial_pathlist);
3075 rows =
3076 cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
3077 simple_gather_path = (Path *)
3078 create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
3079 NULL, rowsp);
3080 add_path(rel, simple_gather_path);
3083 * For each useful ordering, we can consider an order-preserving Gather
3084 * Merge.
3086 foreach(lc, rel->partial_pathlist)
3088 Path *subpath = (Path *) lfirst(lc);
3089 GatherMergePath *path;
3091 if (subpath->pathkeys == NIL)
3092 continue;
3094 rows = subpath->rows * subpath->parallel_workers;
3095 path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
3096 subpath->pathkeys, NULL, rowsp);
3097 add_path(rel, &path->path);
3102 * get_useful_pathkeys_for_relation
3103 * Determine which orderings of a relation might be useful.
3105 * Getting data in sorted order can be useful either because the requested
3106 * order matches the final output ordering for the overall query we're
3107 * planning, or because it enables an efficient merge join. Here, we try
3108 * to figure out which pathkeys to consider.
3110 * This allows us to do incremental sort on top of an index scan under a gather
3111 * merge node, i.e. parallelized.
3113 * If the require_parallel_safe is true, we also require the expressions to
3114 * be parallel safe (which allows pushing the sort below Gather Merge).
3116 * XXX At the moment this can only ever return a list with a single element,
3117 * because it looks at query_pathkeys only. So we might return the pathkeys
3118 * directly, but it seems plausible we'll want to consider other orderings
3119 * in the future. For example, we might want to consider pathkeys useful for
3120 * merge joins.
3122 static List *
3123 get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel,
3124 bool require_parallel_safe)
3126 List *useful_pathkeys_list = NIL;
3129 * Considering query_pathkeys is always worth it, because it might allow
3130 * us to avoid a total sort when we have a partially presorted path
3131 * available or to push the total sort into the parallel portion of the
3132 * query.
3134 if (root->query_pathkeys)
3136 ListCell *lc;
3137 int npathkeys = 0; /* useful pathkeys */
3139 foreach(lc, root->query_pathkeys)
3141 PathKey *pathkey = (PathKey *) lfirst(lc);
3142 EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
3145 * We can only build a sort for pathkeys that contain a
3146 * safe-to-compute-early EC member computable from the current
3147 * relation's reltarget, so ignore the remainder of the list as
3148 * soon as we find a pathkey without such a member.
3150 * It's still worthwhile to return any prefix of the pathkeys list
3151 * that meets this requirement, as we may be able to do an
3152 * incremental sort.
3154 * If requested, ensure the sort expression is parallel-safe too.
3156 if (!relation_can_be_sorted_early(root, rel, pathkey_ec,
3157 require_parallel_safe))
3158 break;
3160 npathkeys++;
3164 * The whole query_pathkeys list matches, so append it directly, to
3165 * allow comparing pathkeys easily by comparing list pointer. If we
3166 * have to truncate the pathkeys, we gotta do a copy though.
3168 if (npathkeys == list_length(root->query_pathkeys))
3169 useful_pathkeys_list = lappend(useful_pathkeys_list,
3170 root->query_pathkeys);
3171 else if (npathkeys > 0)
3172 useful_pathkeys_list = lappend(useful_pathkeys_list,
3173 list_copy_head(root->query_pathkeys,
3174 npathkeys));
3177 return useful_pathkeys_list;
3181 * generate_useful_gather_paths
3182 * Generate parallel access paths for a relation by pushing a Gather or
3183 * Gather Merge on top of a partial path.
3185 * Unlike plain generate_gather_paths, this looks both at pathkeys of input
3186 * paths (aiming to preserve the ordering), but also considers ordering that
3187 * might be useful for nodes above the gather merge node, and tries to add
3188 * a sort (regular or incremental) to provide that.
3190 void
3191 generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
3193 ListCell *lc;
3194 double rows;
3195 double *rowsp = NULL;
3196 List *useful_pathkeys_list = NIL;
3197 Path *cheapest_partial_path = NULL;
3199 /* If there are no partial paths, there's nothing to do here. */
3200 if (rel->partial_pathlist == NIL)
3201 return;
3203 /* Should we override the rel's rowcount estimate? */
3204 if (override_rows)
3205 rowsp = &rows;
3207 /* generate the regular gather (merge) paths */
3208 generate_gather_paths(root, rel, override_rows);
3210 /* consider incremental sort for interesting orderings */
3211 useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true);
3213 /* used for explicit (full) sort paths */
3214 cheapest_partial_path = linitial(rel->partial_pathlist);
3217 * Consider sorted paths for each interesting ordering. We generate both
3218 * incremental and full sort.
3220 foreach(lc, useful_pathkeys_list)
3222 List *useful_pathkeys = lfirst(lc);
3223 ListCell *lc2;
3224 bool is_sorted;
3225 int presorted_keys;
3227 foreach(lc2, rel->partial_pathlist)
3229 Path *subpath = (Path *) lfirst(lc2);
3230 GatherMergePath *path;
3232 is_sorted = pathkeys_count_contained_in(useful_pathkeys,
3233 subpath->pathkeys,
3234 &presorted_keys);
3237 * We don't need to consider the case where a subpath is already
3238 * fully sorted because generate_gather_paths already creates a
3239 * gather merge path for every subpath that has pathkeys present.
3241 * But since the subpath is already sorted, we know we don't need
3242 * to consider adding a sort (full or incremental) on top of it,
3243 * so we can continue here.
3245 if (is_sorted)
3246 continue;
3249 * Try at least sorting the cheapest path and also try
3250 * incrementally sorting any path which is partially sorted
3251 * already (no need to deal with paths which have presorted keys
3252 * when incremental sort is disabled unless it's the cheapest
3253 * input path).
3255 if (subpath != cheapest_partial_path &&
3256 (presorted_keys == 0 || !enable_incremental_sort))
3257 continue;
3260 * Consider regular sort for any path that's not presorted or if
3261 * incremental sort is disabled. We've no need to consider both
3262 * sort and incremental sort on the same path. We assume that
3263 * incremental sort is always faster when there are presorted
3264 * keys.
3266 * This is not redundant with the gather paths created in
3267 * generate_gather_paths, because that doesn't generate ordered
3268 * output. Here we add an explicit sort to match the useful
3269 * ordering.
3271 if (presorted_keys == 0 || !enable_incremental_sort)
3273 subpath = (Path *) create_sort_path(root,
3274 rel,
3275 subpath,
3276 useful_pathkeys,
3277 -1.0);
3278 rows = subpath->rows * subpath->parallel_workers;
3280 else
3281 subpath = (Path *) create_incremental_sort_path(root,
3282 rel,
3283 subpath,
3284 useful_pathkeys,
3285 presorted_keys,
3286 -1);
3287 path = create_gather_merge_path(root, rel,
3288 subpath,
3289 rel->reltarget,
3290 subpath->pathkeys,
3291 NULL,
3292 rowsp);
3294 add_path(rel, &path->path);
3300 * make_rel_from_joinlist
3301 * Build access paths using a "joinlist" to guide the join path search.
3303 * See comments for deconstruct_jointree() for definition of the joinlist
3304 * data structure.
3306 static RelOptInfo *
3307 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
3309 int levels_needed;
3310 List *initial_rels;
3311 ListCell *jl;
3314 * Count the number of child joinlist nodes. This is the depth of the
3315 * dynamic-programming algorithm we must employ to consider all ways of
3316 * joining the child nodes.
3318 levels_needed = list_length(joinlist);
3320 if (levels_needed <= 0)
3321 return NULL; /* nothing to do? */
3324 * Construct a list of rels corresponding to the child joinlist nodes.
3325 * This may contain both base rels and rels constructed according to
3326 * sub-joinlists.
3328 initial_rels = NIL;
3329 foreach(jl, joinlist)
3331 Node *jlnode = (Node *) lfirst(jl);
3332 RelOptInfo *thisrel;
3334 if (IsA(jlnode, RangeTblRef))
3336 int varno = ((RangeTblRef *) jlnode)->rtindex;
3338 thisrel = find_base_rel(root, varno);
3340 else if (IsA(jlnode, List))
3342 /* Recurse to handle subproblem */
3343 thisrel = make_rel_from_joinlist(root, (List *) jlnode);
3345 else
3347 elog(ERROR, "unrecognized joinlist node type: %d",
3348 (int) nodeTag(jlnode));
3349 thisrel = NULL; /* keep compiler quiet */
3352 initial_rels = lappend(initial_rels, thisrel);
3355 if (levels_needed == 1)
3358 * Single joinlist node, so we're done.
3360 return (RelOptInfo *) linitial(initial_rels);
3362 else
3365 * Consider the different orders in which we could join the rels,
3366 * using a plugin, GEQO, or the regular join search code.
3368 * We put the initial_rels list into a PlannerInfo field because
3369 * has_legal_joinclause() needs to look at it (ugly :-().
3371 root->initial_rels = initial_rels;
3373 if (join_search_hook)
3374 return (*join_search_hook) (root, levels_needed, initial_rels);
3375 else if (enable_geqo && levels_needed >= geqo_threshold)
3376 return geqo(root, levels_needed, initial_rels);
3377 else
3378 return standard_join_search(root, levels_needed, initial_rels);
3383 * standard_join_search
3384 * Find possible joinpaths for a query by successively finding ways
3385 * to join component relations into join relations.
3387 * 'levels_needed' is the number of iterations needed, ie, the number of
3388 * independent jointree items in the query. This is > 1.
3390 * 'initial_rels' is a list of RelOptInfo nodes for each independent
3391 * jointree item. These are the components to be joined together.
3392 * Note that levels_needed == list_length(initial_rels).
3394 * Returns the final level of join relations, i.e., the relation that is
3395 * the result of joining all the original relations together.
3396 * At least one implementation path must be provided for this relation and
3397 * all required sub-relations.
3399 * To support loadable plugins that modify planner behavior by changing the
3400 * join searching algorithm, we provide a hook variable that lets a plugin
3401 * replace or supplement this function. Any such hook must return the same
3402 * final join relation as the standard code would, but it might have a
3403 * different set of implementation paths attached, and only the sub-joinrels
3404 * needed for these paths need have been instantiated.
3406 * Note to plugin authors: the functions invoked during standard_join_search()
3407 * modify root->join_rel_list and root->join_rel_hash. If you want to do more
3408 * than one join-order search, you'll probably need to save and restore the
3409 * original states of those data structures. See geqo_eval() for an example.
3411 RelOptInfo *
3412 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
3414 int lev;
3415 RelOptInfo *rel;
3418 * This function cannot be invoked recursively within any one planning
3419 * problem, so join_rel_level[] can't be in use already.
3421 Assert(root->join_rel_level == NULL);
3424 * We employ a simple "dynamic programming" algorithm: we first find all
3425 * ways to build joins of two jointree items, then all ways to build joins
3426 * of three items (from two-item joins and single items), then four-item
3427 * joins, and so on until we have considered all ways to join all the
3428 * items into one rel.
3430 * root->join_rel_level[j] is a list of all the j-item rels. Initially we
3431 * set root->join_rel_level[1] to represent all the single-jointree-item
3432 * relations.
3434 root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
3436 root->join_rel_level[1] = initial_rels;
3438 for (lev = 2; lev <= levels_needed; lev++)
3440 ListCell *lc;
3443 * Determine all possible pairs of relations to be joined at this
3444 * level, and build paths for making each one from every available
3445 * pair of lower-level relations.
3447 join_search_one_level(root, lev);
3450 * Run generate_partitionwise_join_paths() and
3451 * generate_useful_gather_paths() for each just-processed joinrel. We
3452 * could not do this earlier because both regular and partial paths
3453 * can get added to a particular joinrel at multiple times within
3454 * join_search_one_level.
3456 * After that, we're done creating paths for the joinrel, so run
3457 * set_cheapest().
3459 foreach(lc, root->join_rel_level[lev])
3461 rel = (RelOptInfo *) lfirst(lc);
3463 /* Create paths for partitionwise joins. */
3464 generate_partitionwise_join_paths(root, rel);
3467 * Except for the topmost scan/join rel, consider gathering
3468 * partial paths. We'll do the same for the topmost scan/join rel
3469 * once we know the final targetlist (see grouping_planner's and
3470 * its call to apply_scanjoin_target_to_paths).
3472 if (!bms_equal(rel->relids, root->all_query_rels))
3473 generate_useful_gather_paths(root, rel, false);
3475 /* Find and save the cheapest paths for this rel */
3476 set_cheapest(rel);
3478 #ifdef OPTIMIZER_DEBUG
3479 pprint(rel);
3480 #endif
3485 * We should have a single rel at the final level.
3487 if (root->join_rel_level[levels_needed] == NIL)
3488 elog(ERROR, "failed to build any %d-way joins", levels_needed);
3489 Assert(list_length(root->join_rel_level[levels_needed]) == 1);
3491 rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
3493 root->join_rel_level = NULL;
3495 return rel;
3498 /*****************************************************************************
3499 * PUSHING QUALS DOWN INTO SUBQUERIES
3500 *****************************************************************************/
3503 * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
3505 * subquery is the particular component query being checked. topquery
3506 * is the top component of a set-operations tree (the same Query if no
3507 * set-op is involved).
3509 * Conditions checked here:
3511 * 1. If the subquery has a LIMIT clause, we must not push down any quals,
3512 * since that could change the set of rows returned.
3514 * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
3515 * quals into it, because that could change the results.
3517 * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
3518 * This is because upper-level quals should semantically be evaluated only
3519 * once per distinct row, not once per original row, and if the qual is
3520 * volatile then extra evaluations could change the results. (This issue
3521 * does not apply to other forms of aggregation such as GROUP BY, because
3522 * when those are present we push into HAVING not WHERE, so that the quals
3523 * are still applied after aggregation.)
3525 * 4. If the subquery contains window functions, we cannot push volatile quals
3526 * into it. The issue here is a bit different from DISTINCT: a volatile qual
3527 * might succeed for some rows of a window partition and fail for others,
3528 * thereby changing the partition contents and thus the window functions'
3529 * results for rows that remain.
3531 * 5. If the subquery contains any set-returning functions in its targetlist,
3532 * we cannot push volatile quals into it. That would push them below the SRFs
3533 * and thereby change the number of times they are evaluated. Also, a
3534 * volatile qual could succeed for some SRF output rows and fail for others,
3535 * a behavior that cannot occur if it's evaluated before SRF expansion.
3537 * 6. If the subquery has nonempty grouping sets, we cannot push down any
3538 * quals. The concern here is that a qual referencing a "constant" grouping
3539 * column could get constant-folded, which would be improper because the value
3540 * is potentially nullable by grouping-set expansion. This restriction could
3541 * be removed if we had a parsetree representation that shows that such
3542 * grouping columns are not really constant. (There are other ideas that
3543 * could be used to relax this restriction, but that's the approach most
3544 * likely to get taken in the future. Note that there's not much to be gained
3545 * so long as subquery_planner can't move HAVING clauses to WHERE within such
3546 * a subquery.)
3548 * In addition, we make several checks on the subquery's output columns to see
3549 * if it is safe to reference them in pushed-down quals. If output column k
3550 * is found to be unsafe to reference, we set the reason for that inside
3551 * safetyInfo->unsafeFlags[k], but we don't reject the subquery overall since
3552 * column k might not be referenced by some/all quals. The unsafeFlags[]
3553 * array will be consulted later by qual_is_pushdown_safe(). It's better to
3554 * do it this way than to make the checks directly in qual_is_pushdown_safe(),
3555 * because when the subquery involves set operations we have to check the
3556 * output expressions in each arm of the set op.
3558 * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
3559 * we're effectively assuming that the quals cannot distinguish values that
3560 * the DISTINCT's equality operator sees as equal, yet there are many
3561 * counterexamples to that assumption. However use of such a qual with a
3562 * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
3563 * "equal" value will be chosen as the output value by the DISTINCT operation.
3564 * So we don't worry too much about that. Another objection is that if the
3565 * qual is expensive to evaluate, running it for each original row might cost
3566 * more than we save by eliminating rows before the DISTINCT step. But it
3567 * would be very hard to estimate that at this stage, and in practice pushdown
3568 * seldom seems to make things worse, so we ignore that problem too.
3570 * Note: likewise, pushing quals into a subquery with window functions is a
3571 * bit dubious: the quals might remove some rows of a window partition while
3572 * leaving others, causing changes in the window functions' results for the
3573 * surviving rows. We insist that such a qual reference only partitioning
3574 * columns, but again that only protects us if the qual does not distinguish
3575 * values that the partitioning equality operator sees as equal. The risks
3576 * here are perhaps larger than for DISTINCT, since no de-duplication of rows
3577 * occurs and thus there is no theoretical problem with such a qual. But
3578 * we'll do this anyway because the potential performance benefits are very
3579 * large, and we've seen no field complaints about the longstanding comparable
3580 * behavior with DISTINCT.
3582 static bool
3583 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
3584 pushdown_safety_info *safetyInfo)
3586 SetOperationStmt *topop;
3588 /* Check point 1 */
3589 if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
3590 return false;
3592 /* Check point 6 */
3593 if (subquery->groupClause && subquery->groupingSets)
3594 return false;
3596 /* Check points 3, 4, and 5 */
3597 if (subquery->distinctClause ||
3598 subquery->hasWindowFuncs ||
3599 subquery->hasTargetSRFs)
3600 safetyInfo->unsafeVolatile = true;
3603 * If we're at a leaf query, check for unsafe expressions in its target
3604 * list, and mark any reasons why they're unsafe in unsafeFlags[].
3605 * (Non-leaf nodes in setop trees have only simple Vars in their tlists,
3606 * so no need to check them.)
3608 if (subquery->setOperations == NULL)
3609 check_output_expressions(subquery, safetyInfo);
3611 /* Are we at top level, or looking at a setop component? */
3612 if (subquery == topquery)
3614 /* Top level, so check any component queries */
3615 if (subquery->setOperations != NULL)
3616 if (!recurse_pushdown_safe(subquery->setOperations, topquery,
3617 safetyInfo))
3618 return false;
3620 else
3622 /* Setop component must not have more components (too weird) */
3623 if (subquery->setOperations != NULL)
3624 return false;
3625 /* Check whether setop component output types match top level */
3626 topop = castNode(SetOperationStmt, topquery->setOperations);
3627 Assert(topop);
3628 compare_tlist_datatypes(subquery->targetList,
3629 topop->colTypes,
3630 safetyInfo);
3632 return true;
3636 * Helper routine to recurse through setOperations tree
3638 static bool
3639 recurse_pushdown_safe(Node *setOp, Query *topquery,
3640 pushdown_safety_info *safetyInfo)
3642 if (IsA(setOp, RangeTblRef))
3644 RangeTblRef *rtr = (RangeTblRef *) setOp;
3645 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
3646 Query *subquery = rte->subquery;
3648 Assert(subquery != NULL);
3649 return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
3651 else if (IsA(setOp, SetOperationStmt))
3653 SetOperationStmt *op = (SetOperationStmt *) setOp;
3655 /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
3656 if (op->op == SETOP_EXCEPT)
3657 return false;
3658 /* Else recurse */
3659 if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
3660 return false;
3661 if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
3662 return false;
3664 else
3666 elog(ERROR, "unrecognized node type: %d",
3667 (int) nodeTag(setOp));
3669 return true;
3673 * check_output_expressions - check subquery's output expressions for safety
3675 * There are several cases in which it's unsafe to push down an upper-level
3676 * qual if it references a particular output column of a subquery. We check
3677 * each output column of the subquery and set flags in unsafeFlags[k] when we
3678 * see that column is unsafe for a pushed-down qual to reference. The
3679 * conditions checked here are:
3681 * 1. We must not push down any quals that refer to subselect outputs that
3682 * return sets, else we'd introduce functions-returning-sets into the
3683 * subquery's WHERE/HAVING quals.
3685 * 2. We must not push down any quals that refer to subselect outputs that
3686 * contain volatile functions, for fear of introducing strange results due
3687 * to multiple evaluation of a volatile function.
3689 * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
3690 * refer to non-DISTINCT output columns, because that could change the set
3691 * of rows returned. (This condition is vacuous for DISTINCT, because then
3692 * there are no non-DISTINCT output columns, so we needn't check. Note that
3693 * subquery_is_pushdown_safe already reported that we can't use volatile
3694 * quals if there's DISTINCT or DISTINCT ON.)
3696 * 4. If the subquery has any window functions, we must not push down quals
3697 * that reference any output columns that are not listed in all the subquery's
3698 * window PARTITION BY clauses. We can push down quals that use only
3699 * partitioning columns because they should succeed or fail identically for
3700 * every row of any one window partition, and totally excluding some
3701 * partitions will not change a window function's results for remaining
3702 * partitions. (Again, this also requires nonvolatile quals, but
3703 * subquery_is_pushdown_safe handles that.). Subquery columns marked as
3704 * unsafe for this reason can still have WindowClause run conditions pushed
3705 * down.
3707 static void
3708 check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
3710 ListCell *lc;
3712 foreach(lc, subquery->targetList)
3714 TargetEntry *tle = (TargetEntry *) lfirst(lc);
3716 if (tle->resjunk)
3717 continue; /* ignore resjunk columns */
3719 /* Functions returning sets are unsafe (point 1) */
3720 if (subquery->hasTargetSRFs &&
3721 (safetyInfo->unsafeFlags[tle->resno] &
3722 UNSAFE_HAS_SET_FUNC) == 0 &&
3723 expression_returns_set((Node *) tle->expr))
3725 safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_HAS_SET_FUNC;
3726 continue;
3729 /* Volatile functions are unsafe (point 2) */
3730 if ((safetyInfo->unsafeFlags[tle->resno] &
3731 UNSAFE_HAS_VOLATILE_FUNC) == 0 &&
3732 contain_volatile_functions((Node *) tle->expr))
3734 safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_HAS_VOLATILE_FUNC;
3735 continue;
3738 /* If subquery uses DISTINCT ON, check point 3 */
3739 if (subquery->hasDistinctOn &&
3740 (safetyInfo->unsafeFlags[tle->resno] &
3741 UNSAFE_NOTIN_DISTINCTON_CLAUSE) == 0 &&
3742 !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
3744 /* non-DISTINCT column, so mark it unsafe */
3745 safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_NOTIN_DISTINCTON_CLAUSE;
3746 continue;
3749 /* If subquery uses window functions, check point 4 */
3750 if (subquery->hasWindowFuncs &&
3751 (safetyInfo->unsafeFlags[tle->resno] &
3752 UNSAFE_NOTIN_DISTINCTON_CLAUSE) == 0 &&
3753 !targetIsInAllPartitionLists(tle, subquery))
3755 /* not present in all PARTITION BY clauses, so mark it unsafe */
3756 safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_NOTIN_PARTITIONBY_CLAUSE;
3757 continue;
3763 * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
3764 * push quals into each component query, but the quals can only reference
3765 * subquery columns that suffer no type coercions in the set operation.
3766 * Otherwise there are possible semantic gotchas. So, we check the
3767 * component queries to see if any of them have output types different from
3768 * the top-level setop outputs. We set the UNSAFE_TYPE_MISMATCH bit in
3769 * unsafeFlags[k] if column k has different type in any component.
3771 * We don't have to care about typmods here: the only allowed difference
3772 * between set-op input and output typmods is input is a specific typmod
3773 * and output is -1, and that does not require a coercion.
3775 * tlist is a subquery tlist.
3776 * colTypes is an OID list of the top-level setop's output column types.
3777 * safetyInfo is the pushdown_safety_info to set unsafeFlags[] for.
3779 static void
3780 compare_tlist_datatypes(List *tlist, List *colTypes,
3781 pushdown_safety_info *safetyInfo)
3783 ListCell *l;
3784 ListCell *colType = list_head(colTypes);
3786 foreach(l, tlist)
3788 TargetEntry *tle = (TargetEntry *) lfirst(l);
3790 if (tle->resjunk)
3791 continue; /* ignore resjunk columns */
3792 if (colType == NULL)
3793 elog(ERROR, "wrong number of tlist entries");
3794 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
3795 safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_TYPE_MISMATCH;
3796 colType = lnext(colTypes, colType);
3798 if (colType != NULL)
3799 elog(ERROR, "wrong number of tlist entries");
3803 * targetIsInAllPartitionLists
3804 * True if the TargetEntry is listed in the PARTITION BY clause
3805 * of every window defined in the query.
3807 * It would be safe to ignore windows not actually used by any window
3808 * function, but it's not easy to get that info at this stage; and it's
3809 * unlikely to be useful to spend any extra cycles getting it, since
3810 * unreferenced window definitions are probably infrequent in practice.
3812 static bool
3813 targetIsInAllPartitionLists(TargetEntry *tle, Query *query)
3815 ListCell *lc;
3817 foreach(lc, query->windowClause)
3819 WindowClause *wc = (WindowClause *) lfirst(lc);
3821 if (!targetIsInSortList(tle, InvalidOid, wc->partitionClause))
3822 return false;
3824 return true;
3828 * qual_is_pushdown_safe - is a particular rinfo safe to push down?
3830 * rinfo is a restriction clause applying to the given subquery (whose RTE
3831 * has index rti in the parent query).
3833 * Conditions checked here:
3835 * 1. rinfo's clause must not contain any SubPlans (mainly because it's
3836 * unclear that it will work correctly: SubLinks will already have been
3837 * transformed into SubPlans in the qual, but not in the subquery). Note that
3838 * SubLinks that transform to initplans are safe, and will be accepted here
3839 * because what we'll see in the qual is just a Param referencing the initplan
3840 * output.
3842 * 2. If unsafeVolatile is set, rinfo's clause must not contain any volatile
3843 * functions.
3845 * 3. If unsafeLeaky is set, rinfo's clause must not contain any leaky
3846 * functions that are passed Var nodes, and therefore might reveal values from
3847 * the subquery as side effects.
3849 * 4. rinfo's clause must not refer to the whole-row output of the subquery
3850 * (since there is no easy way to name that within the subquery itself).
3852 * 5. rinfo's clause must not refer to any subquery output columns that were
3853 * found to be unsafe to reference by subquery_is_pushdown_safe().
3855 static pushdown_safe_type
3856 qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo,
3857 pushdown_safety_info *safetyInfo)
3859 pushdown_safe_type safe = PUSHDOWN_SAFE;
3860 Node *qual = (Node *) rinfo->clause;
3861 List *vars;
3862 ListCell *vl;
3864 /* Refuse subselects (point 1) */
3865 if (contain_subplans(qual))
3866 return PUSHDOWN_UNSAFE;
3868 /* Refuse volatile quals if we found they'd be unsafe (point 2) */
3869 if (safetyInfo->unsafeVolatile &&
3870 contain_volatile_functions((Node *) rinfo))
3871 return PUSHDOWN_UNSAFE;
3873 /* Refuse leaky quals if told to (point 3) */
3874 if (safetyInfo->unsafeLeaky &&
3875 contain_leaked_vars(qual))
3876 return PUSHDOWN_UNSAFE;
3879 * Examine all Vars used in clause. Since it's a restriction clause, all
3880 * such Vars must refer to subselect output columns ... unless this is
3881 * part of a LATERAL subquery, in which case there could be lateral
3882 * references.
3884 * By omitting the relevant flags, this also gives us a cheap sanity check
3885 * that no aggregates or window functions appear in the qual. Those would
3886 * be unsafe to push down, but at least for the moment we could never see
3887 * any in a qual anyhow.
3889 vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
3890 foreach(vl, vars)
3892 Var *var = (Var *) lfirst(vl);
3895 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
3896 * It's not clear whether a PHV could safely be pushed down, and even
3897 * less clear whether such a situation could arise in any cases of
3898 * practical interest anyway. So for the moment, just refuse to push
3899 * down.
3901 if (!IsA(var, Var))
3903 safe = PUSHDOWN_UNSAFE;
3904 break;
3908 * Punt if we find any lateral references. It would be safe to push
3909 * these down, but we'd have to convert them into outer references,
3910 * which subquery_push_qual lacks the infrastructure to do. The case
3911 * arises so seldom that it doesn't seem worth working hard on.
3913 if (var->varno != rti)
3915 safe = PUSHDOWN_UNSAFE;
3916 break;
3919 /* Subqueries have no system columns */
3920 Assert(var->varattno >= 0);
3922 /* Check point 4 */
3923 if (var->varattno == 0)
3925 safe = PUSHDOWN_UNSAFE;
3926 break;
3929 /* Check point 5 */
3930 if (safetyInfo->unsafeFlags[var->varattno] != 0)
3932 if (safetyInfo->unsafeFlags[var->varattno] &
3933 (UNSAFE_HAS_VOLATILE_FUNC | UNSAFE_HAS_SET_FUNC |
3934 UNSAFE_NOTIN_DISTINCTON_CLAUSE | UNSAFE_TYPE_MISMATCH))
3936 safe = PUSHDOWN_UNSAFE;
3937 break;
3939 else
3941 /* UNSAFE_NOTIN_PARTITIONBY_CLAUSE is ok for run conditions */
3942 safe = PUSHDOWN_WINDOWCLAUSE_RUNCOND;
3943 /* don't break, we might find another Var that's unsafe */
3948 list_free(vars);
3950 return safe;
3954 * subquery_push_qual - push down a qual that we have determined is safe
3956 static void
3957 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
3959 if (subquery->setOperations != NULL)
3961 /* Recurse to push it separately to each component query */
3962 recurse_push_qual(subquery->setOperations, subquery,
3963 rte, rti, qual);
3965 else
3968 * We need to replace Vars in the qual (which must refer to outputs of
3969 * the subquery) with copies of the subquery's targetlist expressions.
3970 * Note that at this point, any uplevel Vars in the qual should have
3971 * been replaced with Params, so they need no work.
3973 * This step also ensures that when we are pushing into a setop tree,
3974 * each component query gets its own copy of the qual.
3976 qual = ReplaceVarsFromTargetList(qual, rti, 0, rte,
3977 subquery->targetList,
3978 REPLACEVARS_REPORT_ERROR, 0,
3979 &subquery->hasSubLinks);
3982 * Now attach the qual to the proper place: normally WHERE, but if the
3983 * subquery uses grouping or aggregation, put it in HAVING (since the
3984 * qual really refers to the group-result rows).
3986 if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual)
3987 subquery->havingQual = make_and_qual(subquery->havingQual, qual);
3988 else
3989 subquery->jointree->quals =
3990 make_and_qual(subquery->jointree->quals, qual);
3993 * We need not change the subquery's hasAggs or hasSubLinks flags,
3994 * since we can't be pushing down any aggregates that weren't there
3995 * before, and we don't push down subselects at all.
4001 * Helper routine to recurse through setOperations tree
4003 static void
4004 recurse_push_qual(Node *setOp, Query *topquery,
4005 RangeTblEntry *rte, Index rti, Node *qual)
4007 if (IsA(setOp, RangeTblRef))
4009 RangeTblRef *rtr = (RangeTblRef *) setOp;
4010 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
4011 Query *subquery = subrte->subquery;
4013 Assert(subquery != NULL);
4014 subquery_push_qual(subquery, rte, rti, qual);
4016 else if (IsA(setOp, SetOperationStmt))
4018 SetOperationStmt *op = (SetOperationStmt *) setOp;
4020 recurse_push_qual(op->larg, topquery, rte, rti, qual);
4021 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
4023 else
4025 elog(ERROR, "unrecognized node type: %d",
4026 (int) nodeTag(setOp));
4030 /*****************************************************************************
4031 * SIMPLIFYING SUBQUERY TARGETLISTS
4032 *****************************************************************************/
4035 * remove_unused_subquery_outputs
4036 * Remove subquery targetlist items we don't need
4038 * It's possible, even likely, that the upper query does not read all the
4039 * output columns of the subquery. We can remove any such outputs that are
4040 * not needed by the subquery itself (e.g., as sort/group columns) and do not
4041 * affect semantics otherwise (e.g., volatile functions can't be removed).
4042 * This is useful not only because we might be able to remove expensive-to-
4043 * compute expressions, but because deletion of output columns might allow
4044 * optimizations such as join removal to occur within the subquery.
4046 * extra_used_attrs can be passed as non-NULL to mark any columns (offset by
4047 * FirstLowInvalidHeapAttributeNumber) that we should not remove. This
4048 * parameter is modified by the function, so callers must make a copy if they
4049 * need to use the passed in Bitmapset after calling this function.
4051 * To avoid affecting column numbering in the targetlist, we don't physically
4052 * remove unused tlist entries, but rather replace their expressions with NULL
4053 * constants. This is implemented by modifying subquery->targetList.
4055 static void
4056 remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel,
4057 Bitmapset *extra_used_attrs)
4059 Bitmapset *attrs_used;
4060 ListCell *lc;
4063 * Just point directly to extra_used_attrs. No need to bms_copy as none of
4064 * the current callers use the Bitmapset after calling this function.
4066 attrs_used = extra_used_attrs;
4069 * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
4070 * could update all the child SELECTs' tlists, but it seems not worth the
4071 * trouble presently.
4073 if (subquery->setOperations)
4074 return;
4077 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
4078 * time: all its output columns must be used in the distinctClause.
4080 if (subquery->distinctClause && !subquery->hasDistinctOn)
4081 return;
4084 * Collect a bitmap of all the output column numbers used by the upper
4085 * query.
4087 * Add all the attributes needed for joins or final output. Note: we must
4088 * look at rel's targetlist, not the attr_needed data, because attr_needed
4089 * isn't computed for inheritance child rels, cf set_append_rel_size().
4090 * (XXX might be worth changing that sometime.)
4092 pull_varattnos((Node *) rel->reltarget->exprs, rel->relid, &attrs_used);
4094 /* Add all the attributes used by un-pushed-down restriction clauses. */
4095 foreach(lc, rel->baserestrictinfo)
4097 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
4099 pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
4103 * If there's a whole-row reference to the subquery, we can't remove
4104 * anything.
4106 if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, attrs_used))
4107 return;
4110 * Run through the tlist and zap entries we don't need. It's okay to
4111 * modify the tlist items in-place because set_subquery_pathlist made a
4112 * copy of the subquery.
4114 foreach(lc, subquery->targetList)
4116 TargetEntry *tle = (TargetEntry *) lfirst(lc);
4117 Node *texpr = (Node *) tle->expr;
4120 * If it has a sortgroupref number, it's used in some sort/group
4121 * clause so we'd better not remove it. Also, don't remove any
4122 * resjunk columns, since their reason for being has nothing to do
4123 * with anybody reading the subquery's output. (It's likely that
4124 * resjunk columns in a sub-SELECT would always have ressortgroupref
4125 * set, but even if they don't, it seems imprudent to remove them.)
4127 if (tle->ressortgroupref || tle->resjunk)
4128 continue;
4131 * If it's used by the upper query, we can't remove it.
4133 if (bms_is_member(tle->resno - FirstLowInvalidHeapAttributeNumber,
4134 attrs_used))
4135 continue;
4138 * If it contains a set-returning function, we can't remove it since
4139 * that could change the number of rows returned by the subquery.
4141 if (subquery->hasTargetSRFs &&
4142 expression_returns_set(texpr))
4143 continue;
4146 * If it contains volatile functions, we daren't remove it for fear
4147 * that the user is expecting their side-effects to happen.
4149 if (contain_volatile_functions(texpr))
4150 continue;
4153 * OK, we don't need it. Replace the expression with a NULL constant.
4154 * Preserve the exposed type of the expression, in case something
4155 * looks at the rowtype of the subquery's result.
4157 tle->expr = (Expr *) makeNullConst(exprType(texpr),
4158 exprTypmod(texpr),
4159 exprCollation(texpr));
4164 * create_partial_bitmap_paths
4165 * Build partial bitmap heap path for the relation
4167 void
4168 create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
4169 Path *bitmapqual)
4171 int parallel_workers;
4172 double pages_fetched;
4174 /* Compute heap pages for bitmap heap scan */
4175 pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0,
4176 NULL, NULL);
4178 parallel_workers = compute_parallel_worker(rel, pages_fetched, -1,
4179 max_parallel_workers_per_gather);
4181 if (parallel_workers <= 0)
4182 return;
4184 add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel,
4185 bitmapqual, rel->lateral_relids, 1.0, parallel_workers));
4189 * Compute the number of parallel workers that should be used to scan a
4190 * relation. We compute the parallel workers based on the size of the heap to
4191 * be scanned and the size of the index to be scanned, then choose a minimum
4192 * of those.
4194 * "heap_pages" is the number of pages from the table that we expect to scan, or
4195 * -1 if we don't expect to scan any.
4197 * "index_pages" is the number of pages from the index that we expect to scan, or
4198 * -1 if we don't expect to scan any.
4200 * "max_workers" is caller's limit on the number of workers. This typically
4201 * comes from a GUC.
4204 compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages,
4205 int max_workers)
4207 int parallel_workers = 0;
4210 * If the user has set the parallel_workers reloption, use that; otherwise
4211 * select a default number of workers.
4213 if (rel->rel_parallel_workers != -1)
4214 parallel_workers = rel->rel_parallel_workers;
4215 else
4218 * If the number of pages being scanned is insufficient to justify a
4219 * parallel scan, just return zero ... unless it's an inheritance
4220 * child. In that case, we want to generate a parallel path here
4221 * anyway. It might not be worthwhile just for this relation, but
4222 * when combined with all of its inheritance siblings it may well pay
4223 * off.
4225 if (rel->reloptkind == RELOPT_BASEREL &&
4226 ((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) ||
4227 (index_pages >= 0 && index_pages < min_parallel_index_scan_size)))
4228 return 0;
4230 if (heap_pages >= 0)
4232 int heap_parallel_threshold;
4233 int heap_parallel_workers = 1;
4236 * Select the number of workers based on the log of the size of
4237 * the relation. This probably needs to be a good deal more
4238 * sophisticated, but we need something here for now. Note that
4239 * the upper limit of the min_parallel_table_scan_size GUC is
4240 * chosen to prevent overflow here.
4242 heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
4243 while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
4245 heap_parallel_workers++;
4246 heap_parallel_threshold *= 3;
4247 if (heap_parallel_threshold > INT_MAX / 3)
4248 break; /* avoid overflow */
4251 parallel_workers = heap_parallel_workers;
4254 if (index_pages >= 0)
4256 int index_parallel_workers = 1;
4257 int index_parallel_threshold;
4259 /* same calculation as for heap_pages above */
4260 index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
4261 while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
4263 index_parallel_workers++;
4264 index_parallel_threshold *= 3;
4265 if (index_parallel_threshold > INT_MAX / 3)
4266 break; /* avoid overflow */
4269 if (parallel_workers > 0)
4270 parallel_workers = Min(parallel_workers, index_parallel_workers);
4271 else
4272 parallel_workers = index_parallel_workers;
4276 /* In no case use more than caller supplied maximum number of workers */
4277 parallel_workers = Min(parallel_workers, max_workers);
4279 return parallel_workers;
4283 * generate_partitionwise_join_paths
4284 * Create paths representing partitionwise join for given partitioned
4285 * join relation.
4287 * This must not be called until after we are done adding paths for all
4288 * child-joins. Otherwise, add_path might delete a path to which some path
4289 * generated here has a reference.
4291 void
4292 generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
4294 List *live_children = NIL;
4295 int cnt_parts;
4296 int num_parts;
4297 RelOptInfo **part_rels;
4299 /* Handle only join relations here. */
4300 if (!IS_JOIN_REL(rel))
4301 return;
4303 /* We've nothing to do if the relation is not partitioned. */
4304 if (!IS_PARTITIONED_REL(rel))
4305 return;
4307 /* The relation should have consider_partitionwise_join set. */
4308 Assert(rel->consider_partitionwise_join);
4310 /* Guard against stack overflow due to overly deep partition hierarchy. */
4311 check_stack_depth();
4313 num_parts = rel->nparts;
4314 part_rels = rel->part_rels;
4316 /* Collect non-dummy child-joins. */
4317 for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++)
4319 RelOptInfo *child_rel = part_rels[cnt_parts];
4321 /* If it's been pruned entirely, it's certainly dummy. */
4322 if (child_rel == NULL)
4323 continue;
4325 /* Make partitionwise join paths for this partitioned child-join. */
4326 generate_partitionwise_join_paths(root, child_rel);
4328 /* If we failed to make any path for this child, we must give up. */
4329 if (child_rel->pathlist == NIL)
4332 * Mark the parent joinrel as unpartitioned so that later
4333 * functions treat it correctly.
4335 rel->nparts = 0;
4336 return;
4339 /* Else, identify the cheapest path for it. */
4340 set_cheapest(child_rel);
4342 /* Dummy children need not be scanned, so ignore those. */
4343 if (IS_DUMMY_REL(child_rel))
4344 continue;
4346 #ifdef OPTIMIZER_DEBUG
4347 pprint(child_rel);
4348 #endif
4350 live_children = lappend(live_children, child_rel);
4353 /* If all child-joins are dummy, parent join is also dummy. */
4354 if (!live_children)
4356 mark_dummy_rel(rel);
4357 return;
4360 /* Build additional paths for this rel from child-join paths. */
4361 add_paths_to_append_rel(root, rel, live_children);
4362 list_free(live_children);