1 /*-------------------------------------------------------------------------
4 * Routines to find possible search paths for processing a query
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/optimizer/path/allpaths.c
13 *-------------------------------------------------------------------------
21 #include "access/sysattr.h"
22 #include "access/tsmapi.h"
23 #include "catalog/pg_class.h"
24 #include "catalog/pg_operator.h"
25 #include "catalog/pg_proc.h"
26 #include "foreign/fdwapi.h"
27 #include "miscadmin.h"
28 #include "nodes/makefuncs.h"
29 #include "nodes/nodeFuncs.h"
30 #include "nodes/supportnodes.h"
31 #ifdef OPTIMIZER_DEBUG
32 #include "nodes/print.h"
34 #include "optimizer/appendinfo.h"
35 #include "optimizer/clauses.h"
36 #include "optimizer/cost.h"
37 #include "optimizer/geqo.h"
38 #include "optimizer/optimizer.h"
39 #include "optimizer/pathnode.h"
40 #include "optimizer/paths.h"
41 #include "optimizer/plancat.h"
42 #include "optimizer/planner.h"
43 #include "optimizer/tlist.h"
44 #include "parser/parse_clause.h"
45 #include "parser/parsetree.h"
46 #include "partitioning/partbounds.h"
47 #include "port/pg_bitutils.h"
48 #include "rewrite/rewriteManip.h"
49 #include "utils/lsyscache.h"
52 /* Bitmask flags for pushdown_safety_info.unsafeFlags */
53 #define UNSAFE_HAS_VOLATILE_FUNC (1 << 0)
54 #define UNSAFE_HAS_SET_FUNC (1 << 1)
55 #define UNSAFE_NOTIN_DISTINCTON_CLAUSE (1 << 2)
56 #define UNSAFE_NOTIN_PARTITIONBY_CLAUSE (1 << 3)
57 #define UNSAFE_TYPE_MISMATCH (1 << 4)
59 /* results of subquery_is_pushdown_safe */
60 typedef struct pushdown_safety_info
62 unsigned char *unsafeFlags
; /* bitmask of reasons why this target list
63 * column is unsafe for qual pushdown, or 0 if
65 bool unsafeVolatile
; /* don't push down volatile quals */
66 bool unsafeLeaky
; /* don't push down leaky quals */
67 } pushdown_safety_info
;
69 /* Return type for qual_is_pushdown_safe */
70 typedef enum pushdown_safe_type
72 PUSHDOWN_UNSAFE
, /* unsafe to push qual into subquery */
73 PUSHDOWN_SAFE
, /* safe to push qual into subquery */
74 PUSHDOWN_WINDOWCLAUSE_RUNCOND
, /* unsafe, but may work as WindowClause
78 /* These parameters are set by GUC */
79 bool enable_geqo
= false; /* just in case GUC doesn't set it */
81 int min_parallel_table_scan_size
;
82 int min_parallel_index_scan_size
;
84 /* Hook for plugins to get control in set_rel_pathlist() */
85 set_rel_pathlist_hook_type set_rel_pathlist_hook
= NULL
;
87 /* Hook for plugins to replace standard_join_search() */
88 join_search_hook_type join_search_hook
= NULL
;
91 static void set_base_rel_consider_startup(PlannerInfo
*root
);
92 static void set_base_rel_sizes(PlannerInfo
*root
);
93 static void set_base_rel_pathlists(PlannerInfo
*root
);
94 static void set_rel_size(PlannerInfo
*root
, RelOptInfo
*rel
,
95 Index rti
, RangeTblEntry
*rte
);
96 static void set_rel_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
97 Index rti
, RangeTblEntry
*rte
);
98 static void set_plain_rel_size(PlannerInfo
*root
, RelOptInfo
*rel
,
100 static void create_plain_partial_paths(PlannerInfo
*root
, RelOptInfo
*rel
);
101 static void set_rel_consider_parallel(PlannerInfo
*root
, RelOptInfo
*rel
,
103 static void set_plain_rel_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
105 static void set_tablesample_rel_size(PlannerInfo
*root
, RelOptInfo
*rel
,
107 static void set_tablesample_rel_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
109 static void set_foreign_size(PlannerInfo
*root
, RelOptInfo
*rel
,
111 static void set_foreign_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
113 static void set_append_rel_size(PlannerInfo
*root
, RelOptInfo
*rel
,
114 Index rti
, RangeTblEntry
*rte
);
115 static void set_append_rel_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
116 Index rti
, RangeTblEntry
*rte
);
117 static void generate_orderedappend_paths(PlannerInfo
*root
, RelOptInfo
*rel
,
118 List
*live_childrels
,
119 List
*all_child_pathkeys
);
120 static Path
*get_cheapest_parameterized_child_path(PlannerInfo
*root
,
122 Relids required_outer
);
123 static void accumulate_append_subpath(Path
*path
,
125 List
**special_subpaths
);
126 static Path
*get_singleton_append_subpath(Path
*path
);
127 static void set_dummy_rel_pathlist(RelOptInfo
*rel
);
128 static void set_subquery_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
129 Index rti
, RangeTblEntry
*rte
);
130 static void set_function_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
132 static void set_values_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
134 static void set_tablefunc_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
136 static void set_cte_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
138 static void set_namedtuplestore_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
140 static void set_result_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
142 static void set_worktable_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
144 static RelOptInfo
*make_rel_from_joinlist(PlannerInfo
*root
, List
*joinlist
);
145 static bool subquery_is_pushdown_safe(Query
*subquery
, Query
*topquery
,
146 pushdown_safety_info
*safetyInfo
);
147 static bool recurse_pushdown_safe(Node
*setOp
, Query
*topquery
,
148 pushdown_safety_info
*safetyInfo
);
149 static void check_output_expressions(Query
*subquery
,
150 pushdown_safety_info
*safetyInfo
);
151 static void compare_tlist_datatypes(List
*tlist
, List
*colTypes
,
152 pushdown_safety_info
*safetyInfo
);
153 static bool targetIsInAllPartitionLists(TargetEntry
*tle
, Query
*query
);
154 static pushdown_safe_type
qual_is_pushdown_safe(Query
*subquery
, Index rti
,
156 pushdown_safety_info
*safetyInfo
);
157 static void subquery_push_qual(Query
*subquery
,
158 RangeTblEntry
*rte
, Index rti
, Node
*qual
);
159 static void recurse_push_qual(Node
*setOp
, Query
*topquery
,
160 RangeTblEntry
*rte
, Index rti
, Node
*qual
);
161 static void remove_unused_subquery_outputs(Query
*subquery
, RelOptInfo
*rel
,
162 Bitmapset
*extra_used_attrs
);
167 * Finds all possible access paths for executing a query, returning a
168 * single rel that represents the join of all base rels in the query.
171 make_one_rel(PlannerInfo
*root
, List
*joinlist
)
177 /* Mark base rels as to whether we care about fast-start plans */
178 set_base_rel_consider_startup(root
);
181 * Compute size estimates and consider_parallel flags for each base rel.
183 set_base_rel_sizes(root
);
186 * We should now have size estimates for every actual table involved in
187 * the query, and we also know which if any have been deleted from the
188 * query by join removal, pruned by partition pruning, or eliminated by
189 * constraint exclusion. So we can now compute total_table_pages.
191 * Note that appendrels are not double-counted here, even though we don't
192 * bother to distinguish RelOptInfos for appendrel parents, because the
193 * parents will have pages = 0.
195 * XXX if a table is self-joined, we will count it once per appearance,
196 * which perhaps is the wrong thing ... but that's not completely clear,
197 * and detecting self-joins here is difficult, so ignore it for now.
200 for (rti
= 1; rti
< root
->simple_rel_array_size
; rti
++)
202 RelOptInfo
*brel
= root
->simple_rel_array
[rti
];
204 /* there may be empty slots corresponding to non-baserel RTEs */
208 Assert(brel
->relid
== rti
); /* sanity check on array */
210 if (IS_DUMMY_REL(brel
))
213 if (IS_SIMPLE_REL(brel
))
214 total_pages
+= (double) brel
->pages
;
216 root
->total_table_pages
= total_pages
;
219 * Generate access paths for each base rel.
221 set_base_rel_pathlists(root
);
224 * Generate access paths for the entire join tree.
226 rel
= make_rel_from_joinlist(root
, joinlist
);
229 * The result should join all and only the query's base + outer-join rels.
231 Assert(bms_equal(rel
->relids
, root
->all_query_rels
));
237 * set_base_rel_consider_startup
238 * Set the consider_[param_]startup flags for each base-relation entry.
240 * For the moment, we only deal with consider_param_startup here; because the
241 * logic for consider_startup is pretty trivial and is the same for every base
242 * relation, we just let build_simple_rel() initialize that flag correctly to
243 * start with. If that logic ever gets more complicated it would probably
244 * be better to move it here.
247 set_base_rel_consider_startup(PlannerInfo
*root
)
250 * Since parameterized paths can only be used on the inside of a nestloop
251 * join plan, there is usually little value in considering fast-start
252 * plans for them. However, for relations that are on the RHS of a SEMI
253 * or ANTI join, a fast-start plan can be useful because we're only going
254 * to care about fetching one tuple anyway.
256 * To minimize growth of planning time, we currently restrict this to
257 * cases where the RHS is a single base relation, not a join; there is no
258 * provision for consider_param_startup to get set at all on joinrels.
259 * Also we don't worry about appendrels. costsize.c's costing rules for
260 * nestloop semi/antijoins don't consider such cases either.
264 foreach(lc
, root
->join_info_list
)
266 SpecialJoinInfo
*sjinfo
= (SpecialJoinInfo
*) lfirst(lc
);
269 if ((sjinfo
->jointype
== JOIN_SEMI
|| sjinfo
->jointype
== JOIN_ANTI
) &&
270 bms_get_singleton_member(sjinfo
->syn_righthand
, &varno
))
272 RelOptInfo
*rel
= find_base_rel(root
, varno
);
274 rel
->consider_param_startup
= true;
281 * Set the size estimates (rows and widths) for each base-relation entry.
282 * Also determine whether to consider parallel paths for base relations.
284 * We do this in a separate pass over the base rels so that rowcount
285 * estimates are available for parameterized path generation, and also so
286 * that each rel's consider_parallel flag is set correctly before we begin to
290 set_base_rel_sizes(PlannerInfo
*root
)
294 for (rti
= 1; rti
< root
->simple_rel_array_size
; rti
++)
296 RelOptInfo
*rel
= root
->simple_rel_array
[rti
];
299 /* there may be empty slots corresponding to non-baserel RTEs */
303 Assert(rel
->relid
== rti
); /* sanity check on array */
305 /* ignore RTEs that are "other rels" */
306 if (rel
->reloptkind
!= RELOPT_BASEREL
)
309 rte
= root
->simple_rte_array
[rti
];
312 * If parallelism is allowable for this query in general, see whether
313 * it's allowable for this rel in particular. We have to do this
314 * before set_rel_size(), because (a) if this rel is an inheritance
315 * parent, set_append_rel_size() will use and perhaps change the rel's
316 * consider_parallel flag, and (b) for some RTE types, set_rel_size()
317 * goes ahead and makes paths immediately.
319 if (root
->glob
->parallelModeOK
)
320 set_rel_consider_parallel(root
, rel
, rte
);
322 set_rel_size(root
, rel
, rti
, rte
);
327 * set_base_rel_pathlists
328 * Finds all paths available for scanning each base-relation entry.
329 * Sequential scan and any available indices are considered.
330 * Each useful path is attached to its relation's 'pathlist' field.
333 set_base_rel_pathlists(PlannerInfo
*root
)
337 for (rti
= 1; rti
< root
->simple_rel_array_size
; rti
++)
339 RelOptInfo
*rel
= root
->simple_rel_array
[rti
];
341 /* there may be empty slots corresponding to non-baserel RTEs */
345 Assert(rel
->relid
== rti
); /* sanity check on array */
347 /* ignore RTEs that are "other rels" */
348 if (rel
->reloptkind
!= RELOPT_BASEREL
)
351 set_rel_pathlist(root
, rel
, rti
, root
->simple_rte_array
[rti
]);
357 * Set size estimates for a base relation
360 set_rel_size(PlannerInfo
*root
, RelOptInfo
*rel
,
361 Index rti
, RangeTblEntry
*rte
)
363 if (rel
->reloptkind
== RELOPT_BASEREL
&&
364 relation_excluded_by_constraints(root
, rel
, rte
))
367 * We proved we don't need to scan the rel via constraint exclusion,
368 * so set up a single dummy path for it. Here we only check this for
369 * regular baserels; if it's an otherrel, CE was already checked in
370 * set_append_rel_size().
372 * In this case, we go ahead and set up the relation's path right away
373 * instead of leaving it for set_rel_pathlist to do. This is because
374 * we don't have a convention for marking a rel as dummy except by
375 * assigning a dummy path to it.
377 set_dummy_rel_pathlist(rel
);
381 /* It's an "append relation", process accordingly */
382 set_append_rel_size(root
, rel
, rti
, rte
);
386 switch (rel
->rtekind
)
389 if (rte
->relkind
== RELKIND_FOREIGN_TABLE
)
392 set_foreign_size(root
, rel
, rte
);
394 else if (rte
->relkind
== RELKIND_PARTITIONED_TABLE
)
397 * We could get here if asked to scan a partitioned table
398 * with ONLY. In that case we shouldn't scan any of the
399 * partitions, so mark it as a dummy rel.
401 set_dummy_rel_pathlist(rel
);
403 else if (rte
->tablesample
!= NULL
)
405 /* Sampled relation */
406 set_tablesample_rel_size(root
, rel
, rte
);
411 set_plain_rel_size(root
, rel
, rte
);
417 * Subqueries don't support making a choice between
418 * parameterized and unparameterized paths, so just go ahead
419 * and build their paths immediately.
421 set_subquery_pathlist(root
, rel
, rti
, rte
);
424 set_function_size_estimates(root
, rel
);
427 set_tablefunc_size_estimates(root
, rel
);
430 set_values_size_estimates(root
, rel
);
435 * CTEs don't support making a choice between parameterized
436 * and unparameterized paths, so just go ahead and build their
439 if (rte
->self_reference
)
440 set_worktable_pathlist(root
, rel
, rte
);
442 set_cte_pathlist(root
, rel
, rte
);
444 case RTE_NAMEDTUPLESTORE
:
445 /* Might as well just build the path immediately */
446 set_namedtuplestore_pathlist(root
, rel
, rte
);
449 /* Might as well just build the path immediately */
450 set_result_pathlist(root
, rel
, rte
);
453 elog(ERROR
, "unexpected rtekind: %d", (int) rel
->rtekind
);
459 * We insist that all non-dummy rels have a nonzero rowcount estimate.
461 Assert(rel
->rows
> 0 || IS_DUMMY_REL(rel
));
466 * Build access paths for a base relation
469 set_rel_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
470 Index rti
, RangeTblEntry
*rte
)
472 if (IS_DUMMY_REL(rel
))
474 /* We already proved the relation empty, so nothing more to do */
478 /* It's an "append relation", process accordingly */
479 set_append_rel_pathlist(root
, rel
, rti
, rte
);
483 switch (rel
->rtekind
)
486 if (rte
->relkind
== RELKIND_FOREIGN_TABLE
)
489 set_foreign_pathlist(root
, rel
, rte
);
491 else if (rte
->tablesample
!= NULL
)
493 /* Sampled relation */
494 set_tablesample_rel_pathlist(root
, rel
, rte
);
499 set_plain_rel_pathlist(root
, rel
, rte
);
503 /* Subquery --- fully handled during set_rel_size */
507 set_function_pathlist(root
, rel
, rte
);
511 set_tablefunc_pathlist(root
, rel
, rte
);
515 set_values_pathlist(root
, rel
, rte
);
518 /* CTE reference --- fully handled during set_rel_size */
520 case RTE_NAMEDTUPLESTORE
:
521 /* tuplestore reference --- fully handled during set_rel_size */
524 /* simple Result --- fully handled during set_rel_size */
527 elog(ERROR
, "unexpected rtekind: %d", (int) rel
->rtekind
);
533 * Allow a plugin to editorialize on the set of Paths for this base
534 * relation. It could add new paths (such as CustomPaths) by calling
535 * add_path(), or add_partial_path() if parallel aware. It could also
536 * delete or modify paths added by the core code.
538 if (set_rel_pathlist_hook
)
539 (*set_rel_pathlist_hook
) (root
, rel
, rti
, rte
);
542 * If this is a baserel, we should normally consider gathering any partial
543 * paths we may have created for it. We have to do this after calling the
544 * set_rel_pathlist_hook, else it cannot add partial paths to be included
547 * However, if this is an inheritance child, skip it. Otherwise, we could
548 * end up with a very large number of gather nodes, each trying to grab
549 * its own pool of workers. Instead, we'll consider gathering partial
550 * paths for the parent appendrel.
552 * Also, if this is the topmost scan/join rel, we postpone gathering until
553 * the final scan/join targetlist is available (see grouping_planner).
555 if (rel
->reloptkind
== RELOPT_BASEREL
&&
556 !bms_equal(rel
->relids
, root
->all_query_rels
))
557 generate_useful_gather_paths(root
, rel
, false);
559 /* Now find the cheapest of the paths for this rel */
562 #ifdef OPTIMIZER_DEBUG
569 * Set size estimates for a plain relation (no subquery, no inheritance)
572 set_plain_rel_size(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
575 * Test any partial indexes of rel for applicability. We must do this
576 * first since partial unique indexes can affect size estimates.
578 check_index_predicates(root
, rel
);
580 /* Mark rel with estimated output rows, width, etc */
581 set_baserel_size_estimates(root
, rel
);
585 * If this relation could possibly be scanned from within a worker, then set
586 * its consider_parallel flag.
589 set_rel_consider_parallel(PlannerInfo
*root
, RelOptInfo
*rel
,
593 * The flag has previously been initialized to false, so we can just
594 * return if it becomes clear that we can't safely set it.
596 Assert(!rel
->consider_parallel
);
598 /* Don't call this if parallelism is disallowed for the entire query. */
599 Assert(root
->glob
->parallelModeOK
);
601 /* This should only be called for baserels and appendrel children. */
602 Assert(IS_SIMPLE_REL(rel
));
604 /* Assorted checks based on rtekind. */
605 switch (rte
->rtekind
)
610 * Currently, parallel workers can't access the leader's temporary
611 * tables. We could possibly relax this if we wrote all of its
612 * local buffers at the start of the query and made no changes
613 * thereafter (maybe we could allow hint bit changes), and if we
614 * taught the workers to read them. Writing a large number of
615 * temporary buffers could be expensive, though, and we don't have
616 * the rest of the necessary infrastructure right now anyway. So
617 * for now, bail out if we see a temporary table.
619 if (get_rel_persistence(rte
->relid
) == RELPERSISTENCE_TEMP
)
623 * Table sampling can be pushed down to workers if the sample
624 * function and its arguments are safe.
626 if (rte
->tablesample
!= NULL
)
628 char proparallel
= func_parallel(rte
->tablesample
->tsmhandler
);
630 if (proparallel
!= PROPARALLEL_SAFE
)
632 if (!is_parallel_safe(root
, (Node
*) rte
->tablesample
->args
))
637 * Ask FDWs whether they can support performing a ForeignScan
638 * within a worker. Most often, the answer will be no. For
639 * example, if the nature of the FDW is such that it opens a TCP
640 * connection with a remote server, each parallel worker would end
641 * up with a separate connection, and these connections might not
642 * be appropriately coordinated between workers and the leader.
644 if (rte
->relkind
== RELKIND_FOREIGN_TABLE
)
646 Assert(rel
->fdwroutine
);
647 if (!rel
->fdwroutine
->IsForeignScanParallelSafe
)
649 if (!rel
->fdwroutine
->IsForeignScanParallelSafe(root
, rel
, rte
))
654 * There are additional considerations for appendrels, which we'll
655 * deal with in set_append_rel_size and set_append_rel_pathlist.
656 * For now, just set consider_parallel based on the rel's own
657 * quals and targetlist.
664 * There's no intrinsic problem with scanning a subquery-in-FROM
665 * (as distinct from a SubPlan or InitPlan) in a parallel worker.
666 * If the subquery doesn't happen to have any parallel-safe paths,
667 * then flagging it as consider_parallel won't change anything,
668 * but that's true for plain tables, too. We must set
669 * consider_parallel based on the rel's own quals and targetlist,
670 * so that if a subquery path is parallel-safe but the quals and
671 * projection we're sticking onto it are not, we correctly mark
672 * the SubqueryScanPath as not parallel-safe. (Note that
673 * set_subquery_pathlist() might push some of these quals down
674 * into the subquery itself, but that doesn't change anything.)
676 * We can't push sub-select containing LIMIT/OFFSET to workers as
677 * there is no guarantee that the row order will be fully
678 * deterministic, and applying LIMIT/OFFSET will lead to
679 * inconsistent results at the top-level. (In some cases, where
680 * the result is ordered, we could relax this restriction. But it
681 * doesn't currently seem worth expending extra effort to do so.)
684 Query
*subquery
= castNode(Query
, rte
->subquery
);
686 if (limit_needed(subquery
))
692 /* Shouldn't happen; we're only considering baserels here. */
697 /* Check for parallel-restricted functions. */
698 if (!is_parallel_safe(root
, (Node
*) rte
->functions
))
703 /* not parallel safe */
707 /* Check for parallel-restricted functions. */
708 if (!is_parallel_safe(root
, (Node
*) rte
->values_lists
))
715 * CTE tuplestores aren't shared among parallel workers, so we
716 * force all CTE scans to happen in the leader. Also, populating
717 * the CTE would require executing a subplan that's not available
718 * in the worker, might be parallel-restricted, and must get
719 * executed only once.
723 case RTE_NAMEDTUPLESTORE
:
726 * tuplestore cannot be shared, at least without more
727 * infrastructure to support that.
732 /* RESULT RTEs, in themselves, are no problem. */
737 * If there's anything in baserestrictinfo that's parallel-restricted, we
738 * give up on parallelizing access to this relation. We could consider
739 * instead postponing application of the restricted quals until we're
740 * above all the parallelism in the plan tree, but it's not clear that
741 * that would be a win in very many cases, and it might be tricky to make
742 * outer join clauses work correctly. It would likely break equivalence
745 if (!is_parallel_safe(root
, (Node
*) rel
->baserestrictinfo
))
749 * Likewise, if the relation's outputs are not parallel-safe, give up.
750 * (Usually, they're just Vars, but sometimes they're not.)
752 if (!is_parallel_safe(root
, (Node
*) rel
->reltarget
->exprs
))
755 /* We have a winner. */
756 rel
->consider_parallel
= true;
760 * set_plain_rel_pathlist
761 * Build access paths for a plain relation (no subquery, no inheritance)
764 set_plain_rel_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
766 Relids required_outer
;
769 * We don't support pushing join clauses into the quals of a seqscan, but
770 * it could still have required parameterization due to LATERAL refs in
773 required_outer
= rel
->lateral_relids
;
775 /* Consider sequential scan */
776 add_path(rel
, create_seqscan_path(root
, rel
, required_outer
, 0));
778 /* If appropriate, consider parallel sequential scan */
779 if (rel
->consider_parallel
&& required_outer
== NULL
)
780 create_plain_partial_paths(root
, rel
);
782 /* Consider index scans */
783 create_index_paths(root
, rel
);
785 /* Consider TID scans */
786 create_tidscan_paths(root
, rel
);
790 * create_plain_partial_paths
791 * Build partial access paths for parallel scan of a plain relation
794 create_plain_partial_paths(PlannerInfo
*root
, RelOptInfo
*rel
)
796 int parallel_workers
;
798 parallel_workers
= compute_parallel_worker(rel
, rel
->pages
, -1,
799 max_parallel_workers_per_gather
);
801 /* If any limit was set to zero, the user doesn't want a parallel scan. */
802 if (parallel_workers
<= 0)
805 /* Add an unordered partial path based on a parallel sequential scan. */
806 add_partial_path(rel
, create_seqscan_path(root
, rel
, NULL
, parallel_workers
));
810 * set_tablesample_rel_size
811 * Set size estimates for a sampled relation
814 set_tablesample_rel_size(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
816 TableSampleClause
*tsc
= rte
->tablesample
;
822 * Test any partial indexes of rel for applicability. We must do this
823 * first since partial unique indexes can affect size estimates.
825 check_index_predicates(root
, rel
);
828 * Call the sampling method's estimation function to estimate the number
829 * of pages it will read and the number of tuples it will return. (Note:
830 * we assume the function returns sane values.)
832 tsm
= GetTsmRoutine(tsc
->tsmhandler
);
833 tsm
->SampleScanGetSampleSize(root
, rel
, tsc
->args
,
837 * For the moment, because we will only consider a SampleScan path for the
838 * rel, it's okay to just overwrite the pages and tuples estimates for the
839 * whole relation. If we ever consider multiple path types for sampled
840 * rels, we'll need more complication.
843 rel
->tuples
= tuples
;
845 /* Mark rel with estimated output rows, width, etc */
846 set_baserel_size_estimates(root
, rel
);
850 * set_tablesample_rel_pathlist
851 * Build access paths for a sampled relation
854 set_tablesample_rel_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
856 Relids required_outer
;
860 * We don't support pushing join clauses into the quals of a samplescan,
861 * but it could still have required parameterization due to LATERAL refs
862 * in its tlist or TABLESAMPLE arguments.
864 required_outer
= rel
->lateral_relids
;
866 /* Consider sampled scan */
867 path
= create_samplescan_path(root
, rel
, required_outer
);
870 * If the sampling method does not support repeatable scans, we must avoid
871 * plans that would scan the rel multiple times. Ideally, we'd simply
872 * avoid putting the rel on the inside of a nestloop join; but adding such
873 * a consideration to the planner seems like a great deal of complication
874 * to support an uncommon usage of second-rate sampling methods. Instead,
875 * if there is a risk that the query might perform an unsafe join, just
876 * wrap the SampleScan in a Materialize node. We can check for joins by
877 * counting the membership of all_query_rels (note that this correctly
878 * counts inheritance trees as single rels). If we're inside a subquery,
879 * we can't easily check whether a join might occur in the outer query, so
880 * just assume one is possible.
882 * GetTsmRoutine is relatively expensive compared to the other tests here,
883 * so check repeatable_across_scans last, even though that's a bit odd.
885 if ((root
->query_level
> 1 ||
886 bms_membership(root
->all_query_rels
) != BMS_SINGLETON
) &&
887 !(GetTsmRoutine(rte
->tablesample
->tsmhandler
)->repeatable_across_scans
))
889 path
= (Path
*) create_material_path(rel
, path
);
894 /* For the moment, at least, there are no other paths to consider */
899 * Set size estimates for a foreign table RTE
902 set_foreign_size(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
904 /* Mark rel with estimated output rows, width, etc */
905 set_foreign_size_estimates(root
, rel
);
907 /* Let FDW adjust the size estimates, if it can */
908 rel
->fdwroutine
->GetForeignRelSize(root
, rel
, rte
->relid
);
910 /* ... but do not let it set the rows estimate to zero */
911 rel
->rows
= clamp_row_est(rel
->rows
);
914 * Also, make sure rel->tuples is not insane relative to rel->rows.
915 * Notably, this ensures sanity if pg_class.reltuples contains -1 and the
916 * FDW doesn't do anything to replace that.
918 rel
->tuples
= Max(rel
->tuples
, rel
->rows
);
922 * set_foreign_pathlist
923 * Build access paths for a foreign table RTE
926 set_foreign_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
928 /* Call the FDW's GetForeignPaths function to generate path(s) */
929 rel
->fdwroutine
->GetForeignPaths(root
, rel
, rte
->relid
);
933 * set_append_rel_size
934 * Set size estimates for a simple "append relation"
936 * The passed-in rel and RTE represent the entire append relation. The
937 * relation's contents are computed by appending together the output of the
938 * individual member relations. Note that in the non-partitioned inheritance
939 * case, the first member relation is actually the same table as is mentioned
940 * in the parent RTE ... but it has a different RTE and RelOptInfo. This is
941 * a good thing because their outputs are not the same size.
944 set_append_rel_size(PlannerInfo
*root
, RelOptInfo
*rel
,
945 Index rti
, RangeTblEntry
*rte
)
947 int parentRTindex
= rti
;
948 bool has_live_children
;
951 double *parent_attrsizes
;
955 /* Guard against stack overflow due to overly deep inheritance tree. */
958 Assert(IS_SIMPLE_REL(rel
));
961 * If this is a partitioned baserel, set the consider_partitionwise_join
962 * flag; currently, we only consider partitionwise joins with the baserel
963 * if its targetlist doesn't contain a whole-row Var.
965 if (enable_partitionwise_join
&&
966 rel
->reloptkind
== RELOPT_BASEREL
&&
967 rte
->relkind
== RELKIND_PARTITIONED_TABLE
&&
968 bms_is_empty(rel
->attr_needed
[InvalidAttrNumber
- rel
->min_attr
]))
969 rel
->consider_partitionwise_join
= true;
972 * Initialize to compute size estimates for whole append relation.
974 * We handle width estimates by weighting the widths of different child
975 * rels proportionally to their number of rows. This is sensible because
976 * the use of width estimates is mainly to compute the total relation
977 * "footprint" if we have to sort or hash it. To do this, we sum the
978 * total equivalent size (in "double" arithmetic) and then divide by the
979 * total rowcount estimate. This is done separately for the total rel
980 * width and each attribute.
982 * Note: if you consider changing this logic, beware that child rels could
983 * have zero rows and/or width, if they were excluded by constraints.
985 has_live_children
= false;
988 nattrs
= rel
->max_attr
- rel
->min_attr
+ 1;
989 parent_attrsizes
= (double *) palloc0(nattrs
* sizeof(double));
991 foreach(l
, root
->append_rel_list
)
993 AppendRelInfo
*appinfo
= (AppendRelInfo
*) lfirst(l
);
995 RangeTblEntry
*childRTE
;
996 RelOptInfo
*childrel
;
998 ListCell
*parentvars
;
1002 /* append_rel_list contains all append rels; ignore others */
1003 if (appinfo
->parent_relid
!= parentRTindex
)
1006 childRTindex
= appinfo
->child_relid
;
1007 childRTE
= root
->simple_rte_array
[childRTindex
];
1010 * The child rel's RelOptInfo was already created during
1011 * add_other_rels_to_query.
1013 childrel
= find_base_rel(root
, childRTindex
);
1014 Assert(childrel
->reloptkind
== RELOPT_OTHER_MEMBER_REL
);
1016 /* We may have already proven the child to be dummy. */
1017 if (IS_DUMMY_REL(childrel
))
1021 * We have to copy the parent's targetlist and quals to the child,
1022 * with appropriate substitution of variables. However, the
1023 * baserestrictinfo quals were already copied/substituted when the
1024 * child RelOptInfo was built. So we don't need any additional setup
1025 * before applying constraint exclusion.
1027 if (relation_excluded_by_constraints(root
, childrel
, childRTE
))
1030 * This child need not be scanned, so we can omit it from the
1033 set_dummy_rel_pathlist(childrel
);
1038 * Constraint exclusion failed, so copy the parent's join quals and
1039 * targetlist to the child, with appropriate variable substitutions.
1041 * We skip join quals that came from above outer joins that can null
1042 * this rel, since they would be of no value while generating paths
1043 * for the child. This saves some effort while processing the child
1044 * rel, and it also avoids an implementation restriction in
1045 * adjust_appendrel_attrs (it can't apply nullingrels to a non-Var).
1048 foreach(lc
, rel
->joininfo
)
1050 RestrictInfo
*rinfo
= (RestrictInfo
*) lfirst(lc
);
1052 if (!bms_overlap(rinfo
->clause_relids
, rel
->nulling_relids
))
1053 childrinfos
= lappend(childrinfos
,
1054 adjust_appendrel_attrs(root
,
1058 childrel
->joininfo
= childrinfos
;
1061 * Now for the child's targetlist.
1063 * NB: the resulting childrel->reltarget->exprs may contain arbitrary
1064 * expressions, which otherwise would not occur in a rel's targetlist.
1065 * Code that might be looking at an appendrel child must cope with
1066 * such. (Normally, a rel's targetlist would only include Vars and
1067 * PlaceHolderVars.) XXX we do not bother to update the cost or width
1068 * fields of childrel->reltarget; not clear if that would be useful.
1070 childrel
->reltarget
->exprs
= (List
*)
1071 adjust_appendrel_attrs(root
,
1072 (Node
*) rel
->reltarget
->exprs
,
1076 * We have to make child entries in the EquivalenceClass data
1077 * structures as well. This is needed either if the parent
1078 * participates in some eclass joins (because we will want to consider
1079 * inner-indexscan joins on the individual children) or if the parent
1080 * has useful pathkeys (because we should try to build MergeAppend
1081 * paths that produce those sort orderings).
1083 if (rel
->has_eclass_joins
|| has_useful_pathkeys(root
, rel
))
1084 add_child_rel_equivalences(root
, appinfo
, rel
, childrel
);
1085 childrel
->has_eclass_joins
= rel
->has_eclass_joins
;
1088 * Note: we could compute appropriate attr_needed data for the child's
1089 * variables, by transforming the parent's attr_needed through the
1090 * translated_vars mapping. However, currently there's no need
1091 * because attr_needed is only examined for base relations not
1092 * otherrels. So we just leave the child's attr_needed empty.
1096 * If we consider partitionwise joins with the parent rel, do the same
1097 * for partitioned child rels.
1099 * Note: here we abuse the consider_partitionwise_join flag by setting
1100 * it for child rels that are not themselves partitioned. We do so to
1101 * tell try_partitionwise_join() that the child rel is sufficiently
1102 * valid to be used as a per-partition input, even if it later gets
1103 * proven to be dummy. (It's not usable until we've set up the
1104 * reltarget and EC entries, which we just did.)
1106 if (rel
->consider_partitionwise_join
)
1107 childrel
->consider_partitionwise_join
= true;
1110 * If parallelism is allowable for this query in general, see whether
1111 * it's allowable for this childrel in particular. But if we've
1112 * already decided the appendrel is not parallel-safe as a whole,
1113 * there's no point in considering parallelism for this child. For
1114 * consistency, do this before calling set_rel_size() for the child.
1116 if (root
->glob
->parallelModeOK
&& rel
->consider_parallel
)
1117 set_rel_consider_parallel(root
, childrel
, childRTE
);
1120 * Compute the child's size.
1122 set_rel_size(root
, childrel
, childRTindex
, childRTE
);
1125 * It is possible that constraint exclusion detected a contradiction
1126 * within a child subquery, even though we didn't prove one above. If
1127 * so, we can skip this child.
1129 if (IS_DUMMY_REL(childrel
))
1132 /* We have at least one live child. */
1133 has_live_children
= true;
1136 * If any live child is not parallel-safe, treat the whole appendrel
1137 * as not parallel-safe. In future we might be able to generate plans
1138 * in which some children are farmed out to workers while others are
1139 * not; but we don't have that today, so it's a waste to consider
1140 * partial paths anywhere in the appendrel unless it's all safe.
1141 * (Child rels visited before this one will be unmarked in
1142 * set_append_rel_pathlist().)
1144 if (!childrel
->consider_parallel
)
1145 rel
->consider_parallel
= false;
1148 * Accumulate size information from each live child.
1150 Assert(childrel
->rows
> 0);
1152 parent_rows
+= childrel
->rows
;
1153 parent_size
+= childrel
->reltarget
->width
* childrel
->rows
;
1156 * Accumulate per-column estimates too. We need not do anything for
1157 * PlaceHolderVars in the parent list. If child expression isn't a
1158 * Var, or we didn't record a width estimate for it, we have to fall
1159 * back on a datatype-based estimate.
1161 * By construction, child's targetlist is 1-to-1 with parent's.
1163 forboth(parentvars
, rel
->reltarget
->exprs
,
1164 childvars
, childrel
->reltarget
->exprs
)
1166 Var
*parentvar
= (Var
*) lfirst(parentvars
);
1167 Node
*childvar
= (Node
*) lfirst(childvars
);
1169 if (IsA(parentvar
, Var
) && parentvar
->varno
== parentRTindex
)
1171 int pndx
= parentvar
->varattno
- rel
->min_attr
;
1172 int32 child_width
= 0;
1174 if (IsA(childvar
, Var
) &&
1175 ((Var
*) childvar
)->varno
== childrel
->relid
)
1177 int cndx
= ((Var
*) childvar
)->varattno
- childrel
->min_attr
;
1179 child_width
= childrel
->attr_widths
[cndx
];
1181 if (child_width
<= 0)
1182 child_width
= get_typavgwidth(exprType(childvar
),
1183 exprTypmod(childvar
));
1184 Assert(child_width
> 0);
1185 parent_attrsizes
[pndx
] += child_width
* childrel
->rows
;
1190 if (has_live_children
)
1193 * Save the finished size estimates.
1197 Assert(parent_rows
> 0);
1198 rel
->rows
= parent_rows
;
1199 rel
->reltarget
->width
= rint(parent_size
/ parent_rows
);
1200 for (i
= 0; i
< nattrs
; i
++)
1201 rel
->attr_widths
[i
] = rint(parent_attrsizes
[i
] / parent_rows
);
1204 * Set "raw tuples" count equal to "rows" for the appendrel; needed
1205 * because some places assume rel->tuples is valid for any baserel.
1207 rel
->tuples
= parent_rows
;
1210 * Note that we leave rel->pages as zero; this is important to avoid
1211 * double-counting the appendrel tree in total_table_pages.
1217 * All children were excluded by constraints, so mark the whole
1218 * appendrel dummy. We must do this in this phase so that the rel's
1219 * dummy-ness is visible when we generate paths for other rels.
1221 set_dummy_rel_pathlist(rel
);
1224 pfree(parent_attrsizes
);
1228 * set_append_rel_pathlist
1229 * Build access paths for an "append relation"
1232 set_append_rel_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
1233 Index rti
, RangeTblEntry
*rte
)
1235 int parentRTindex
= rti
;
1236 List
*live_childrels
= NIL
;
1240 * Generate access paths for each member relation, and remember the
1241 * non-dummy children.
1243 foreach(l
, root
->append_rel_list
)
1245 AppendRelInfo
*appinfo
= (AppendRelInfo
*) lfirst(l
);
1247 RangeTblEntry
*childRTE
;
1248 RelOptInfo
*childrel
;
1250 /* append_rel_list contains all append rels; ignore others */
1251 if (appinfo
->parent_relid
!= parentRTindex
)
1254 /* Re-locate the child RTE and RelOptInfo */
1255 childRTindex
= appinfo
->child_relid
;
1256 childRTE
= root
->simple_rte_array
[childRTindex
];
1257 childrel
= root
->simple_rel_array
[childRTindex
];
1260 * If set_append_rel_size() decided the parent appendrel was
1261 * parallel-unsafe at some point after visiting this child rel, we
1262 * need to propagate the unsafety marking down to the child, so that
1263 * we don't generate useless partial paths for it.
1265 if (!rel
->consider_parallel
)
1266 childrel
->consider_parallel
= false;
1269 * Compute the child's access paths.
1271 set_rel_pathlist(root
, childrel
, childRTindex
, childRTE
);
1274 * If child is dummy, ignore it.
1276 if (IS_DUMMY_REL(childrel
))
1280 * Child is live, so add it to the live_childrels list for use below.
1282 live_childrels
= lappend(live_childrels
, childrel
);
1285 /* Add paths to the append relation. */
1286 add_paths_to_append_rel(root
, rel
, live_childrels
);
1291 * add_paths_to_append_rel
1292 * Generate paths for the given append relation given the set of non-dummy
1295 * The function collects all parameterizations and orderings supported by the
1296 * non-dummy children. For every such parameterization or ordering, it creates
1297 * an append path collecting one path from each non-dummy child with given
1298 * parameterization or ordering. Similarly it collects partial paths from
1299 * non-dummy children to create partial append paths.
1302 add_paths_to_append_rel(PlannerInfo
*root
, RelOptInfo
*rel
,
1303 List
*live_childrels
)
1305 List
*subpaths
= NIL
;
1306 bool subpaths_valid
= true;
1307 List
*startup_subpaths
= NIL
;
1308 bool startup_subpaths_valid
= true;
1309 List
*partial_subpaths
= NIL
;
1310 List
*pa_partial_subpaths
= NIL
;
1311 List
*pa_nonpartial_subpaths
= NIL
;
1312 bool partial_subpaths_valid
= true;
1313 bool pa_subpaths_valid
;
1314 List
*all_child_pathkeys
= NIL
;
1315 List
*all_child_outers
= NIL
;
1317 double partial_rows
= -1;
1319 /* If appropriate, consider parallel append */
1320 pa_subpaths_valid
= enable_parallel_append
&& rel
->consider_parallel
;
1323 * For every non-dummy child, remember the cheapest path. Also, identify
1324 * all pathkeys (orderings) and parameterizations (required_outer sets)
1325 * available for the non-dummy member relations.
1327 foreach(l
, live_childrels
)
1329 RelOptInfo
*childrel
= lfirst(l
);
1331 Path
*cheapest_partial_path
= NULL
;
1334 * If child has an unparameterized cheapest-total path, add that to
1335 * the unparameterized Append path we are constructing for the parent.
1336 * If not, there's no workable unparameterized path.
1338 * With partitionwise aggregates, the child rel's pathlist may be
1339 * empty, so don't assume that a path exists here.
1341 if (childrel
->pathlist
!= NIL
&&
1342 childrel
->cheapest_total_path
->param_info
== NULL
)
1343 accumulate_append_subpath(childrel
->cheapest_total_path
,
1346 subpaths_valid
= false;
1349 * When the planner is considering cheap startup plans, we'll also
1350 * collect all the cheapest_startup_paths (if set) and build an
1351 * AppendPath containing those as subpaths.
1353 if (rel
->consider_startup
&& childrel
->cheapest_startup_path
!= NULL
)
1355 /* cheapest_startup_path must not be a parameterized path. */
1356 Assert(childrel
->cheapest_startup_path
->param_info
== NULL
);
1357 accumulate_append_subpath(childrel
->cheapest_startup_path
,
1362 startup_subpaths_valid
= false;
1365 /* Same idea, but for a partial plan. */
1366 if (childrel
->partial_pathlist
!= NIL
)
1368 cheapest_partial_path
= linitial(childrel
->partial_pathlist
);
1369 accumulate_append_subpath(cheapest_partial_path
,
1370 &partial_subpaths
, NULL
);
1373 partial_subpaths_valid
= false;
1376 * Same idea, but for a parallel append mixing partial and non-partial
1379 if (pa_subpaths_valid
)
1381 Path
*nppath
= NULL
;
1384 get_cheapest_parallel_safe_total_inner(childrel
->pathlist
);
1386 if (cheapest_partial_path
== NULL
&& nppath
== NULL
)
1388 /* Neither a partial nor a parallel-safe path? Forget it. */
1389 pa_subpaths_valid
= false;
1391 else if (nppath
== NULL
||
1392 (cheapest_partial_path
!= NULL
&&
1393 cheapest_partial_path
->total_cost
< nppath
->total_cost
))
1395 /* Partial path is cheaper or the only option. */
1396 Assert(cheapest_partial_path
!= NULL
);
1397 accumulate_append_subpath(cheapest_partial_path
,
1398 &pa_partial_subpaths
,
1399 &pa_nonpartial_subpaths
);
1404 * Either we've got only a non-partial path, or we think that
1405 * a single backend can execute the best non-partial path
1406 * faster than all the parallel backends working together can
1407 * execute the best partial path.
1409 * It might make sense to be more aggressive here. Even if
1410 * the best non-partial path is more expensive than the best
1411 * partial path, it could still be better to choose the
1412 * non-partial path if there are several such paths that can
1413 * be given to different workers. For now, we don't try to
1416 accumulate_append_subpath(nppath
,
1417 &pa_nonpartial_subpaths
,
1423 * Collect lists of all the available path orderings and
1424 * parameterizations for all the children. We use these as a
1425 * heuristic to indicate which sort orderings and parameterizations we
1426 * should build Append and MergeAppend paths for.
1428 foreach(lcp
, childrel
->pathlist
)
1430 Path
*childpath
= (Path
*) lfirst(lcp
);
1431 List
*childkeys
= childpath
->pathkeys
;
1432 Relids childouter
= PATH_REQ_OUTER(childpath
);
1434 /* Unsorted paths don't contribute to pathkey list */
1435 if (childkeys
!= NIL
)
1440 /* Have we already seen this ordering? */
1441 foreach(lpk
, all_child_pathkeys
)
1443 List
*existing_pathkeys
= (List
*) lfirst(lpk
);
1445 if (compare_pathkeys(existing_pathkeys
,
1446 childkeys
) == PATHKEYS_EQUAL
)
1454 /* No, so add it to all_child_pathkeys */
1455 all_child_pathkeys
= lappend(all_child_pathkeys
,
1460 /* Unparameterized paths don't contribute to param-set list */
1466 /* Have we already seen this param set? */
1467 foreach(lco
, all_child_outers
)
1469 Relids existing_outers
= (Relids
) lfirst(lco
);
1471 if (bms_equal(existing_outers
, childouter
))
1479 /* No, so add it to all_child_outers */
1480 all_child_outers
= lappend(all_child_outers
,
1488 * If we found unparameterized paths for all children, build an unordered,
1489 * unparameterized Append path for the rel. (Note: this is correct even
1490 * if we have zero or one live subpath due to constraint exclusion.)
1493 add_path(rel
, (Path
*) create_append_path(root
, rel
, subpaths
, NIL
,
1494 NIL
, NULL
, 0, false,
1497 /* build an AppendPath for the cheap startup paths, if valid */
1498 if (startup_subpaths_valid
)
1499 add_path(rel
, (Path
*) create_append_path(root
, rel
, startup_subpaths
,
1500 NIL
, NIL
, NULL
, 0, false, -1));
1503 * Consider an append of unordered, unparameterized partial paths. Make
1504 * it parallel-aware if possible.
1506 if (partial_subpaths_valid
&& partial_subpaths
!= NIL
)
1508 AppendPath
*appendpath
;
1510 int parallel_workers
= 0;
1512 /* Find the highest number of workers requested for any subpath. */
1513 foreach(lc
, partial_subpaths
)
1515 Path
*path
= lfirst(lc
);
1517 parallel_workers
= Max(parallel_workers
, path
->parallel_workers
);
1519 Assert(parallel_workers
> 0);
1522 * If the use of parallel append is permitted, always request at least
1523 * log2(# of children) workers. We assume it can be useful to have
1524 * extra workers in this case because they will be spread out across
1525 * the children. The precise formula is just a guess, but we don't
1526 * want to end up with a radically different answer for a table with N
1527 * partitions vs. an unpartitioned table with the same data, so the
1528 * use of some kind of log-scaling here seems to make some sense.
1530 if (enable_parallel_append
)
1532 parallel_workers
= Max(parallel_workers
,
1533 pg_leftmost_one_pos32(list_length(live_childrels
)) + 1);
1534 parallel_workers
= Min(parallel_workers
,
1535 max_parallel_workers_per_gather
);
1537 Assert(parallel_workers
> 0);
1539 /* Generate a partial append path. */
1540 appendpath
= create_append_path(root
, rel
, NIL
, partial_subpaths
,
1541 NIL
, NULL
, parallel_workers
,
1542 enable_parallel_append
,
1546 * Make sure any subsequent partial paths use the same row count
1549 partial_rows
= appendpath
->path
.rows
;
1552 add_partial_path(rel
, (Path
*) appendpath
);
1556 * Consider a parallel-aware append using a mix of partial and non-partial
1557 * paths. (This only makes sense if there's at least one child which has
1558 * a non-partial path that is substantially cheaper than any partial path;
1559 * otherwise, we should use the append path added in the previous step.)
1561 if (pa_subpaths_valid
&& pa_nonpartial_subpaths
!= NIL
)
1563 AppendPath
*appendpath
;
1565 int parallel_workers
= 0;
1568 * Find the highest number of workers requested for any partial
1571 foreach(lc
, pa_partial_subpaths
)
1573 Path
*path
= lfirst(lc
);
1575 parallel_workers
= Max(parallel_workers
, path
->parallel_workers
);
1579 * Same formula here as above. It's even more important in this
1580 * instance because the non-partial paths won't contribute anything to
1581 * the planned number of parallel workers.
1583 parallel_workers
= Max(parallel_workers
,
1584 pg_leftmost_one_pos32(list_length(live_childrels
)) + 1);
1585 parallel_workers
= Min(parallel_workers
,
1586 max_parallel_workers_per_gather
);
1587 Assert(parallel_workers
> 0);
1589 appendpath
= create_append_path(root
, rel
, pa_nonpartial_subpaths
,
1590 pa_partial_subpaths
,
1591 NIL
, NULL
, parallel_workers
, true,
1593 add_partial_path(rel
, (Path
*) appendpath
);
1597 * Also build unparameterized ordered append paths based on the collected
1598 * list of child pathkeys.
1601 generate_orderedappend_paths(root
, rel
, live_childrels
,
1602 all_child_pathkeys
);
1605 * Build Append paths for each parameterization seen among the child rels.
1606 * (This may look pretty expensive, but in most cases of practical
1607 * interest, the child rels will expose mostly the same parameterizations,
1608 * so that not that many cases actually get considered here.)
1610 * The Append node itself cannot enforce quals, so all qual checking must
1611 * be done in the child paths. This means that to have a parameterized
1612 * Append path, we must have the exact same parameterization for each
1613 * child path; otherwise some children might be failing to check the
1614 * moved-down quals. To make them match up, we can try to increase the
1615 * parameterization of lesser-parameterized paths.
1617 foreach(l
, all_child_outers
)
1619 Relids required_outer
= (Relids
) lfirst(l
);
1622 /* Select the child paths for an Append with this parameterization */
1624 subpaths_valid
= true;
1625 foreach(lcr
, live_childrels
)
1627 RelOptInfo
*childrel
= (RelOptInfo
*) lfirst(lcr
);
1630 if (childrel
->pathlist
== NIL
)
1632 /* failed to make a suitable path for this child */
1633 subpaths_valid
= false;
1637 subpath
= get_cheapest_parameterized_child_path(root
,
1640 if (subpath
== NULL
)
1642 /* failed to make a suitable path for this child */
1643 subpaths_valid
= false;
1646 accumulate_append_subpath(subpath
, &subpaths
, NULL
);
1650 add_path(rel
, (Path
*)
1651 create_append_path(root
, rel
, subpaths
, NIL
,
1652 NIL
, required_outer
, 0, false,
1657 * When there is only a single child relation, the Append path can inherit
1658 * any ordering available for the child rel's path, so that it's useful to
1659 * consider ordered partial paths. Above we only considered the cheapest
1660 * partial path for each child, but let's also make paths using any
1661 * partial paths that have pathkeys.
1663 if (list_length(live_childrels
) == 1)
1665 RelOptInfo
*childrel
= (RelOptInfo
*) linitial(live_childrels
);
1667 /* skip the cheapest partial path, since we already used that above */
1668 for_each_from(l
, childrel
->partial_pathlist
, 1)
1670 Path
*path
= (Path
*) lfirst(l
);
1671 AppendPath
*appendpath
;
1673 /* skip paths with no pathkeys. */
1674 if (path
->pathkeys
== NIL
)
1677 appendpath
= create_append_path(root
, rel
, NIL
, list_make1(path
),
1679 path
->parallel_workers
, true,
1681 add_partial_path(rel
, (Path
*) appendpath
);
1687 * generate_orderedappend_paths
1688 * Generate ordered append paths for an append relation
1690 * Usually we generate MergeAppend paths here, but there are some special
1691 * cases where we can generate simple Append paths, because the subpaths
1692 * can provide tuples in the required order already.
1694 * We generate a path for each ordering (pathkey list) appearing in
1695 * all_child_pathkeys.
1697 * We consider both cheapest-startup and cheapest-total cases, ie, for each
1698 * interesting ordering, collect all the cheapest startup subpaths and all the
1699 * cheapest total paths, and build a suitable path for each case.
1701 * We don't currently generate any parameterized ordered paths here. While
1702 * it would not take much more code here to do so, it's very unclear that it
1703 * is worth the planning cycles to investigate such paths: there's little
1704 * use for an ordered path on the inside of a nestloop. In fact, it's likely
1705 * that the current coding of add_path would reject such paths out of hand,
1706 * because add_path gives no credit for sort ordering of parameterized paths,
1707 * and a parameterized MergeAppend is going to be more expensive than the
1708 * corresponding parameterized Append path. If we ever try harder to support
1709 * parameterized mergejoin plans, it might be worth adding support for
1710 * parameterized paths here to feed such joins. (See notes in
1711 * optimizer/README for why that might not ever happen, though.)
1714 generate_orderedappend_paths(PlannerInfo
*root
, RelOptInfo
*rel
,
1715 List
*live_childrels
,
1716 List
*all_child_pathkeys
)
1719 List
*partition_pathkeys
= NIL
;
1720 List
*partition_pathkeys_desc
= NIL
;
1721 bool partition_pathkeys_partial
= true;
1722 bool partition_pathkeys_desc_partial
= true;
1725 * Some partitioned table setups may allow us to use an Append node
1726 * instead of a MergeAppend. This is possible in cases such as RANGE
1727 * partitioned tables where it's guaranteed that an earlier partition must
1728 * contain rows which come earlier in the sort order. To detect whether
1729 * this is relevant, build pathkey descriptions of the partition ordering,
1730 * for both forward and reverse scans.
1732 if (rel
->part_scheme
!= NULL
&& IS_SIMPLE_REL(rel
) &&
1733 partitions_are_ordered(rel
->boundinfo
, rel
->live_parts
))
1735 partition_pathkeys
= build_partition_pathkeys(root
, rel
,
1736 ForwardScanDirection
,
1737 &partition_pathkeys_partial
);
1739 partition_pathkeys_desc
= build_partition_pathkeys(root
, rel
,
1740 BackwardScanDirection
,
1741 &partition_pathkeys_desc_partial
);
1744 * You might think we should truncate_useless_pathkeys here, but
1745 * allowing partition keys which are a subset of the query's pathkeys
1746 * can often be useful. For example, consider a table partitioned by
1747 * RANGE (a, b), and a query with ORDER BY a, b, c. If we have child
1748 * paths that can produce the a, b, c ordering (perhaps via indexes on
1749 * (a, b, c)) then it works to consider the appendrel output as
1750 * ordered by a, b, c.
1754 /* Now consider each interesting sort ordering */
1755 foreach(lcp
, all_child_pathkeys
)
1757 List
*pathkeys
= (List
*) lfirst(lcp
);
1758 List
*startup_subpaths
= NIL
;
1759 List
*total_subpaths
= NIL
;
1760 List
*fractional_subpaths
= NIL
;
1761 bool startup_neq_total
= false;
1762 bool match_partition_order
;
1763 bool match_partition_order_desc
;
1769 * Determine if this sort ordering matches any partition pathkeys we
1770 * have, for both ascending and descending partition order. If the
1771 * partition pathkeys happen to be contained in pathkeys then it still
1772 * works, as described above, providing that the partition pathkeys
1773 * are complete and not just a prefix of the partition keys. (In such
1774 * cases we'll be relying on the child paths to have sorted the
1775 * lower-order columns of the required pathkeys.)
1777 match_partition_order
=
1778 pathkeys_contained_in(pathkeys
, partition_pathkeys
) ||
1779 (!partition_pathkeys_partial
&&
1780 pathkeys_contained_in(partition_pathkeys
, pathkeys
));
1782 match_partition_order_desc
= !match_partition_order
&&
1783 (pathkeys_contained_in(pathkeys
, partition_pathkeys_desc
) ||
1784 (!partition_pathkeys_desc_partial
&&
1785 pathkeys_contained_in(partition_pathkeys_desc
, pathkeys
)));
1788 * When the required pathkeys match the reverse of the partition
1789 * order, we must build the list of paths in reverse starting with the
1790 * last matching partition first. We can get away without making any
1791 * special cases for this in the loop below by just looping backward
1792 * over the child relations in this case.
1794 if (match_partition_order_desc
)
1797 first_index
= list_length(live_childrels
) - 1;
1802 * Set this to true to save us having to check for
1803 * match_partition_order_desc in the loop below.
1805 match_partition_order
= true;
1809 /* for all other case, loop forward */
1811 end_index
= list_length(live_childrels
);
1815 /* Select the child paths for this ordering... */
1816 for (int i
= first_index
; i
!= end_index
; i
+= direction
)
1818 RelOptInfo
*childrel
= list_nth_node(RelOptInfo
, live_childrels
, i
);
1819 Path
*cheapest_startup
,
1821 *cheapest_fractional
= NULL
;
1823 /* Locate the right paths, if they are available. */
1825 get_cheapest_path_for_pathkeys(childrel
->pathlist
,
1831 get_cheapest_path_for_pathkeys(childrel
->pathlist
,
1838 * If we can't find any paths with the right order just use the
1839 * cheapest-total path; we'll have to sort it later.
1841 if (cheapest_startup
== NULL
|| cheapest_total
== NULL
)
1843 cheapest_startup
= cheapest_total
=
1844 childrel
->cheapest_total_path
;
1845 /* Assert we do have an unparameterized path for this child */
1846 Assert(cheapest_total
->param_info
== NULL
);
1850 * When building a fractional path, determine a cheapest
1851 * fractional path for each child relation too. Looking at startup
1852 * and total costs is not enough, because the cheapest fractional
1853 * path may be dominated by two separate paths (one for startup,
1856 * When needed (building fractional path), determine the cheapest
1857 * fractional path too.
1859 if (root
->tuple_fraction
> 0)
1861 double path_fraction
= (1.0 / root
->tuple_fraction
);
1863 cheapest_fractional
=
1864 get_cheapest_fractional_path_for_pathkeys(childrel
->pathlist
,
1870 * If we found no path with matching pathkeys, use the
1871 * cheapest total path instead.
1873 * XXX We might consider partially sorted paths too (with an
1874 * incremental sort on top). But we'd have to build all the
1875 * incremental paths, do the costing etc.
1877 if (!cheapest_fractional
)
1878 cheapest_fractional
= cheapest_total
;
1882 * Notice whether we actually have different paths for the
1883 * "cheapest" and "total" cases; frequently there will be no point
1884 * in two create_merge_append_path() calls.
1886 if (cheapest_startup
!= cheapest_total
)
1887 startup_neq_total
= true;
1890 * Collect the appropriate child paths. The required logic varies
1891 * for the Append and MergeAppend cases.
1893 if (match_partition_order
)
1896 * We're going to make a plain Append path. We don't need
1897 * most of what accumulate_append_subpath would do, but we do
1898 * want to cut out child Appends or MergeAppends if they have
1899 * just a single subpath (and hence aren't doing anything
1902 cheapest_startup
= get_singleton_append_subpath(cheapest_startup
);
1903 cheapest_total
= get_singleton_append_subpath(cheapest_total
);
1905 startup_subpaths
= lappend(startup_subpaths
, cheapest_startup
);
1906 total_subpaths
= lappend(total_subpaths
, cheapest_total
);
1908 if (cheapest_fractional
)
1910 cheapest_fractional
= get_singleton_append_subpath(cheapest_fractional
);
1911 fractional_subpaths
= lappend(fractional_subpaths
, cheapest_fractional
);
1917 * Otherwise, rely on accumulate_append_subpath to collect the
1918 * child paths for the MergeAppend.
1920 accumulate_append_subpath(cheapest_startup
,
1921 &startup_subpaths
, NULL
);
1922 accumulate_append_subpath(cheapest_total
,
1923 &total_subpaths
, NULL
);
1925 if (cheapest_fractional
)
1926 accumulate_append_subpath(cheapest_fractional
,
1927 &fractional_subpaths
, NULL
);
1931 /* ... and build the Append or MergeAppend paths */
1932 if (match_partition_order
)
1934 /* We only need Append */
1935 add_path(rel
, (Path
*) create_append_path(root
,
1944 if (startup_neq_total
)
1945 add_path(rel
, (Path
*) create_append_path(root
,
1955 if (fractional_subpaths
)
1956 add_path(rel
, (Path
*) create_append_path(root
,
1958 fractional_subpaths
,
1968 /* We need MergeAppend */
1969 add_path(rel
, (Path
*) create_merge_append_path(root
,
1974 if (startup_neq_total
)
1975 add_path(rel
, (Path
*) create_merge_append_path(root
,
1981 if (fractional_subpaths
)
1982 add_path(rel
, (Path
*) create_merge_append_path(root
,
1984 fractional_subpaths
,
1992 * get_cheapest_parameterized_child_path
1993 * Get cheapest path for this relation that has exactly the requested
1996 * Returns NULL if unable to create such a path.
1999 get_cheapest_parameterized_child_path(PlannerInfo
*root
, RelOptInfo
*rel
,
2000 Relids required_outer
)
2006 * Look up the cheapest existing path with no more than the needed
2007 * parameterization. If it has exactly the needed parameterization, we're
2010 cheapest
= get_cheapest_path_for_pathkeys(rel
->pathlist
,
2015 Assert(cheapest
!= NULL
);
2016 if (bms_equal(PATH_REQ_OUTER(cheapest
), required_outer
))
2020 * Otherwise, we can "reparameterize" an existing path to match the given
2021 * parameterization, which effectively means pushing down additional
2022 * joinquals to be checked within the path's scan. However, some existing
2023 * paths might check the available joinquals already while others don't;
2024 * therefore, it's not clear which existing path will be cheapest after
2025 * reparameterization. We have to go through them all and find out.
2028 foreach(lc
, rel
->pathlist
)
2030 Path
*path
= (Path
*) lfirst(lc
);
2032 /* Can't use it if it needs more than requested parameterization */
2033 if (!bms_is_subset(PATH_REQ_OUTER(path
), required_outer
))
2037 * Reparameterization can only increase the path's cost, so if it's
2038 * already more expensive than the current cheapest, forget it.
2040 if (cheapest
!= NULL
&&
2041 compare_path_costs(cheapest
, path
, TOTAL_COST
) <= 0)
2044 /* Reparameterize if needed, then recheck cost */
2045 if (!bms_equal(PATH_REQ_OUTER(path
), required_outer
))
2047 path
= reparameterize_path(root
, path
, required_outer
, 1.0);
2049 continue; /* failed to reparameterize this one */
2050 Assert(bms_equal(PATH_REQ_OUTER(path
), required_outer
));
2052 if (cheapest
!= NULL
&&
2053 compare_path_costs(cheapest
, path
, TOTAL_COST
) <= 0)
2057 /* We have a new best path */
2061 /* Return the best path, or NULL if we found no suitable candidate */
2066 * accumulate_append_subpath
2067 * Add a subpath to the list being built for an Append or MergeAppend.
2069 * It's possible that the child is itself an Append or MergeAppend path, in
2070 * which case we can "cut out the middleman" and just add its child paths to
2071 * our own list. (We don't try to do this earlier because we need to apply
2072 * both levels of transformation to the quals.)
2074 * Note that if we omit a child MergeAppend in this way, we are effectively
2075 * omitting a sort step, which seems fine: if the parent is to be an Append,
2076 * its result would be unsorted anyway, while if the parent is to be a
2077 * MergeAppend, there's no point in a separate sort on a child.
2079 * Normally, either path is a partial path and subpaths is a list of partial
2080 * paths, or else path is a non-partial plan and subpaths is a list of those.
2081 * However, if path is a parallel-aware Append, then we add its partial path
2082 * children to subpaths and the rest to special_subpaths. If the latter is
2083 * NULL, we don't flatten the path at all (unless it contains only partial
2087 accumulate_append_subpath(Path
*path
, List
**subpaths
, List
**special_subpaths
)
2089 if (IsA(path
, AppendPath
))
2091 AppendPath
*apath
= (AppendPath
*) path
;
2093 if (!apath
->path
.parallel_aware
|| apath
->first_partial_path
== 0)
2095 *subpaths
= list_concat(*subpaths
, apath
->subpaths
);
2098 else if (special_subpaths
!= NULL
)
2100 List
*new_special_subpaths
;
2102 /* Split Parallel Append into partial and non-partial subpaths */
2103 *subpaths
= list_concat(*subpaths
,
2104 list_copy_tail(apath
->subpaths
,
2105 apath
->first_partial_path
));
2106 new_special_subpaths
= list_copy_head(apath
->subpaths
,
2107 apath
->first_partial_path
);
2108 *special_subpaths
= list_concat(*special_subpaths
,
2109 new_special_subpaths
);
2113 else if (IsA(path
, MergeAppendPath
))
2115 MergeAppendPath
*mpath
= (MergeAppendPath
*) path
;
2117 *subpaths
= list_concat(*subpaths
, mpath
->subpaths
);
2121 *subpaths
= lappend(*subpaths
, path
);
2125 * get_singleton_append_subpath
2126 * Returns the single subpath of an Append/MergeAppend, or just
2127 * return 'path' if it's not a single sub-path Append/MergeAppend.
2129 * Note: 'path' must not be a parallel-aware path.
2132 get_singleton_append_subpath(Path
*path
)
2134 Assert(!path
->parallel_aware
);
2136 if (IsA(path
, AppendPath
))
2138 AppendPath
*apath
= (AppendPath
*) path
;
2140 if (list_length(apath
->subpaths
) == 1)
2141 return (Path
*) linitial(apath
->subpaths
);
2143 else if (IsA(path
, MergeAppendPath
))
2145 MergeAppendPath
*mpath
= (MergeAppendPath
*) path
;
2147 if (list_length(mpath
->subpaths
) == 1)
2148 return (Path
*) linitial(mpath
->subpaths
);
2155 * set_dummy_rel_pathlist
2156 * Build a dummy path for a relation that's been excluded by constraints
2158 * Rather than inventing a special "dummy" path type, we represent this as an
2159 * AppendPath with no members (see also IS_DUMMY_APPEND/IS_DUMMY_REL macros).
2161 * (See also mark_dummy_rel, which does basically the same thing, but is
2162 * typically used to change a rel into dummy state after we already made
2166 set_dummy_rel_pathlist(RelOptInfo
*rel
)
2168 /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
2170 rel
->reltarget
->width
= 0;
2172 /* Discard any pre-existing paths; no further need for them */
2173 rel
->pathlist
= NIL
;
2174 rel
->partial_pathlist
= NIL
;
2176 /* Set up the dummy path */
2177 add_path(rel
, (Path
*) create_append_path(NULL
, rel
, NIL
, NIL
,
2178 NIL
, rel
->lateral_relids
,
2182 * We set the cheapest-path fields immediately, just in case they were
2183 * pointing at some discarded path. This is redundant in current usage
2184 * because set_rel_pathlist will do it later, but it's cheap so we keep it
2185 * for safety and consistency with mark_dummy_rel.
2191 * find_window_run_conditions
2192 * Determine if 'wfunc' is really a WindowFunc and call its prosupport
2193 * function to determine the function's monotonic properties. We then
2194 * see if 'opexpr' can be used to short-circuit execution.
2196 * For example row_number() over (order by ...) always produces a value one
2197 * higher than the previous. If someone has a window function in a subquery
2198 * and has a WHERE clause in the outer query to filter rows <= 10, then we may
2199 * as well stop processing the windowagg once the row number reaches 11. Here
2200 * we check if 'opexpr' might help us to stop doing needless extra processing
2201 * in WindowAgg nodes.
2203 * '*keep_original' is set to true if the caller should also use 'opexpr' for
2204 * its original purpose. This is set to false if the caller can assume that
2205 * the run condition will handle all of the required filtering.
2207 * Returns true if 'opexpr' was found to be useful and was added to the
2208 * WindowFunc's runCondition. We also set *keep_original accordingly and add
2209 * 'attno' to *run_cond_attrs offset by FirstLowInvalidHeapAttributeNumber.
2210 * If the 'opexpr' cannot be used then we set *keep_original to true and
2214 find_window_run_conditions(Query
*subquery
, RangeTblEntry
*rte
, Index rti
,
2215 AttrNumber attno
, WindowFunc
*wfunc
, OpExpr
*opexpr
,
2216 bool wfunc_left
, bool *keep_original
,
2217 Bitmapset
**run_cond_attrs
)
2221 SupportRequestWFuncMonotonic req
;
2222 SupportRequestWFuncMonotonic
*res
;
2223 WindowClause
*wclause
;
2229 *keep_original
= true;
2231 while (IsA(wfunc
, RelabelType
))
2232 wfunc
= (WindowFunc
*) ((RelabelType
*) wfunc
)->arg
;
2234 /* we can only work with window functions */
2235 if (!IsA(wfunc
, WindowFunc
))
2238 /* can't use it if there are subplans in the WindowFunc */
2239 if (contain_subplans((Node
*) wfunc
))
2242 prosupport
= get_func_support(wfunc
->winfnoid
);
2244 /* Check if there's a support function for 'wfunc' */
2245 if (!OidIsValid(prosupport
))
2248 /* get the Expr from the other side of the OpExpr */
2250 otherexpr
= lsecond(opexpr
->args
);
2252 otherexpr
= linitial(opexpr
->args
);
2255 * The value being compared must not change during the evaluation of the
2258 if (!is_pseudo_constant_clause((Node
*) otherexpr
))
2261 /* find the window clause belonging to the window function */
2262 wclause
= (WindowClause
*) list_nth(subquery
->windowClause
,
2265 req
.type
= T_SupportRequestWFuncMonotonic
;
2266 req
.window_func
= wfunc
;
2267 req
.window_clause
= wclause
;
2269 /* call the support function */
2270 res
= (SupportRequestWFuncMonotonic
*)
2271 DatumGetPointer(OidFunctionCall1(prosupport
,
2272 PointerGetDatum(&req
)));
2275 * Nothing to do if the function is neither monotonically increasing nor
2276 * monotonically decreasing.
2278 if (res
== NULL
|| res
->monotonic
== MONOTONICFUNC_NONE
)
2282 runoperator
= InvalidOid
;
2283 opinfos
= get_op_btree_interpretation(opexpr
->opno
);
2285 foreach(lc
, opinfos
)
2287 OpBtreeInterpretation
*opinfo
= (OpBtreeInterpretation
*) lfirst(lc
);
2288 int strategy
= opinfo
->strategy
;
2291 if (strategy
== BTLessStrategyNumber
||
2292 strategy
== BTLessEqualStrategyNumber
)
2295 * < / <= is supported for monotonically increasing functions in
2296 * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc>
2297 * for monotonically decreasing functions.
2299 if ((wfunc_left
&& (res
->monotonic
& MONOTONICFUNC_INCREASING
)) ||
2300 (!wfunc_left
&& (res
->monotonic
& MONOTONICFUNC_DECREASING
)))
2302 *keep_original
= false;
2304 runoperator
= opexpr
->opno
;
2309 else if (strategy
== BTGreaterStrategyNumber
||
2310 strategy
== BTGreaterEqualStrategyNumber
)
2313 * > / >= is supported for monotonically decreasing functions in
2314 * the form <wfunc> op <pseudoconst> and <pseudoconst> op <wfunc>
2315 * for monotonically increasing functions.
2317 if ((wfunc_left
&& (res
->monotonic
& MONOTONICFUNC_DECREASING
)) ||
2318 (!wfunc_left
&& (res
->monotonic
& MONOTONICFUNC_INCREASING
)))
2320 *keep_original
= false;
2322 runoperator
= opexpr
->opno
;
2327 else if (strategy
== BTEqualStrategyNumber
)
2332 * When both monotonically increasing and decreasing then the
2333 * return value of the window function will be the same each time.
2334 * We can simply use 'opexpr' as the run condition without
2337 if ((res
->monotonic
& MONOTONICFUNC_BOTH
) == MONOTONICFUNC_BOTH
)
2339 *keep_original
= false;
2341 runoperator
= opexpr
->opno
;
2346 * When monotonically increasing we make a qual with <wfunc> <=
2347 * <value> or <value> >= <wfunc> in order to filter out values
2348 * which are above the value in the equality condition. For
2349 * monotonically decreasing functions we want to filter values
2350 * below the value in the equality condition.
2352 if (res
->monotonic
& MONOTONICFUNC_INCREASING
)
2353 newstrategy
= wfunc_left
? BTLessEqualStrategyNumber
: BTGreaterEqualStrategyNumber
;
2355 newstrategy
= wfunc_left
? BTGreaterEqualStrategyNumber
: BTLessEqualStrategyNumber
;
2357 /* We must keep the original equality qual */
2358 *keep_original
= true;
2361 /* determine the operator to use for the WindowFuncRunCondition */
2362 runoperator
= get_opfamily_member(opinfo
->opfamily_id
,
2364 opinfo
->oprighttype
,
2370 if (runopexpr
!= NULL
)
2372 WindowFuncRunCondition
*wfuncrc
;
2374 wfuncrc
= makeNode(WindowFuncRunCondition
);
2375 wfuncrc
->opno
= runoperator
;
2376 wfuncrc
->inputcollid
= runopexpr
->inputcollid
;
2377 wfuncrc
->wfunc_left
= wfunc_left
;
2378 wfuncrc
->arg
= copyObject(otherexpr
);
2380 wfunc
->runCondition
= lappend(wfunc
->runCondition
, wfuncrc
);
2382 /* record that this attno was used in a run condition */
2383 *run_cond_attrs
= bms_add_member(*run_cond_attrs
,
2384 attno
- FirstLowInvalidHeapAttributeNumber
);
2388 /* unsupported OpExpr */
2393 * check_and_push_window_quals
2394 * Check if 'clause' is a qual that can be pushed into a WindowFunc
2395 * as a 'runCondition' qual. These, when present, allow some unnecessary
2396 * work to be skipped during execution.
2398 * 'run_cond_attrs' will be populated with all targetlist resnos of subquery
2399 * targets (offset by FirstLowInvalidHeapAttributeNumber) that we pushed
2402 * Returns true if the caller still must keep the original qual or false if
2403 * the caller can safely ignore the original qual because the WindowAgg node
2404 * will use the runCondition to stop returning tuples.
2407 check_and_push_window_quals(Query
*subquery
, RangeTblEntry
*rte
, Index rti
,
2408 Node
*clause
, Bitmapset
**run_cond_attrs
)
2410 OpExpr
*opexpr
= (OpExpr
*) clause
;
2411 bool keep_original
= true;
2415 /* We're only able to use OpExprs with 2 operands */
2416 if (!IsA(opexpr
, OpExpr
))
2419 if (list_length(opexpr
->args
) != 2)
2423 * Currently, we restrict this optimization to strict OpExprs. The reason
2424 * for this is that during execution, once the runcondition becomes false,
2425 * we stop evaluating WindowFuncs. To avoid leaving around stale window
2426 * function result values, we set them to NULL. Having only strict
2427 * OpExprs here ensures that we properly filter out the tuples with NULLs
2428 * in the top-level WindowAgg.
2430 set_opfuncid(opexpr
);
2431 if (!func_strict(opexpr
->opfuncid
))
2435 * Check for plain Vars that reference window functions in the subquery.
2436 * If we find any, we'll ask find_window_run_conditions() if 'opexpr' can
2437 * be used as part of the run condition.
2440 /* Check the left side of the OpExpr */
2441 var1
= linitial(opexpr
->args
);
2442 if (IsA(var1
, Var
) && var1
->varattno
> 0)
2444 TargetEntry
*tle
= list_nth(subquery
->targetList
, var1
->varattno
- 1);
2445 WindowFunc
*wfunc
= (WindowFunc
*) tle
->expr
;
2447 if (find_window_run_conditions(subquery
, rte
, rti
, tle
->resno
, wfunc
,
2448 opexpr
, true, &keep_original
,
2450 return keep_original
;
2453 /* and check the right side */
2454 var2
= lsecond(opexpr
->args
);
2455 if (IsA(var2
, Var
) && var2
->varattno
> 0)
2457 TargetEntry
*tle
= list_nth(subquery
->targetList
, var2
->varattno
- 1);
2458 WindowFunc
*wfunc
= (WindowFunc
*) tle
->expr
;
2460 if (find_window_run_conditions(subquery
, rte
, rti
, tle
->resno
, wfunc
,
2461 opexpr
, false, &keep_original
,
2463 return keep_original
;
2470 * set_subquery_pathlist
2471 * Generate SubqueryScan access paths for a subquery RTE
2473 * We don't currently support generating parameterized paths for subqueries
2474 * by pushing join clauses down into them; it seems too expensive to re-plan
2475 * the subquery multiple times to consider different alternatives.
2476 * (XXX that could stand to be reconsidered, now that we use Paths.)
2477 * So the paths made here will be parameterized if the subquery contains
2478 * LATERAL references, otherwise not. As long as that's true, there's no need
2479 * for a separate set_subquery_size phase: just make the paths right away.
2482 set_subquery_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
2483 Index rti
, RangeTblEntry
*rte
)
2485 Query
*parse
= root
->parse
;
2486 Query
*subquery
= rte
->subquery
;
2487 bool trivial_pathtarget
;
2488 Relids required_outer
;
2489 pushdown_safety_info safetyInfo
;
2490 double tuple_fraction
;
2491 RelOptInfo
*sub_final_rel
;
2492 Bitmapset
*run_cond_attrs
= NULL
;
2496 * Must copy the Query so that planning doesn't mess up the RTE contents
2497 * (really really need to fix the planner to not scribble on its input,
2498 * someday ... but see remove_unused_subquery_outputs to start with).
2500 subquery
= copyObject(subquery
);
2503 * If it's a LATERAL subquery, it might contain some Vars of the current
2504 * query level, requiring it to be treated as parameterized, even though
2505 * we don't support pushing down join quals into subqueries.
2507 required_outer
= rel
->lateral_relids
;
2510 * Zero out result area for subquery_is_pushdown_safe, so that it can set
2511 * flags as needed while recursing. In particular, we need a workspace
2512 * for keeping track of the reasons why columns are unsafe to reference.
2513 * These reasons are stored in the bits inside unsafeFlags[i] when we
2514 * discover reasons that column i of the subquery is unsafe to be used in
2515 * a pushed-down qual.
2517 memset(&safetyInfo
, 0, sizeof(safetyInfo
));
2518 safetyInfo
.unsafeFlags
= (unsigned char *)
2519 palloc0((list_length(subquery
->targetList
) + 1) * sizeof(unsigned char));
2522 * If the subquery has the "security_barrier" flag, it means the subquery
2523 * originated from a view that must enforce row-level security. Then we
2524 * must not push down quals that contain leaky functions. (Ideally this
2525 * would be checked inside subquery_is_pushdown_safe, but since we don't
2526 * currently pass the RTE to that function, we must do it here.)
2528 safetyInfo
.unsafeLeaky
= rte
->security_barrier
;
2531 * If there are any restriction clauses that have been attached to the
2532 * subquery relation, consider pushing them down to become WHERE or HAVING
2533 * quals of the subquery itself. This transformation is useful because it
2534 * may allow us to generate a better plan for the subquery than evaluating
2535 * all the subquery output rows and then filtering them.
2537 * There are several cases where we cannot push down clauses. Restrictions
2538 * involving the subquery are checked by subquery_is_pushdown_safe().
2539 * Restrictions on individual clauses are checked by
2540 * qual_is_pushdown_safe(). Also, we don't want to push down
2541 * pseudoconstant clauses; better to have the gating node above the
2544 * Non-pushed-down clauses will get evaluated as qpquals of the
2545 * SubqueryScan node.
2547 * XXX Are there any cases where we want to make a policy decision not to
2548 * push down a pushable qual, because it'd result in a worse plan?
2550 if (rel
->baserestrictinfo
!= NIL
&&
2551 subquery_is_pushdown_safe(subquery
, subquery
, &safetyInfo
))
2553 /* OK to consider pushing down individual quals */
2554 List
*upperrestrictlist
= NIL
;
2557 foreach(l
, rel
->baserestrictinfo
)
2559 RestrictInfo
*rinfo
= (RestrictInfo
*) lfirst(l
);
2560 Node
*clause
= (Node
*) rinfo
->clause
;
2562 if (rinfo
->pseudoconstant
)
2564 upperrestrictlist
= lappend(upperrestrictlist
, rinfo
);
2568 switch (qual_is_pushdown_safe(subquery
, rti
, rinfo
, &safetyInfo
))
2572 subquery_push_qual(subquery
, rte
, rti
, clause
);
2575 case PUSHDOWN_WINDOWCLAUSE_RUNCOND
:
2578 * Since we can't push the qual down into the subquery,
2579 * check if it happens to reference a window function. If
2580 * so then it might be useful to use for the WindowAgg's
2583 if (!subquery
->hasWindowFuncs
||
2584 check_and_push_window_quals(subquery
, rte
, rti
, clause
,
2588 * subquery has no window funcs or the clause is not a
2589 * suitable window run condition qual or it is, but
2590 * the original must also be kept in the upper query.
2592 upperrestrictlist
= lappend(upperrestrictlist
, rinfo
);
2596 case PUSHDOWN_UNSAFE
:
2597 upperrestrictlist
= lappend(upperrestrictlist
, rinfo
);
2601 rel
->baserestrictinfo
= upperrestrictlist
;
2602 /* We don't bother recomputing baserestrict_min_security */
2605 pfree(safetyInfo
.unsafeFlags
);
2608 * The upper query might not use all the subquery's output columns; if
2609 * not, we can simplify. Pass the attributes that were pushed down into
2610 * WindowAgg run conditions to ensure we don't accidentally think those
2613 remove_unused_subquery_outputs(subquery
, rel
, run_cond_attrs
);
2616 * We can safely pass the outer tuple_fraction down to the subquery if the
2617 * outer level has no joining, aggregation, or sorting to do. Otherwise
2618 * we'd better tell the subquery to plan for full retrieval. (XXX This
2619 * could probably be made more intelligent ...)
2621 if (parse
->hasAggs
||
2622 parse
->groupClause
||
2623 parse
->groupingSets
||
2624 root
->hasHavingQual
||
2625 parse
->distinctClause
||
2626 parse
->sortClause
||
2627 bms_membership(root
->all_baserels
) == BMS_MULTIPLE
)
2628 tuple_fraction
= 0.0; /* default case */
2630 tuple_fraction
= root
->tuple_fraction
;
2632 /* plan_params should not be in use in current query level */
2633 Assert(root
->plan_params
== NIL
);
2635 /* Generate a subroot and Paths for the subquery */
2636 rel
->subroot
= subquery_planner(root
->glob
, subquery
,
2638 false, tuple_fraction
);
2640 /* Isolate the params needed by this specific subplan */
2641 rel
->subplan_params
= root
->plan_params
;
2642 root
->plan_params
= NIL
;
2645 * It's possible that constraint exclusion proved the subquery empty. If
2646 * so, it's desirable to produce an unadorned dummy path so that we will
2647 * recognize appropriate optimizations at this query level.
2649 sub_final_rel
= fetch_upper_rel(rel
->subroot
, UPPERREL_FINAL
, NULL
);
2651 if (IS_DUMMY_REL(sub_final_rel
))
2653 set_dummy_rel_pathlist(rel
);
2658 * Mark rel with estimated output rows, width, etc. Note that we have to
2659 * do this before generating outer-query paths, else cost_subqueryscan is
2662 set_subquery_size_estimates(root
, rel
);
2665 * Also detect whether the reltarget is trivial, so that we can pass that
2666 * info to cost_subqueryscan (rather than re-deriving it multiple times).
2667 * It's trivial if it fetches all the subplan output columns in order.
2669 if (list_length(rel
->reltarget
->exprs
) != list_length(subquery
->targetList
))
2670 trivial_pathtarget
= false;
2673 trivial_pathtarget
= true;
2674 foreach(lc
, rel
->reltarget
->exprs
)
2676 Node
*node
= (Node
*) lfirst(lc
);
2679 if (!IsA(node
, Var
))
2681 trivial_pathtarget
= false;
2685 if (var
->varno
!= rti
||
2686 var
->varattno
!= foreach_current_index(lc
) + 1)
2688 trivial_pathtarget
= false;
2695 * For each Path that subquery_planner produced, make a SubqueryScanPath
2696 * in the outer query.
2698 foreach(lc
, sub_final_rel
->pathlist
)
2700 Path
*subpath
= (Path
*) lfirst(lc
);
2703 /* Convert subpath's pathkeys to outer representation */
2704 pathkeys
= convert_subquery_pathkeys(root
,
2707 make_tlist_from_pathtarget(subpath
->pathtarget
));
2709 /* Generate outer path using this subpath */
2710 add_path(rel
, (Path
*)
2711 create_subqueryscan_path(root
, rel
, subpath
,
2713 pathkeys
, required_outer
));
2716 /* If outer rel allows parallelism, do same for partial paths. */
2717 if (rel
->consider_parallel
&& bms_is_empty(required_outer
))
2719 /* If consider_parallel is false, there should be no partial paths. */
2720 Assert(sub_final_rel
->consider_parallel
||
2721 sub_final_rel
->partial_pathlist
== NIL
);
2723 /* Same for partial paths. */
2724 foreach(lc
, sub_final_rel
->partial_pathlist
)
2726 Path
*subpath
= (Path
*) lfirst(lc
);
2729 /* Convert subpath's pathkeys to outer representation */
2730 pathkeys
= convert_subquery_pathkeys(root
,
2733 make_tlist_from_pathtarget(subpath
->pathtarget
));
2735 /* Generate outer path using this subpath */
2736 add_partial_path(rel
, (Path
*)
2737 create_subqueryscan_path(root
, rel
, subpath
,
2746 * set_function_pathlist
2747 * Build the (single) access path for a function RTE
2750 set_function_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
2752 Relids required_outer
;
2753 List
*pathkeys
= NIL
;
2756 * We don't support pushing join clauses into the quals of a function
2757 * scan, but it could still have required parameterization due to LATERAL
2758 * refs in the function expression.
2760 required_outer
= rel
->lateral_relids
;
2763 * The result is considered unordered unless ORDINALITY was used, in which
2764 * case it is ordered by the ordinal column (the last one). See if we
2765 * care, by checking for uses of that Var in equivalence classes.
2767 if (rte
->funcordinality
)
2769 AttrNumber ordattno
= rel
->max_attr
;
2774 * Is there a Var for it in rel's targetlist? If not, the query did
2775 * not reference the ordinality column, or at least not in any way
2776 * that would be interesting for sorting.
2778 foreach(lc
, rel
->reltarget
->exprs
)
2780 Var
*node
= (Var
*) lfirst(lc
);
2782 /* checking varno/varlevelsup is just paranoia */
2783 if (IsA(node
, Var
) &&
2784 node
->varattno
== ordattno
&&
2785 node
->varno
== rel
->relid
&&
2786 node
->varlevelsup
== 0)
2794 * Try to build pathkeys for this Var with int8 sorting. We tell
2795 * build_expression_pathkey not to build any new equivalence class; if
2796 * the Var isn't already mentioned in some EC, it means that nothing
2797 * cares about the ordering.
2800 pathkeys
= build_expression_pathkey(root
,
2807 /* Generate appropriate path */
2808 add_path(rel
, create_functionscan_path(root
, rel
,
2809 pathkeys
, required_outer
));
2813 * set_values_pathlist
2814 * Build the (single) access path for a VALUES RTE
2817 set_values_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
2819 Relids required_outer
;
2822 * We don't support pushing join clauses into the quals of a values scan,
2823 * but it could still have required parameterization due to LATERAL refs
2824 * in the values expressions.
2826 required_outer
= rel
->lateral_relids
;
2828 /* Generate appropriate path */
2829 add_path(rel
, create_valuesscan_path(root
, rel
, required_outer
));
2833 * set_tablefunc_pathlist
2834 * Build the (single) access path for a table func RTE
2837 set_tablefunc_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
2839 Relids required_outer
;
2842 * We don't support pushing join clauses into the quals of a tablefunc
2843 * scan, but it could still have required parameterization due to LATERAL
2844 * refs in the function expression.
2846 required_outer
= rel
->lateral_relids
;
2848 /* Generate appropriate path */
2849 add_path(rel
, create_tablefuncscan_path(root
, rel
,
2855 * Build the (single) access path for a non-self-reference CTE RTE
2857 * There's no need for a separate set_cte_size phase, since we don't
2858 * support join-qual-parameterized paths for CTEs.
2861 set_cte_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
2865 PlannerInfo
*cteroot
;
2871 Relids required_outer
;
2874 * Find the referenced CTE, and locate the path and plan previously made
2877 levelsup
= rte
->ctelevelsup
;
2879 while (levelsup
-- > 0)
2881 cteroot
= cteroot
->parent_root
;
2882 if (!cteroot
) /* shouldn't happen */
2883 elog(ERROR
, "bad levelsup for CTE \"%s\"", rte
->ctename
);
2887 * Note: cte_plan_ids can be shorter than cteList, if we are still working
2888 * on planning the CTEs (ie, this is a side-reference from another CTE).
2889 * So we mustn't use forboth here.
2892 foreach(lc
, cteroot
->parse
->cteList
)
2894 CommonTableExpr
*cte
= (CommonTableExpr
*) lfirst(lc
);
2896 if (strcmp(cte
->ctename
, rte
->ctename
) == 0)
2900 if (lc
== NULL
) /* shouldn't happen */
2901 elog(ERROR
, "could not find CTE \"%s\"", rte
->ctename
);
2902 if (ndx
>= list_length(cteroot
->cte_plan_ids
))
2903 elog(ERROR
, "could not find plan for CTE \"%s\"", rte
->ctename
);
2904 plan_id
= list_nth_int(cteroot
->cte_plan_ids
, ndx
);
2906 elog(ERROR
, "no plan was made for CTE \"%s\"", rte
->ctename
);
2908 Assert(list_length(root
->glob
->subpaths
) == list_length(root
->glob
->subplans
));
2909 ctepath
= (Path
*) list_nth(root
->glob
->subpaths
, plan_id
- 1);
2910 cteplan
= (Plan
*) list_nth(root
->glob
->subplans
, plan_id
- 1);
2912 /* Mark rel with estimated output rows, width, etc */
2913 set_cte_size_estimates(root
, rel
, cteplan
->plan_rows
);
2915 /* Convert the ctepath's pathkeys to outer query's representation */
2916 pathkeys
= convert_subquery_pathkeys(root
,
2919 cteplan
->targetlist
);
2922 * We don't support pushing join clauses into the quals of a CTE scan, but
2923 * it could still have required parameterization due to LATERAL refs in
2926 required_outer
= rel
->lateral_relids
;
2928 /* Generate appropriate path */
2929 add_path(rel
, create_ctescan_path(root
, rel
, pathkeys
, required_outer
));
2933 * set_namedtuplestore_pathlist
2934 * Build the (single) access path for a named tuplestore RTE
2936 * There's no need for a separate set_namedtuplestore_size phase, since we
2937 * don't support join-qual-parameterized paths for tuplestores.
2940 set_namedtuplestore_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
2943 Relids required_outer
;
2945 /* Mark rel with estimated output rows, width, etc */
2946 set_namedtuplestore_size_estimates(root
, rel
);
2949 * We don't support pushing join clauses into the quals of a tuplestore
2950 * scan, but it could still have required parameterization due to LATERAL
2951 * refs in its tlist.
2953 required_outer
= rel
->lateral_relids
;
2955 /* Generate appropriate path */
2956 add_path(rel
, create_namedtuplestorescan_path(root
, rel
, required_outer
));
2960 * set_result_pathlist
2961 * Build the (single) access path for an RTE_RESULT RTE
2963 * There's no need for a separate set_result_size phase, since we
2964 * don't support join-qual-parameterized paths for these RTEs.
2967 set_result_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
,
2970 Relids required_outer
;
2972 /* Mark rel with estimated output rows, width, etc */
2973 set_result_size_estimates(root
, rel
);
2976 * We don't support pushing join clauses into the quals of a Result scan,
2977 * but it could still have required parameterization due to LATERAL refs
2980 required_outer
= rel
->lateral_relids
;
2982 /* Generate appropriate path */
2983 add_path(rel
, create_resultscan_path(root
, rel
, required_outer
));
2987 * set_worktable_pathlist
2988 * Build the (single) access path for a self-reference CTE RTE
2990 * There's no need for a separate set_worktable_size phase, since we don't
2991 * support join-qual-parameterized paths for CTEs.
2994 set_worktable_pathlist(PlannerInfo
*root
, RelOptInfo
*rel
, RangeTblEntry
*rte
)
2997 PlannerInfo
*cteroot
;
2999 Relids required_outer
;
3002 * We need to find the non-recursive term's path, which is in the plan
3003 * level that's processing the recursive UNION, which is one level *below*
3004 * where the CTE comes from.
3006 levelsup
= rte
->ctelevelsup
;
3007 if (levelsup
== 0) /* shouldn't happen */
3008 elog(ERROR
, "bad levelsup for CTE \"%s\"", rte
->ctename
);
3011 while (levelsup
-- > 0)
3013 cteroot
= cteroot
->parent_root
;
3014 if (!cteroot
) /* shouldn't happen */
3015 elog(ERROR
, "bad levelsup for CTE \"%s\"", rte
->ctename
);
3017 ctepath
= cteroot
->non_recursive_path
;
3018 if (!ctepath
) /* shouldn't happen */
3019 elog(ERROR
, "could not find path for CTE \"%s\"", rte
->ctename
);
3021 /* Mark rel with estimated output rows, width, etc */
3022 set_cte_size_estimates(root
, rel
, ctepath
->rows
);
3025 * We don't support pushing join clauses into the quals of a worktable
3026 * scan, but it could still have required parameterization due to LATERAL
3027 * refs in its tlist. (I'm not sure this is actually possible given the
3028 * restrictions on recursive references, but it's easy enough to support.)
3030 required_outer
= rel
->lateral_relids
;
3032 /* Generate appropriate path */
3033 add_path(rel
, create_worktablescan_path(root
, rel
, required_outer
));
3037 * generate_gather_paths
3038 * Generate parallel access paths for a relation by pushing a Gather or
3039 * Gather Merge on top of a partial path.
3041 * This must not be called until after we're done creating all partial paths
3042 * for the specified relation. (Otherwise, add_partial_path might delete a
3043 * path that some GatherPath or GatherMergePath has a reference to.)
3045 * If we're generating paths for a scan or join relation, override_rows will
3046 * be false, and we'll just use the relation's size estimate. When we're
3047 * being called for a partially-grouped or partially-distinct path, though, we
3048 * need to override the rowcount estimate. (It's not clear that the
3049 * particular value we're using here is actually best, but the underlying rel
3050 * has no estimate so we must do something.)
3053 generate_gather_paths(PlannerInfo
*root
, RelOptInfo
*rel
, bool override_rows
)
3055 Path
*cheapest_partial_path
;
3056 Path
*simple_gather_path
;
3059 double *rowsp
= NULL
;
3061 /* If there are no partial paths, there's nothing to do here. */
3062 if (rel
->partial_pathlist
== NIL
)
3065 /* Should we override the rel's rowcount estimate? */
3070 * The output of Gather is always unsorted, so there's only one partial
3071 * path of interest: the cheapest one. That will be the one at the front
3072 * of partial_pathlist because of the way add_partial_path works.
3074 cheapest_partial_path
= linitial(rel
->partial_pathlist
);
3076 cheapest_partial_path
->rows
* cheapest_partial_path
->parallel_workers
;
3077 simple_gather_path
= (Path
*)
3078 create_gather_path(root
, rel
, cheapest_partial_path
, rel
->reltarget
,
3080 add_path(rel
, simple_gather_path
);
3083 * For each useful ordering, we can consider an order-preserving Gather
3086 foreach(lc
, rel
->partial_pathlist
)
3088 Path
*subpath
= (Path
*) lfirst(lc
);
3089 GatherMergePath
*path
;
3091 if (subpath
->pathkeys
== NIL
)
3094 rows
= subpath
->rows
* subpath
->parallel_workers
;
3095 path
= create_gather_merge_path(root
, rel
, subpath
, rel
->reltarget
,
3096 subpath
->pathkeys
, NULL
, rowsp
);
3097 add_path(rel
, &path
->path
);
3102 * get_useful_pathkeys_for_relation
3103 * Determine which orderings of a relation might be useful.
3105 * Getting data in sorted order can be useful either because the requested
3106 * order matches the final output ordering for the overall query we're
3107 * planning, or because it enables an efficient merge join. Here, we try
3108 * to figure out which pathkeys to consider.
3110 * This allows us to do incremental sort on top of an index scan under a gather
3111 * merge node, i.e. parallelized.
3113 * If the require_parallel_safe is true, we also require the expressions to
3114 * be parallel safe (which allows pushing the sort below Gather Merge).
3116 * XXX At the moment this can only ever return a list with a single element,
3117 * because it looks at query_pathkeys only. So we might return the pathkeys
3118 * directly, but it seems plausible we'll want to consider other orderings
3119 * in the future. For example, we might want to consider pathkeys useful for
3123 get_useful_pathkeys_for_relation(PlannerInfo
*root
, RelOptInfo
*rel
,
3124 bool require_parallel_safe
)
3126 List
*useful_pathkeys_list
= NIL
;
3129 * Considering query_pathkeys is always worth it, because it might allow
3130 * us to avoid a total sort when we have a partially presorted path
3131 * available or to push the total sort into the parallel portion of the
3134 if (root
->query_pathkeys
)
3137 int npathkeys
= 0; /* useful pathkeys */
3139 foreach(lc
, root
->query_pathkeys
)
3141 PathKey
*pathkey
= (PathKey
*) lfirst(lc
);
3142 EquivalenceClass
*pathkey_ec
= pathkey
->pk_eclass
;
3145 * We can only build a sort for pathkeys that contain a
3146 * safe-to-compute-early EC member computable from the current
3147 * relation's reltarget, so ignore the remainder of the list as
3148 * soon as we find a pathkey without such a member.
3150 * It's still worthwhile to return any prefix of the pathkeys list
3151 * that meets this requirement, as we may be able to do an
3154 * If requested, ensure the sort expression is parallel-safe too.
3156 if (!relation_can_be_sorted_early(root
, rel
, pathkey_ec
,
3157 require_parallel_safe
))
3164 * The whole query_pathkeys list matches, so append it directly, to
3165 * allow comparing pathkeys easily by comparing list pointer. If we
3166 * have to truncate the pathkeys, we gotta do a copy though.
3168 if (npathkeys
== list_length(root
->query_pathkeys
))
3169 useful_pathkeys_list
= lappend(useful_pathkeys_list
,
3170 root
->query_pathkeys
);
3171 else if (npathkeys
> 0)
3172 useful_pathkeys_list
= lappend(useful_pathkeys_list
,
3173 list_copy_head(root
->query_pathkeys
,
3177 return useful_pathkeys_list
;
3181 * generate_useful_gather_paths
3182 * Generate parallel access paths for a relation by pushing a Gather or
3183 * Gather Merge on top of a partial path.
3185 * Unlike plain generate_gather_paths, this looks both at pathkeys of input
3186 * paths (aiming to preserve the ordering), but also considers ordering that
3187 * might be useful for nodes above the gather merge node, and tries to add
3188 * a sort (regular or incremental) to provide that.
3191 generate_useful_gather_paths(PlannerInfo
*root
, RelOptInfo
*rel
, bool override_rows
)
3195 double *rowsp
= NULL
;
3196 List
*useful_pathkeys_list
= NIL
;
3197 Path
*cheapest_partial_path
= NULL
;
3199 /* If there are no partial paths, there's nothing to do here. */
3200 if (rel
->partial_pathlist
== NIL
)
3203 /* Should we override the rel's rowcount estimate? */
3207 /* generate the regular gather (merge) paths */
3208 generate_gather_paths(root
, rel
, override_rows
);
3210 /* consider incremental sort for interesting orderings */
3211 useful_pathkeys_list
= get_useful_pathkeys_for_relation(root
, rel
, true);
3213 /* used for explicit (full) sort paths */
3214 cheapest_partial_path
= linitial(rel
->partial_pathlist
);
3217 * Consider sorted paths for each interesting ordering. We generate both
3218 * incremental and full sort.
3220 foreach(lc
, useful_pathkeys_list
)
3222 List
*useful_pathkeys
= lfirst(lc
);
3227 foreach(lc2
, rel
->partial_pathlist
)
3229 Path
*subpath
= (Path
*) lfirst(lc2
);
3230 GatherMergePath
*path
;
3232 is_sorted
= pathkeys_count_contained_in(useful_pathkeys
,
3237 * We don't need to consider the case where a subpath is already
3238 * fully sorted because generate_gather_paths already creates a
3239 * gather merge path for every subpath that has pathkeys present.
3241 * But since the subpath is already sorted, we know we don't need
3242 * to consider adding a sort (full or incremental) on top of it,
3243 * so we can continue here.
3249 * Try at least sorting the cheapest path and also try
3250 * incrementally sorting any path which is partially sorted
3251 * already (no need to deal with paths which have presorted keys
3252 * when incremental sort is disabled unless it's the cheapest
3255 if (subpath
!= cheapest_partial_path
&&
3256 (presorted_keys
== 0 || !enable_incremental_sort
))
3260 * Consider regular sort for any path that's not presorted or if
3261 * incremental sort is disabled. We've no need to consider both
3262 * sort and incremental sort on the same path. We assume that
3263 * incremental sort is always faster when there are presorted
3266 * This is not redundant with the gather paths created in
3267 * generate_gather_paths, because that doesn't generate ordered
3268 * output. Here we add an explicit sort to match the useful
3271 if (presorted_keys
== 0 || !enable_incremental_sort
)
3273 subpath
= (Path
*) create_sort_path(root
,
3278 rows
= subpath
->rows
* subpath
->parallel_workers
;
3281 subpath
= (Path
*) create_incremental_sort_path(root
,
3287 path
= create_gather_merge_path(root
, rel
,
3294 add_path(rel
, &path
->path
);
3300 * make_rel_from_joinlist
3301 * Build access paths using a "joinlist" to guide the join path search.
3303 * See comments for deconstruct_jointree() for definition of the joinlist
3307 make_rel_from_joinlist(PlannerInfo
*root
, List
*joinlist
)
3314 * Count the number of child joinlist nodes. This is the depth of the
3315 * dynamic-programming algorithm we must employ to consider all ways of
3316 * joining the child nodes.
3318 levels_needed
= list_length(joinlist
);
3320 if (levels_needed
<= 0)
3321 return NULL
; /* nothing to do? */
3324 * Construct a list of rels corresponding to the child joinlist nodes.
3325 * This may contain both base rels and rels constructed according to
3329 foreach(jl
, joinlist
)
3331 Node
*jlnode
= (Node
*) lfirst(jl
);
3332 RelOptInfo
*thisrel
;
3334 if (IsA(jlnode
, RangeTblRef
))
3336 int varno
= ((RangeTblRef
*) jlnode
)->rtindex
;
3338 thisrel
= find_base_rel(root
, varno
);
3340 else if (IsA(jlnode
, List
))
3342 /* Recurse to handle subproblem */
3343 thisrel
= make_rel_from_joinlist(root
, (List
*) jlnode
);
3347 elog(ERROR
, "unrecognized joinlist node type: %d",
3348 (int) nodeTag(jlnode
));
3349 thisrel
= NULL
; /* keep compiler quiet */
3352 initial_rels
= lappend(initial_rels
, thisrel
);
3355 if (levels_needed
== 1)
3358 * Single joinlist node, so we're done.
3360 return (RelOptInfo
*) linitial(initial_rels
);
3365 * Consider the different orders in which we could join the rels,
3366 * using a plugin, GEQO, or the regular join search code.
3368 * We put the initial_rels list into a PlannerInfo field because
3369 * has_legal_joinclause() needs to look at it (ugly :-().
3371 root
->initial_rels
= initial_rels
;
3373 if (join_search_hook
)
3374 return (*join_search_hook
) (root
, levels_needed
, initial_rels
);
3375 else if (enable_geqo
&& levels_needed
>= geqo_threshold
)
3376 return geqo(root
, levels_needed
, initial_rels
);
3378 return standard_join_search(root
, levels_needed
, initial_rels
);
3383 * standard_join_search
3384 * Find possible joinpaths for a query by successively finding ways
3385 * to join component relations into join relations.
3387 * 'levels_needed' is the number of iterations needed, ie, the number of
3388 * independent jointree items in the query. This is > 1.
3390 * 'initial_rels' is a list of RelOptInfo nodes for each independent
3391 * jointree item. These are the components to be joined together.
3392 * Note that levels_needed == list_length(initial_rels).
3394 * Returns the final level of join relations, i.e., the relation that is
3395 * the result of joining all the original relations together.
3396 * At least one implementation path must be provided for this relation and
3397 * all required sub-relations.
3399 * To support loadable plugins that modify planner behavior by changing the
3400 * join searching algorithm, we provide a hook variable that lets a plugin
3401 * replace or supplement this function. Any such hook must return the same
3402 * final join relation as the standard code would, but it might have a
3403 * different set of implementation paths attached, and only the sub-joinrels
3404 * needed for these paths need have been instantiated.
3406 * Note to plugin authors: the functions invoked during standard_join_search()
3407 * modify root->join_rel_list and root->join_rel_hash. If you want to do more
3408 * than one join-order search, you'll probably need to save and restore the
3409 * original states of those data structures. See geqo_eval() for an example.
3412 standard_join_search(PlannerInfo
*root
, int levels_needed
, List
*initial_rels
)
3418 * This function cannot be invoked recursively within any one planning
3419 * problem, so join_rel_level[] can't be in use already.
3421 Assert(root
->join_rel_level
== NULL
);
3424 * We employ a simple "dynamic programming" algorithm: we first find all
3425 * ways to build joins of two jointree items, then all ways to build joins
3426 * of three items (from two-item joins and single items), then four-item
3427 * joins, and so on until we have considered all ways to join all the
3428 * items into one rel.
3430 * root->join_rel_level[j] is a list of all the j-item rels. Initially we
3431 * set root->join_rel_level[1] to represent all the single-jointree-item
3434 root
->join_rel_level
= (List
**) palloc0((levels_needed
+ 1) * sizeof(List
*));
3436 root
->join_rel_level
[1] = initial_rels
;
3438 for (lev
= 2; lev
<= levels_needed
; lev
++)
3443 * Determine all possible pairs of relations to be joined at this
3444 * level, and build paths for making each one from every available
3445 * pair of lower-level relations.
3447 join_search_one_level(root
, lev
);
3450 * Run generate_partitionwise_join_paths() and
3451 * generate_useful_gather_paths() for each just-processed joinrel. We
3452 * could not do this earlier because both regular and partial paths
3453 * can get added to a particular joinrel at multiple times within
3454 * join_search_one_level.
3456 * After that, we're done creating paths for the joinrel, so run
3459 foreach(lc
, root
->join_rel_level
[lev
])
3461 rel
= (RelOptInfo
*) lfirst(lc
);
3463 /* Create paths for partitionwise joins. */
3464 generate_partitionwise_join_paths(root
, rel
);
3467 * Except for the topmost scan/join rel, consider gathering
3468 * partial paths. We'll do the same for the topmost scan/join rel
3469 * once we know the final targetlist (see grouping_planner's and
3470 * its call to apply_scanjoin_target_to_paths).
3472 if (!bms_equal(rel
->relids
, root
->all_query_rels
))
3473 generate_useful_gather_paths(root
, rel
, false);
3475 /* Find and save the cheapest paths for this rel */
3478 #ifdef OPTIMIZER_DEBUG
3485 * We should have a single rel at the final level.
3487 if (root
->join_rel_level
[levels_needed
] == NIL
)
3488 elog(ERROR
, "failed to build any %d-way joins", levels_needed
);
3489 Assert(list_length(root
->join_rel_level
[levels_needed
]) == 1);
3491 rel
= (RelOptInfo
*) linitial(root
->join_rel_level
[levels_needed
]);
3493 root
->join_rel_level
= NULL
;
3498 /*****************************************************************************
3499 * PUSHING QUALS DOWN INTO SUBQUERIES
3500 *****************************************************************************/
3503 * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
3505 * subquery is the particular component query being checked. topquery
3506 * is the top component of a set-operations tree (the same Query if no
3507 * set-op is involved).
3509 * Conditions checked here:
3511 * 1. If the subquery has a LIMIT clause, we must not push down any quals,
3512 * since that could change the set of rows returned.
3514 * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
3515 * quals into it, because that could change the results.
3517 * 3. If the subquery uses DISTINCT, we cannot push volatile quals into it.
3518 * This is because upper-level quals should semantically be evaluated only
3519 * once per distinct row, not once per original row, and if the qual is
3520 * volatile then extra evaluations could change the results. (This issue
3521 * does not apply to other forms of aggregation such as GROUP BY, because
3522 * when those are present we push into HAVING not WHERE, so that the quals
3523 * are still applied after aggregation.)
3525 * 4. If the subquery contains window functions, we cannot push volatile quals
3526 * into it. The issue here is a bit different from DISTINCT: a volatile qual
3527 * might succeed for some rows of a window partition and fail for others,
3528 * thereby changing the partition contents and thus the window functions'
3529 * results for rows that remain.
3531 * 5. If the subquery contains any set-returning functions in its targetlist,
3532 * we cannot push volatile quals into it. That would push them below the SRFs
3533 * and thereby change the number of times they are evaluated. Also, a
3534 * volatile qual could succeed for some SRF output rows and fail for others,
3535 * a behavior that cannot occur if it's evaluated before SRF expansion.
3537 * 6. If the subquery has nonempty grouping sets, we cannot push down any
3538 * quals. The concern here is that a qual referencing a "constant" grouping
3539 * column could get constant-folded, which would be improper because the value
3540 * is potentially nullable by grouping-set expansion. This restriction could
3541 * be removed if we had a parsetree representation that shows that such
3542 * grouping columns are not really constant. (There are other ideas that
3543 * could be used to relax this restriction, but that's the approach most
3544 * likely to get taken in the future. Note that there's not much to be gained
3545 * so long as subquery_planner can't move HAVING clauses to WHERE within such
3548 * In addition, we make several checks on the subquery's output columns to see
3549 * if it is safe to reference them in pushed-down quals. If output column k
3550 * is found to be unsafe to reference, we set the reason for that inside
3551 * safetyInfo->unsafeFlags[k], but we don't reject the subquery overall since
3552 * column k might not be referenced by some/all quals. The unsafeFlags[]
3553 * array will be consulted later by qual_is_pushdown_safe(). It's better to
3554 * do it this way than to make the checks directly in qual_is_pushdown_safe(),
3555 * because when the subquery involves set operations we have to check the
3556 * output expressions in each arm of the set op.
3558 * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
3559 * we're effectively assuming that the quals cannot distinguish values that
3560 * the DISTINCT's equality operator sees as equal, yet there are many
3561 * counterexamples to that assumption. However use of such a qual with a
3562 * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
3563 * "equal" value will be chosen as the output value by the DISTINCT operation.
3564 * So we don't worry too much about that. Another objection is that if the
3565 * qual is expensive to evaluate, running it for each original row might cost
3566 * more than we save by eliminating rows before the DISTINCT step. But it
3567 * would be very hard to estimate that at this stage, and in practice pushdown
3568 * seldom seems to make things worse, so we ignore that problem too.
3570 * Note: likewise, pushing quals into a subquery with window functions is a
3571 * bit dubious: the quals might remove some rows of a window partition while
3572 * leaving others, causing changes in the window functions' results for the
3573 * surviving rows. We insist that such a qual reference only partitioning
3574 * columns, but again that only protects us if the qual does not distinguish
3575 * values that the partitioning equality operator sees as equal. The risks
3576 * here are perhaps larger than for DISTINCT, since no de-duplication of rows
3577 * occurs and thus there is no theoretical problem with such a qual. But
3578 * we'll do this anyway because the potential performance benefits are very
3579 * large, and we've seen no field complaints about the longstanding comparable
3580 * behavior with DISTINCT.
3583 subquery_is_pushdown_safe(Query
*subquery
, Query
*topquery
,
3584 pushdown_safety_info
*safetyInfo
)
3586 SetOperationStmt
*topop
;
3589 if (subquery
->limitOffset
!= NULL
|| subquery
->limitCount
!= NULL
)
3593 if (subquery
->groupClause
&& subquery
->groupingSets
)
3596 /* Check points 3, 4, and 5 */
3597 if (subquery
->distinctClause
||
3598 subquery
->hasWindowFuncs
||
3599 subquery
->hasTargetSRFs
)
3600 safetyInfo
->unsafeVolatile
= true;
3603 * If we're at a leaf query, check for unsafe expressions in its target
3604 * list, and mark any reasons why they're unsafe in unsafeFlags[].
3605 * (Non-leaf nodes in setop trees have only simple Vars in their tlists,
3606 * so no need to check them.)
3608 if (subquery
->setOperations
== NULL
)
3609 check_output_expressions(subquery
, safetyInfo
);
3611 /* Are we at top level, or looking at a setop component? */
3612 if (subquery
== topquery
)
3614 /* Top level, so check any component queries */
3615 if (subquery
->setOperations
!= NULL
)
3616 if (!recurse_pushdown_safe(subquery
->setOperations
, topquery
,
3622 /* Setop component must not have more components (too weird) */
3623 if (subquery
->setOperations
!= NULL
)
3625 /* Check whether setop component output types match top level */
3626 topop
= castNode(SetOperationStmt
, topquery
->setOperations
);
3628 compare_tlist_datatypes(subquery
->targetList
,
3636 * Helper routine to recurse through setOperations tree
3639 recurse_pushdown_safe(Node
*setOp
, Query
*topquery
,
3640 pushdown_safety_info
*safetyInfo
)
3642 if (IsA(setOp
, RangeTblRef
))
3644 RangeTblRef
*rtr
= (RangeTblRef
*) setOp
;
3645 RangeTblEntry
*rte
= rt_fetch(rtr
->rtindex
, topquery
->rtable
);
3646 Query
*subquery
= rte
->subquery
;
3648 Assert(subquery
!= NULL
);
3649 return subquery_is_pushdown_safe(subquery
, topquery
, safetyInfo
);
3651 else if (IsA(setOp
, SetOperationStmt
))
3653 SetOperationStmt
*op
= (SetOperationStmt
*) setOp
;
3655 /* EXCEPT is no good (point 2 for subquery_is_pushdown_safe) */
3656 if (op
->op
== SETOP_EXCEPT
)
3659 if (!recurse_pushdown_safe(op
->larg
, topquery
, safetyInfo
))
3661 if (!recurse_pushdown_safe(op
->rarg
, topquery
, safetyInfo
))
3666 elog(ERROR
, "unrecognized node type: %d",
3667 (int) nodeTag(setOp
));
3673 * check_output_expressions - check subquery's output expressions for safety
3675 * There are several cases in which it's unsafe to push down an upper-level
3676 * qual if it references a particular output column of a subquery. We check
3677 * each output column of the subquery and set flags in unsafeFlags[k] when we
3678 * see that column is unsafe for a pushed-down qual to reference. The
3679 * conditions checked here are:
3681 * 1. We must not push down any quals that refer to subselect outputs that
3682 * return sets, else we'd introduce functions-returning-sets into the
3683 * subquery's WHERE/HAVING quals.
3685 * 2. We must not push down any quals that refer to subselect outputs that
3686 * contain volatile functions, for fear of introducing strange results due
3687 * to multiple evaluation of a volatile function.
3689 * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
3690 * refer to non-DISTINCT output columns, because that could change the set
3691 * of rows returned. (This condition is vacuous for DISTINCT, because then
3692 * there are no non-DISTINCT output columns, so we needn't check. Note that
3693 * subquery_is_pushdown_safe already reported that we can't use volatile
3694 * quals if there's DISTINCT or DISTINCT ON.)
3696 * 4. If the subquery has any window functions, we must not push down quals
3697 * that reference any output columns that are not listed in all the subquery's
3698 * window PARTITION BY clauses. We can push down quals that use only
3699 * partitioning columns because they should succeed or fail identically for
3700 * every row of any one window partition, and totally excluding some
3701 * partitions will not change a window function's results for remaining
3702 * partitions. (Again, this also requires nonvolatile quals, but
3703 * subquery_is_pushdown_safe handles that.). Subquery columns marked as
3704 * unsafe for this reason can still have WindowClause run conditions pushed
3708 check_output_expressions(Query
*subquery
, pushdown_safety_info
*safetyInfo
)
3712 foreach(lc
, subquery
->targetList
)
3714 TargetEntry
*tle
= (TargetEntry
*) lfirst(lc
);
3717 continue; /* ignore resjunk columns */
3719 /* Functions returning sets are unsafe (point 1) */
3720 if (subquery
->hasTargetSRFs
&&
3721 (safetyInfo
->unsafeFlags
[tle
->resno
] &
3722 UNSAFE_HAS_SET_FUNC
) == 0 &&
3723 expression_returns_set((Node
*) tle
->expr
))
3725 safetyInfo
->unsafeFlags
[tle
->resno
] |= UNSAFE_HAS_SET_FUNC
;
3729 /* Volatile functions are unsafe (point 2) */
3730 if ((safetyInfo
->unsafeFlags
[tle
->resno
] &
3731 UNSAFE_HAS_VOLATILE_FUNC
) == 0 &&
3732 contain_volatile_functions((Node
*) tle
->expr
))
3734 safetyInfo
->unsafeFlags
[tle
->resno
] |= UNSAFE_HAS_VOLATILE_FUNC
;
3738 /* If subquery uses DISTINCT ON, check point 3 */
3739 if (subquery
->hasDistinctOn
&&
3740 (safetyInfo
->unsafeFlags
[tle
->resno
] &
3741 UNSAFE_NOTIN_DISTINCTON_CLAUSE
) == 0 &&
3742 !targetIsInSortList(tle
, InvalidOid
, subquery
->distinctClause
))
3744 /* non-DISTINCT column, so mark it unsafe */
3745 safetyInfo
->unsafeFlags
[tle
->resno
] |= UNSAFE_NOTIN_DISTINCTON_CLAUSE
;
3749 /* If subquery uses window functions, check point 4 */
3750 if (subquery
->hasWindowFuncs
&&
3751 (safetyInfo
->unsafeFlags
[tle
->resno
] &
3752 UNSAFE_NOTIN_DISTINCTON_CLAUSE
) == 0 &&
3753 !targetIsInAllPartitionLists(tle
, subquery
))
3755 /* not present in all PARTITION BY clauses, so mark it unsafe */
3756 safetyInfo
->unsafeFlags
[tle
->resno
] |= UNSAFE_NOTIN_PARTITIONBY_CLAUSE
;
3763 * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
3764 * push quals into each component query, but the quals can only reference
3765 * subquery columns that suffer no type coercions in the set operation.
3766 * Otherwise there are possible semantic gotchas. So, we check the
3767 * component queries to see if any of them have output types different from
3768 * the top-level setop outputs. We set the UNSAFE_TYPE_MISMATCH bit in
3769 * unsafeFlags[k] if column k has different type in any component.
3771 * We don't have to care about typmods here: the only allowed difference
3772 * between set-op input and output typmods is input is a specific typmod
3773 * and output is -1, and that does not require a coercion.
3775 * tlist is a subquery tlist.
3776 * colTypes is an OID list of the top-level setop's output column types.
3777 * safetyInfo is the pushdown_safety_info to set unsafeFlags[] for.
3780 compare_tlist_datatypes(List
*tlist
, List
*colTypes
,
3781 pushdown_safety_info
*safetyInfo
)
3784 ListCell
*colType
= list_head(colTypes
);
3788 TargetEntry
*tle
= (TargetEntry
*) lfirst(l
);
3791 continue; /* ignore resjunk columns */
3792 if (colType
== NULL
)
3793 elog(ERROR
, "wrong number of tlist entries");
3794 if (exprType((Node
*) tle
->expr
) != lfirst_oid(colType
))
3795 safetyInfo
->unsafeFlags
[tle
->resno
] |= UNSAFE_TYPE_MISMATCH
;
3796 colType
= lnext(colTypes
, colType
);
3798 if (colType
!= NULL
)
3799 elog(ERROR
, "wrong number of tlist entries");
3803 * targetIsInAllPartitionLists
3804 * True if the TargetEntry is listed in the PARTITION BY clause
3805 * of every window defined in the query.
3807 * It would be safe to ignore windows not actually used by any window
3808 * function, but it's not easy to get that info at this stage; and it's
3809 * unlikely to be useful to spend any extra cycles getting it, since
3810 * unreferenced window definitions are probably infrequent in practice.
3813 targetIsInAllPartitionLists(TargetEntry
*tle
, Query
*query
)
3817 foreach(lc
, query
->windowClause
)
3819 WindowClause
*wc
= (WindowClause
*) lfirst(lc
);
3821 if (!targetIsInSortList(tle
, InvalidOid
, wc
->partitionClause
))
3828 * qual_is_pushdown_safe - is a particular rinfo safe to push down?
3830 * rinfo is a restriction clause applying to the given subquery (whose RTE
3831 * has index rti in the parent query).
3833 * Conditions checked here:
3835 * 1. rinfo's clause must not contain any SubPlans (mainly because it's
3836 * unclear that it will work correctly: SubLinks will already have been
3837 * transformed into SubPlans in the qual, but not in the subquery). Note that
3838 * SubLinks that transform to initplans are safe, and will be accepted here
3839 * because what we'll see in the qual is just a Param referencing the initplan
3842 * 2. If unsafeVolatile is set, rinfo's clause must not contain any volatile
3845 * 3. If unsafeLeaky is set, rinfo's clause must not contain any leaky
3846 * functions that are passed Var nodes, and therefore might reveal values from
3847 * the subquery as side effects.
3849 * 4. rinfo's clause must not refer to the whole-row output of the subquery
3850 * (since there is no easy way to name that within the subquery itself).
3852 * 5. rinfo's clause must not refer to any subquery output columns that were
3853 * found to be unsafe to reference by subquery_is_pushdown_safe().
3855 static pushdown_safe_type
3856 qual_is_pushdown_safe(Query
*subquery
, Index rti
, RestrictInfo
*rinfo
,
3857 pushdown_safety_info
*safetyInfo
)
3859 pushdown_safe_type safe
= PUSHDOWN_SAFE
;
3860 Node
*qual
= (Node
*) rinfo
->clause
;
3864 /* Refuse subselects (point 1) */
3865 if (contain_subplans(qual
))
3866 return PUSHDOWN_UNSAFE
;
3868 /* Refuse volatile quals if we found they'd be unsafe (point 2) */
3869 if (safetyInfo
->unsafeVolatile
&&
3870 contain_volatile_functions((Node
*) rinfo
))
3871 return PUSHDOWN_UNSAFE
;
3873 /* Refuse leaky quals if told to (point 3) */
3874 if (safetyInfo
->unsafeLeaky
&&
3875 contain_leaked_vars(qual
))
3876 return PUSHDOWN_UNSAFE
;
3879 * Examine all Vars used in clause. Since it's a restriction clause, all
3880 * such Vars must refer to subselect output columns ... unless this is
3881 * part of a LATERAL subquery, in which case there could be lateral
3884 * By omitting the relevant flags, this also gives us a cheap sanity check
3885 * that no aggregates or window functions appear in the qual. Those would
3886 * be unsafe to push down, but at least for the moment we could never see
3887 * any in a qual anyhow.
3889 vars
= pull_var_clause(qual
, PVC_INCLUDE_PLACEHOLDERS
);
3892 Var
*var
= (Var
*) lfirst(vl
);
3895 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
3896 * It's not clear whether a PHV could safely be pushed down, and even
3897 * less clear whether such a situation could arise in any cases of
3898 * practical interest anyway. So for the moment, just refuse to push
3903 safe
= PUSHDOWN_UNSAFE
;
3908 * Punt if we find any lateral references. It would be safe to push
3909 * these down, but we'd have to convert them into outer references,
3910 * which subquery_push_qual lacks the infrastructure to do. The case
3911 * arises so seldom that it doesn't seem worth working hard on.
3913 if (var
->varno
!= rti
)
3915 safe
= PUSHDOWN_UNSAFE
;
3919 /* Subqueries have no system columns */
3920 Assert(var
->varattno
>= 0);
3923 if (var
->varattno
== 0)
3925 safe
= PUSHDOWN_UNSAFE
;
3930 if (safetyInfo
->unsafeFlags
[var
->varattno
] != 0)
3932 if (safetyInfo
->unsafeFlags
[var
->varattno
] &
3933 (UNSAFE_HAS_VOLATILE_FUNC
| UNSAFE_HAS_SET_FUNC
|
3934 UNSAFE_NOTIN_DISTINCTON_CLAUSE
| UNSAFE_TYPE_MISMATCH
))
3936 safe
= PUSHDOWN_UNSAFE
;
3941 /* UNSAFE_NOTIN_PARTITIONBY_CLAUSE is ok for run conditions */
3942 safe
= PUSHDOWN_WINDOWCLAUSE_RUNCOND
;
3943 /* don't break, we might find another Var that's unsafe */
3954 * subquery_push_qual - push down a qual that we have determined is safe
3957 subquery_push_qual(Query
*subquery
, RangeTblEntry
*rte
, Index rti
, Node
*qual
)
3959 if (subquery
->setOperations
!= NULL
)
3961 /* Recurse to push it separately to each component query */
3962 recurse_push_qual(subquery
->setOperations
, subquery
,
3968 * We need to replace Vars in the qual (which must refer to outputs of
3969 * the subquery) with copies of the subquery's targetlist expressions.
3970 * Note that at this point, any uplevel Vars in the qual should have
3971 * been replaced with Params, so they need no work.
3973 * This step also ensures that when we are pushing into a setop tree,
3974 * each component query gets its own copy of the qual.
3976 qual
= ReplaceVarsFromTargetList(qual
, rti
, 0, rte
,
3977 subquery
->targetList
,
3978 REPLACEVARS_REPORT_ERROR
, 0,
3979 &subquery
->hasSubLinks
);
3982 * Now attach the qual to the proper place: normally WHERE, but if the
3983 * subquery uses grouping or aggregation, put it in HAVING (since the
3984 * qual really refers to the group-result rows).
3986 if (subquery
->hasAggs
|| subquery
->groupClause
|| subquery
->groupingSets
|| subquery
->havingQual
)
3987 subquery
->havingQual
= make_and_qual(subquery
->havingQual
, qual
);
3989 subquery
->jointree
->quals
=
3990 make_and_qual(subquery
->jointree
->quals
, qual
);
3993 * We need not change the subquery's hasAggs or hasSubLinks flags,
3994 * since we can't be pushing down any aggregates that weren't there
3995 * before, and we don't push down subselects at all.
4001 * Helper routine to recurse through setOperations tree
4004 recurse_push_qual(Node
*setOp
, Query
*topquery
,
4005 RangeTblEntry
*rte
, Index rti
, Node
*qual
)
4007 if (IsA(setOp
, RangeTblRef
))
4009 RangeTblRef
*rtr
= (RangeTblRef
*) setOp
;
4010 RangeTblEntry
*subrte
= rt_fetch(rtr
->rtindex
, topquery
->rtable
);
4011 Query
*subquery
= subrte
->subquery
;
4013 Assert(subquery
!= NULL
);
4014 subquery_push_qual(subquery
, rte
, rti
, qual
);
4016 else if (IsA(setOp
, SetOperationStmt
))
4018 SetOperationStmt
*op
= (SetOperationStmt
*) setOp
;
4020 recurse_push_qual(op
->larg
, topquery
, rte
, rti
, qual
);
4021 recurse_push_qual(op
->rarg
, topquery
, rte
, rti
, qual
);
4025 elog(ERROR
, "unrecognized node type: %d",
4026 (int) nodeTag(setOp
));
4030 /*****************************************************************************
4031 * SIMPLIFYING SUBQUERY TARGETLISTS
4032 *****************************************************************************/
4035 * remove_unused_subquery_outputs
4036 * Remove subquery targetlist items we don't need
4038 * It's possible, even likely, that the upper query does not read all the
4039 * output columns of the subquery. We can remove any such outputs that are
4040 * not needed by the subquery itself (e.g., as sort/group columns) and do not
4041 * affect semantics otherwise (e.g., volatile functions can't be removed).
4042 * This is useful not only because we might be able to remove expensive-to-
4043 * compute expressions, but because deletion of output columns might allow
4044 * optimizations such as join removal to occur within the subquery.
4046 * extra_used_attrs can be passed as non-NULL to mark any columns (offset by
4047 * FirstLowInvalidHeapAttributeNumber) that we should not remove. This
4048 * parameter is modified by the function, so callers must make a copy if they
4049 * need to use the passed in Bitmapset after calling this function.
4051 * To avoid affecting column numbering in the targetlist, we don't physically
4052 * remove unused tlist entries, but rather replace their expressions with NULL
4053 * constants. This is implemented by modifying subquery->targetList.
4056 remove_unused_subquery_outputs(Query
*subquery
, RelOptInfo
*rel
,
4057 Bitmapset
*extra_used_attrs
)
4059 Bitmapset
*attrs_used
;
4063 * Just point directly to extra_used_attrs. No need to bms_copy as none of
4064 * the current callers use the Bitmapset after calling this function.
4066 attrs_used
= extra_used_attrs
;
4069 * Do nothing if subquery has UNION/INTERSECT/EXCEPT: in principle we
4070 * could update all the child SELECTs' tlists, but it seems not worth the
4071 * trouble presently.
4073 if (subquery
->setOperations
)
4077 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
4078 * time: all its output columns must be used in the distinctClause.
4080 if (subquery
->distinctClause
&& !subquery
->hasDistinctOn
)
4084 * Collect a bitmap of all the output column numbers used by the upper
4087 * Add all the attributes needed for joins or final output. Note: we must
4088 * look at rel's targetlist, not the attr_needed data, because attr_needed
4089 * isn't computed for inheritance child rels, cf set_append_rel_size().
4090 * (XXX might be worth changing that sometime.)
4092 pull_varattnos((Node
*) rel
->reltarget
->exprs
, rel
->relid
, &attrs_used
);
4094 /* Add all the attributes used by un-pushed-down restriction clauses. */
4095 foreach(lc
, rel
->baserestrictinfo
)
4097 RestrictInfo
*rinfo
= (RestrictInfo
*) lfirst(lc
);
4099 pull_varattnos((Node
*) rinfo
->clause
, rel
->relid
, &attrs_used
);
4103 * If there's a whole-row reference to the subquery, we can't remove
4106 if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber
, attrs_used
))
4110 * Run through the tlist and zap entries we don't need. It's okay to
4111 * modify the tlist items in-place because set_subquery_pathlist made a
4112 * copy of the subquery.
4114 foreach(lc
, subquery
->targetList
)
4116 TargetEntry
*tle
= (TargetEntry
*) lfirst(lc
);
4117 Node
*texpr
= (Node
*) tle
->expr
;
4120 * If it has a sortgroupref number, it's used in some sort/group
4121 * clause so we'd better not remove it. Also, don't remove any
4122 * resjunk columns, since their reason for being has nothing to do
4123 * with anybody reading the subquery's output. (It's likely that
4124 * resjunk columns in a sub-SELECT would always have ressortgroupref
4125 * set, but even if they don't, it seems imprudent to remove them.)
4127 if (tle
->ressortgroupref
|| tle
->resjunk
)
4131 * If it's used by the upper query, we can't remove it.
4133 if (bms_is_member(tle
->resno
- FirstLowInvalidHeapAttributeNumber
,
4138 * If it contains a set-returning function, we can't remove it since
4139 * that could change the number of rows returned by the subquery.
4141 if (subquery
->hasTargetSRFs
&&
4142 expression_returns_set(texpr
))
4146 * If it contains volatile functions, we daren't remove it for fear
4147 * that the user is expecting their side-effects to happen.
4149 if (contain_volatile_functions(texpr
))
4153 * OK, we don't need it. Replace the expression with a NULL constant.
4154 * Preserve the exposed type of the expression, in case something
4155 * looks at the rowtype of the subquery's result.
4157 tle
->expr
= (Expr
*) makeNullConst(exprType(texpr
),
4159 exprCollation(texpr
));
4164 * create_partial_bitmap_paths
4165 * Build partial bitmap heap path for the relation
4168 create_partial_bitmap_paths(PlannerInfo
*root
, RelOptInfo
*rel
,
4171 int parallel_workers
;
4172 double pages_fetched
;
4174 /* Compute heap pages for bitmap heap scan */
4175 pages_fetched
= compute_bitmap_pages(root
, rel
, bitmapqual
, 1.0,
4178 parallel_workers
= compute_parallel_worker(rel
, pages_fetched
, -1,
4179 max_parallel_workers_per_gather
);
4181 if (parallel_workers
<= 0)
4184 add_partial_path(rel
, (Path
*) create_bitmap_heap_path(root
, rel
,
4185 bitmapqual
, rel
->lateral_relids
, 1.0, parallel_workers
));
4189 * Compute the number of parallel workers that should be used to scan a
4190 * relation. We compute the parallel workers based on the size of the heap to
4191 * be scanned and the size of the index to be scanned, then choose a minimum
4194 * "heap_pages" is the number of pages from the table that we expect to scan, or
4195 * -1 if we don't expect to scan any.
4197 * "index_pages" is the number of pages from the index that we expect to scan, or
4198 * -1 if we don't expect to scan any.
4200 * "max_workers" is caller's limit on the number of workers. This typically
4204 compute_parallel_worker(RelOptInfo
*rel
, double heap_pages
, double index_pages
,
4207 int parallel_workers
= 0;
4210 * If the user has set the parallel_workers reloption, use that; otherwise
4211 * select a default number of workers.
4213 if (rel
->rel_parallel_workers
!= -1)
4214 parallel_workers
= rel
->rel_parallel_workers
;
4218 * If the number of pages being scanned is insufficient to justify a
4219 * parallel scan, just return zero ... unless it's an inheritance
4220 * child. In that case, we want to generate a parallel path here
4221 * anyway. It might not be worthwhile just for this relation, but
4222 * when combined with all of its inheritance siblings it may well pay
4225 if (rel
->reloptkind
== RELOPT_BASEREL
&&
4226 ((heap_pages
>= 0 && heap_pages
< min_parallel_table_scan_size
) ||
4227 (index_pages
>= 0 && index_pages
< min_parallel_index_scan_size
)))
4230 if (heap_pages
>= 0)
4232 int heap_parallel_threshold
;
4233 int heap_parallel_workers
= 1;
4236 * Select the number of workers based on the log of the size of
4237 * the relation. This probably needs to be a good deal more
4238 * sophisticated, but we need something here for now. Note that
4239 * the upper limit of the min_parallel_table_scan_size GUC is
4240 * chosen to prevent overflow here.
4242 heap_parallel_threshold
= Max(min_parallel_table_scan_size
, 1);
4243 while (heap_pages
>= (BlockNumber
) (heap_parallel_threshold
* 3))
4245 heap_parallel_workers
++;
4246 heap_parallel_threshold
*= 3;
4247 if (heap_parallel_threshold
> INT_MAX
/ 3)
4248 break; /* avoid overflow */
4251 parallel_workers
= heap_parallel_workers
;
4254 if (index_pages
>= 0)
4256 int index_parallel_workers
= 1;
4257 int index_parallel_threshold
;
4259 /* same calculation as for heap_pages above */
4260 index_parallel_threshold
= Max(min_parallel_index_scan_size
, 1);
4261 while (index_pages
>= (BlockNumber
) (index_parallel_threshold
* 3))
4263 index_parallel_workers
++;
4264 index_parallel_threshold
*= 3;
4265 if (index_parallel_threshold
> INT_MAX
/ 3)
4266 break; /* avoid overflow */
4269 if (parallel_workers
> 0)
4270 parallel_workers
= Min(parallel_workers
, index_parallel_workers
);
4272 parallel_workers
= index_parallel_workers
;
4276 /* In no case use more than caller supplied maximum number of workers */
4277 parallel_workers
= Min(parallel_workers
, max_workers
);
4279 return parallel_workers
;
4283 * generate_partitionwise_join_paths
4284 * Create paths representing partitionwise join for given partitioned
4287 * This must not be called until after we are done adding paths for all
4288 * child-joins. Otherwise, add_path might delete a path to which some path
4289 * generated here has a reference.
4292 generate_partitionwise_join_paths(PlannerInfo
*root
, RelOptInfo
*rel
)
4294 List
*live_children
= NIL
;
4297 RelOptInfo
**part_rels
;
4299 /* Handle only join relations here. */
4300 if (!IS_JOIN_REL(rel
))
4303 /* We've nothing to do if the relation is not partitioned. */
4304 if (!IS_PARTITIONED_REL(rel
))
4307 /* The relation should have consider_partitionwise_join set. */
4308 Assert(rel
->consider_partitionwise_join
);
4310 /* Guard against stack overflow due to overly deep partition hierarchy. */
4311 check_stack_depth();
4313 num_parts
= rel
->nparts
;
4314 part_rels
= rel
->part_rels
;
4316 /* Collect non-dummy child-joins. */
4317 for (cnt_parts
= 0; cnt_parts
< num_parts
; cnt_parts
++)
4319 RelOptInfo
*child_rel
= part_rels
[cnt_parts
];
4321 /* If it's been pruned entirely, it's certainly dummy. */
4322 if (child_rel
== NULL
)
4325 /* Make partitionwise join paths for this partitioned child-join. */
4326 generate_partitionwise_join_paths(root
, child_rel
);
4328 /* If we failed to make any path for this child, we must give up. */
4329 if (child_rel
->pathlist
== NIL
)
4332 * Mark the parent joinrel as unpartitioned so that later
4333 * functions treat it correctly.
4339 /* Else, identify the cheapest path for it. */
4340 set_cheapest(child_rel
);
4342 /* Dummy children need not be scanned, so ignore those. */
4343 if (IS_DUMMY_REL(child_rel
))
4346 #ifdef OPTIMIZER_DEBUG
4350 live_children
= lappend(live_children
, child_rel
);
4353 /* If all child-joins are dummy, parent join is also dummy. */
4356 mark_dummy_rel(rel
);
4360 /* Build additional paths for this rel from child-join paths. */
4361 add_paths_to_append_rel(root
, rel
, live_children
);
4362 list_free(live_children
);