Disallow empty passwords in LDAP authentication, the same way
[PostgreSQL.git] / src / backend / optimizer / path / joinpath.c
blob8e5767d17e016049c670de5c5b6e8607ee379820
1 /*-------------------------------------------------------------------------
3 * joinpath.c
4 * Routines to find all possible paths for processing a set of joins
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include <math.h>
19 #include "optimizer/cost.h"
20 #include "optimizer/pathnode.h"
21 #include "optimizer/paths.h"
24 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
25 RelOptInfo *outerrel, RelOptInfo *innerrel,
26 List *restrictlist, List *mergeclause_list,
27 JoinType jointype, SpecialJoinInfo *sjinfo);
28 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
29 RelOptInfo *outerrel, RelOptInfo *innerrel,
30 List *restrictlist, List *mergeclause_list,
31 JoinType jointype, SpecialJoinInfo *sjinfo);
32 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
33 RelOptInfo *outerrel, RelOptInfo *innerrel,
34 List *restrictlist,
35 JoinType jointype, SpecialJoinInfo *sjinfo);
36 static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
37 RelOptInfo *outer_rel, JoinType jointype);
38 static List *select_mergejoin_clauses(PlannerInfo *root,
39 RelOptInfo *joinrel,
40 RelOptInfo *outerrel,
41 RelOptInfo *innerrel,
42 List *restrictlist,
43 JoinType jointype);
47 * add_paths_to_joinrel
48 * Given a join relation and two component rels from which it can be made,
49 * consider all possible paths that use the two component rels as outer
50 * and inner rel respectively. Add these paths to the join rel's pathlist
51 * if they survive comparison with other paths (and remove any existing
52 * paths that are dominated by these paths).
54 * Modifies the pathlist field of the joinrel node to contain the best
55 * paths found so far.
57 * jointype is not necessarily the same as sjinfo->jointype; it might be
58 * "flipped around" if we are considering joining the rels in the opposite
59 * direction from what's indicated in sjinfo.
61 * Also, this routine and others in this module accept the special JoinTypes
62 * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
63 * unique-ify the outer or inner relation and then apply a regular inner
64 * join. These values are not allowed to propagate outside this module,
65 * however. Path cost estimation code may need to recognize that it's
66 * dealing with such a case --- the combination of nominal jointype INNER
67 * with sjinfo->jointype == JOIN_SEMI indicates that.
69 void
70 add_paths_to_joinrel(PlannerInfo *root,
71 RelOptInfo *joinrel,
72 RelOptInfo *outerrel,
73 RelOptInfo *innerrel,
74 JoinType jointype,
75 SpecialJoinInfo *sjinfo,
76 List *restrictlist)
78 List *mergeclause_list = NIL;
81 * Find potential mergejoin clauses. We can skip this if we are not
82 * interested in doing a mergejoin. However, mergejoin is currently our
83 * only way of implementing full outer joins, so override mergejoin
84 * disable if it's a full join.
86 if (enable_mergejoin || jointype == JOIN_FULL)
87 mergeclause_list = select_mergejoin_clauses(root,
88 joinrel,
89 outerrel,
90 innerrel,
91 restrictlist,
92 jointype);
95 * 1. Consider mergejoin paths where both relations must be explicitly
96 * sorted.
98 sort_inner_and_outer(root, joinrel, outerrel, innerrel,
99 restrictlist, mergeclause_list, jointype, sjinfo);
102 * 2. Consider paths where the outer relation need not be explicitly
103 * sorted. This includes both nestloops and mergejoins where the outer
104 * path is already ordered.
106 match_unsorted_outer(root, joinrel, outerrel, innerrel,
107 restrictlist, mergeclause_list, jointype, sjinfo);
109 #ifdef NOT_USED
112 * 3. Consider paths where the inner relation need not be explicitly
113 * sorted. This includes mergejoins only (nestloops were already built in
114 * match_unsorted_outer).
116 * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
117 * significant difference between the inner and outer side of a mergejoin,
118 * so match_unsorted_inner creates no paths that aren't equivalent to
119 * those made by match_unsorted_outer when add_paths_to_joinrel() is
120 * invoked with the two rels given in the other order.
122 match_unsorted_inner(root, joinrel, outerrel, innerrel,
123 restrictlist, mergeclause_list, jointype, sjinfo);
124 #endif
127 * 4. Consider paths where both outer and inner relations must be hashed
128 * before being joined.
130 if (enable_hashjoin)
131 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
132 restrictlist, jointype, sjinfo);
136 * sort_inner_and_outer
137 * Create mergejoin join paths by explicitly sorting both the outer and
138 * inner join relations on each available merge ordering.
140 * 'joinrel' is the join relation
141 * 'outerrel' is the outer join relation
142 * 'innerrel' is the inner join relation
143 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
144 * clauses that apply to this join
145 * 'mergeclause_list' is a list of RestrictInfo nodes for available
146 * mergejoin clauses in this join
147 * 'jointype' is the type of join to do
148 * 'sjinfo' is extra info about the join for selectivity estimation
150 static void
151 sort_inner_and_outer(PlannerInfo *root,
152 RelOptInfo *joinrel,
153 RelOptInfo *outerrel,
154 RelOptInfo *innerrel,
155 List *restrictlist,
156 List *mergeclause_list,
157 JoinType jointype,
158 SpecialJoinInfo *sjinfo)
160 bool useallclauses;
161 Path *outer_path;
162 Path *inner_path;
163 List *all_pathkeys;
164 ListCell *l;
167 * If we are doing a right or full join, we must use *all* the
168 * mergeclauses as join clauses, else we will not have a valid plan.
170 switch (jointype)
172 case JOIN_INNER:
173 case JOIN_LEFT:
174 case JOIN_SEMI:
175 case JOIN_ANTI:
176 case JOIN_UNIQUE_OUTER:
177 case JOIN_UNIQUE_INNER:
178 useallclauses = false;
179 break;
180 case JOIN_RIGHT:
181 case JOIN_FULL:
182 useallclauses = true;
183 break;
184 default:
185 elog(ERROR, "unrecognized join type: %d",
186 (int) jointype);
187 useallclauses = false; /* keep compiler quiet */
188 break;
192 * We only consider the cheapest-total-cost input paths, since we are
193 * assuming here that a sort is required. We will consider
194 * cheapest-startup-cost input paths later, and only if they don't need a
195 * sort.
197 * If unique-ification is requested, do it and then handle as a plain
198 * inner join.
200 outer_path = outerrel->cheapest_total_path;
201 inner_path = innerrel->cheapest_total_path;
202 if (jointype == JOIN_UNIQUE_OUTER)
204 outer_path = (Path *) create_unique_path(root, outerrel,
205 outer_path, sjinfo);
206 Assert(outer_path);
207 jointype = JOIN_INNER;
209 else if (jointype == JOIN_UNIQUE_INNER)
211 inner_path = (Path *) create_unique_path(root, innerrel,
212 inner_path, sjinfo);
213 Assert(inner_path);
214 jointype = JOIN_INNER;
218 * Each possible ordering of the available mergejoin clauses will generate
219 * a differently-sorted result path at essentially the same cost. We have
220 * no basis for choosing one over another at this level of joining, but
221 * some sort orders may be more useful than others for higher-level
222 * mergejoins, so it's worth considering multiple orderings.
224 * Actually, it's not quite true that every mergeclause ordering will
225 * generate a different path order, because some of the clauses may be
226 * partially redundant (refer to the same EquivalenceClasses). Therefore,
227 * what we do is convert the mergeclause list to a list of canonical
228 * pathkeys, and then consider different orderings of the pathkeys.
230 * Generating a path for *every* permutation of the pathkeys doesn't seem
231 * like a winning strategy; the cost in planning time is too high. For
232 * now, we generate one path for each pathkey, listing that pathkey first
233 * and the rest in random order. This should allow at least a one-clause
234 * mergejoin without re-sorting against any other possible mergejoin
235 * partner path. But if we've not guessed the right ordering of secondary
236 * keys, we may end up evaluating clauses as qpquals when they could have
237 * been done as mergeclauses. (In practice, it's rare that there's more
238 * than two or three mergeclauses, so expending a huge amount of thought
239 * on that is probably not worth it.)
241 * The pathkey order returned by select_outer_pathkeys_for_merge() has
242 * some heuristics behind it (see that function), so be sure to try it
243 * exactly as-is as well as making variants.
245 all_pathkeys = select_outer_pathkeys_for_merge(root,
246 mergeclause_list,
247 joinrel);
249 foreach(l, all_pathkeys)
251 List *front_pathkey = (List *) lfirst(l);
252 List *cur_mergeclauses;
253 List *outerkeys;
254 List *innerkeys;
255 List *merge_pathkeys;
257 /* Make a pathkey list with this guy first */
258 if (l != list_head(all_pathkeys))
259 outerkeys = lcons(front_pathkey,
260 list_delete_ptr(list_copy(all_pathkeys),
261 front_pathkey));
262 else
263 outerkeys = all_pathkeys; /* no work at first one... */
265 /* Sort the mergeclauses into the corresponding ordering */
266 cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
267 outerkeys,
268 true,
269 mergeclause_list);
271 /* Should have used them all... */
272 Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
274 /* Build sort pathkeys for the inner side */
275 innerkeys = make_inner_pathkeys_for_merge(root,
276 cur_mergeclauses,
277 outerkeys);
279 /* Build pathkeys representing output sort order */
280 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
281 outerkeys);
284 * And now we can make the path.
286 * Note: it's possible that the cheapest paths will already be sorted
287 * properly. create_mergejoin_path will detect that case and suppress
288 * an explicit sort step, so we needn't do so here.
290 add_path(joinrel, (Path *)
291 create_mergejoin_path(root,
292 joinrel,
293 jointype,
294 sjinfo,
295 outer_path,
296 inner_path,
297 restrictlist,
298 merge_pathkeys,
299 cur_mergeclauses,
300 outerkeys,
301 innerkeys));
306 * match_unsorted_outer
307 * Creates possible join paths for processing a single join relation
308 * 'joinrel' by employing either iterative substitution or
309 * mergejoining on each of its possible outer paths (considering
310 * only outer paths that are already ordered well enough for merging).
312 * We always generate a nestloop path for each available outer path.
313 * In fact we may generate as many as five: one on the cheapest-total-cost
314 * inner path, one on the same with materialization, one on the
315 * cheapest-startup-cost inner path (if different), one on the
316 * cheapest-total inner-indexscan path (if any), and one on the
317 * cheapest-startup inner-indexscan path (if different).
319 * We also consider mergejoins if mergejoin clauses are available. We have
320 * two ways to generate the inner path for a mergejoin: sort the cheapest
321 * inner path, or use an inner path that is already suitably ordered for the
322 * merge. If we have several mergeclauses, it could be that there is no inner
323 * path (or only a very expensive one) for the full list of mergeclauses, but
324 * better paths exist if we truncate the mergeclause list (thereby discarding
325 * some sort key requirements). So, we consider truncations of the
326 * mergeclause list as well as the full list. (Ideally we'd consider all
327 * subsets of the mergeclause list, but that seems way too expensive.)
329 * 'joinrel' is the join relation
330 * 'outerrel' is the outer join relation
331 * 'innerrel' is the inner join relation
332 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
333 * clauses that apply to this join
334 * 'mergeclause_list' is a list of RestrictInfo nodes for available
335 * mergejoin clauses in this join
336 * 'jointype' is the type of join to do
337 * 'sjinfo' is extra info about the join for selectivity estimation
339 static void
340 match_unsorted_outer(PlannerInfo *root,
341 RelOptInfo *joinrel,
342 RelOptInfo *outerrel,
343 RelOptInfo *innerrel,
344 List *restrictlist,
345 List *mergeclause_list,
346 JoinType jointype,
347 SpecialJoinInfo *sjinfo)
349 JoinType save_jointype = jointype;
350 bool nestjoinOK;
351 bool useallclauses;
352 Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
353 Path *inner_cheapest_total = innerrel->cheapest_total_path;
354 Path *matpath = NULL;
355 Path *index_cheapest_startup = NULL;
356 Path *index_cheapest_total = NULL;
357 ListCell *l;
360 * Nestloop only supports inner, left, semi, and anti joins. Also, if we
361 * are doing a right or full join, we must use *all* the mergeclauses as
362 * join clauses, else we will not have a valid plan. (Although these two
363 * flags are currently inverses, keep them separate for clarity and
364 * possible future changes.)
366 switch (jointype)
368 case JOIN_INNER:
369 case JOIN_LEFT:
370 case JOIN_SEMI:
371 case JOIN_ANTI:
372 nestjoinOK = true;
373 useallclauses = false;
374 break;
375 case JOIN_RIGHT:
376 case JOIN_FULL:
377 nestjoinOK = false;
378 useallclauses = true;
379 break;
380 case JOIN_UNIQUE_OUTER:
381 case JOIN_UNIQUE_INNER:
382 jointype = JOIN_INNER;
383 nestjoinOK = true;
384 useallclauses = false;
385 break;
386 default:
387 elog(ERROR, "unrecognized join type: %d",
388 (int) jointype);
389 nestjoinOK = false; /* keep compiler quiet */
390 useallclauses = false;
391 break;
395 * If we need to unique-ify the inner path, we will consider only the
396 * cheapest inner.
398 if (save_jointype == JOIN_UNIQUE_INNER)
400 inner_cheapest_total = (Path *)
401 create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
402 Assert(inner_cheapest_total);
403 inner_cheapest_startup = inner_cheapest_total;
405 else if (nestjoinOK)
408 * If the cheapest inner path is a join or seqscan, we should consider
409 * materializing it. (This is a heuristic: we could consider it
410 * always, but for inner indexscans it's probably a waste of time.)
411 * Also skip it if the inner path materializes its output anyway.
413 if (!(inner_cheapest_total->pathtype == T_IndexScan ||
414 inner_cheapest_total->pathtype == T_BitmapHeapScan ||
415 inner_cheapest_total->pathtype == T_TidScan ||
416 inner_cheapest_total->pathtype == T_Material ||
417 inner_cheapest_total->pathtype == T_FunctionScan ||
418 inner_cheapest_total->pathtype == T_CteScan ||
419 inner_cheapest_total->pathtype == T_WorkTableScan))
420 matpath = (Path *)
421 create_material_path(innerrel, inner_cheapest_total);
424 * Get the best innerjoin indexpaths (if any) for this outer rel.
425 * They're the same for all outer paths.
427 if (innerrel->reloptkind != RELOPT_JOINREL)
429 if (IsA(inner_cheapest_total, AppendPath))
430 index_cheapest_total = best_appendrel_indexscan(root,
431 innerrel,
432 outerrel,
433 jointype);
434 else if (innerrel->rtekind == RTE_RELATION)
435 best_inner_indexscan(root, innerrel, outerrel, jointype,
436 &index_cheapest_startup,
437 &index_cheapest_total);
441 foreach(l, outerrel->pathlist)
443 Path *outerpath = (Path *) lfirst(l);
444 List *merge_pathkeys;
445 List *mergeclauses;
446 List *innersortkeys;
447 List *trialsortkeys;
448 Path *cheapest_startup_inner;
449 Path *cheapest_total_inner;
450 int num_sortkeys;
451 int sortkeycnt;
454 * If we need to unique-ify the outer path, it's pointless to consider
455 * any but the cheapest outer.
457 if (save_jointype == JOIN_UNIQUE_OUTER)
459 if (outerpath != outerrel->cheapest_total_path)
460 continue;
461 outerpath = (Path *) create_unique_path(root, outerrel,
462 outerpath, sjinfo);
463 Assert(outerpath);
467 * The result will have this sort order (even if it is implemented as
468 * a nestloop, and even if some of the mergeclauses are implemented by
469 * qpquals rather than as true mergeclauses):
471 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
472 outerpath->pathkeys);
474 if (nestjoinOK)
477 * Always consider a nestloop join with this outer and
478 * cheapest-total-cost inner. When appropriate, also consider
479 * using the materialized form of the cheapest inner, the
480 * cheapest-startup-cost inner path, and the cheapest innerjoin
481 * indexpaths.
483 add_path(joinrel, (Path *)
484 create_nestloop_path(root,
485 joinrel,
486 jointype,
487 sjinfo,
488 outerpath,
489 inner_cheapest_total,
490 restrictlist,
491 merge_pathkeys));
492 if (matpath != NULL)
493 add_path(joinrel, (Path *)
494 create_nestloop_path(root,
495 joinrel,
496 jointype,
497 sjinfo,
498 outerpath,
499 matpath,
500 restrictlist,
501 merge_pathkeys));
502 if (inner_cheapest_startup != inner_cheapest_total)
503 add_path(joinrel, (Path *)
504 create_nestloop_path(root,
505 joinrel,
506 jointype,
507 sjinfo,
508 outerpath,
509 inner_cheapest_startup,
510 restrictlist,
511 merge_pathkeys));
512 if (index_cheapest_total != NULL)
513 add_path(joinrel, (Path *)
514 create_nestloop_path(root,
515 joinrel,
516 jointype,
517 sjinfo,
518 outerpath,
519 index_cheapest_total,
520 restrictlist,
521 merge_pathkeys));
522 if (index_cheapest_startup != NULL &&
523 index_cheapest_startup != index_cheapest_total)
524 add_path(joinrel, (Path *)
525 create_nestloop_path(root,
526 joinrel,
527 jointype,
528 sjinfo,
529 outerpath,
530 index_cheapest_startup,
531 restrictlist,
532 merge_pathkeys));
535 /* Can't do anything else if outer path needs to be unique'd */
536 if (save_jointype == JOIN_UNIQUE_OUTER)
537 continue;
539 /* Look for useful mergeclauses (if any) */
540 mergeclauses = find_mergeclauses_for_pathkeys(root,
541 outerpath->pathkeys,
542 true,
543 mergeclause_list);
546 * Done with this outer path if no chance for a mergejoin.
548 * Special corner case: for "x FULL JOIN y ON true", there will be no
549 * join clauses at all. Ordinarily we'd generate a clauseless
550 * nestloop path, but since mergejoin is our only join type that
551 * supports FULL JOIN, it's necessary to generate a clauseless
552 * mergejoin path instead.
554 if (mergeclauses == NIL)
556 if (jointype == JOIN_FULL)
557 /* okay to try for mergejoin */ ;
558 else
559 continue;
561 if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
562 continue;
564 /* Compute the required ordering of the inner path */
565 innersortkeys = make_inner_pathkeys_for_merge(root,
566 mergeclauses,
567 outerpath->pathkeys);
570 * Generate a mergejoin on the basis of sorting the cheapest inner.
571 * Since a sort will be needed, only cheapest total cost matters. (But
572 * create_mergejoin_path will do the right thing if
573 * inner_cheapest_total is already correctly sorted.)
575 add_path(joinrel, (Path *)
576 create_mergejoin_path(root,
577 joinrel,
578 jointype,
579 sjinfo,
580 outerpath,
581 inner_cheapest_total,
582 restrictlist,
583 merge_pathkeys,
584 mergeclauses,
585 NIL,
586 innersortkeys));
588 /* Can't do anything else if inner path needs to be unique'd */
589 if (save_jointype == JOIN_UNIQUE_INNER)
590 continue;
593 * Look for presorted inner paths that satisfy the innersortkey list
594 * --- or any truncation thereof, if we are allowed to build a
595 * mergejoin using a subset of the merge clauses. Here, we consider
596 * both cheap startup cost and cheap total cost.
598 * As we shorten the sortkey list, we should consider only paths that
599 * are strictly cheaper than (in particular, not the same as) any path
600 * found in an earlier iteration. Otherwise we'd be intentionally
601 * using fewer merge keys than a given path allows (treating the rest
602 * as plain joinquals), which is unlikely to be a good idea. Also,
603 * eliminating paths here on the basis of compare_path_costs is a lot
604 * cheaper than building the mergejoin path only to throw it away.
606 * If inner_cheapest_total is well enough sorted to have not required
607 * a sort in the path made above, we shouldn't make a duplicate path
608 * with it, either. We handle that case with the same logic that
609 * handles the previous consideration, by initializing the variables
610 * that track cheapest-so-far properly. Note that we do NOT reject
611 * inner_cheapest_total if we find it matches some shorter set of
612 * pathkeys. That case corresponds to using fewer mergekeys to avoid
613 * sorting inner_cheapest_total, whereas we did sort it above, so the
614 * plans being considered are different.
616 if (pathkeys_contained_in(innersortkeys,
617 inner_cheapest_total->pathkeys))
619 /* inner_cheapest_total didn't require a sort */
620 cheapest_startup_inner = inner_cheapest_total;
621 cheapest_total_inner = inner_cheapest_total;
623 else
625 /* it did require a sort, at least for the full set of keys */
626 cheapest_startup_inner = NULL;
627 cheapest_total_inner = NULL;
629 num_sortkeys = list_length(innersortkeys);
630 if (num_sortkeys > 1 && !useallclauses)
631 trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
632 else
633 trialsortkeys = innersortkeys; /* won't really truncate */
635 for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
637 Path *innerpath;
638 List *newclauses = NIL;
641 * Look for an inner path ordered well enough for the first
642 * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
643 * destructively, which is why we made a copy...
645 trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
646 innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
647 trialsortkeys,
648 TOTAL_COST);
649 if (innerpath != NULL &&
650 (cheapest_total_inner == NULL ||
651 compare_path_costs(innerpath, cheapest_total_inner,
652 TOTAL_COST) < 0))
654 /* Found a cheap (or even-cheaper) sorted path */
655 /* Select the right mergeclauses, if we didn't already */
656 if (sortkeycnt < num_sortkeys)
658 newclauses =
659 find_mergeclauses_for_pathkeys(root,
660 trialsortkeys,
661 false,
662 mergeclauses);
663 Assert(newclauses != NIL);
665 else
666 newclauses = mergeclauses;
667 add_path(joinrel, (Path *)
668 create_mergejoin_path(root,
669 joinrel,
670 jointype,
671 sjinfo,
672 outerpath,
673 innerpath,
674 restrictlist,
675 merge_pathkeys,
676 newclauses,
677 NIL,
678 NIL));
679 cheapest_total_inner = innerpath;
681 /* Same on the basis of cheapest startup cost ... */
682 innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
683 trialsortkeys,
684 STARTUP_COST);
685 if (innerpath != NULL &&
686 (cheapest_startup_inner == NULL ||
687 compare_path_costs(innerpath, cheapest_startup_inner,
688 STARTUP_COST) < 0))
690 /* Found a cheap (or even-cheaper) sorted path */
691 if (innerpath != cheapest_total_inner)
694 * Avoid rebuilding clause list if we already made one;
695 * saves memory in big join trees...
697 if (newclauses == NIL)
699 if (sortkeycnt < num_sortkeys)
701 newclauses =
702 find_mergeclauses_for_pathkeys(root,
703 trialsortkeys,
704 false,
705 mergeclauses);
706 Assert(newclauses != NIL);
708 else
709 newclauses = mergeclauses;
711 add_path(joinrel, (Path *)
712 create_mergejoin_path(root,
713 joinrel,
714 jointype,
715 sjinfo,
716 outerpath,
717 innerpath,
718 restrictlist,
719 merge_pathkeys,
720 newclauses,
721 NIL,
722 NIL));
724 cheapest_startup_inner = innerpath;
728 * Don't consider truncated sortkeys if we need all clauses.
730 if (useallclauses)
731 break;
737 * hash_inner_and_outer
738 * Create hashjoin join paths by explicitly hashing both the outer and
739 * inner keys of each available hash clause.
741 * 'joinrel' is the join relation
742 * 'outerrel' is the outer join relation
743 * 'innerrel' is the inner join relation
744 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
745 * clauses that apply to this join
746 * 'jointype' is the type of join to do
747 * 'sjinfo' is extra info about the join for selectivity estimation
749 static void
750 hash_inner_and_outer(PlannerInfo *root,
751 RelOptInfo *joinrel,
752 RelOptInfo *outerrel,
753 RelOptInfo *innerrel,
754 List *restrictlist,
755 JoinType jointype,
756 SpecialJoinInfo *sjinfo)
758 bool isouterjoin;
759 List *hashclauses;
760 ListCell *l;
763 * Hashjoin only supports inner, left, semi, and anti joins.
765 switch (jointype)
767 case JOIN_INNER:
768 case JOIN_SEMI:
769 case JOIN_UNIQUE_OUTER:
770 case JOIN_UNIQUE_INNER:
771 isouterjoin = false;
772 break;
773 case JOIN_LEFT:
774 case JOIN_ANTI:
775 isouterjoin = true;
776 break;
777 default:
778 return;
782 * We need to build only one hashpath for any given pair of outer and
783 * inner relations; all of the hashable clauses will be used as keys.
785 * Scan the join's restrictinfo list to find hashjoinable clauses that are
786 * usable with this pair of sub-relations.
788 hashclauses = NIL;
789 foreach(l, restrictlist)
791 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
793 if (!restrictinfo->can_join ||
794 restrictinfo->hashjoinoperator == InvalidOid)
795 continue; /* not hashjoinable */
798 * If processing an outer join, only use its own join clauses for
799 * hashing. For inner joins we need not be so picky.
801 if (isouterjoin && restrictinfo->is_pushed_down)
802 continue;
805 * Check if clause is usable with these input rels.
807 if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
808 bms_is_subset(restrictinfo->right_relids, innerrel->relids))
810 /* righthand side is inner */
812 else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
813 bms_is_subset(restrictinfo->right_relids, outerrel->relids))
815 /* lefthand side is inner */
817 else
818 continue; /* no good for these input relations */
820 hashclauses = lappend(hashclauses, restrictinfo);
823 /* If we found any usable hashclauses, make a path */
824 if (hashclauses)
827 * We consider both the cheapest-total-cost and cheapest-startup-cost
828 * outer paths. There's no need to consider any but the
829 * cheapest-total-cost inner path, however.
831 Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
832 Path *cheapest_total_outer = outerrel->cheapest_total_path;
833 Path *cheapest_total_inner = innerrel->cheapest_total_path;
835 /* Unique-ify if need be */
836 if (jointype == JOIN_UNIQUE_OUTER)
838 cheapest_total_outer = (Path *)
839 create_unique_path(root, outerrel,
840 cheapest_total_outer, sjinfo);
841 Assert(cheapest_total_outer);
842 cheapest_startup_outer = cheapest_total_outer;
843 jointype = JOIN_INNER;
845 else if (jointype == JOIN_UNIQUE_INNER)
847 cheapest_total_inner = (Path *)
848 create_unique_path(root, innerrel,
849 cheapest_total_inner, sjinfo);
850 Assert(cheapest_total_inner);
851 jointype = JOIN_INNER;
854 add_path(joinrel, (Path *)
855 create_hashjoin_path(root,
856 joinrel,
857 jointype,
858 sjinfo,
859 cheapest_total_outer,
860 cheapest_total_inner,
861 restrictlist,
862 hashclauses));
863 if (cheapest_startup_outer != cheapest_total_outer)
864 add_path(joinrel, (Path *)
865 create_hashjoin_path(root,
866 joinrel,
867 jointype,
868 sjinfo,
869 cheapest_startup_outer,
870 cheapest_total_inner,
871 restrictlist,
872 hashclauses));
877 * best_appendrel_indexscan
878 * Finds the best available set of inner indexscans for a nestloop join
879 * with the given append relation on the inside and the given outer_rel
880 * outside. Returns an AppendPath comprising the best inner scans, or
881 * NULL if there are no possible inner indexscans.
883 * Note that we currently consider only cheapest-total-cost. It's not
884 * very clear what cheapest-startup-cost might mean for an AppendPath.
886 static Path *
887 best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
888 RelOptInfo *outer_rel, JoinType jointype)
890 int parentRTindex = rel->relid;
891 List *append_paths = NIL;
892 bool found_indexscan = false;
893 ListCell *l;
895 foreach(l, root->append_rel_list)
897 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
898 int childRTindex;
899 RelOptInfo *childrel;
900 Path *index_cheapest_startup;
901 Path *index_cheapest_total;
903 /* append_rel_list contains all append rels; ignore others */
904 if (appinfo->parent_relid != parentRTindex)
905 continue;
907 childRTindex = appinfo->child_relid;
908 childrel = find_base_rel(root, childRTindex);
909 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
912 * Check to see if child was rejected by constraint exclusion. If so,
913 * it will have a cheapest_total_path that's a "dummy" path.
915 if (IS_DUMMY_PATH(childrel->cheapest_total_path))
916 continue; /* OK, we can ignore it */
919 * Get the best innerjoin indexpaths (if any) for this child rel.
921 best_inner_indexscan(root, childrel, outer_rel, jointype,
922 &index_cheapest_startup, &index_cheapest_total);
925 * If no luck on an indexpath for this rel, we'll still consider an
926 * Append substituting the cheapest-total inner path. However we must
927 * find at least one indexpath, else there's not going to be any
928 * improvement over the base path for the appendrel.
930 if (index_cheapest_total)
931 found_indexscan = true;
932 else
933 index_cheapest_total = childrel->cheapest_total_path;
935 append_paths = lappend(append_paths, index_cheapest_total);
938 if (!found_indexscan)
939 return NULL;
941 /* Form and return the completed Append path. */
942 return (Path *) create_append_path(rel, append_paths);
946 * select_mergejoin_clauses
947 * Select mergejoin clauses that are usable for a particular join.
948 * Returns a list of RestrictInfo nodes for those clauses.
950 * We also mark each selected RestrictInfo to show which side is currently
951 * being considered as outer. These are transient markings that are only
952 * good for the duration of the current add_paths_to_joinrel() call!
954 * We examine each restrictinfo clause known for the join to see
955 * if it is mergejoinable and involves vars from the two sub-relations
956 * currently of interest.
958 static List *
959 select_mergejoin_clauses(PlannerInfo *root,
960 RelOptInfo *joinrel,
961 RelOptInfo *outerrel,
962 RelOptInfo *innerrel,
963 List *restrictlist,
964 JoinType jointype)
966 List *result_list = NIL;
967 bool isouterjoin = IS_OUTER_JOIN(jointype);
968 bool have_nonmergeable_joinclause = false;
969 ListCell *l;
971 foreach(l, restrictlist)
973 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
976 * If processing an outer join, only use its own join clauses in the
977 * merge. For inner joins we can use pushed-down clauses too. (Note:
978 * we don't set have_nonmergeable_joinclause here because pushed-down
979 * clauses will become otherquals not joinquals.)
981 if (isouterjoin && restrictinfo->is_pushed_down)
982 continue;
984 if (!restrictinfo->can_join ||
985 restrictinfo->mergeopfamilies == NIL)
987 have_nonmergeable_joinclause = true;
988 continue; /* not mergejoinable */
992 * Check if clause is usable with these input rels. All the vars
993 * needed on each side of the clause must be available from one or the
994 * other of the input rels.
996 if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
997 bms_is_subset(restrictinfo->right_relids, innerrel->relids))
999 /* righthand side is inner */
1000 restrictinfo->outer_is_left = true;
1002 else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
1003 bms_is_subset(restrictinfo->right_relids, outerrel->relids))
1005 /* lefthand side is inner */
1006 restrictinfo->outer_is_left = false;
1008 else
1010 have_nonmergeable_joinclause = true;
1011 continue; /* no good for these input relations */
1015 * Insist that each side have a non-redundant eclass. This
1016 * restriction is needed because various bits of the planner expect
1017 * that each clause in a merge be associatable with some pathkey in a
1018 * canonical pathkey list, but redundant eclasses can't appear in
1019 * canonical sort orderings. (XXX it might be worth relaxing this,
1020 * but not enough time to address it for 8.3.)
1022 * Note: it would be bad if this condition failed for an otherwise
1023 * mergejoinable FULL JOIN clause, since that would result in
1024 * undesirable planner failure. I believe that is not possible
1025 * however; a variable involved in a full join could only appear in
1026 * below_outer_join eclasses, which aren't considered redundant.
1028 * This case *can* happen for left/right join clauses: the outer-side
1029 * variable could be equated to a constant. Because we will propagate
1030 * that constant across the join clause, the loss of ability to do a
1031 * mergejoin is not really all that big a deal, and so it's not clear
1032 * that improving this is important.
1034 cache_mergeclause_eclasses(root, restrictinfo);
1036 if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
1037 EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
1039 have_nonmergeable_joinclause = true;
1040 continue; /* can't handle redundant eclasses */
1043 result_list = lappend(result_list, restrictinfo);
1047 * If it is a right/full join then *all* the explicit join clauses must be
1048 * mergejoinable, else the executor will fail. If we are asked for a right
1049 * join then just return NIL to indicate no mergejoin is possible (we can
1050 * handle it as a left join instead). If we are asked for a full join then
1051 * emit an error, because there is no fallback.
1053 if (have_nonmergeable_joinclause)
1055 switch (jointype)
1057 case JOIN_RIGHT:
1058 return NIL; /* not mergejoinable */
1059 case JOIN_FULL:
1060 ereport(ERROR,
1061 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1062 errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
1063 break;
1064 default:
1065 /* otherwise, it's OK to have nonmergeable join quals */
1066 break;
1070 return result_list;