src/backend/optimizer/path/joinpath.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * joinpath.c
   4  *        Routines to find all possible paths for processing a set of joins
   5  *
   6  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL$
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <math.h>
  18
  19 #include "optimizer/cost.h"
  20 #include "optimizer/pathnode.h"
  21 #include "optimizer/paths.h"
  22
  23
  24 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  25                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
  26                                          List *restrictlist, List *mergeclause_list,
  27                                          JoinType jointype, SpecialJoinInfo *sjinfo);
  28 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
  29                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
  30                                          List *restrictlist, List *mergeclause_list,
  31                                          JoinType jointype, SpecialJoinInfo *sjinfo);
  32 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  33                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
  34                                          List *restrictlist,
  35                                          JoinType jointype, SpecialJoinInfo *sjinfo);
  36 static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
  37                                                  RelOptInfo *outer_rel, JoinType jointype);
  38 static List *select_mergejoin_clauses(PlannerInfo *root,
  39                                                  RelOptInfo *joinrel,
  40                                                  RelOptInfo *outerrel,
  41                                                  RelOptInfo *innerrel,
  42                                                  List *restrictlist,
  43                                                  JoinType jointype);
  44
  45
  46 /*
  47  * add_paths_to_joinrel
  48  *        Given a join relation and two component rels from which it can be made,
  49  *        consider all possible paths that use the two component rels as outer
  50  *        and inner rel respectively.  Add these paths to the join rel's pathlist
  51  *        if they survive comparison with other paths (and remove any existing
  52  *        paths that are dominated by these paths).
  53  *
  54  * Modifies the pathlist field of the joinrel node to contain the best
  55  * paths found so far.
  56  *
  57  * jointype is not necessarily the same as sjinfo->jointype; it might be
  58  * "flipped around" if we are considering joining the rels in the opposite
  59  * direction from what's indicated in sjinfo.
  60  *
  61  * Also, this routine and others in this module accept the special JoinTypes
  62  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
  63  * unique-ify the outer or inner relation and then apply a regular inner
  64  * join.  These values are not allowed to propagate outside this module,
  65  * however.  Path cost estimation code may need to recognize that it's
  66  * dealing with such a case --- the combination of nominal jointype INNER
  67  * with sjinfo->jointype == JOIN_SEMI indicates that.
  68  */
  69 void
  70 add_paths_to_joinrel(PlannerInfo *root,
  71                                          RelOptInfo *joinrel,
  72                                          RelOptInfo *outerrel,
  73                                          RelOptInfo *innerrel,
  74                                          JoinType jointype,
  75                                          SpecialJoinInfo *sjinfo,
  76                                          List *restrictlist)
  77 {
  78         List       *mergeclause_list = NIL;
  79
  80         /*
  81          * Find potential mergejoin clauses.  We can skip this if we are not
  82          * interested in doing a mergejoin.  However, mergejoin is currently our
  83          * only way of implementing full outer joins, so override mergejoin
  84          * disable if it's a full join.
  85          */
  86         if (enable_mergejoin || jointype == JOIN_FULL)
  87                 mergeclause_list = select_mergejoin_clauses(root,
  88                                                                                                         joinrel,
  89                                                                                                         outerrel,
  90                                                                                                         innerrel,
  91                                                                                                         restrictlist,
  92                                                                                                         jointype);
  93
  94         /*
  95          * 1. Consider mergejoin paths where both relations must be explicitly
  96          * sorted.
  97          */
  98         sort_inner_and_outer(root, joinrel, outerrel, innerrel,
  99                                                  restrictlist, mergeclause_list, jointype, sjinfo);
 100
 101         /*
 102          * 2. Consider paths where the outer relation need not be explicitly
 103          * sorted. This includes both nestloops and mergejoins where the outer
 104          * path is already ordered.
 105          */
 106         match_unsorted_outer(root, joinrel, outerrel, innerrel,
 107                                                  restrictlist, mergeclause_list, jointype, sjinfo);
 108
 109 #ifdef NOT_USED
 110
 111         /*
 112          * 3. Consider paths where the inner relation need not be explicitly
 113          * sorted.      This includes mergejoins only (nestloops were already built in
 114          * match_unsorted_outer).
 115          *
 116          * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
 117          * significant difference between the inner and outer side of a mergejoin,
 118          * so match_unsorted_inner creates no paths that aren't equivalent to
 119          * those made by match_unsorted_outer when add_paths_to_joinrel() is
 120          * invoked with the two rels given in the other order.
 121          */
 122         match_unsorted_inner(root, joinrel, outerrel, innerrel,
 123                                                  restrictlist, mergeclause_list, jointype, sjinfo);
 124 #endif
 125
 126         /*
 127          * 4. Consider paths where both outer and inner relations must be hashed
 128          * before being joined.
 129          */
 130         if (enable_hashjoin)
 131                 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
 132                                                          restrictlist, jointype, sjinfo);
 133 }
 134
 135 /*
 136  * sort_inner_and_outer
 137  *        Create mergejoin join paths by explicitly sorting both the outer and
 138  *        inner join relations on each available merge ordering.
 139  *
 140  * 'joinrel' is the join relation
 141  * 'outerrel' is the outer join relation
 142  * 'innerrel' is the inner join relation
 143  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
 144  *              clauses that apply to this join
 145  * 'mergeclause_list' is a list of RestrictInfo nodes for available
 146  *              mergejoin clauses in this join
 147  * 'jointype' is the type of join to do
 148  * 'sjinfo' is extra info about the join for selectivity estimation
 149  */
 150 static void
 151 sort_inner_and_outer(PlannerInfo *root,
 152                                          RelOptInfo *joinrel,
 153                                          RelOptInfo *outerrel,
 154                                          RelOptInfo *innerrel,
 155                                          List *restrictlist,
 156                                          List *mergeclause_list,
 157                                          JoinType jointype,
 158                                          SpecialJoinInfo *sjinfo)
 159 {
 160         bool            useallclauses;
 161         Path       *outer_path;
 162         Path       *inner_path;
 163         List       *all_pathkeys;
 164         ListCell   *l;
 165
 166         /*
 167          * If we are doing a right or full join, we must use *all* the
 168          * mergeclauses as join clauses, else we will not have a valid plan.
 169          */
 170         switch (jointype)
 171         {
 172                 case JOIN_INNER:
 173                 case JOIN_LEFT:
 174                 case JOIN_SEMI:
 175                 case JOIN_ANTI:
 176                 case JOIN_UNIQUE_OUTER:
 177                 case JOIN_UNIQUE_INNER:
 178                         useallclauses = false;
 179                         break;
 180                 case JOIN_RIGHT:
 181                 case JOIN_FULL:
 182                         useallclauses = true;
 183                         break;
 184                 default:
 185                         elog(ERROR, "unrecognized join type: %d",
 186                                  (int) jointype);
 187                         useallclauses = false;          /* keep compiler quiet */
 188                         break;
 189         }
 190
 191         /*
 192          * We only consider the cheapest-total-cost input paths, since we are
 193          * assuming here that a sort is required.  We will consider
 194          * cheapest-startup-cost input paths later, and only if they don't need a
 195          * sort.
 196          *
 197          * If unique-ification is requested, do it and then handle as a plain
 198          * inner join.
 199          */
 200         outer_path = outerrel->cheapest_total_path;
 201         inner_path = innerrel->cheapest_total_path;
 202         if (jointype == JOIN_UNIQUE_OUTER)
 203         {
 204                 outer_path = (Path *) create_unique_path(root, outerrel,
 205                                                                                                  outer_path, sjinfo);
 206                 Assert(outer_path);
 207                 jointype = JOIN_INNER;
 208         }
 209         else if (jointype == JOIN_UNIQUE_INNER)
 210         {
 211                 inner_path = (Path *) create_unique_path(root, innerrel,
 212                                                                                                  inner_path, sjinfo);
 213                 Assert(inner_path);
 214                 jointype = JOIN_INNER;
 215         }
 216
 217         /*
 218          * Each possible ordering of the available mergejoin clauses will generate
 219          * a differently-sorted result path at essentially the same cost.  We have
 220          * no basis for choosing one over another at this level of joining, but
 221          * some sort orders may be more useful than others for higher-level
 222          * mergejoins, so it's worth considering multiple orderings.
 223          *
 224          * Actually, it's not quite true that every mergeclause ordering will
 225          * generate a different path order, because some of the clauses may be
 226          * partially redundant (refer to the same EquivalenceClasses).  Therefore,
 227          * what we do is convert the mergeclause list to a list of canonical
 228          * pathkeys, and then consider different orderings of the pathkeys.
 229          *
 230          * Generating a path for *every* permutation of the pathkeys doesn't seem
 231          * like a winning strategy; the cost in planning time is too high. For
 232          * now, we generate one path for each pathkey, listing that pathkey first
 233          * and the rest in random order.  This should allow at least a one-clause
 234          * mergejoin without re-sorting against any other possible mergejoin
 235          * partner path.  But if we've not guessed the right ordering of secondary
 236          * keys, we may end up evaluating clauses as qpquals when they could have
 237          * been done as mergeclauses.  (In practice, it's rare that there's more
 238          * than two or three mergeclauses, so expending a huge amount of thought
 239          * on that is probably not worth it.)
 240          *
 241          * The pathkey order returned by select_outer_pathkeys_for_merge() has
 242          * some heuristics behind it (see that function), so be sure to try it
 243          * exactly as-is as well as making variants.
 244          */
 245         all_pathkeys = select_outer_pathkeys_for_merge(root,
 246                                                                                                    mergeclause_list,
 247                                                                                                    joinrel);
 248
 249         foreach(l, all_pathkeys)
 250         {
 251                 List       *front_pathkey = (List *) lfirst(l);
 252                 List       *cur_mergeclauses;
 253                 List       *outerkeys;
 254                 List       *innerkeys;
 255                 List       *merge_pathkeys;
 256
 257                 /* Make a pathkey list with this guy first */
 258                 if (l != list_head(all_pathkeys))
 259                         outerkeys = lcons(front_pathkey,
 260                                                           list_delete_ptr(list_copy(all_pathkeys),
 261                                                                                           front_pathkey));
 262                 else
 263                         outerkeys = all_pathkeys;       /* no work at first one... */
 264
 265                 /* Sort the mergeclauses into the corresponding ordering */
 266                 cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
 267                                                                                                                   outerkeys,
 268                                                                                                                   true,
 269                                                                                                                   mergeclause_list);
 270
 271                 /* Should have used them all... */
 272                 Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
 273
 274                 /* Build sort pathkeys for the inner side */
 275                 innerkeys = make_inner_pathkeys_for_merge(root,
 276                                                                                                   cur_mergeclauses,
 277                                                                                                   outerkeys);
 278
 279                 /* Build pathkeys representing output sort order */
 280                 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
 281                                                                                          outerkeys);
 282
 283                 /*
 284                  * And now we can make the path.
 285                  *
 286                  * Note: it's possible that the cheapest paths will already be sorted
 287                  * properly.  create_mergejoin_path will detect that case and suppress
 288                  * an explicit sort step, so we needn't do so here.
 289                  */
 290                 add_path(joinrel, (Path *)
 291                                  create_mergejoin_path(root,
 292                                                                            joinrel,
 293                                                                            jointype,
 294                                                                            sjinfo,
 295                                                                            outer_path,
 296                                                                            inner_path,
 297                                                                            restrictlist,
 298                                                                            merge_pathkeys,
 299                                                                            cur_mergeclauses,
 300                                                                            outerkeys,
 301                                                                            innerkeys));
 302         }
 303 }
 304
 305 /*
 306  * match_unsorted_outer
 307  *        Creates possible join paths for processing a single join relation
 308  *        'joinrel' by employing either iterative substitution or
 309  *        mergejoining on each of its possible outer paths (considering
 310  *        only outer paths that are already ordered well enough for merging).
 311  *
 312  * We always generate a nestloop path for each available outer path.
 313  * In fact we may generate as many as five: one on the cheapest-total-cost
 314  * inner path, one on the same with materialization, one on the
 315  * cheapest-startup-cost inner path (if different), one on the
 316  * cheapest-total inner-indexscan path (if any), and one on the
 317  * cheapest-startup inner-indexscan path (if different).
 318  *
 319  * We also consider mergejoins if mergejoin clauses are available.      We have
 320  * two ways to generate the inner path for a mergejoin: sort the cheapest
 321  * inner path, or use an inner path that is already suitably ordered for the
 322  * merge.  If we have several mergeclauses, it could be that there is no inner
 323  * path (or only a very expensive one) for the full list of mergeclauses, but
 324  * better paths exist if we truncate the mergeclause list (thereby discarding
 325  * some sort key requirements).  So, we consider truncations of the
 326  * mergeclause list as well as the full list.  (Ideally we'd consider all
 327  * subsets of the mergeclause list, but that seems way too expensive.)
 328  *
 329  * 'joinrel' is the join relation
 330  * 'outerrel' is the outer join relation
 331  * 'innerrel' is the inner join relation
 332  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
 333  *              clauses that apply to this join
 334  * 'mergeclause_list' is a list of RestrictInfo nodes for available
 335  *              mergejoin clauses in this join
 336  * 'jointype' is the type of join to do
 337  * 'sjinfo' is extra info about the join for selectivity estimation
 338  */
 339 static void
 340 match_unsorted_outer(PlannerInfo *root,
 341                                          RelOptInfo *joinrel,
 342                                          RelOptInfo *outerrel,
 343                                          RelOptInfo *innerrel,
 344                                          List *restrictlist,
 345                                          List *mergeclause_list,
 346                                          JoinType jointype,
 347                                          SpecialJoinInfo *sjinfo)
 348 {
 349         JoinType        save_jointype = jointype;
 350         bool            nestjoinOK;
 351         bool            useallclauses;
 352         Path       *inner_cheapest_startup = innerrel->cheapest_startup_path;
 353         Path       *inner_cheapest_total = innerrel->cheapest_total_path;
 354         Path       *matpath = NULL;
 355         Path       *index_cheapest_startup = NULL;
 356         Path       *index_cheapest_total = NULL;
 357         ListCell   *l;
 358
 359         /*
 360          * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
 361          * are doing a right or full join, we must use *all* the mergeclauses as
 362          * join clauses, else we will not have a valid plan.  (Although these two
 363          * flags are currently inverses, keep them separate for clarity and
 364          * possible future changes.)
 365          */
 366         switch (jointype)
 367         {
 368                 case JOIN_INNER:
 369                 case JOIN_LEFT:
 370                 case JOIN_SEMI:
 371                 case JOIN_ANTI:
 372                         nestjoinOK = true;
 373                         useallclauses = false;
 374                         break;
 375                 case JOIN_RIGHT:
 376                 case JOIN_FULL:
 377                         nestjoinOK = false;
 378                         useallclauses = true;
 379                         break;
 380                 case JOIN_UNIQUE_OUTER:
 381                 case JOIN_UNIQUE_INNER:
 382                         jointype = JOIN_INNER;
 383                         nestjoinOK = true;
 384                         useallclauses = false;
 385                         break;
 386                 default:
 387                         elog(ERROR, "unrecognized join type: %d",
 388                                  (int) jointype);
 389                         nestjoinOK = false; /* keep compiler quiet */
 390                         useallclauses = false;
 391                         break;
 392         }
 393
 394         /*
 395          * If we need to unique-ify the inner path, we will consider only the
 396          * cheapest inner.
 397          */
 398         if (save_jointype == JOIN_UNIQUE_INNER)
 399         {
 400                 inner_cheapest_total = (Path *)
 401                         create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
 402                 Assert(inner_cheapest_total);
 403                 inner_cheapest_startup = inner_cheapest_total;
 404         }
 405         else if (nestjoinOK)
 406         {
 407                 /*
 408                  * If the cheapest inner path is a join or seqscan, we should consider
 409                  * materializing it.  (This is a heuristic: we could consider it
 410                  * always, but for inner indexscans it's probably a waste of time.)
 411                  * Also skip it if the inner path materializes its output anyway.
 412                  */
 413                 if (!(inner_cheapest_total->pathtype == T_IndexScan ||
 414                           inner_cheapest_total->pathtype == T_BitmapHeapScan ||
 415                           inner_cheapest_total->pathtype == T_TidScan ||
 416                           inner_cheapest_total->pathtype == T_Material ||
 417                           inner_cheapest_total->pathtype == T_FunctionScan ||
 418                           inner_cheapest_total->pathtype == T_CteScan ||
 419                           inner_cheapest_total->pathtype == T_WorkTableScan))
 420                         matpath = (Path *)
 421                                 create_material_path(innerrel, inner_cheapest_total);
 422
 423                 /*
 424                  * Get the best innerjoin indexpaths (if any) for this outer rel.
 425                  * They're the same for all outer paths.
 426                  */
 427                 if (innerrel->reloptkind != RELOPT_JOINREL)
 428                 {
 429                         if (IsA(inner_cheapest_total, AppendPath))
 430                                 index_cheapest_total = best_appendrel_indexscan(root,
 431                                                                                                                                 innerrel,
 432                                                                                                                                 outerrel,
 433                                                                                                                                 jointype);
 434                         else if (innerrel->rtekind == RTE_RELATION)
 435                                 best_inner_indexscan(root, innerrel, outerrel, jointype,
 436                                                                          &index_cheapest_startup,
 437                                                                          &index_cheapest_total);
 438                 }
 439         }
 440
 441         foreach(l, outerrel->pathlist)
 442         {
 443                 Path       *outerpath = (Path *) lfirst(l);
 444                 List       *merge_pathkeys;
 445                 List       *mergeclauses;
 446                 List       *innersortkeys;
 447                 List       *trialsortkeys;
 448                 Path       *cheapest_startup_inner;
 449                 Path       *cheapest_total_inner;
 450                 int                     num_sortkeys;
 451                 int                     sortkeycnt;
 452
 453                 /*
 454                  * If we need to unique-ify the outer path, it's pointless to consider
 455                  * any but the cheapest outer.
 456                  */
 457                 if (save_jointype == JOIN_UNIQUE_OUTER)
 458                 {
 459                         if (outerpath != outerrel->cheapest_total_path)
 460                                 continue;
 461                         outerpath = (Path *) create_unique_path(root, outerrel,
 462                                                                                                         outerpath, sjinfo);
 463                         Assert(outerpath);
 464                 }
 465
 466                 /*
 467                  * The result will have this sort order (even if it is implemented as
 468                  * a nestloop, and even if some of the mergeclauses are implemented by
 469                  * qpquals rather than as true mergeclauses):
 470                  */
 471                 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
 472                                                                                          outerpath->pathkeys);
 473
 474                 if (nestjoinOK)
 475                 {
 476                         /*
 477                          * Always consider a nestloop join with this outer and
 478                          * cheapest-total-cost inner.  When appropriate, also consider
 479                          * using the materialized form of the cheapest inner, the
 480                          * cheapest-startup-cost inner path, and the cheapest innerjoin
 481                          * indexpaths.
 482                          */
 483                         add_path(joinrel, (Path *)
 484                                          create_nestloop_path(root,
 485                                                                                   joinrel,
 486                                                                                   jointype,
 487                                                                                   sjinfo,
 488                                                                                   outerpath,
 489                                                                                   inner_cheapest_total,
 490                                                                                   restrictlist,
 491                                                                                   merge_pathkeys));
 492                         if (matpath != NULL)
 493                                 add_path(joinrel, (Path *)
 494                                                  create_nestloop_path(root,
 495                                                                                           joinrel,
 496                                                                                           jointype,
 497                                                                                           sjinfo,
 498                                                                                           outerpath,
 499                                                                                           matpath,
 500                                                                                           restrictlist,
 501                                                                                           merge_pathkeys));
 502                         if (inner_cheapest_startup != inner_cheapest_total)
 503                                 add_path(joinrel, (Path *)
 504                                                  create_nestloop_path(root,
 505                                                                                           joinrel,
 506                                                                                           jointype,
 507                                                                                           sjinfo,
 508                                                                                           outerpath,
 509                                                                                           inner_cheapest_startup,
 510                                                                                           restrictlist,
 511                                                                                           merge_pathkeys));
 512                         if (index_cheapest_total != NULL)
 513                                 add_path(joinrel, (Path *)
 514                                                  create_nestloop_path(root,
 515                                                                                           joinrel,
 516                                                                                           jointype,
 517                                                                                           sjinfo,
 518                                                                                           outerpath,
 519                                                                                           index_cheapest_total,
 520                                                                                           restrictlist,
 521                                                                                           merge_pathkeys));
 522                         if (index_cheapest_startup != NULL &&
 523                                 index_cheapest_startup != index_cheapest_total)
 524                                 add_path(joinrel, (Path *)
 525                                                  create_nestloop_path(root,
 526                                                                                           joinrel,
 527                                                                                           jointype,
 528                                                                                           sjinfo,
 529                                                                                           outerpath,
 530                                                                                           index_cheapest_startup,
 531                                                                                           restrictlist,
 532                                                                                           merge_pathkeys));
 533                 }
 534
 535                 /* Can't do anything else if outer path needs to be unique'd */
 536                 if (save_jointype == JOIN_UNIQUE_OUTER)
 537                         continue;
 538
 539                 /* Look for useful mergeclauses (if any) */
 540                 mergeclauses = find_mergeclauses_for_pathkeys(root,
 541                                                                                                           outerpath->pathkeys,
 542                                                                                                           true,
 543                                                                                                           mergeclause_list);
 544
 545                 /*
 546                  * Done with this outer path if no chance for a mergejoin.
 547                  *
 548                  * Special corner case: for "x FULL JOIN y ON true", there will be no
 549                  * join clauses at all.  Ordinarily we'd generate a clauseless
 550                  * nestloop path, but since mergejoin is our only join type that
 551                  * supports FULL JOIN, it's necessary to generate a clauseless
 552                  * mergejoin path instead.
 553                  */
 554                 if (mergeclauses == NIL)
 555                 {
 556                         if (jointype == JOIN_FULL)
 557                                  /* okay to try for mergejoin */ ;
 558                         else
 559                                 continue;
 560                 }
 561                 if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
 562                         continue;
 563
 564                 /* Compute the required ordering of the inner path */
 565                 innersortkeys = make_inner_pathkeys_for_merge(root,
 566                                                                                                           mergeclauses,
 567                                                                                                           outerpath->pathkeys);
 568
 569                 /*
 570                  * Generate a mergejoin on the basis of sorting the cheapest inner.
 571                  * Since a sort will be needed, only cheapest total cost matters. (But
 572                  * create_mergejoin_path will do the right thing if
 573                  * inner_cheapest_total is already correctly sorted.)
 574                  */
 575                 add_path(joinrel, (Path *)
 576                                  create_mergejoin_path(root,
 577                                                                            joinrel,
 578                                                                            jointype,
 579                                                                            sjinfo,
 580                                                                            outerpath,
 581                                                                            inner_cheapest_total,
 582                                                                            restrictlist,
 583                                                                            merge_pathkeys,
 584                                                                            mergeclauses,
 585                                                                            NIL,
 586                                                                            innersortkeys));
 587
 588                 /* Can't do anything else if inner path needs to be unique'd */
 589                 if (save_jointype == JOIN_UNIQUE_INNER)
 590                         continue;
 591
 592                 /*
 593                  * Look for presorted inner paths that satisfy the innersortkey list
 594                  * --- or any truncation thereof, if we are allowed to build a
 595                  * mergejoin using a subset of the merge clauses.  Here, we consider
 596                  * both cheap startup cost and cheap total cost.
 597                  *
 598                  * As we shorten the sortkey list, we should consider only paths that
 599                  * are strictly cheaper than (in particular, not the same as) any path
 600                  * found in an earlier iteration.  Otherwise we'd be intentionally
 601                  * using fewer merge keys than a given path allows (treating the rest
 602                  * as plain joinquals), which is unlikely to be a good idea.  Also,
 603                  * eliminating paths here on the basis of compare_path_costs is a lot
 604                  * cheaper than building the mergejoin path only to throw it away.
 605                  *
 606                  * If inner_cheapest_total is well enough sorted to have not required
 607                  * a sort in the path made above, we shouldn't make a duplicate path
 608                  * with it, either.  We handle that case with the same logic that
 609                  * handles the previous consideration, by initializing the variables
 610                  * that track cheapest-so-far properly.  Note that we do NOT reject
 611                  * inner_cheapest_total if we find it matches some shorter set of
 612                  * pathkeys.  That case corresponds to using fewer mergekeys to avoid
 613                  * sorting inner_cheapest_total, whereas we did sort it above, so the
 614                  * plans being considered are different.
 615                  */
 616                 if (pathkeys_contained_in(innersortkeys,
 617                                                                   inner_cheapest_total->pathkeys))
 618                 {
 619                         /* inner_cheapest_total didn't require a sort */
 620                         cheapest_startup_inner = inner_cheapest_total;
 621                         cheapest_total_inner = inner_cheapest_total;
 622                 }
 623                 else
 624                 {
 625                         /* it did require a sort, at least for the full set of keys */
 626                         cheapest_startup_inner = NULL;
 627                         cheapest_total_inner = NULL;
 628                 }
 629                 num_sortkeys = list_length(innersortkeys);
 630                 if (num_sortkeys > 1 && !useallclauses)
 631                         trialsortkeys = list_copy(innersortkeys);       /* need modifiable copy */
 632                 else
 633                         trialsortkeys = innersortkeys;          /* won't really truncate */
 634
 635                 for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
 636                 {
 637                         Path       *innerpath;
 638                         List       *newclauses = NIL;
 639
 640                         /*
 641                          * Look for an inner path ordered well enough for the first
 642                          * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
 643                          * destructively, which is why we made a copy...
 644                          */
 645                         trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
 646                         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
 647                                                                                                            trialsortkeys,
 648                                                                                                            TOTAL_COST);
 649                         if (innerpath != NULL &&
 650                                 (cheapest_total_inner == NULL ||
 651                                  compare_path_costs(innerpath, cheapest_total_inner,
 652                                                                         TOTAL_COST) < 0))
 653                         {
 654                                 /* Found a cheap (or even-cheaper) sorted path */
 655                                 /* Select the right mergeclauses, if we didn't already */
 656                                 if (sortkeycnt < num_sortkeys)
 657                                 {
 658                                         newclauses =
 659                                                 find_mergeclauses_for_pathkeys(root,
 660                                                                                                            trialsortkeys,
 661                                                                                                            false,
 662                                                                                                            mergeclauses);
 663                                         Assert(newclauses != NIL);
 664                                 }
 665                                 else
 666                                         newclauses = mergeclauses;
 667                                 add_path(joinrel, (Path *)
 668                                                  create_mergejoin_path(root,
 669                                                                                            joinrel,
 670                                                                                            jointype,
 671                                                                                            sjinfo,
 672                                                                                            outerpath,
 673                                                                                            innerpath,
 674                                                                                            restrictlist,
 675                                                                                            merge_pathkeys,
 676                                                                                            newclauses,
 677                                                                                            NIL,
 678                                                                                            NIL));
 679                                 cheapest_total_inner = innerpath;
 680                         }
 681                         /* Same on the basis of cheapest startup cost ... */
 682                         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
 683                                                                                                            trialsortkeys,
 684                                                                                                            STARTUP_COST);
 685                         if (innerpath != NULL &&
 686                                 (cheapest_startup_inner == NULL ||
 687                                  compare_path_costs(innerpath, cheapest_startup_inner,
 688                                                                         STARTUP_COST) < 0))
 689                         {
 690                                 /* Found a cheap (or even-cheaper) sorted path */
 691                                 if (innerpath != cheapest_total_inner)
 692                                 {
 693                                         /*
 694                                          * Avoid rebuilding clause list if we already made one;
 695                                          * saves memory in big join trees...
 696                                          */
 697                                         if (newclauses == NIL)
 698                                         {
 699                                                 if (sortkeycnt < num_sortkeys)
 700                                                 {
 701                                                         newclauses =
 702                                                                 find_mergeclauses_for_pathkeys(root,
 703                                                                                                                            trialsortkeys,
 704                                                                                                                            false,
 705                                                                                                                            mergeclauses);
 706                                                         Assert(newclauses != NIL);
 707                                                 }
 708                                                 else
 709                                                         newclauses = mergeclauses;
 710                                         }
 711                                         add_path(joinrel, (Path *)
 712                                                          create_mergejoin_path(root,
 713                                                                                                    joinrel,
 714                                                                                                    jointype,
 715                                                                                                    sjinfo,
 716                                                                                                    outerpath,
 717                                                                                                    innerpath,
 718                                                                                                    restrictlist,
 719                                                                                                    merge_pathkeys,
 720                                                                                                    newclauses,
 721                                                                                                    NIL,
 722                                                                                                    NIL));
 723                                 }
 724                                 cheapest_startup_inner = innerpath;
 725                         }
 726
 727                         /*
 728                          * Don't consider truncated sortkeys if we need all clauses.
 729                          */
 730                         if (useallclauses)
 731                                 break;
 732                 }
 733         }
 734 }
 735
 736 /*
 737  * hash_inner_and_outer
 738  *        Create hashjoin join paths by explicitly hashing both the outer and
 739  *        inner keys of each available hash clause.
 740  *
 741  * 'joinrel' is the join relation
 742  * 'outerrel' is the outer join relation
 743  * 'innerrel' is the inner join relation
 744  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
 745  *              clauses that apply to this join
 746  * 'jointype' is the type of join to do
 747  * 'sjinfo' is extra info about the join for selectivity estimation
 748  */
 749 static void
 750 hash_inner_and_outer(PlannerInfo *root,
 751                                          RelOptInfo *joinrel,
 752                                          RelOptInfo *outerrel,
 753                                          RelOptInfo *innerrel,
 754                                          List *restrictlist,
 755                                          JoinType jointype,
 756                                          SpecialJoinInfo *sjinfo)
 757 {
 758         bool            isouterjoin;
 759         List       *hashclauses;
 760         ListCell   *l;
 761
 762         /*
 763          * Hashjoin only supports inner, left, semi, and anti joins.
 764          */
 765         switch (jointype)
 766         {
 767                 case JOIN_INNER:
 768                 case JOIN_SEMI:
 769                 case JOIN_UNIQUE_OUTER:
 770                 case JOIN_UNIQUE_INNER:
 771                         isouterjoin = false;
 772                         break;
 773                 case JOIN_LEFT:
 774                 case JOIN_ANTI:
 775                         isouterjoin = true;
 776                         break;
 777                 default:
 778                         return;
 779         }
 780
 781         /*
 782          * We need to build only one hashpath for any given pair of outer and
 783          * inner relations; all of the hashable clauses will be used as keys.
 784          *
 785          * Scan the join's restrictinfo list to find hashjoinable clauses that are
 786          * usable with this pair of sub-relations.
 787          */
 788         hashclauses = NIL;
 789         foreach(l, restrictlist)
 790         {
 791                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
 792
 793                 if (!restrictinfo->can_join ||
 794                         restrictinfo->hashjoinoperator == InvalidOid)
 795                         continue;                       /* not hashjoinable */
 796
 797                 /*
 798                  * If processing an outer join, only use its own join clauses for
 799                  * hashing.  For inner joins we need not be so picky.
 800                  */
 801                 if (isouterjoin && restrictinfo->is_pushed_down)
 802                         continue;
 803
 804                 /*
 805                  * Check if clause is usable with these input rels.
 806                  */
 807                 if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
 808                         bms_is_subset(restrictinfo->right_relids, innerrel->relids))
 809                 {
 810                         /* righthand side is inner */
 811                 }
 812                 else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
 813                                  bms_is_subset(restrictinfo->right_relids, outerrel->relids))
 814                 {
 815                         /* lefthand side is inner */
 816                 }
 817                 else
 818                         continue;                       /* no good for these input relations */
 819
 820                 hashclauses = lappend(hashclauses, restrictinfo);
 821         }
 822
 823         /* If we found any usable hashclauses, make a path */
 824         if (hashclauses)
 825         {
 826                 /*
 827                  * We consider both the cheapest-total-cost and cheapest-startup-cost
 828                  * outer paths.  There's no need to consider any but the
 829                  * cheapest-total-cost inner path, however.
 830                  */
 831                 Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
 832                 Path       *cheapest_total_outer = outerrel->cheapest_total_path;
 833                 Path       *cheapest_total_inner = innerrel->cheapest_total_path;
 834
 835                 /* Unique-ify if need be */
 836                 if (jointype == JOIN_UNIQUE_OUTER)
 837                 {
 838                         cheapest_total_outer = (Path *)
 839                                 create_unique_path(root, outerrel,
 840                                                                    cheapest_total_outer, sjinfo);
 841                         Assert(cheapest_total_outer);
 842                         cheapest_startup_outer = cheapest_total_outer;
 843                         jointype = JOIN_INNER;
 844                 }
 845                 else if (jointype == JOIN_UNIQUE_INNER)
 846                 {
 847                         cheapest_total_inner = (Path *)
 848                                 create_unique_path(root, innerrel,
 849                                                                    cheapest_total_inner, sjinfo);
 850                         Assert(cheapest_total_inner);
 851                         jointype = JOIN_INNER;
 852                 }
 853
 854                 add_path(joinrel, (Path *)
 855                                  create_hashjoin_path(root,
 856                                                                           joinrel,
 857                                                                           jointype,
 858                                                                           sjinfo,
 859                                                                           cheapest_total_outer,
 860                                                                           cheapest_total_inner,
 861                                                                           restrictlist,
 862                                                                           hashclauses));
 863                 if (cheapest_startup_outer != cheapest_total_outer)
 864                         add_path(joinrel, (Path *)
 865                                          create_hashjoin_path(root,
 866                                                                                   joinrel,
 867                                                                                   jointype,
 868                                                                                   sjinfo,
 869                                                                                   cheapest_startup_outer,
 870                                                                                   cheapest_total_inner,
 871                                                                                   restrictlist,
 872                                                                                   hashclauses));
 873         }
 874 }
 875
 876 /*
 877  * best_appendrel_indexscan
 878  *        Finds the best available set of inner indexscans for a nestloop join
 879  *        with the given append relation on the inside and the given outer_rel
 880  *        outside.      Returns an AppendPath comprising the best inner scans, or
 881  *        NULL if there are no possible inner indexscans.
 882  *
 883  * Note that we currently consider only cheapest-total-cost.  It's not
 884  * very clear what cheapest-startup-cost might mean for an AppendPath.
 885  */
 886 static Path *
 887 best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
 888                                                  RelOptInfo *outer_rel, JoinType jointype)
 889 {
 890         int                     parentRTindex = rel->relid;
 891         List       *append_paths = NIL;
 892         bool            found_indexscan = false;
 893         ListCell   *l;
 894
 895         foreach(l, root->append_rel_list)
 896         {
 897                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
 898                 int                     childRTindex;
 899                 RelOptInfo *childrel;
 900                 Path       *index_cheapest_startup;
 901                 Path       *index_cheapest_total;
 902
 903                 /* append_rel_list contains all append rels; ignore others */
 904                 if (appinfo->parent_relid != parentRTindex)
 905                         continue;
 906
 907                 childRTindex = appinfo->child_relid;
 908                 childrel = find_base_rel(root, childRTindex);
 909                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
 910
 911                 /*
 912                  * Check to see if child was rejected by constraint exclusion. If so,
 913                  * it will have a cheapest_total_path that's a "dummy" path.
 914                  */
 915                 if (IS_DUMMY_PATH(childrel->cheapest_total_path))
 916                         continue;                       /* OK, we can ignore it */
 917
 918                 /*
 919                  * Get the best innerjoin indexpaths (if any) for this child rel.
 920                  */
 921                 best_inner_indexscan(root, childrel, outer_rel, jointype,
 922                                                          &index_cheapest_startup, &index_cheapest_total);
 923
 924                 /*
 925                  * If no luck on an indexpath for this rel, we'll still consider an
 926                  * Append substituting the cheapest-total inner path.  However we must
 927                  * find at least one indexpath, else there's not going to be any
 928                  * improvement over the base path for the appendrel.
 929                  */
 930                 if (index_cheapest_total)
 931                         found_indexscan = true;
 932                 else
 933                         index_cheapest_total = childrel->cheapest_total_path;
 934
 935                 append_paths = lappend(append_paths, index_cheapest_total);
 936         }
 937
 938         if (!found_indexscan)
 939                 return NULL;
 940
 941         /* Form and return the completed Append path. */
 942         return (Path *) create_append_path(rel, append_paths);
 943 }
 944
 945 /*
 946  * select_mergejoin_clauses
 947  *        Select mergejoin clauses that are usable for a particular join.
 948  *        Returns a list of RestrictInfo nodes for those clauses.
 949  *
 950  * We also mark each selected RestrictInfo to show which side is currently
 951  * being considered as outer.  These are transient markings that are only
 952  * good for the duration of the current add_paths_to_joinrel() call!
 953  *
 954  * We examine each restrictinfo clause known for the join to see
 955  * if it is mergejoinable and involves vars from the two sub-relations
 956  * currently of interest.
 957  */
 958 static List *
 959 select_mergejoin_clauses(PlannerInfo *root,
 960                                                  RelOptInfo *joinrel,
 961                                                  RelOptInfo *outerrel,
 962                                                  RelOptInfo *innerrel,
 963                                                  List *restrictlist,
 964                                                  JoinType jointype)
 965 {
 966         List       *result_list = NIL;
 967         bool            isouterjoin = IS_OUTER_JOIN(jointype);
 968         bool            have_nonmergeable_joinclause = false;
 969         ListCell   *l;
 970
 971         foreach(l, restrictlist)
 972         {
 973                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
 974
 975                 /*
 976                  * If processing an outer join, only use its own join clauses in the
 977                  * merge.  For inner joins we can use pushed-down clauses too. (Note:
 978                  * we don't set have_nonmergeable_joinclause here because pushed-down
 979                  * clauses will become otherquals not joinquals.)
 980                  */
 981                 if (isouterjoin && restrictinfo->is_pushed_down)
 982                         continue;
 983
 984                 if (!restrictinfo->can_join ||
 985                         restrictinfo->mergeopfamilies == NIL)
 986                 {
 987                         have_nonmergeable_joinclause = true;
 988                         continue;                       /* not mergejoinable */
 989                 }
 990
 991                 /*
 992                  * Check if clause is usable with these input rels.  All the vars
 993                  * needed on each side of the clause must be available from one or the
 994                  * other of the input rels.
 995                  */
 996                 if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
 997                         bms_is_subset(restrictinfo->right_relids, innerrel->relids))
 998                 {
 999                         /* righthand side is inner */
1000                         restrictinfo->outer_is_left = true;
1001                 }
1002                 else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
1003                                  bms_is_subset(restrictinfo->right_relids, outerrel->relids))
1004                 {
1005                         /* lefthand side is inner */
1006                         restrictinfo->outer_is_left = false;
1007                 }
1008                 else
1009                 {
1010                         have_nonmergeable_joinclause = true;
1011                         continue;                       /* no good for these input relations */
1012                 }
1013
1014                 /*
1015                  * Insist that each side have a non-redundant eclass.  This
1016                  * restriction is needed because various bits of the planner expect
1017                  * that each clause in a merge be associatable with some pathkey in a
1018                  * canonical pathkey list, but redundant eclasses can't appear in
1019                  * canonical sort orderings.  (XXX it might be worth relaxing this,
1020                  * but not enough time to address it for 8.3.)
1021                  *
1022                  * Note: it would be bad if this condition failed for an otherwise
1023                  * mergejoinable FULL JOIN clause, since that would result in
1024                  * undesirable planner failure.  I believe that is not possible
1025                  * however; a variable involved in a full join could only appear in
1026                  * below_outer_join eclasses, which aren't considered redundant.
1027                  *
1028                  * This case *can* happen for left/right join clauses: the outer-side
1029                  * variable could be equated to a constant.  Because we will propagate
1030                  * that constant across the join clause, the loss of ability to do a
1031                  * mergejoin is not really all that big a deal, and so it's not clear
1032                  * that improving this is important.
1033                  */
1034                 cache_mergeclause_eclasses(root, restrictinfo);
1035
1036                 if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
1037                         EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
1038                 {
1039                         have_nonmergeable_joinclause = true;
1040                         continue;                       /* can't handle redundant eclasses */
1041                 }
1042
1043                 result_list = lappend(result_list, restrictinfo);
1044         }
1045
1046         /*
1047          * If it is a right/full join then *all* the explicit join clauses must be
1048          * mergejoinable, else the executor will fail. If we are asked for a right
1049          * join then just return NIL to indicate no mergejoin is possible (we can
1050          * handle it as a left join instead). If we are asked for a full join then
1051          * emit an error, because there is no fallback.
1052          */
1053         if (have_nonmergeable_joinclause)
1054         {
1055                 switch (jointype)
1056                 {
1057                         case JOIN_RIGHT:
1058                                 return NIL;             /* not mergejoinable */
1059                         case JOIN_FULL:
1060                                 ereport(ERROR,
1061                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1062                                                  errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
1063                                 break;
1064                         default:
1065                                 /* otherwise, it's OK to have nonmergeable join quals */
1066                                 break;
1067                 }
1068         }
1069
1070         return result_list;
1071 }