src/backend/optimizer/path/pathkeys.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * pathkeys.c
   4  *        Utilities for matching and building path keys
   5  *
   6  * See src/backend/optimizer/README for a great deal of information about
   7  * the nature and use of path keys.
   8  *
   9  *
  10  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  11  * Portions Copyright (c) 1994, Regents of the University of California
  12  *
  13  * IDENTIFICATION
  14  *        $PostgreSQL$
  15  *
  16  *-------------------------------------------------------------------------
  17  */
  18 #include "postgres.h"
  19
  20 #include "access/skey.h"
  21 #include "catalog/pg_type.h"
  22 #include "nodes/makefuncs.h"
  23 #include "nodes/nodeFuncs.h"
  24 #include "nodes/plannodes.h"
  25 #include "optimizer/clauses.h"
  26 #include "optimizer/pathnode.h"
  27 #include "optimizer/paths.h"
  28 #include "optimizer/tlist.h"
  29 #include "parser/parsetree.h"
  30 #include "utils/lsyscache.h"
  31
  32
  33 static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily,
  34                         int strategy, bool nulls_first);
  35 static PathKey *make_canonical_pathkey(PlannerInfo *root,
  36                                            EquivalenceClass *eclass, Oid opfamily,
  37                                            int strategy, bool nulls_first);
  38 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
  39 static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root,
  40                                                    Expr *expr, Oid ordering_op,
  41                                                    bool nulls_first,
  42                                                    Index sortref,
  43                                                    bool canonicalize);
  44 static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel,
  45                                   AttrNumber varattno);
  46 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
  47
  48
  49 /****************************************************************************
  50  *              PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
  51  ****************************************************************************/
  52
  53 /*
  54  * makePathKey
  55  *              create a PathKey node
  56  *
  57  * This does not promise to create a canonical PathKey, it's merely a
  58  * convenience routine to build the specified node.
  59  */
  60 static PathKey *
  61 makePathKey(EquivalenceClass *eclass, Oid opfamily,
  62                         int strategy, bool nulls_first)
  63 {
  64         PathKey    *pk = makeNode(PathKey);
  65
  66         pk->pk_eclass = eclass;
  67         pk->pk_opfamily = opfamily;
  68         pk->pk_strategy = strategy;
  69         pk->pk_nulls_first = nulls_first;
  70
  71         return pk;
  72 }
  73
  74 /*
  75  * make_canonical_pathkey
  76  *        Given the parameters for a PathKey, find any pre-existing matching
  77  *        pathkey in the query's list of "canonical" pathkeys.  Make a new
  78  *        entry if there's not one already.
  79  *
  80  * Note that this function must not be used until after we have completed
  81  * merging EquivalenceClasses.
  82  */
  83 static PathKey *
  84 make_canonical_pathkey(PlannerInfo *root,
  85                                            EquivalenceClass *eclass, Oid opfamily,
  86                                            int strategy, bool nulls_first)
  87 {
  88         PathKey    *pk;
  89         ListCell   *lc;
  90         MemoryContext oldcontext;
  91
  92         /* The passed eclass might be non-canonical, so chase up to the top */
  93         while (eclass->ec_merged)
  94                 eclass = eclass->ec_merged;
  95
  96         foreach(lc, root->canon_pathkeys)
  97         {
  98                 pk = (PathKey *) lfirst(lc);
  99                 if (eclass == pk->pk_eclass &&
 100                         opfamily == pk->pk_opfamily &&
 101                         strategy == pk->pk_strategy &&
 102                         nulls_first == pk->pk_nulls_first)
 103                         return pk;
 104         }
 105
 106         /*
 107          * Be sure canonical pathkeys are allocated in the main planning context.
 108          * Not an issue in normal planning, but it is for GEQO.
 109          */
 110         oldcontext = MemoryContextSwitchTo(root->planner_cxt);
 111
 112         pk = makePathKey(eclass, opfamily, strategy, nulls_first);
 113         root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
 114
 115         MemoryContextSwitchTo(oldcontext);
 116
 117         return pk;
 118 }
 119
 120 /*
 121  * pathkey_is_redundant
 122  *         Is a pathkey redundant with one already in the given list?
 123  *
 124  * Both the given pathkey and the list members must be canonical for this
 125  * to work properly.  We detect two cases:
 126  *
 127  * 1. If the new pathkey's equivalence class contains a constant, and isn't
 128  * below an outer join, then we can disregard it as a sort key.  An example:
 129  *                      SELECT ... WHERE x = 42 ORDER BY x, y;
 130  * We may as well just sort by y.  Note that because of opfamily matching,
 131  * this is semantically correct: we know that the equality constraint is one
 132  * that actually binds the variable to a single value in the terms of any
 133  * ordering operator that might go with the eclass.  This rule not only lets
 134  * us simplify (or even skip) explicit sorts, but also allows matching index
 135  * sort orders to a query when there are don't-care index columns.
 136  *
 137  * 2. If the new pathkey's equivalence class is the same as that of any
 138  * existing member of the pathkey list, then it is redundant.  Some examples:
 139  *                      SELECT ... ORDER BY x, x;
 140  *                      SELECT ... ORDER BY x, x DESC;
 141  *                      SELECT ... WHERE x = y ORDER BY x, y;
 142  * In all these cases the second sort key cannot distinguish values that are
 143  * considered equal by the first, and so there's no point in using it.
 144  * Note in particular that we need not compare opfamily (all the opfamilies
 145  * of the EC have the same notion of equality) nor sort direction.
 146  *
 147  * Because the equivclass.c machinery forms only one copy of any EC per query,
 148  * pointer comparison is enough to decide whether canonical ECs are the same.
 149  */
 150 static bool
 151 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
 152 {
 153         EquivalenceClass *new_ec = new_pathkey->pk_eclass;
 154         ListCell   *lc;
 155
 156         /* Assert we've been given canonical pathkeys */
 157         Assert(!new_ec->ec_merged);
 158
 159         /* Check for EC containing a constant --- unconditionally redundant */
 160         if (EC_MUST_BE_REDUNDANT(new_ec))
 161                 return true;
 162
 163         /* If same EC already used in list, then redundant */
 164         foreach(lc, pathkeys)
 165         {
 166                 PathKey    *old_pathkey = (PathKey *) lfirst(lc);
 167
 168                 /* Assert we've been given canonical pathkeys */
 169                 Assert(!old_pathkey->pk_eclass->ec_merged);
 170
 171                 if (new_ec == old_pathkey->pk_eclass)
 172                         return true;
 173         }
 174
 175         return false;
 176 }
 177
 178 /*
 179  * canonicalize_pathkeys
 180  *         Convert a not-necessarily-canonical pathkeys list to canonical form.
 181  *
 182  * Note that this function must not be used until after we have completed
 183  * merging EquivalenceClasses.
 184  */
 185 List *
 186 canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
 187 {
 188         List       *new_pathkeys = NIL;
 189         ListCell   *l;
 190
 191         foreach(l, pathkeys)
 192         {
 193                 PathKey    *pathkey = (PathKey *) lfirst(l);
 194                 EquivalenceClass *eclass;
 195                 PathKey    *cpathkey;
 196
 197                 /* Find the canonical (merged) EquivalenceClass */
 198                 eclass = pathkey->pk_eclass;
 199                 while (eclass->ec_merged)
 200                         eclass = eclass->ec_merged;
 201
 202                 /*
 203                  * If we can tell it's redundant just from the EC, skip.
 204                  * pathkey_is_redundant would notice that, but we needn't even bother
 205                  * constructing the node...
 206                  */
 207                 if (EC_MUST_BE_REDUNDANT(eclass))
 208                         continue;
 209
 210                 /* OK, build a canonicalized PathKey struct */
 211                 cpathkey = make_canonical_pathkey(root,
 212                                                                                   eclass,
 213                                                                                   pathkey->pk_opfamily,
 214                                                                                   pathkey->pk_strategy,
 215                                                                                   pathkey->pk_nulls_first);
 216
 217                 /* Add to list unless redundant */
 218                 if (!pathkey_is_redundant(cpathkey, new_pathkeys))
 219                         new_pathkeys = lappend(new_pathkeys, cpathkey);
 220         }
 221         return new_pathkeys;
 222 }
 223
 224 /*
 225  * make_pathkey_from_sortinfo
 226  *        Given an expression, a sortop, and a nulls-first flag, create
 227  *        a PathKey.  If canonicalize = true, the result is a "canonical"
 228  *        PathKey, otherwise not.  (But note it might be redundant anyway.)
 229  *
 230  * If the PathKey is being generated from a SortGroupClause, sortref should be
 231  * the SortGroupClause's SortGroupRef; otherwise zero.
 232  *
 233  * canonicalize should always be TRUE after EquivalenceClass merging has
 234  * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
 235  */
 236 static PathKey *
 237 make_pathkey_from_sortinfo(PlannerInfo *root,
 238                                                    Expr *expr, Oid ordering_op,
 239                                                    bool nulls_first,
 240                                                    Index sortref,
 241                                                    bool canonicalize)
 242 {
 243         Oid                     opfamily,
 244                                 opcintype;
 245         int16           strategy;
 246         Oid                     equality_op;
 247         List       *opfamilies;
 248         EquivalenceClass *eclass;
 249
 250         /*
 251          * An ordering operator fully determines the behavior of its opfamily, so
 252          * could only meaningfully appear in one family --- or perhaps two if one
 253          * builds a reverse-sort opfamily, but there's not much point in that
 254          * anymore.  But EquivalenceClasses need to contain opfamily lists based
 255          * on the family membership of equality operators, which could easily be
 256          * bigger.      So, look up the equality operator that goes with the ordering
 257          * operator (this should be unique) and get its membership.
 258          */
 259
 260         /* Find the operator in pg_amop --- failure shouldn't happen */
 261         if (!get_ordering_op_properties(ordering_op,
 262                                                                         &opfamily, &opcintype, &strategy))
 263                 elog(ERROR, "operator %u is not a valid ordering operator",
 264                          ordering_op);
 265         /* Get matching equality operator */
 266         equality_op = get_opfamily_member(opfamily,
 267                                                                           opcintype,
 268                                                                           opcintype,
 269                                                                           BTEqualStrategyNumber);
 270         if (!OidIsValid(equality_op))           /* shouldn't happen */
 271                 elog(ERROR, "could not find equality operator for ordering operator %u",
 272                          ordering_op);
 273         opfamilies = get_mergejoin_opfamilies(equality_op);
 274         if (!opfamilies)                        /* certainly should find some */
 275                 elog(ERROR, "could not find opfamilies for ordering operator %u",
 276                          ordering_op);
 277
 278         /*
 279          * When dealing with binary-compatible opclasses, we have to ensure that
 280          * the exposed type of the expression tree matches the declared input type
 281          * of the opclass, except when that is a polymorphic type (compare the
 282          * behavior of parse_coerce.c).  This ensures that we can correctly match
 283          * the indexkey or sortclause expression to other expressions we find in
 284          * the query, because arguments of ordinary operator expressions will be
 285          * cast that way.  (We have to do this for indexkeys because they are
 286          * represented without any explicit relabel in pg_index, and for sort
 287          * clauses because the parser is likewise cavalier about putting relabels
 288          * on them.)
 289          */
 290         if (exprType((Node *) expr) != opcintype &&
 291                 !IsPolymorphicType(opcintype))
 292         {
 293                 /* Strip any existing RelabelType, and add a new one if needed */
 294                 while (expr && IsA(expr, RelabelType))
 295                         expr = (Expr *) ((RelabelType *) expr)->arg;
 296                 if (exprType((Node *) expr) != opcintype)
 297                         expr = (Expr *) makeRelabelType(expr,
 298                                                                                         opcintype,
 299                                                                                         -1,
 300                                                                                         COERCE_DONTCARE);
 301         }
 302
 303         /* Now find or create a matching EquivalenceClass */
 304         eclass = get_eclass_for_sort_expr(root, expr, opcintype, opfamilies,
 305                                                                           sortref);
 306
 307         /* And finally we can find or create a PathKey node */
 308         if (canonicalize)
 309                 return make_canonical_pathkey(root, eclass, opfamily,
 310                                                                           strategy, nulls_first);
 311         else
 312                 return makePathKey(eclass, opfamily, strategy, nulls_first);
 313 }
 314
 315
 316 /****************************************************************************
 317  *              PATHKEY COMPARISONS
 318  ****************************************************************************/
 319
 320 /*
 321  * compare_pathkeys
 322  *        Compare two pathkeys to see if they are equivalent, and if not whether
 323  *        one is "better" than the other.
 324  *
 325  *        This function may only be applied to canonicalized pathkey lists.
 326  *        In the canonical representation, pathkeys can be checked for equality
 327  *        by simple pointer comparison.
 328  */
 329 PathKeysComparison
 330 compare_pathkeys(List *keys1, List *keys2)
 331 {
 332         ListCell   *key1,
 333                            *key2;
 334
 335         /*
 336          * Fall out quickly if we are passed two identical lists.  This mostly
 337          * catches the case where both are NIL, but that's common enough to
 338          * warrant the test.
 339          */
 340         if (keys1 == keys2)
 341                 return PATHKEYS_EQUAL;
 342
 343         forboth(key1, keys1, key2, keys2)
 344         {
 345                 PathKey    *pathkey1 = (PathKey *) lfirst(key1);
 346                 PathKey    *pathkey2 = (PathKey *) lfirst(key2);
 347
 348                 /*
 349                  * XXX would like to check that we've been given canonicalized input,
 350                  * but PlannerInfo not accessible here...
 351                  */
 352 #ifdef NOT_USED
 353                 Assert(list_member_ptr(root->canon_pathkeys, pathkey1));
 354                 Assert(list_member_ptr(root->canon_pathkeys, pathkey2));
 355 #endif
 356
 357                 if (pathkey1 != pathkey2)
 358                         return PATHKEYS_DIFFERENT;      /* no need to keep looking */
 359         }
 360
 361         /*
 362          * If we reached the end of only one list, the other is longer and
 363          * therefore not a subset.
 364          */
 365         if (key1 != NULL)
 366                 return PATHKEYS_BETTER1;        /* key1 is longer */
 367         if (key2 != NULL)
 368                 return PATHKEYS_BETTER2;        /* key2 is longer */
 369         return PATHKEYS_EQUAL;
 370 }
 371
 372 /*
 373  * pathkeys_contained_in
 374  *        Common special case of compare_pathkeys: we just want to know
 375  *        if keys2 are at least as well sorted as keys1.
 376  */
 377 bool
 378 pathkeys_contained_in(List *keys1, List *keys2)
 379 {
 380         switch (compare_pathkeys(keys1, keys2))
 381         {
 382                 case PATHKEYS_EQUAL:
 383                 case PATHKEYS_BETTER2:
 384                         return true;
 385                 default:
 386                         break;
 387         }
 388         return false;
 389 }
 390
 391 /*
 392  * get_cheapest_path_for_pathkeys
 393  *        Find the cheapest path (according to the specified criterion) that
 394  *        satisfies the given pathkeys.  Return NULL if no such path.
 395  *
 396  * 'paths' is a list of possible paths that all generate the same relation
 397  * 'pathkeys' represents a required ordering (already canonicalized!)
 398  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
 399  */
 400 Path *
 401 get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 402                                                            CostSelector cost_criterion)
 403 {
 404         Path       *matched_path = NULL;
 405         ListCell   *l;
 406
 407         foreach(l, paths)
 408         {
 409                 Path       *path = (Path *) lfirst(l);
 410
 411                 /*
 412                  * Since cost comparison is a lot cheaper than pathkey comparison, do
 413                  * that first.  (XXX is that still true?)
 414                  */
 415                 if (matched_path != NULL &&
 416                         compare_path_costs(matched_path, path, cost_criterion) <= 0)
 417                         continue;
 418
 419                 if (pathkeys_contained_in(pathkeys, path->pathkeys))
 420                         matched_path = path;
 421         }
 422         return matched_path;
 423 }
 424
 425 /*
 426  * get_cheapest_fractional_path_for_pathkeys
 427  *        Find the cheapest path (for retrieving a specified fraction of all
 428  *        the tuples) that satisfies the given pathkeys.
 429  *        Return NULL if no such path.
 430  *
 431  * See compare_fractional_path_costs() for the interpretation of the fraction
 432  * parameter.
 433  *
 434  * 'paths' is a list of possible paths that all generate the same relation
 435  * 'pathkeys' represents a required ordering (already canonicalized!)
 436  * 'fraction' is the fraction of the total tuples expected to be retrieved
 437  */
 438 Path *
 439 get_cheapest_fractional_path_for_pathkeys(List *paths,
 440                                                                                   List *pathkeys,
 441                                                                                   double fraction)
 442 {
 443         Path       *matched_path = NULL;
 444         ListCell   *l;
 445
 446         foreach(l, paths)
 447         {
 448                 Path       *path = (Path *) lfirst(l);
 449
 450                 /*
 451                  * Since cost comparison is a lot cheaper than pathkey comparison, do
 452                  * that first.
 453                  */
 454                 if (matched_path != NULL &&
 455                         compare_fractional_path_costs(matched_path, path, fraction) <= 0)
 456                         continue;
 457
 458                 if (pathkeys_contained_in(pathkeys, path->pathkeys))
 459                         matched_path = path;
 460         }
 461         return matched_path;
 462 }
 463
 464 /****************************************************************************
 465  *              NEW PATHKEY FORMATION
 466  ****************************************************************************/
 467
 468 /*
 469  * build_index_pathkeys
 470  *        Build a pathkeys list that describes the ordering induced by an index
 471  *        scan using the given index.  (Note that an unordered index doesn't
 472  *        induce any ordering; such an index will have no sortop OIDS in
 473  *        its sortops arrays, and we will return NIL.)
 474  *
 475  * If 'scandir' is BackwardScanDirection, attempt to build pathkeys
 476  * representing a backwards scan of the index.  Return NIL if can't do it.
 477  *
 478  * The result is canonical, meaning that redundant pathkeys are removed;
 479  * it may therefore have fewer entries than there are index columns.
 480  *
 481  * We generate the full pathkeys list whether or not all are useful for the
 482  * current query.  Caller should do truncate_useless_pathkeys().
 483  */
 484 List *
 485 build_index_pathkeys(PlannerInfo *root,
 486                                          IndexOptInfo *index,
 487                                          ScanDirection scandir)
 488 {
 489         List       *retval = NIL;
 490         ListCell   *indexprs_item = list_head(index->indexprs);
 491         int                     i;
 492
 493         for (i = 0; i < index->ncolumns; i++)
 494         {
 495                 Oid                     sortop;
 496                 bool            nulls_first;
 497                 int                     ikey;
 498                 Expr       *indexkey;
 499                 PathKey    *cpathkey;
 500
 501                 if (ScanDirectionIsBackward(scandir))
 502                 {
 503                         sortop = index->revsortop[i];
 504                         nulls_first = !index->nulls_first[i];
 505                 }
 506                 else
 507                 {
 508                         sortop = index->fwdsortop[i];
 509                         nulls_first = index->nulls_first[i];
 510                 }
 511
 512                 if (!OidIsValid(sortop))
 513                         break;                          /* no more orderable columns */
 514
 515                 ikey = index->indexkeys[i];
 516                 if (ikey != 0)
 517                 {
 518                         /* simple index column */
 519                         indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey);
 520                 }
 521                 else
 522                 {
 523                         /* expression --- assume we need not copy it */
 524                         if (indexprs_item == NULL)
 525                                 elog(ERROR, "wrong number of index expressions");
 526                         indexkey = (Expr *) lfirst(indexprs_item);
 527                         indexprs_item = lnext(indexprs_item);
 528                 }
 529
 530                 /* OK, make a canonical pathkey for this sort key */
 531                 cpathkey = make_pathkey_from_sortinfo(root,
 532                                                                                           indexkey,
 533                                                                                           sortop,
 534                                                                                           nulls_first,
 535                                                                                           0,
 536                                                                                           true);
 537
 538                 /* Add to list unless redundant */
 539                 if (!pathkey_is_redundant(cpathkey, retval))
 540                         retval = lappend(retval, cpathkey);
 541         }
 542
 543         return retval;
 544 }
 545
 546 /*
 547  * Find or make a Var node for the specified attribute of the rel.
 548  *
 549  * We first look for the var in the rel's target list, because that's
 550  * easy and fast.  But the var might not be there (this should normally
 551  * only happen for vars that are used in WHERE restriction clauses,
 552  * but not in join clauses or in the SELECT target list).  In that case,
 553  * gin up a Var node the hard way.
 554  */
 555 static Var *
 556 find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, AttrNumber varattno)
 557 {
 558         ListCell   *temp;
 559         Index           relid;
 560         Oid                     reloid,
 561                                 vartypeid;
 562         int32           type_mod;
 563
 564         foreach(temp, rel->reltargetlist)
 565         {
 566                 Var                *var = (Var *) lfirst(temp);
 567
 568                 if (IsA(var, Var) &&
 569                         var->varattno == varattno)
 570                         return var;
 571         }
 572
 573         relid = rel->relid;
 574         reloid = getrelid(relid, root->parse->rtable);
 575         get_atttypetypmod(reloid, varattno, &vartypeid, &type_mod);
 576
 577         return makeVar(relid, varattno, vartypeid, type_mod, 0);
 578 }
 579
 580 /*
 581  * convert_subquery_pathkeys
 582  *        Build a pathkeys list that describes the ordering of a subquery's
 583  *        result, in the terms of the outer query.      This is essentially a
 584  *        task of conversion.
 585  *
 586  * 'rel': outer query's RelOptInfo for the subquery relation.
 587  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
 588  *
 589  * It is not necessary for caller to do truncate_useless_pathkeys(),
 590  * because we select keys in a way that takes usefulness of the keys into
 591  * account.
 592  */
 593 List *
 594 convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
 595                                                   List *subquery_pathkeys)
 596 {
 597         List       *retval = NIL;
 598         int                     retvallen = 0;
 599         int                     outer_query_keys = list_length(root->query_pathkeys);
 600         List       *sub_tlist = rel->subplan->targetlist;
 601         ListCell   *i;
 602
 603         foreach(i, subquery_pathkeys)
 604         {
 605                 PathKey    *sub_pathkey = (PathKey *) lfirst(i);
 606                 EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
 607                 PathKey    *best_pathkey = NULL;
 608
 609                 if (sub_eclass->ec_has_volatile)
 610                 {
 611                         /*
 612                          * If the sub_pathkey's EquivalenceClass is volatile, then it must
 613                          * have come from an ORDER BY clause, and we have to match it to
 614                          * that same targetlist entry.
 615                          */
 616                         TargetEntry *tle;
 617
 618                         if (sub_eclass->ec_sortref == 0)        /* can't happen */
 619                                 elog(ERROR, "volatile EquivalenceClass has no sortref");
 620                         tle = get_sortgroupref_tle(sub_eclass->ec_sortref, sub_tlist);
 621                         Assert(tle);
 622                         /* resjunk items aren't visible to outer query */
 623                         if (!tle->resjunk)
 624                         {
 625                                 /* We can represent this sub_pathkey */
 626                                 EquivalenceMember *sub_member;
 627                                 Expr       *outer_expr;
 628                                 EquivalenceClass *outer_ec;
 629
 630                                 Assert(list_length(sub_eclass->ec_members) == 1);
 631                                 sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
 632                                 outer_expr = (Expr *)
 633                                         makeVar(rel->relid,
 634                                                         tle->resno,
 635                                                         exprType((Node *) tle->expr),
 636                                                         exprTypmod((Node *) tle->expr),
 637                                                         0);
 638                                 outer_ec =
 639                                         get_eclass_for_sort_expr(root,
 640                                                                                          outer_expr,
 641                                                                                          sub_member->em_datatype,
 642                                                                                          sub_eclass->ec_opfamilies,
 643                                                                                          0);
 644                                 best_pathkey =
 645                                         make_canonical_pathkey(root,
 646                                                                                    outer_ec,
 647                                                                                    sub_pathkey->pk_opfamily,
 648                                                                                    sub_pathkey->pk_strategy,
 649                                                                                    sub_pathkey->pk_nulls_first);
 650                         }
 651                 }
 652                 else
 653                 {
 654                         /*
 655                          * Otherwise, the sub_pathkey's EquivalenceClass could contain
 656                          * multiple elements (representing knowledge that multiple items
 657                          * are effectively equal).      Each element might match none, one, or
 658                          * more of the output columns that are visible to the outer query.
 659                          * This means we may have multiple possible representations of the
 660                          * sub_pathkey in the context of the outer query.  Ideally we
 661                          * would generate them all and put them all into an EC of the
 662                          * outer query, thereby propagating equality knowledge up to the
 663                          * outer query.  Right now we cannot do so, because the outer
 664                          * query's EquivalenceClasses are already frozen when this is
 665                          * called. Instead we prefer the one that has the highest "score"
 666                          * (number of EC peers, plus one if it matches the outer
 667                          * query_pathkeys). This is the most likely to be useful in the
 668                          * outer query.
 669                          */
 670                         int                     best_score = -1;
 671                         ListCell   *j;
 672
 673                         foreach(j, sub_eclass->ec_members)
 674                         {
 675                                 EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
 676                                 Expr       *sub_expr = sub_member->em_expr;
 677                                 Expr       *sub_stripped;
 678                                 ListCell   *k;
 679
 680                                 /*
 681                                  * We handle two cases: the sub_pathkey key can be either an
 682                                  * exact match for a targetlist entry, or it could match after
 683                                  * stripping RelabelType nodes.  (We need that case since
 684                                  * make_pathkey_from_sortinfo could add or remove
 685                                  * RelabelType.)
 686                                  */
 687                                 sub_stripped = sub_expr;
 688                                 while (sub_stripped && IsA(sub_stripped, RelabelType))
 689                                         sub_stripped = ((RelabelType *) sub_stripped)->arg;
 690
 691                                 foreach(k, sub_tlist)
 692                                 {
 693                                         TargetEntry *tle = (TargetEntry *) lfirst(k);
 694                                         Expr       *outer_expr;
 695                                         EquivalenceClass *outer_ec;
 696                                         PathKey    *outer_pk;
 697                                         int                     score;
 698
 699                                         /* resjunk items aren't visible to outer query */
 700                                         if (tle->resjunk)
 701                                                 continue;
 702
 703                                         if (equal(tle->expr, sub_expr))
 704                                         {
 705                                                 /* Exact match */
 706                                                 outer_expr = (Expr *)
 707                                                         makeVar(rel->relid,
 708                                                                         tle->resno,
 709                                                                         exprType((Node *) tle->expr),
 710                                                                         exprTypmod((Node *) tle->expr),
 711                                                                         0);
 712                                         }
 713                                         else
 714                                         {
 715                                                 Expr       *tle_stripped;
 716
 717                                                 tle_stripped = tle->expr;
 718                                                 while (tle_stripped && IsA(tle_stripped, RelabelType))
 719                                                         tle_stripped = ((RelabelType *) tle_stripped)->arg;
 720
 721                                                 if (equal(tle_stripped, sub_stripped))
 722                                                 {
 723                                                         /* Match after discarding RelabelType */
 724                                                         outer_expr = (Expr *)
 725                                                                 makeVar(rel->relid,
 726                                                                                 tle->resno,
 727                                                                                 exprType((Node *) tle->expr),
 728                                                                                 exprTypmod((Node *) tle->expr),
 729                                                                                 0);
 730                                                         if (exprType((Node *) outer_expr) !=
 731                                                                 exprType((Node *) sub_expr))
 732                                                                 outer_expr = (Expr *)
 733                                                                         makeRelabelType(outer_expr,
 734                                                                                                  exprType((Node *) sub_expr),
 735                                                                                                         -1,
 736                                                                                                         COERCE_DONTCARE);
 737                                                 }
 738                                                 else
 739                                                         continue;
 740                                         }
 741
 742                                         /* Found a representation for this sub_pathkey */
 743                                         outer_ec = get_eclass_for_sort_expr(root,
 744                                                                                                                 outer_expr,
 745                                                                                                          sub_member->em_datatype,
 746                                                                                                    sub_eclass->ec_opfamilies,
 747                                                                                                                 0);
 748                                         outer_pk = make_canonical_pathkey(root,
 749                                                                                                           outer_ec,
 750                                                                                                         sub_pathkey->pk_opfamily,
 751                                                                                                         sub_pathkey->pk_strategy,
 752                                                                                                 sub_pathkey->pk_nulls_first);
 753                                         /* score = # of equivalence peers */
 754                                         score = list_length(outer_ec->ec_members) - 1;
 755                                         /* +1 if it matches the proper query_pathkeys item */
 756                                         if (retvallen < outer_query_keys &&
 757                                                 list_nth(root->query_pathkeys, retvallen) == outer_pk)
 758                                                 score++;
 759                                         if (score > best_score)
 760                                         {
 761                                                 best_pathkey = outer_pk;
 762                                                 best_score = score;
 763                                         }
 764                                 }
 765                         }
 766                 }
 767
 768                 /*
 769                  * If we couldn't find a representation of this sub_pathkey, we're
 770                  * done (we can't use the ones to its right, either).
 771                  */
 772                 if (!best_pathkey)
 773                         break;
 774
 775                 /*
 776                  * Eliminate redundant ordering info; could happen if outer query
 777                  * equivalences subquery keys...
 778                  */
 779                 if (!pathkey_is_redundant(best_pathkey, retval))
 780                 {
 781                         retval = lappend(retval, best_pathkey);
 782                         retvallen++;
 783                 }
 784         }
 785
 786         return retval;
 787 }
 788
 789 /*
 790  * build_join_pathkeys
 791  *        Build the path keys for a join relation constructed by mergejoin or
 792  *        nestloop join.  This is normally the same as the outer path's keys.
 793  *
 794  *        EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
 795  *        having the outer path's path keys, because null lefthand rows may be
 796  *        inserted at random points.  It must be treated as unsorted.
 797  *
 798  *        We truncate away any pathkeys that are uninteresting for higher joins.
 799  *
 800  * 'joinrel' is the join relation that paths are being formed for
 801  * 'jointype' is the join type (inner, left, full, etc)
 802  * 'outer_pathkeys' is the list of the current outer path's path keys
 803  *
 804  * Returns the list of new path keys.
 805  */
 806 List *
 807 build_join_pathkeys(PlannerInfo *root,
 808                                         RelOptInfo *joinrel,
 809                                         JoinType jointype,
 810                                         List *outer_pathkeys)
 811 {
 812         if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
 813                 return NIL;
 814
 815         /*
 816          * This used to be quite a complex bit of code, but now that all pathkey
 817          * sublists start out life canonicalized, we don't have to do a darn thing
 818          * here!
 819          *
 820          * We do, however, need to truncate the pathkeys list, since it may
 821          * contain pathkeys that were useful for forming this joinrel but are
 822          * uninteresting to higher levels.
 823          */
 824         return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
 825 }
 826
 827 /****************************************************************************
 828  *              PATHKEYS AND SORT CLAUSES
 829  ****************************************************************************/
 830
 831 /*
 832  * make_pathkeys_for_sortclauses
 833  *              Generate a pathkeys list that represents the sort order specified
 834  *              by a list of SortGroupClauses
 835  *
 836  * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
 837  * otherwise not.  canonicalize should always be TRUE after EquivalenceClass
 838  * merging has been performed, but FALSE if we haven't done EquivalenceClass
 839  * merging yet.  (We provide this option because grouping_planner() needs to
 840  * be able to represent requested pathkeys before the equivalence classes have
 841  * been created for the query.)
 842  *
 843  * 'sortclauses' is a list of SortGroupClause nodes
 844  * 'tlist' is the targetlist to find the referenced tlist entries in
 845  */
 846 List *
 847 make_pathkeys_for_sortclauses(PlannerInfo *root,
 848                                                           List *sortclauses,
 849                                                           List *tlist,
 850                                                           bool canonicalize)
 851 {
 852         List       *pathkeys = NIL;
 853         ListCell   *l;
 854
 855         foreach(l, sortclauses)
 856         {
 857                 SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 858                 Expr       *sortkey;
 859                 PathKey    *pathkey;
 860
 861                 sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
 862                 Assert(OidIsValid(sortcl->sortop));
 863                 pathkey = make_pathkey_from_sortinfo(root,
 864                                                                                          sortkey,
 865                                                                                          sortcl->sortop,
 866                                                                                          sortcl->nulls_first,
 867                                                                                          sortcl->tleSortGroupRef,
 868                                                                                          canonicalize);
 869
 870                 /* Canonical form eliminates redundant ordering keys */
 871                 if (canonicalize)
 872                 {
 873                         if (!pathkey_is_redundant(pathkey, pathkeys))
 874                                 pathkeys = lappend(pathkeys, pathkey);
 875                 }
 876                 else
 877                         pathkeys = lappend(pathkeys, pathkey);
 878         }
 879         return pathkeys;
 880 }
 881
 882 /****************************************************************************
 883  *              PATHKEYS AND MERGECLAUSES
 884  ****************************************************************************/
 885
 886 /*
 887  * cache_mergeclause_eclasses
 888  *              Make the cached EquivalenceClass links valid in a mergeclause
 889  *              restrictinfo.
 890  *
 891  * RestrictInfo contains fields in which we may cache pointers to
 892  * EquivalenceClasses for the left and right inputs of the mergeclause.
 893  * (If the mergeclause is a true equivalence clause these will be the
 894  * same EquivalenceClass, otherwise not.)
 895  */
 896 void
 897 cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
 898 {
 899         Assert(restrictinfo->mergeopfamilies != NIL);
 900
 901         /* the cached values should be either both set or both not */
 902         if (restrictinfo->left_ec == NULL)
 903         {
 904                 Expr       *clause = restrictinfo->clause;
 905                 Oid                     lefttype,
 906                                         righttype;
 907
 908                 /* Need the declared input types of the operator */
 909                 op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
 910
 911                 /* Find or create a matching EquivalenceClass for each side */
 912                 restrictinfo->left_ec =
 913                         get_eclass_for_sort_expr(root,
 914                                                                          (Expr *) get_leftop(clause),
 915                                                                          lefttype,
 916                                                                          restrictinfo->mergeopfamilies,
 917                                                                          0);
 918                 restrictinfo->right_ec =
 919                         get_eclass_for_sort_expr(root,
 920                                                                          (Expr *) get_rightop(clause),
 921                                                                          righttype,
 922                                                                          restrictinfo->mergeopfamilies,
 923                                                                          0);
 924         }
 925         else
 926                 Assert(restrictinfo->right_ec != NULL);
 927 }
 928
 929 /*
 930  * find_mergeclauses_for_pathkeys
 931  *        This routine attempts to find a set of mergeclauses that can be
 932  *        used with a specified ordering for one of the input relations.
 933  *        If successful, it returns a list of mergeclauses.
 934  *
 935  * 'pathkeys' is a pathkeys list showing the ordering of an input path.
 936  * 'outer_keys' is TRUE if these keys are for the outer input path,
 937  *                      FALSE if for inner.
 938  * 'restrictinfos' is a list of mergejoinable restriction clauses for the
 939  *                      join relation being formed.
 940  *
 941  * The restrictinfos must be marked (via outer_is_left) to show which side
 942  * of each clause is associated with the current outer path.  (See
 943  * select_mergejoin_clauses())
 944  *
 945  * The result is NIL if no merge can be done, else a maximal list of
 946  * usable mergeclauses (represented as a list of their restrictinfo nodes).
 947  */
 948 List *
 949 find_mergeclauses_for_pathkeys(PlannerInfo *root,
 950                                                            List *pathkeys,
 951                                                            bool outer_keys,
 952                                                            List *restrictinfos)
 953 {
 954         List       *mergeclauses = NIL;
 955         ListCell   *i;
 956
 957         /* make sure we have eclasses cached in the clauses */
 958         foreach(i, restrictinfos)
 959         {
 960                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
 961
 962                 cache_mergeclause_eclasses(root, rinfo);
 963         }
 964
 965         foreach(i, pathkeys)
 966         {
 967                 PathKey    *pathkey = (PathKey *) lfirst(i);
 968                 EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
 969                 List       *matched_restrictinfos = NIL;
 970                 ListCell   *j;
 971
 972                 /*----------
 973                  * A mergejoin clause matches a pathkey if it has the same EC.
 974                  * If there are multiple matching clauses, take them all.  In plain
 975                  * inner-join scenarios we expect only one match, because
 976                  * equivalence-class processing will have removed any redundant
 977                  * mergeclauses.  However, in outer-join scenarios there might be
 978                  * multiple matches.  An example is
 979                  *
 980                  *      select * from a full join b
 981                  *              on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
 982                  *
 983                  * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
 984                  * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
 985                  * we *must* do so or we will be unable to form a valid plan.
 986                  *
 987                  * We expect that the given pathkeys list is canonical, which means
 988                  * no two members have the same EC, so it's not possible for this
 989                  * code to enter the same mergeclause into the result list twice.
 990                  *
 991                  * XXX it's possible that multiple matching clauses might have
 992                  * different ECs on the other side, in which case the order we put
 993                  * them into our result makes a difference in the pathkeys required
 994                  * for the other input path.  However this routine hasn't got any info
 995                  * about which order would be best, so for now we disregard that case
 996                  * (which is probably a corner case anyway).
 997                  *----------
 998                  */
 999                 foreach(j, restrictinfos)
1000                 {
1001                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1002                         EquivalenceClass *clause_ec;
1003
1004                         if (outer_keys)
1005                                 clause_ec = rinfo->outer_is_left ?
1006                                         rinfo->left_ec : rinfo->right_ec;
1007                         else
1008                                 clause_ec = rinfo->outer_is_left ?
1009                                         rinfo->right_ec : rinfo->left_ec;
1010                         if (clause_ec == pathkey_ec)
1011                                 matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1012                 }
1013
1014                 /*
1015                  * If we didn't find a mergeclause, we're done --- any additional
1016                  * sort-key positions in the pathkeys are useless.      (But we can still
1017                  * mergejoin if we found at least one mergeclause.)
1018                  */
1019                 if (matched_restrictinfos == NIL)
1020                         break;
1021
1022                 /*
1023                  * If we did find usable mergeclause(s) for this sort-key position,
1024                  * add them to result list.
1025                  */
1026                 mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1027         }
1028
1029         return mergeclauses;
1030 }
1031
1032 /*
1033  * select_outer_pathkeys_for_merge
1034  *        Builds a pathkey list representing a possible sort ordering
1035  *        that can be used with the given mergeclauses.
1036  *
1037  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1038  *                      that will be used in a merge join.
1039  * 'joinrel' is the join relation we are trying to construct.
1040  *
1041  * The restrictinfos must be marked (via outer_is_left) to show which side
1042  * of each clause is associated with the current outer path.  (See
1043  * select_mergejoin_clauses())
1044  *
1045  * Returns a pathkeys list that can be applied to the outer relation.
1046  *
1047  * Since we assume here that a sort is required, there is no particular use
1048  * in matching any available ordering of the outerrel.  (joinpath.c has an
1049  * entirely separate code path for considering sort-free mergejoins.)  Rather,
1050  * it's interesting to try to match the requested query_pathkeys so that a
1051  * second output sort may be avoided; and failing that, we try to list "more
1052  * popular" keys (those with the most unmatched EquivalenceClass peers)
1053  * earlier, in hopes of making the resulting ordering useful for as many
1054  * higher-level mergejoins as possible.
1055  */
1056 List *
1057 select_outer_pathkeys_for_merge(PlannerInfo *root,
1058                                                                 List *mergeclauses,
1059                                                                 RelOptInfo *joinrel)
1060 {
1061         List       *pathkeys = NIL;
1062         int                     nClauses = list_length(mergeclauses);
1063         EquivalenceClass **ecs;
1064         int                *scores;
1065         int                     necs;
1066         ListCell   *lc;
1067         int                     j;
1068
1069         /* Might have no mergeclauses */
1070         if (nClauses == 0)
1071                 return NIL;
1072
1073         /*
1074          * Make arrays of the ECs used by the mergeclauses (dropping any
1075          * duplicates) and their "popularity" scores.
1076          */
1077         ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1078         scores = (int *) palloc(nClauses * sizeof(int));
1079         necs = 0;
1080
1081         foreach(lc, mergeclauses)
1082         {
1083                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1084                 EquivalenceClass *oeclass;
1085                 int                     score;
1086                 ListCell   *lc2;
1087
1088                 /* get the outer eclass */
1089                 cache_mergeclause_eclasses(root, rinfo);
1090
1091                 if (rinfo->outer_is_left)
1092                         oeclass = rinfo->left_ec;
1093                 else
1094                         oeclass = rinfo->right_ec;
1095
1096                 /* reject duplicates */
1097                 for (j = 0; j < necs; j++)
1098                 {
1099                         if (ecs[j] == oeclass)
1100                                 break;
1101                 }
1102                 if (j < necs)
1103                         continue;
1104
1105                 /* compute score */
1106                 score = 0;
1107                 foreach(lc2, oeclass->ec_members)
1108                 {
1109                         EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
1110
1111                         /* Potential future join partner? */
1112                         if (!em->em_is_const && !em->em_is_child &&
1113                                 !bms_overlap(em->em_relids, joinrel->relids))
1114                                 score++;
1115                 }
1116
1117                 ecs[necs] = oeclass;
1118                 scores[necs] = score;
1119                 necs++;
1120         }
1121
1122         /*
1123          * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1124          * can generate a sort order that's also useful for final output. There is
1125          * no percentage in a partial match, though, so we have to have 'em all.
1126          */
1127         if (root->query_pathkeys)
1128         {
1129                 foreach(lc, root->query_pathkeys)
1130                 {
1131                         PathKey    *query_pathkey = (PathKey *) lfirst(lc);
1132                         EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1133
1134                         for (j = 0; j < necs; j++)
1135                         {
1136                                 if (ecs[j] == query_ec)
1137                                         break;          /* found match */
1138                         }
1139                         if (j >= necs)
1140                                 break;                  /* didn't find match */
1141                 }
1142                 /* if we got to the end of the list, we have them all */
1143                 if (lc == NULL)
1144                 {
1145                         /* copy query_pathkeys as starting point for our output */
1146                         pathkeys = list_copy(root->query_pathkeys);
1147                         /* mark their ECs as already-emitted */
1148                         foreach(lc, root->query_pathkeys)
1149                         {
1150                                 PathKey    *query_pathkey = (PathKey *) lfirst(lc);
1151                                 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1152
1153                                 for (j = 0; j < necs; j++)
1154                                 {
1155                                         if (ecs[j] == query_ec)
1156                                         {
1157                                                 scores[j] = -1;
1158                                                 break;
1159                                         }
1160                                 }
1161                         }
1162                 }
1163         }
1164
1165         /*
1166          * Add remaining ECs to the list in popularity order, using a default sort
1167          * ordering.  (We could use qsort() here, but the list length is usually
1168          * so small it's not worth it.)
1169          */
1170         for (;;)
1171         {
1172                 int                     best_j;
1173                 int                     best_score;
1174                 EquivalenceClass *ec;
1175                 PathKey    *pathkey;
1176
1177                 best_j = 0;
1178                 best_score = scores[0];
1179                 for (j = 1; j < necs; j++)
1180                 {
1181                         if (scores[j] > best_score)
1182                         {
1183                                 best_j = j;
1184                                 best_score = scores[j];
1185                         }
1186                 }
1187                 if (best_score < 0)
1188                         break;                          /* all done */
1189                 ec = ecs[best_j];
1190                 scores[best_j] = -1;
1191                 pathkey = make_canonical_pathkey(root,
1192                                                                                  ec,
1193                                                                                  linitial_oid(ec->ec_opfamilies),
1194                                                                                  BTLessStrategyNumber,
1195                                                                                  false);
1196                 /* can't be redundant because no duplicate ECs */
1197                 Assert(!pathkey_is_redundant(pathkey, pathkeys));
1198                 pathkeys = lappend(pathkeys, pathkey);
1199         }
1200
1201         pfree(ecs);
1202         pfree(scores);
1203
1204         return pathkeys;
1205 }
1206
1207 /*
1208  * make_inner_pathkeys_for_merge
1209  *        Builds a pathkey list representing the explicit sort order that
1210  *        must be applied to an inner path to make it usable with the
1211  *        given mergeclauses.
1212  *
1213  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1214  *                      that will be used in a merge join.
1215  * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1216  *                      side of the join.
1217  *
1218  * The restrictinfos must be marked (via outer_is_left) to show which side
1219  * of each clause is associated with the current outer path.  (See
1220  * select_mergejoin_clauses())
1221  *
1222  * Returns a pathkeys list that can be applied to the inner relation.
1223  *
1224  * Note that it is not this routine's job to decide whether sorting is
1225  * actually needed for a particular input path.  Assume a sort is necessary;
1226  * just make the keys, eh?
1227  */
1228 List *
1229 make_inner_pathkeys_for_merge(PlannerInfo *root,
1230                                                           List *mergeclauses,
1231                                                           List *outer_pathkeys)
1232 {
1233         List       *pathkeys = NIL;
1234         EquivalenceClass *lastoeclass;
1235         PathKey    *opathkey;
1236         ListCell   *lc;
1237         ListCell   *lop;
1238
1239         lastoeclass = NULL;
1240         opathkey = NULL;
1241         lop = list_head(outer_pathkeys);
1242
1243         foreach(lc, mergeclauses)
1244         {
1245                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1246                 EquivalenceClass *oeclass;
1247                 EquivalenceClass *ieclass;
1248                 PathKey    *pathkey;
1249
1250                 cache_mergeclause_eclasses(root, rinfo);
1251
1252                 if (rinfo->outer_is_left)
1253                 {
1254                         oeclass = rinfo->left_ec;
1255                         ieclass = rinfo->right_ec;
1256                 }
1257                 else
1258                 {
1259                         oeclass = rinfo->right_ec;
1260                         ieclass = rinfo->left_ec;
1261                 }
1262
1263                 /* outer eclass should match current or next pathkeys */
1264                 /* we check this carefully for debugging reasons */
1265                 if (oeclass != lastoeclass)
1266                 {
1267                         if (!lop)
1268                                 elog(ERROR, "too few pathkeys for mergeclauses");
1269                         opathkey = (PathKey *) lfirst(lop);
1270                         lop = lnext(lop);
1271                         lastoeclass = opathkey->pk_eclass;
1272                         if (oeclass != lastoeclass)
1273                                 elog(ERROR, "outer pathkeys do not match mergeclause");
1274                 }
1275
1276                 /*
1277                  * Often, we'll have same EC on both sides, in which case the outer
1278                  * pathkey is also canonical for the inner side, and we can skip a
1279                  * useless search.
1280                  */
1281                 if (ieclass == oeclass)
1282                         pathkey = opathkey;
1283                 else
1284                         pathkey = make_canonical_pathkey(root,
1285                                                                                          ieclass,
1286                                                                                          opathkey->pk_opfamily,
1287                                                                                          opathkey->pk_strategy,
1288                                                                                          opathkey->pk_nulls_first);
1289
1290                 /*
1291                  * Don't generate redundant pathkeys (can happen if multiple
1292                  * mergeclauses refer to same EC).
1293                  */
1294                 if (!pathkey_is_redundant(pathkey, pathkeys))
1295                         pathkeys = lappend(pathkeys, pathkey);
1296         }
1297
1298         return pathkeys;
1299 }
1300
1301 /****************************************************************************
1302  *              PATHKEY USEFULNESS CHECKS
1303  *
1304  * We only want to remember as many of the pathkeys of a path as have some
1305  * potential use, either for subsequent mergejoins or for meeting the query's
1306  * requested output ordering.  This ensures that add_path() won't consider
1307  * a path to have a usefully different ordering unless it really is useful.
1308  * These routines check for usefulness of given pathkeys.
1309  ****************************************************************************/
1310
1311 /*
1312  * pathkeys_useful_for_merging
1313  *              Count the number of pathkeys that may be useful for mergejoins
1314  *              above the given relation.
1315  *
1316  * We consider a pathkey potentially useful if it corresponds to the merge
1317  * ordering of either side of any joinclause for the rel.  This might be
1318  * overoptimistic, since joinclauses that require different other relations
1319  * might never be usable at the same time, but trying to be exact is likely
1320  * to be more trouble than it's worth.
1321  *
1322  * To avoid doubling the number of mergejoin paths considered, we would like
1323  * to consider only one of the two scan directions (ASC or DESC) as useful
1324  * for merging for any given target column.  The choice is arbitrary unless
1325  * one of the directions happens to match an ORDER BY key, in which case
1326  * that direction should be preferred, in hopes of avoiding a final sort step.
1327  * right_merge_direction() implements this heuristic.
1328  */
1329 int
1330 pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
1331 {
1332         int                     useful = 0;
1333         ListCell   *i;
1334
1335         foreach(i, pathkeys)
1336         {
1337                 PathKey    *pathkey = (PathKey *) lfirst(i);
1338                 bool            matched = false;
1339                 ListCell   *j;
1340
1341                 /* If "wrong" direction, not useful for merging */
1342                 if (!right_merge_direction(root, pathkey))
1343                         break;
1344
1345                 /*
1346                  * First look into the EquivalenceClass of the pathkey, to see if
1347                  * there are any members not yet joined to the rel.  If so, it's
1348                  * surely possible to generate a mergejoin clause using them.
1349                  */
1350                 if (rel->has_eclass_joins &&
1351                         eclass_useful_for_merging(pathkey->pk_eclass, rel))
1352                         matched = true;
1353                 else
1354                 {
1355                         /*
1356                          * Otherwise search the rel's joininfo list, which contains
1357                          * non-EquivalenceClass-derivable join clauses that might
1358                          * nonetheless be mergejoinable.
1359                          */
1360                         foreach(j, rel->joininfo)
1361                         {
1362                                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
1363
1364                                 if (restrictinfo->mergeopfamilies == NIL)
1365                                         continue;
1366                                 cache_mergeclause_eclasses(root, restrictinfo);
1367
1368                                 if (pathkey->pk_eclass == restrictinfo->left_ec ||
1369                                         pathkey->pk_eclass == restrictinfo->right_ec)
1370                                 {
1371                                         matched = true;
1372                                         break;
1373                                 }
1374                         }
1375                 }
1376
1377                 /*
1378                  * If we didn't find a mergeclause, we're done --- any additional
1379                  * sort-key positions in the pathkeys are useless.      (But we can still
1380                  * mergejoin if we found at least one mergeclause.)
1381                  */
1382                 if (matched)
1383                         useful++;
1384                 else
1385                         break;
1386         }
1387
1388         return useful;
1389 }
1390
1391 /*
1392  * right_merge_direction
1393  *              Check whether the pathkey embodies the preferred sort direction
1394  *              for merging its target column.
1395  */
1396 static bool
1397 right_merge_direction(PlannerInfo *root, PathKey *pathkey)
1398 {
1399         ListCell   *l;
1400
1401         foreach(l, root->query_pathkeys)
1402         {
1403                 PathKey    *query_pathkey = (PathKey *) lfirst(l);
1404
1405                 if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
1406                         pathkey->pk_opfamily == query_pathkey->pk_opfamily)
1407                 {
1408                         /*
1409                          * Found a matching query sort column.  Prefer this pathkey's
1410                          * direction iff it matches.  Note that we ignore pk_nulls_first,
1411                          * which means that a sort might be needed anyway ... but we still
1412                          * want to prefer only one of the two possible directions, and we
1413                          * might as well use this one.
1414                          */
1415                         return (pathkey->pk_strategy == query_pathkey->pk_strategy);
1416                 }
1417         }
1418
1419         /* If no matching ORDER BY request, prefer the ASC direction */
1420         return (pathkey->pk_strategy == BTLessStrategyNumber);
1421 }
1422
1423 /*
1424  * pathkeys_useful_for_ordering
1425  *              Count the number of pathkeys that are useful for meeting the
1426  *              query's requested output ordering.
1427  *
1428  * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
1429  * no good to order by just the first key(s) of the requested ordering.
1430  * So the result is always either 0 or list_length(root->query_pathkeys).
1431  */
1432 int
1433 pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
1434 {
1435         if (root->query_pathkeys == NIL)
1436                 return 0;                               /* no special ordering requested */
1437
1438         if (pathkeys == NIL)
1439                 return 0;                               /* unordered path */
1440
1441         if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
1442         {
1443                 /* It's useful ... or at least the first N keys are */
1444                 return list_length(root->query_pathkeys);
1445         }
1446
1447         return 0;                                       /* path ordering not useful */
1448 }
1449
1450 /*
1451  * truncate_useless_pathkeys
1452  *              Shorten the given pathkey list to just the useful pathkeys.
1453  */
1454 List *
1455 truncate_useless_pathkeys(PlannerInfo *root,
1456                                                   RelOptInfo *rel,
1457                                                   List *pathkeys)
1458 {
1459         int                     nuseful;
1460         int                     nuseful2;
1461
1462         nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
1463         nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
1464         if (nuseful2 > nuseful)
1465                 nuseful = nuseful2;
1466
1467         /*
1468          * Note: not safe to modify input list destructively, but we can avoid
1469          * copying the list if we're not actually going to change it
1470          */
1471         if (nuseful == 0)
1472                 return NIL;
1473         else if (nuseful == list_length(pathkeys))
1474                 return pathkeys;
1475         else
1476                 return list_truncate(list_copy(pathkeys), nuseful);
1477 }
1478
1479 /*
1480  * has_useful_pathkeys
1481  *              Detect whether the specified rel could have any pathkeys that are
1482  *              useful according to truncate_useless_pathkeys().
1483  *
1484  * This is a cheap test that lets us skip building pathkeys at all in very
1485  * simple queries.      It's OK to err in the direction of returning "true" when
1486  * there really aren't any usable pathkeys, but erring in the other direction
1487  * is bad --- so keep this in sync with the routines above!
1488  *
1489  * We could make the test more complex, for example checking to see if any of
1490  * the joinclauses are really mergejoinable, but that likely wouldn't win
1491  * often enough to repay the extra cycles.      Queries with neither a join nor
1492  * a sort are reasonably common, though, so this much work seems worthwhile.
1493  */
1494 bool
1495 has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
1496 {
1497         if (rel->joininfo != NIL || rel->has_eclass_joins)
1498                 return true;                    /* might be able to use pathkeys for merging */
1499         if (root->query_pathkeys != NIL)
1500                 return true;                    /* might be able to use them for ordering */
1501         return false;                           /* definitely useless */
1502 }