src/backend/optimizer/path/pathkeys.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * pathkeys.c
   4  *        Utilities for matching and building path keys
   5  *
   6  * See src/backend/optimizer/README for a great deal of information about
   7  * the nature and use of path keys.
   8  *
   9  *
  10  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  11  * Portions Copyright (c) 1994, Regents of the University of California
  12  *
  13  * IDENTIFICATION
  14  *        $PostgreSQL$
  15  *
  16  *-------------------------------------------------------------------------
  17  */
  18 #include "postgres.h"
  19
  20 #include "access/skey.h"
  21 #include "catalog/pg_type.h"
  22 #include "nodes/makefuncs.h"
  23 #include "nodes/nodeFuncs.h"
  24 #include "nodes/plannodes.h"
  25 #include "optimizer/clauses.h"
  26 #include "optimizer/pathnode.h"
  27 #include "optimizer/paths.h"
  28 #include "optimizer/tlist.h"
  29 #include "parser/parsetree.h"
  30 #include "utils/lsyscache.h"
  31
  32
  33 static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily,
  34                         int strategy, bool nulls_first);
  35 static PathKey *make_canonical_pathkey(PlannerInfo *root,
  36                                            EquivalenceClass *eclass, Oid opfamily,
  37                                            int strategy, bool nulls_first);
  38 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
  39 static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root,
  40                                                    Expr *expr, Oid ordering_op,
  41                                                    bool nulls_first,
  42                                                    Index sortref,
  43                                                    bool canonicalize);
  44 static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel,
  45                                   AttrNumber varattno);
  46 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
  47
  48
  49 /****************************************************************************
  50  *              PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
  51  ****************************************************************************/
  52
  53 /*
  54  * makePathKey
  55  *              create a PathKey node
  56  *
  57  * This does not promise to create a canonical PathKey, it's merely a
  58  * convenience routine to build the specified node.
  59  */
  60 static PathKey *
  61 makePathKey(EquivalenceClass *eclass, Oid opfamily,
  62                         int strategy, bool nulls_first)
  63 {
  64         PathKey    *pk = makeNode(PathKey);
  65
  66         pk->pk_eclass = eclass;
  67         pk->pk_opfamily = opfamily;
  68         pk->pk_strategy = strategy;
  69         pk->pk_nulls_first = nulls_first;
  70
  71         return pk;
  72 }
  73
  74 /*
  75  * make_canonical_pathkey
  76  *        Given the parameters for a PathKey, find any pre-existing matching
  77  *        pathkey in the query's list of "canonical" pathkeys.  Make a new
  78  *        entry if there's not one already.
  79  *
  80  * Note that this function must not be used until after we have completed
  81  * merging EquivalenceClasses.
  82  */
  83 static PathKey *
  84 make_canonical_pathkey(PlannerInfo *root,
  85                                            EquivalenceClass *eclass, Oid opfamily,
  86                                            int strategy, bool nulls_first)
  87 {
  88         PathKey    *pk;
  89         ListCell   *lc;
  90         MemoryContext oldcontext;
  91
  92         /* The passed eclass might be non-canonical, so chase up to the top */
  93         while (eclass->ec_merged)
  94                 eclass = eclass->ec_merged;
  95
  96         foreach(lc, root->canon_pathkeys)
  97         {
  98                 pk = (PathKey *) lfirst(lc);
  99                 if (eclass == pk->pk_eclass &&
 100                         opfamily == pk->pk_opfamily &&
 101                         strategy == pk->pk_strategy &&
 102                         nulls_first == pk->pk_nulls_first)
 103                         return pk;
 104         }
 105
 106         /*
 107          * Be sure canonical pathkeys are allocated in the main planning context.
 108          * Not an issue in normal planning, but it is for GEQO.
 109          */
 110         oldcontext = MemoryContextSwitchTo(root->planner_cxt);
 111
 112         pk = makePathKey(eclass, opfamily, strategy, nulls_first);
 113         root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
 114
 115         MemoryContextSwitchTo(oldcontext);
 116
 117         return pk;
 118 }
 119
 120 /*
 121  * pathkey_is_redundant
 122  *         Is a pathkey redundant with one already in the given list?
 123  *
 124  * Both the given pathkey and the list members must be canonical for this
 125  * to work properly.  We detect two cases:
 126  *
 127  * 1. If the new pathkey's equivalence class contains a constant, and isn't
 128  * below an outer join, then we can disregard it as a sort key.  An example:
 129  *                      SELECT ... WHERE x = 42 ORDER BY x, y;
 130  * We may as well just sort by y.  Note that because of opfamily matching,
 131  * this is semantically correct: we know that the equality constraint is one
 132  * that actually binds the variable to a single value in the terms of any
 133  * ordering operator that might go with the eclass.  This rule not only lets
 134  * us simplify (or even skip) explicit sorts, but also allows matching index
 135  * sort orders to a query when there are don't-care index columns.
 136  *
 137  * 2. If the new pathkey's equivalence class is the same as that of any
 138  * existing member of the pathkey list, then it is redundant.  Some examples:
 139  *                      SELECT ... ORDER BY x, x;
 140  *                      SELECT ... ORDER BY x, x DESC;
 141  *                      SELECT ... WHERE x = y ORDER BY x, y;
 142  * In all these cases the second sort key cannot distinguish values that are
 143  * considered equal by the first, and so there's no point in using it.
 144  * Note in particular that we need not compare opfamily (all the opfamilies
 145  * of the EC have the same notion of equality) nor sort direction.
 146  *
 147  * Because the equivclass.c machinery forms only one copy of any EC per query,
 148  * pointer comparison is enough to decide whether canonical ECs are the same.
 149  */
 150 static bool
 151 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
 152 {
 153         EquivalenceClass *new_ec = new_pathkey->pk_eclass;
 154         ListCell   *lc;
 155
 156         /* Assert we've been given canonical pathkeys */
 157         Assert(!new_ec->ec_merged);
 158
 159         /* Check for EC containing a constant --- unconditionally redundant */
 160         if (EC_MUST_BE_REDUNDANT(new_ec))
 161                 return true;
 162
 163         /* If same EC already used in list, then redundant */
 164         foreach(lc, pathkeys)
 165         {
 166                 PathKey    *old_pathkey = (PathKey *) lfirst(lc);
 167
 168                 /* Assert we've been given canonical pathkeys */
 169                 Assert(!old_pathkey->pk_eclass->ec_merged);
 170
 171                 if (new_ec == old_pathkey->pk_eclass)
 172                         return true;
 173         }
 174
 175         return false;
 176 }
 177
 178 /*
 179  * canonicalize_pathkeys
 180  *         Convert a not-necessarily-canonical pathkeys list to canonical form.
 181  *
 182  * Note that this function must not be used until after we have completed
 183  * merging EquivalenceClasses.
 184  */
 185 List *
 186 canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
 187 {
 188         List       *new_pathkeys = NIL;
 189         ListCell   *l;
 190
 191         foreach(l, pathkeys)
 192         {
 193                 PathKey    *pathkey = (PathKey *) lfirst(l);
 194                 EquivalenceClass *eclass;
 195                 PathKey    *cpathkey;
 196
 197                 /* Find the canonical (merged) EquivalenceClass */
 198                 eclass = pathkey->pk_eclass;
 199                 while (eclass->ec_merged)
 200                         eclass = eclass->ec_merged;
 201
 202                 /*
 203                  * If we can tell it's redundant just from the EC, skip.
 204                  * pathkey_is_redundant would notice that, but we needn't even bother
 205                  * constructing the node...
 206                  */
 207                 if (EC_MUST_BE_REDUNDANT(eclass))
 208                         continue;
 209
 210                 /* OK, build a canonicalized PathKey struct */
 211                 cpathkey = make_canonical_pathkey(root,
 212                                                                                   eclass,
 213                                                                                   pathkey->pk_opfamily,
 214                                                                                   pathkey->pk_strategy,
 215                                                                                   pathkey->pk_nulls_first);
 216
 217                 /* Add to list unless redundant */
 218                 if (!pathkey_is_redundant(cpathkey, new_pathkeys))
 219                         new_pathkeys = lappend(new_pathkeys, cpathkey);
 220         }
 221         return new_pathkeys;
 222 }
 223
 224 /*
 225  * make_pathkey_from_sortinfo
 226  *        Given an expression, a sortop, and a nulls-first flag, create
 227  *        a PathKey.  If canonicalize = true, the result is a "canonical"
 228  *        PathKey, otherwise not.  (But note it might be redundant anyway.)
 229  *
 230  * If the PathKey is being generated from a SortGroupClause, sortref should be
 231  * the SortGroupClause's SortGroupRef; otherwise zero.
 232  *
 233  * canonicalize should always be TRUE after EquivalenceClass merging has
 234  * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
 235  */
 236 static PathKey *
 237 make_pathkey_from_sortinfo(PlannerInfo *root,
 238                                                    Expr *expr, Oid ordering_op,
 239                                                    bool nulls_first,
 240                                                    Index sortref,
 241                                                    bool canonicalize)
 242 {
 243         Oid                     opfamily,
 244                                 opcintype;
 245         int16           strategy;
 246         Oid                     equality_op;
 247         List       *opfamilies;
 248         EquivalenceClass *eclass;
 249
 250         /*
 251          * An ordering operator fully determines the behavior of its opfamily, so
 252          * could only meaningfully appear in one family --- or perhaps two if one
 253          * builds a reverse-sort opfamily, but there's not much point in that
 254          * anymore.  But EquivalenceClasses need to contain opfamily lists based
 255          * on the family membership of equality operators, which could easily be
 256          * bigger.      So, look up the equality operator that goes with the ordering
 257          * operator (this should be unique) and get its membership.
 258          */
 259
 260         /* Find the operator in pg_amop --- failure shouldn't happen */
 261         if (!get_ordering_op_properties(ordering_op,
 262                                                                         &opfamily, &opcintype, &strategy))
 263                 elog(ERROR, "operator %u is not a valid ordering operator",
 264                          ordering_op);
 265         /* Get matching equality operator */
 266         equality_op = get_opfamily_member(opfamily,
 267                                                                           opcintype,
 268                                                                           opcintype,
 269                                                                           BTEqualStrategyNumber);
 270         if (!OidIsValid(equality_op))           /* shouldn't happen */
 271                 elog(ERROR, "could not find equality operator for ordering operator %u",
 272                          ordering_op);
 273         opfamilies = get_mergejoin_opfamilies(equality_op);
 274         if (!opfamilies)                        /* certainly should find some */
 275                 elog(ERROR, "could not find opfamilies for ordering operator %u",
 276                          ordering_op);
 277
 278         /*
 279          * When dealing with binary-compatible opclasses, we have to ensure that
 280          * the exposed type of the expression tree matches the declared input type
 281          * of the opclass, except when that is a polymorphic type (compare the
 282          * behavior of parse_coerce.c).  This ensures that we can correctly match
 283          * the indexkey or sortclause expression to other expressions we find in
 284          * the query, because arguments of ordinary operator expressions will be
 285          * cast that way.  (We have to do this for indexkeys because they are
 286          * represented without any explicit relabel in pg_index, and for sort
 287          * clauses because the parser is likewise cavalier about putting relabels
 288          * on them.)
 289          */
 290         if (exprType((Node *) expr) != opcintype &&
 291                 !IsPolymorphicType(opcintype))
 292         {
 293                 /* Strip any existing RelabelType, and add a new one if needed */
 294                 while (expr && IsA(expr, RelabelType))
 295                         expr = (Expr *) ((RelabelType *) expr)->arg;
 296                 if (exprType((Node *) expr) != opcintype)
 297                         expr = (Expr *) makeRelabelType(expr,
 298                                                                                         opcintype,
 299                                                                                         -1,
 300                                                                                         COERCE_DONTCARE);
 301         }
 302
 303         /* Now find or create a matching EquivalenceClass */
 304         eclass = get_eclass_for_sort_expr(root, expr, opcintype, opfamilies,
 305                                                                           sortref);
 306
 307         /* And finally we can find or create a PathKey node */
 308         if (canonicalize)
 309                 return make_canonical_pathkey(root, eclass, opfamily,
 310                                                                           strategy, nulls_first);
 311         else
 312                 return makePathKey(eclass, opfamily, strategy, nulls_first);
 313 }
 314
 315
 316 /****************************************************************************
 317  *              PATHKEY COMPARISONS
 318  ****************************************************************************/
 319
 320 /*
 321  * compare_pathkeys
 322  *        Compare two pathkeys to see if they are equivalent, and if not whether
 323  *        one is "better" than the other.
 324  *
 325  *        This function may only be applied to canonicalized pathkey lists.
 326  *        In the canonical representation, pathkeys can be checked for equality
 327  *        by simple pointer comparison.
 328  */
 329 PathKeysComparison
 330 compare_pathkeys(List *keys1, List *keys2)
 331 {
 332         ListCell   *key1,
 333                            *key2;
 334
 335         forboth(key1, keys1, key2, keys2)
 336         {
 337                 PathKey    *pathkey1 = (PathKey *) lfirst(key1);
 338                 PathKey    *pathkey2 = (PathKey *) lfirst(key2);
 339
 340                 /*
 341                  * XXX would like to check that we've been given canonicalized input,
 342                  * but PlannerInfo not accessible here...
 343                  */
 344 #ifdef NOT_USED
 345                 Assert(list_member_ptr(root->canon_pathkeys, pathkey1));
 346                 Assert(list_member_ptr(root->canon_pathkeys, pathkey2));
 347 #endif
 348
 349                 if (pathkey1 != pathkey2)
 350                         return PATHKEYS_DIFFERENT;      /* no need to keep looking */
 351         }
 352
 353         /*
 354          * If we reached the end of only one list, the other is longer and
 355          * therefore not a subset.
 356          */
 357         if (key1 == NULL && key2 == NULL)
 358                 return PATHKEYS_EQUAL;
 359         if (key1 != NULL)
 360                 return PATHKEYS_BETTER1;        /* key1 is longer */
 361         return PATHKEYS_BETTER2;        /* key2 is longer */
 362 }
 363
 364 /*
 365  * pathkeys_contained_in
 366  *        Common special case of compare_pathkeys: we just want to know
 367  *        if keys2 are at least as well sorted as keys1.
 368  */
 369 bool
 370 pathkeys_contained_in(List *keys1, List *keys2)
 371 {
 372         switch (compare_pathkeys(keys1, keys2))
 373         {
 374                 case PATHKEYS_EQUAL:
 375                 case PATHKEYS_BETTER2:
 376                         return true;
 377                 default:
 378                         break;
 379         }
 380         return false;
 381 }
 382
 383 /*
 384  * get_cheapest_path_for_pathkeys
 385  *        Find the cheapest path (according to the specified criterion) that
 386  *        satisfies the given pathkeys.  Return NULL if no such path.
 387  *
 388  * 'paths' is a list of possible paths that all generate the same relation
 389  * 'pathkeys' represents a required ordering (already canonicalized!)
 390  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
 391  */
 392 Path *
 393 get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 394                                                            CostSelector cost_criterion)
 395 {
 396         Path       *matched_path = NULL;
 397         ListCell   *l;
 398
 399         foreach(l, paths)
 400         {
 401                 Path       *path = (Path *) lfirst(l);
 402
 403                 /*
 404                  * Since cost comparison is a lot cheaper than pathkey comparison, do
 405                  * that first.  (XXX is that still true?)
 406                  */
 407                 if (matched_path != NULL &&
 408                         compare_path_costs(matched_path, path, cost_criterion) <= 0)
 409                         continue;
 410
 411                 if (pathkeys_contained_in(pathkeys, path->pathkeys))
 412                         matched_path = path;
 413         }
 414         return matched_path;
 415 }
 416
 417 /*
 418  * get_cheapest_fractional_path_for_pathkeys
 419  *        Find the cheapest path (for retrieving a specified fraction of all
 420  *        the tuples) that satisfies the given pathkeys.
 421  *        Return NULL if no such path.
 422  *
 423  * See compare_fractional_path_costs() for the interpretation of the fraction
 424  * parameter.
 425  *
 426  * 'paths' is a list of possible paths that all generate the same relation
 427  * 'pathkeys' represents a required ordering (already canonicalized!)
 428  * 'fraction' is the fraction of the total tuples expected to be retrieved
 429  */
 430 Path *
 431 get_cheapest_fractional_path_for_pathkeys(List *paths,
 432                                                                                   List *pathkeys,
 433                                                                                   double fraction)
 434 {
 435         Path       *matched_path = NULL;
 436         ListCell   *l;
 437
 438         foreach(l, paths)
 439         {
 440                 Path       *path = (Path *) lfirst(l);
 441
 442                 /*
 443                  * Since cost comparison is a lot cheaper than pathkey comparison, do
 444                  * that first.
 445                  */
 446                 if (matched_path != NULL &&
 447                         compare_fractional_path_costs(matched_path, path, fraction) <= 0)
 448                         continue;
 449
 450                 if (pathkeys_contained_in(pathkeys, path->pathkeys))
 451                         matched_path = path;
 452         }
 453         return matched_path;
 454 }
 455
 456 /****************************************************************************
 457  *              NEW PATHKEY FORMATION
 458  ****************************************************************************/
 459
 460 /*
 461  * build_index_pathkeys
 462  *        Build a pathkeys list that describes the ordering induced by an index
 463  *        scan using the given index.  (Note that an unordered index doesn't
 464  *        induce any ordering; such an index will have no sortop OIDS in
 465  *        its sortops arrays, and we will return NIL.)
 466  *
 467  * If 'scandir' is BackwardScanDirection, attempt to build pathkeys
 468  * representing a backwards scan of the index.  Return NIL if can't do it.
 469  *
 470  * The result is canonical, meaning that redundant pathkeys are removed;
 471  * it may therefore have fewer entries than there are index columns.
 472  *
 473  * We generate the full pathkeys list whether or not all are useful for the
 474  * current query.  Caller should do truncate_useless_pathkeys().
 475  */
 476 List *
 477 build_index_pathkeys(PlannerInfo *root,
 478                                          IndexOptInfo *index,
 479                                          ScanDirection scandir)
 480 {
 481         List       *retval = NIL;
 482         ListCell   *indexprs_item = list_head(index->indexprs);
 483         int                     i;
 484
 485         for (i = 0; i < index->ncolumns; i++)
 486         {
 487                 Oid                     sortop;
 488                 bool            nulls_first;
 489                 int                     ikey;
 490                 Expr       *indexkey;
 491                 PathKey    *cpathkey;
 492
 493                 if (ScanDirectionIsBackward(scandir))
 494                 {
 495                         sortop = index->revsortop[i];
 496                         nulls_first = !index->nulls_first[i];
 497                 }
 498                 else
 499                 {
 500                         sortop = index->fwdsortop[i];
 501                         nulls_first = index->nulls_first[i];
 502                 }
 503
 504                 if (!OidIsValid(sortop))
 505                         break;                          /* no more orderable columns */
 506
 507                 ikey = index->indexkeys[i];
 508                 if (ikey != 0)
 509                 {
 510                         /* simple index column */
 511                         indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey);
 512                 }
 513                 else
 514                 {
 515                         /* expression --- assume we need not copy it */
 516                         if (indexprs_item == NULL)
 517                                 elog(ERROR, "wrong number of index expressions");
 518                         indexkey = (Expr *) lfirst(indexprs_item);
 519                         indexprs_item = lnext(indexprs_item);
 520                 }
 521
 522                 /* OK, make a canonical pathkey for this sort key */
 523                 cpathkey = make_pathkey_from_sortinfo(root,
 524                                                                                           indexkey,
 525                                                                                           sortop,
 526                                                                                           nulls_first,
 527                                                                                           0,
 528                                                                                           true);
 529
 530                 /* Add to list unless redundant */
 531                 if (!pathkey_is_redundant(cpathkey, retval))
 532                         retval = lappend(retval, cpathkey);
 533         }
 534
 535         return retval;
 536 }
 537
 538 /*
 539  * Find or make a Var node for the specified attribute of the rel.
 540  *
 541  * We first look for the var in the rel's target list, because that's
 542  * easy and fast.  But the var might not be there (this should normally
 543  * only happen for vars that are used in WHERE restriction clauses,
 544  * but not in join clauses or in the SELECT target list).  In that case,
 545  * gin up a Var node the hard way.
 546  */
 547 static Var *
 548 find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, AttrNumber varattno)
 549 {
 550         ListCell   *temp;
 551         Index           relid;
 552         Oid                     reloid,
 553                                 vartypeid;
 554         int32           type_mod;
 555
 556         foreach(temp, rel->reltargetlist)
 557         {
 558                 Var                *var = (Var *) lfirst(temp);
 559
 560                 if (IsA(var, Var) &&
 561                         var->varattno == varattno)
 562                         return var;
 563         }
 564
 565         relid = rel->relid;
 566         reloid = getrelid(relid, root->parse->rtable);
 567         get_atttypetypmod(reloid, varattno, &vartypeid, &type_mod);
 568
 569         return makeVar(relid, varattno, vartypeid, type_mod, 0);
 570 }
 571
 572 /*
 573  * convert_subquery_pathkeys
 574  *        Build a pathkeys list that describes the ordering of a subquery's
 575  *        result, in the terms of the outer query.      This is essentially a
 576  *        task of conversion.
 577  *
 578  * 'rel': outer query's RelOptInfo for the subquery relation.
 579  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
 580  *
 581  * It is not necessary for caller to do truncate_useless_pathkeys(),
 582  * because we select keys in a way that takes usefulness of the keys into
 583  * account.
 584  */
 585 List *
 586 convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
 587                                                   List *subquery_pathkeys)
 588 {
 589         List       *retval = NIL;
 590         int                     retvallen = 0;
 591         int                     outer_query_keys = list_length(root->query_pathkeys);
 592         List       *sub_tlist = rel->subplan->targetlist;
 593         ListCell   *i;
 594
 595         foreach(i, subquery_pathkeys)
 596         {
 597                 PathKey    *sub_pathkey = (PathKey *) lfirst(i);
 598                 EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
 599                 PathKey    *best_pathkey = NULL;
 600
 601                 if (sub_eclass->ec_has_volatile)
 602                 {
 603                         /*
 604                          * If the sub_pathkey's EquivalenceClass is volatile, then it must
 605                          * have come from an ORDER BY clause, and we have to match it to
 606                          * that same targetlist entry.
 607                          */
 608                         TargetEntry *tle;
 609
 610                         if (sub_eclass->ec_sortref == 0)        /* can't happen */
 611                                 elog(ERROR, "volatile EquivalenceClass has no sortref");
 612                         tle = get_sortgroupref_tle(sub_eclass->ec_sortref, sub_tlist);
 613                         Assert(tle);
 614                         /* resjunk items aren't visible to outer query */
 615                         if (!tle->resjunk)
 616                         {
 617                                 /* We can represent this sub_pathkey */
 618                                 EquivalenceMember *sub_member;
 619                                 Expr       *outer_expr;
 620                                 EquivalenceClass *outer_ec;
 621
 622                                 Assert(list_length(sub_eclass->ec_members) == 1);
 623                                 sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
 624                                 outer_expr = (Expr *)
 625                                         makeVar(rel->relid,
 626                                                         tle->resno,
 627                                                         exprType((Node *) tle->expr),
 628                                                         exprTypmod((Node *) tle->expr),
 629                                                         0);
 630                                 outer_ec =
 631                                         get_eclass_for_sort_expr(root,
 632                                                                                          outer_expr,
 633                                                                                          sub_member->em_datatype,
 634                                                                                          sub_eclass->ec_opfamilies,
 635                                                                                          0);
 636                                 best_pathkey =
 637                                         make_canonical_pathkey(root,
 638                                                                                    outer_ec,
 639                                                                                    sub_pathkey->pk_opfamily,
 640                                                                                    sub_pathkey->pk_strategy,
 641                                                                                    sub_pathkey->pk_nulls_first);
 642                         }
 643                 }
 644                 else
 645                 {
 646                         /*
 647                          * Otherwise, the sub_pathkey's EquivalenceClass could contain
 648                          * multiple elements (representing knowledge that multiple items
 649                          * are effectively equal).      Each element might match none, one, or
 650                          * more of the output columns that are visible to the outer query.
 651                          * This means we may have multiple possible representations of the
 652                          * sub_pathkey in the context of the outer query.  Ideally we
 653                          * would generate them all and put them all into an EC of the
 654                          * outer query, thereby propagating equality knowledge up to the
 655                          * outer query.  Right now we cannot do so, because the outer
 656                          * query's EquivalenceClasses are already frozen when this is
 657                          * called. Instead we prefer the one that has the highest "score"
 658                          * (number of EC peers, plus one if it matches the outer
 659                          * query_pathkeys). This is the most likely to be useful in the
 660                          * outer query.
 661                          */
 662                         int                     best_score = -1;
 663                         ListCell   *j;
 664
 665                         foreach(j, sub_eclass->ec_members)
 666                         {
 667                                 EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
 668                                 Expr       *sub_expr = sub_member->em_expr;
 669                                 Expr       *sub_stripped;
 670                                 ListCell   *k;
 671
 672                                 /*
 673                                  * We handle two cases: the sub_pathkey key can be either an
 674                                  * exact match for a targetlist entry, or it could match after
 675                                  * stripping RelabelType nodes.  (We need that case since
 676                                  * make_pathkey_from_sortinfo could add or remove
 677                                  * RelabelType.)
 678                                  */
 679                                 sub_stripped = sub_expr;
 680                                 while (sub_stripped && IsA(sub_stripped, RelabelType))
 681                                         sub_stripped = ((RelabelType *) sub_stripped)->arg;
 682
 683                                 foreach(k, sub_tlist)
 684                                 {
 685                                         TargetEntry *tle = (TargetEntry *) lfirst(k);
 686                                         Expr       *outer_expr;
 687                                         EquivalenceClass *outer_ec;
 688                                         PathKey    *outer_pk;
 689                                         int                     score;
 690
 691                                         /* resjunk items aren't visible to outer query */
 692                                         if (tle->resjunk)
 693                                                 continue;
 694
 695                                         if (equal(tle->expr, sub_expr))
 696                                         {
 697                                                 /* Exact match */
 698                                                 outer_expr = (Expr *)
 699                                                         makeVar(rel->relid,
 700                                                                         tle->resno,
 701                                                                         exprType((Node *) tle->expr),
 702                                                                         exprTypmod((Node *) tle->expr),
 703                                                                         0);
 704                                         }
 705                                         else
 706                                         {
 707                                                 Expr       *tle_stripped;
 708
 709                                                 tle_stripped = tle->expr;
 710                                                 while (tle_stripped && IsA(tle_stripped, RelabelType))
 711                                                         tle_stripped = ((RelabelType *) tle_stripped)->arg;
 712
 713                                                 if (equal(tle_stripped, sub_stripped))
 714                                                 {
 715                                                         /* Match after discarding RelabelType */
 716                                                         outer_expr = (Expr *)
 717                                                                 makeVar(rel->relid,
 718                                                                                 tle->resno,
 719                                                                                 exprType((Node *) tle->expr),
 720                                                                                 exprTypmod((Node *) tle->expr),
 721                                                                                 0);
 722                                                         if (exprType((Node *) outer_expr) !=
 723                                                                 exprType((Node *) sub_expr))
 724                                                                 outer_expr = (Expr *)
 725                                                                         makeRelabelType(outer_expr,
 726                                                                                                  exprType((Node *) sub_expr),
 727                                                                                                         -1,
 728                                                                                                         COERCE_DONTCARE);
 729                                                 }
 730                                                 else
 731                                                         continue;
 732                                         }
 733
 734                                         /* Found a representation for this sub_pathkey */
 735                                         outer_ec = get_eclass_for_sort_expr(root,
 736                                                                                                                 outer_expr,
 737                                                                                                          sub_member->em_datatype,
 738                                                                                                    sub_eclass->ec_opfamilies,
 739                                                                                                                 0);
 740                                         outer_pk = make_canonical_pathkey(root,
 741                                                                                                           outer_ec,
 742                                                                                                         sub_pathkey->pk_opfamily,
 743                                                                                                         sub_pathkey->pk_strategy,
 744                                                                                                 sub_pathkey->pk_nulls_first);
 745                                         /* score = # of equivalence peers */
 746                                         score = list_length(outer_ec->ec_members) - 1;
 747                                         /* +1 if it matches the proper query_pathkeys item */
 748                                         if (retvallen < outer_query_keys &&
 749                                                 list_nth(root->query_pathkeys, retvallen) == outer_pk)
 750                                                 score++;
 751                                         if (score > best_score)
 752                                         {
 753                                                 best_pathkey = outer_pk;
 754                                                 best_score = score;
 755                                         }
 756                                 }
 757                         }
 758                 }
 759
 760                 /*
 761                  * If we couldn't find a representation of this sub_pathkey, we're
 762                  * done (we can't use the ones to its right, either).
 763                  */
 764                 if (!best_pathkey)
 765                         break;
 766
 767                 /*
 768                  * Eliminate redundant ordering info; could happen if outer query
 769                  * equivalences subquery keys...
 770                  */
 771                 if (!pathkey_is_redundant(best_pathkey, retval))
 772                 {
 773                         retval = lappend(retval, best_pathkey);
 774                         retvallen++;
 775                 }
 776         }
 777
 778         return retval;
 779 }
 780
 781 /*
 782  * build_join_pathkeys
 783  *        Build the path keys for a join relation constructed by mergejoin or
 784  *        nestloop join.  This is normally the same as the outer path's keys.
 785  *
 786  *        EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
 787  *        having the outer path's path keys, because null lefthand rows may be
 788  *        inserted at random points.  It must be treated as unsorted.
 789  *
 790  *        We truncate away any pathkeys that are uninteresting for higher joins.
 791  *
 792  * 'joinrel' is the join relation that paths are being formed for
 793  * 'jointype' is the join type (inner, left, full, etc)
 794  * 'outer_pathkeys' is the list of the current outer path's path keys
 795  *
 796  * Returns the list of new path keys.
 797  */
 798 List *
 799 build_join_pathkeys(PlannerInfo *root,
 800                                         RelOptInfo *joinrel,
 801                                         JoinType jointype,
 802                                         List *outer_pathkeys)
 803 {
 804         if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
 805                 return NIL;
 806
 807         /*
 808          * This used to be quite a complex bit of code, but now that all pathkey
 809          * sublists start out life canonicalized, we don't have to do a darn thing
 810          * here!
 811          *
 812          * We do, however, need to truncate the pathkeys list, since it may
 813          * contain pathkeys that were useful for forming this joinrel but are
 814          * uninteresting to higher levels.
 815          */
 816         return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
 817 }
 818
 819 /****************************************************************************
 820  *              PATHKEYS AND SORT CLAUSES
 821  ****************************************************************************/
 822
 823 /*
 824  * make_pathkeys_for_sortclauses
 825  *              Generate a pathkeys list that represents the sort order specified
 826  *              by a list of SortGroupClauses
 827  *
 828  * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
 829  * otherwise not.  canonicalize should always be TRUE after EquivalenceClass
 830  * merging has been performed, but FALSE if we haven't done EquivalenceClass
 831  * merging yet.  (We provide this option because grouping_planner() needs to
 832  * be able to represent requested pathkeys before the equivalence classes have
 833  * been created for the query.)
 834  *
 835  * 'sortclauses' is a list of SortGroupClause nodes
 836  * 'tlist' is the targetlist to find the referenced tlist entries in
 837  */
 838 List *
 839 make_pathkeys_for_sortclauses(PlannerInfo *root,
 840                                                           List *sortclauses,
 841                                                           List *tlist,
 842                                                           bool canonicalize)
 843 {
 844         List       *pathkeys = NIL;
 845         ListCell   *l;
 846
 847         foreach(l, sortclauses)
 848         {
 849                 SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 850                 Expr       *sortkey;
 851                 PathKey    *pathkey;
 852
 853                 sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
 854                 Assert(OidIsValid(sortcl->sortop));
 855                 pathkey = make_pathkey_from_sortinfo(root,
 856                                                                                          sortkey,
 857                                                                                          sortcl->sortop,
 858                                                                                          sortcl->nulls_first,
 859                                                                                          sortcl->tleSortGroupRef,
 860                                                                                          canonicalize);
 861
 862                 /* Canonical form eliminates redundant ordering keys */
 863                 if (canonicalize)
 864                 {
 865                         if (!pathkey_is_redundant(pathkey, pathkeys))
 866                                 pathkeys = lappend(pathkeys, pathkey);
 867                 }
 868                 else
 869                         pathkeys = lappend(pathkeys, pathkey);
 870         }
 871         return pathkeys;
 872 }
 873
 874 /****************************************************************************
 875  *              PATHKEYS AND MERGECLAUSES
 876  ****************************************************************************/
 877
 878 /*
 879  * cache_mergeclause_eclasses
 880  *              Make the cached EquivalenceClass links valid in a mergeclause
 881  *              restrictinfo.
 882  *
 883  * RestrictInfo contains fields in which we may cache pointers to
 884  * EquivalenceClasses for the left and right inputs of the mergeclause.
 885  * (If the mergeclause is a true equivalence clause these will be the
 886  * same EquivalenceClass, otherwise not.)
 887  */
 888 void
 889 cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
 890 {
 891         Assert(restrictinfo->mergeopfamilies != NIL);
 892
 893         /* the cached values should be either both set or both not */
 894         if (restrictinfo->left_ec == NULL)
 895         {
 896                 Expr       *clause = restrictinfo->clause;
 897                 Oid                     lefttype,
 898                                         righttype;
 899
 900                 /* Need the declared input types of the operator */
 901                 op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
 902
 903                 /* Find or create a matching EquivalenceClass for each side */
 904                 restrictinfo->left_ec =
 905                         get_eclass_for_sort_expr(root,
 906                                                                          (Expr *) get_leftop(clause),
 907                                                                          lefttype,
 908                                                                          restrictinfo->mergeopfamilies,
 909                                                                          0);
 910                 restrictinfo->right_ec =
 911                         get_eclass_for_sort_expr(root,
 912                                                                          (Expr *) get_rightop(clause),
 913                                                                          righttype,
 914                                                                          restrictinfo->mergeopfamilies,
 915                                                                          0);
 916         }
 917         else
 918                 Assert(restrictinfo->right_ec != NULL);
 919 }
 920
 921 /*
 922  * find_mergeclauses_for_pathkeys
 923  *        This routine attempts to find a set of mergeclauses that can be
 924  *        used with a specified ordering for one of the input relations.
 925  *        If successful, it returns a list of mergeclauses.
 926  *
 927  * 'pathkeys' is a pathkeys list showing the ordering of an input path.
 928  * 'outer_keys' is TRUE if these keys are for the outer input path,
 929  *                      FALSE if for inner.
 930  * 'restrictinfos' is a list of mergejoinable restriction clauses for the
 931  *                      join relation being formed.
 932  *
 933  * The restrictinfos must be marked (via outer_is_left) to show which side
 934  * of each clause is associated with the current outer path.  (See
 935  * select_mergejoin_clauses())
 936  *
 937  * The result is NIL if no merge can be done, else a maximal list of
 938  * usable mergeclauses (represented as a list of their restrictinfo nodes).
 939  */
 940 List *
 941 find_mergeclauses_for_pathkeys(PlannerInfo *root,
 942                                                            List *pathkeys,
 943                                                            bool outer_keys,
 944                                                            List *restrictinfos)
 945 {
 946         List       *mergeclauses = NIL;
 947         ListCell   *i;
 948
 949         /* make sure we have eclasses cached in the clauses */
 950         foreach(i, restrictinfos)
 951         {
 952                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
 953
 954                 cache_mergeclause_eclasses(root, rinfo);
 955         }
 956
 957         foreach(i, pathkeys)
 958         {
 959                 PathKey    *pathkey = (PathKey *) lfirst(i);
 960                 EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
 961                 List       *matched_restrictinfos = NIL;
 962                 ListCell   *j;
 963
 964                 /*----------
 965                  * A mergejoin clause matches a pathkey if it has the same EC.
 966                  * If there are multiple matching clauses, take them all.  In plain
 967                  * inner-join scenarios we expect only one match, because
 968                  * equivalence-class processing will have removed any redundant
 969                  * mergeclauses.  However, in outer-join scenarios there might be
 970                  * multiple matches.  An example is
 971                  *
 972                  *      select * from a full join b
 973                  *              on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
 974                  *
 975                  * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
 976                  * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
 977                  * we *must* do so or we will be unable to form a valid plan.
 978                  *
 979                  * We expect that the given pathkeys list is canonical, which means
 980                  * no two members have the same EC, so it's not possible for this
 981                  * code to enter the same mergeclause into the result list twice.
 982                  *
 983                  * XXX it's possible that multiple matching clauses might have
 984                  * different ECs on the other side, in which case the order we put
 985                  * them into our result makes a difference in the pathkeys required
 986                  * for the other input path.  However this routine hasn't got any info
 987                  * about which order would be best, so for now we disregard that case
 988                  * (which is probably a corner case anyway).
 989                  *----------
 990                  */
 991                 foreach(j, restrictinfos)
 992                 {
 993                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
 994                         EquivalenceClass *clause_ec;
 995
 996                         if (outer_keys)
 997                                 clause_ec = rinfo->outer_is_left ?
 998                                         rinfo->left_ec : rinfo->right_ec;
 999                         else
1000                                 clause_ec = rinfo->outer_is_left ?
1001                                         rinfo->right_ec : rinfo->left_ec;
1002                         if (clause_ec == pathkey_ec)
1003                                 matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1004                 }
1005
1006                 /*
1007                  * If we didn't find a mergeclause, we're done --- any additional
1008                  * sort-key positions in the pathkeys are useless.      (But we can still
1009                  * mergejoin if we found at least one mergeclause.)
1010                  */
1011                 if (matched_restrictinfos == NIL)
1012                         break;
1013
1014                 /*
1015                  * If we did find usable mergeclause(s) for this sort-key position,
1016                  * add them to result list.
1017                  */
1018                 mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1019         }
1020
1021         return mergeclauses;
1022 }
1023
1024 /*
1025  * select_outer_pathkeys_for_merge
1026  *        Builds a pathkey list representing a possible sort ordering
1027  *        that can be used with the given mergeclauses.
1028  *
1029  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1030  *                      that will be used in a merge join.
1031  * 'joinrel' is the join relation we are trying to construct.
1032  *
1033  * The restrictinfos must be marked (via outer_is_left) to show which side
1034  * of each clause is associated with the current outer path.  (See
1035  * select_mergejoin_clauses())
1036  *
1037  * Returns a pathkeys list that can be applied to the outer relation.
1038  *
1039  * Since we assume here that a sort is required, there is no particular use
1040  * in matching any available ordering of the outerrel.  (joinpath.c has an
1041  * entirely separate code path for considering sort-free mergejoins.)  Rather,
1042  * it's interesting to try to match the requested query_pathkeys so that a
1043  * second output sort may be avoided; and failing that, we try to list "more
1044  * popular" keys (those with the most unmatched EquivalenceClass peers)
1045  * earlier, in hopes of making the resulting ordering useful for as many
1046  * higher-level mergejoins as possible.
1047  */
1048 List *
1049 select_outer_pathkeys_for_merge(PlannerInfo *root,
1050                                                                 List *mergeclauses,
1051                                                                 RelOptInfo *joinrel)
1052 {
1053         List       *pathkeys = NIL;
1054         int                     nClauses = list_length(mergeclauses);
1055         EquivalenceClass **ecs;
1056         int                *scores;
1057         int                     necs;
1058         ListCell   *lc;
1059         int                     j;
1060
1061         /* Might have no mergeclauses */
1062         if (nClauses == 0)
1063                 return NIL;
1064
1065         /*
1066          * Make arrays of the ECs used by the mergeclauses (dropping any
1067          * duplicates) and their "popularity" scores.
1068          */
1069         ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1070         scores = (int *) palloc(nClauses * sizeof(int));
1071         necs = 0;
1072
1073         foreach(lc, mergeclauses)
1074         {
1075                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1076                 EquivalenceClass *oeclass;
1077                 int                     score;
1078                 ListCell   *lc2;
1079
1080                 /* get the outer eclass */
1081                 cache_mergeclause_eclasses(root, rinfo);
1082
1083                 if (rinfo->outer_is_left)
1084                         oeclass = rinfo->left_ec;
1085                 else
1086                         oeclass = rinfo->right_ec;
1087
1088                 /* reject duplicates */
1089                 for (j = 0; j < necs; j++)
1090                 {
1091                         if (ecs[j] == oeclass)
1092                                 break;
1093                 }
1094                 if (j < necs)
1095                         continue;
1096
1097                 /* compute score */
1098                 score = 0;
1099                 foreach(lc2, oeclass->ec_members)
1100                 {
1101                         EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
1102
1103                         /* Potential future join partner? */
1104                         if (!em->em_is_const && !em->em_is_child &&
1105                                 !bms_overlap(em->em_relids, joinrel->relids))
1106                                 score++;
1107                 }
1108
1109                 ecs[necs] = oeclass;
1110                 scores[necs] = score;
1111                 necs++;
1112         }
1113
1114         /*
1115          * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1116          * can generate a sort order that's also useful for final output. There is
1117          * no percentage in a partial match, though, so we have to have 'em all.
1118          */
1119         if (root->query_pathkeys)
1120         {
1121                 foreach(lc, root->query_pathkeys)
1122                 {
1123                         PathKey    *query_pathkey = (PathKey *) lfirst(lc);
1124                         EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1125
1126                         for (j = 0; j < necs; j++)
1127                         {
1128                                 if (ecs[j] == query_ec)
1129                                         break;          /* found match */
1130                         }
1131                         if (j >= necs)
1132                                 break;                  /* didn't find match */
1133                 }
1134                 /* if we got to the end of the list, we have them all */
1135                 if (lc == NULL)
1136                 {
1137                         /* copy query_pathkeys as starting point for our output */
1138                         pathkeys = list_copy(root->query_pathkeys);
1139                         /* mark their ECs as already-emitted */
1140                         foreach(lc, root->query_pathkeys)
1141                         {
1142                                 PathKey    *query_pathkey = (PathKey *) lfirst(lc);
1143                                 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1144
1145                                 for (j = 0; j < necs; j++)
1146                                 {
1147                                         if (ecs[j] == query_ec)
1148                                         {
1149                                                 scores[j] = -1;
1150                                                 break;
1151                                         }
1152                                 }
1153                         }
1154                 }
1155         }
1156
1157         /*
1158          * Add remaining ECs to the list in popularity order, using a default sort
1159          * ordering.  (We could use qsort() here, but the list length is usually
1160          * so small it's not worth it.)
1161          */
1162         for (;;)
1163         {
1164                 int                     best_j;
1165                 int                     best_score;
1166                 EquivalenceClass *ec;
1167                 PathKey    *pathkey;
1168
1169                 best_j = 0;
1170                 best_score = scores[0];
1171                 for (j = 1; j < necs; j++)
1172                 {
1173                         if (scores[j] > best_score)
1174                         {
1175                                 best_j = j;
1176                                 best_score = scores[j];
1177                         }
1178                 }
1179                 if (best_score < 0)
1180                         break;                          /* all done */
1181                 ec = ecs[best_j];
1182                 scores[best_j] = -1;
1183                 pathkey = make_canonical_pathkey(root,
1184                                                                                  ec,
1185                                                                                  linitial_oid(ec->ec_opfamilies),
1186                                                                                  BTLessStrategyNumber,
1187                                                                                  false);
1188                 /* can't be redundant because no duplicate ECs */
1189                 Assert(!pathkey_is_redundant(pathkey, pathkeys));
1190                 pathkeys = lappend(pathkeys, pathkey);
1191         }
1192
1193         pfree(ecs);
1194         pfree(scores);
1195
1196         return pathkeys;
1197 }
1198
1199 /*
1200  * make_inner_pathkeys_for_merge
1201  *        Builds a pathkey list representing the explicit sort order that
1202  *        must be applied to an inner path to make it usable with the
1203  *        given mergeclauses.
1204  *
1205  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1206  *                      that will be used in a merge join.
1207  * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1208  *                      side of the join.
1209  *
1210  * The restrictinfos must be marked (via outer_is_left) to show which side
1211  * of each clause is associated with the current outer path.  (See
1212  * select_mergejoin_clauses())
1213  *
1214  * Returns a pathkeys list that can be applied to the inner relation.
1215  *
1216  * Note that it is not this routine's job to decide whether sorting is
1217  * actually needed for a particular input path.  Assume a sort is necessary;
1218  * just make the keys, eh?
1219  */
1220 List *
1221 make_inner_pathkeys_for_merge(PlannerInfo *root,
1222                                                           List *mergeclauses,
1223                                                           List *outer_pathkeys)
1224 {
1225         List       *pathkeys = NIL;
1226         EquivalenceClass *lastoeclass;
1227         PathKey    *opathkey;
1228         ListCell   *lc;
1229         ListCell   *lop;
1230
1231         lastoeclass = NULL;
1232         opathkey = NULL;
1233         lop = list_head(outer_pathkeys);
1234
1235         foreach(lc, mergeclauses)
1236         {
1237                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1238                 EquivalenceClass *oeclass;
1239                 EquivalenceClass *ieclass;
1240                 PathKey    *pathkey;
1241
1242                 cache_mergeclause_eclasses(root, rinfo);
1243
1244                 if (rinfo->outer_is_left)
1245                 {
1246                         oeclass = rinfo->left_ec;
1247                         ieclass = rinfo->right_ec;
1248                 }
1249                 else
1250                 {
1251                         oeclass = rinfo->right_ec;
1252                         ieclass = rinfo->left_ec;
1253                 }
1254
1255                 /* outer eclass should match current or next pathkeys */
1256                 /* we check this carefully for debugging reasons */
1257                 if (oeclass != lastoeclass)
1258                 {
1259                         if (!lop)
1260                                 elog(ERROR, "too few pathkeys for mergeclauses");
1261                         opathkey = (PathKey *) lfirst(lop);
1262                         lop = lnext(lop);
1263                         lastoeclass = opathkey->pk_eclass;
1264                         if (oeclass != lastoeclass)
1265                                 elog(ERROR, "outer pathkeys do not match mergeclause");
1266                 }
1267
1268                 /*
1269                  * Often, we'll have same EC on both sides, in which case the outer
1270                  * pathkey is also canonical for the inner side, and we can skip a
1271                  * useless search.
1272                  */
1273                 if (ieclass == oeclass)
1274                         pathkey = opathkey;
1275                 else
1276                         pathkey = make_canonical_pathkey(root,
1277                                                                                          ieclass,
1278                                                                                          opathkey->pk_opfamily,
1279                                                                                          opathkey->pk_strategy,
1280                                                                                          opathkey->pk_nulls_first);
1281
1282                 /*
1283                  * Don't generate redundant pathkeys (can happen if multiple
1284                  * mergeclauses refer to same EC).
1285                  */
1286                 if (!pathkey_is_redundant(pathkey, pathkeys))
1287                         pathkeys = lappend(pathkeys, pathkey);
1288         }
1289
1290         return pathkeys;
1291 }
1292
1293 /****************************************************************************
1294  *              PATHKEY USEFULNESS CHECKS
1295  *
1296  * We only want to remember as many of the pathkeys of a path as have some
1297  * potential use, either for subsequent mergejoins or for meeting the query's
1298  * requested output ordering.  This ensures that add_path() won't consider
1299  * a path to have a usefully different ordering unless it really is useful.
1300  * These routines check for usefulness of given pathkeys.
1301  ****************************************************************************/
1302
1303 /*
1304  * pathkeys_useful_for_merging
1305  *              Count the number of pathkeys that may be useful for mergejoins
1306  *              above the given relation.
1307  *
1308  * We consider a pathkey potentially useful if it corresponds to the merge
1309  * ordering of either side of any joinclause for the rel.  This might be
1310  * overoptimistic, since joinclauses that require different other relations
1311  * might never be usable at the same time, but trying to be exact is likely
1312  * to be more trouble than it's worth.
1313  *
1314  * To avoid doubling the number of mergejoin paths considered, we would like
1315  * to consider only one of the two scan directions (ASC or DESC) as useful
1316  * for merging for any given target column.  The choice is arbitrary unless
1317  * one of the directions happens to match an ORDER BY key, in which case
1318  * that direction should be preferred, in hopes of avoiding a final sort step.
1319  * right_merge_direction() implements this heuristic.
1320  */
1321 int
1322 pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
1323 {
1324         int                     useful = 0;
1325         ListCell   *i;
1326
1327         foreach(i, pathkeys)
1328         {
1329                 PathKey    *pathkey = (PathKey *) lfirst(i);
1330                 bool            matched = false;
1331                 ListCell   *j;
1332
1333                 /* If "wrong" direction, not useful for merging */
1334                 if (!right_merge_direction(root, pathkey))
1335                         break;
1336
1337                 /*
1338                  * First look into the EquivalenceClass of the pathkey, to see if
1339                  * there are any members not yet joined to the rel.  If so, it's
1340                  * surely possible to generate a mergejoin clause using them.
1341                  */
1342                 if (rel->has_eclass_joins &&
1343                         eclass_useful_for_merging(pathkey->pk_eclass, rel))
1344                         matched = true;
1345                 else
1346                 {
1347                         /*
1348                          * Otherwise search the rel's joininfo list, which contains
1349                          * non-EquivalenceClass-derivable join clauses that might
1350                          * nonetheless be mergejoinable.
1351                          */
1352                         foreach(j, rel->joininfo)
1353                         {
1354                                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
1355
1356                                 if (restrictinfo->mergeopfamilies == NIL)
1357                                         continue;
1358                                 cache_mergeclause_eclasses(root, restrictinfo);
1359
1360                                 if (pathkey->pk_eclass == restrictinfo->left_ec ||
1361                                         pathkey->pk_eclass == restrictinfo->right_ec)
1362                                 {
1363                                         matched = true;
1364                                         break;
1365                                 }
1366                         }
1367                 }
1368
1369                 /*
1370                  * If we didn't find a mergeclause, we're done --- any additional
1371                  * sort-key positions in the pathkeys are useless.      (But we can still
1372                  * mergejoin if we found at least one mergeclause.)
1373                  */
1374                 if (matched)
1375                         useful++;
1376                 else
1377                         break;
1378         }
1379
1380         return useful;
1381 }
1382
1383 /*
1384  * right_merge_direction
1385  *              Check whether the pathkey embodies the preferred sort direction
1386  *              for merging its target column.
1387  */
1388 static bool
1389 right_merge_direction(PlannerInfo *root, PathKey *pathkey)
1390 {
1391         ListCell   *l;
1392
1393         foreach(l, root->query_pathkeys)
1394         {
1395                 PathKey    *query_pathkey = (PathKey *) lfirst(l);
1396
1397                 if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
1398                         pathkey->pk_opfamily == query_pathkey->pk_opfamily)
1399                 {
1400                         /*
1401                          * Found a matching query sort column.  Prefer this pathkey's
1402                          * direction iff it matches.  Note that we ignore pk_nulls_first,
1403                          * which means that a sort might be needed anyway ... but we still
1404                          * want to prefer only one of the two possible directions, and we
1405                          * might as well use this one.
1406                          */
1407                         return (pathkey->pk_strategy == query_pathkey->pk_strategy);
1408                 }
1409         }
1410
1411         /* If no matching ORDER BY request, prefer the ASC direction */
1412         return (pathkey->pk_strategy == BTLessStrategyNumber);
1413 }
1414
1415 /*
1416  * pathkeys_useful_for_ordering
1417  *              Count the number of pathkeys that are useful for meeting the
1418  *              query's requested output ordering.
1419  *
1420  * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
1421  * no good to order by just the first key(s) of the requested ordering.
1422  * So the result is always either 0 or list_length(root->query_pathkeys).
1423  */
1424 int
1425 pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
1426 {
1427         if (root->query_pathkeys == NIL)
1428                 return 0;                               /* no special ordering requested */
1429
1430         if (pathkeys == NIL)
1431                 return 0;                               /* unordered path */
1432
1433         if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
1434         {
1435                 /* It's useful ... or at least the first N keys are */
1436                 return list_length(root->query_pathkeys);
1437         }
1438
1439         return 0;                                       /* path ordering not useful */
1440 }
1441
1442 /*
1443  * truncate_useless_pathkeys
1444  *              Shorten the given pathkey list to just the useful pathkeys.
1445  */
1446 List *
1447 truncate_useless_pathkeys(PlannerInfo *root,
1448                                                   RelOptInfo *rel,
1449                                                   List *pathkeys)
1450 {
1451         int                     nuseful;
1452         int                     nuseful2;
1453
1454         nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
1455         nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
1456         if (nuseful2 > nuseful)
1457                 nuseful = nuseful2;
1458
1459         /*
1460          * Note: not safe to modify input list destructively, but we can avoid
1461          * copying the list if we're not actually going to change it
1462          */
1463         if (nuseful == 0)
1464                 return NIL;
1465         else if (nuseful == list_length(pathkeys))
1466                 return pathkeys;
1467         else
1468                 return list_truncate(list_copy(pathkeys), nuseful);
1469 }
1470
1471 /*
1472  * has_useful_pathkeys
1473  *              Detect whether the specified rel could have any pathkeys that are
1474  *              useful according to truncate_useless_pathkeys().
1475  *
1476  * This is a cheap test that lets us skip building pathkeys at all in very
1477  * simple queries.      It's OK to err in the direction of returning "true" when
1478  * there really aren't any usable pathkeys, but erring in the other direction
1479  * is bad --- so keep this in sync with the routines above!
1480  *
1481  * We could make the test more complex, for example checking to see if any of
1482  * the joinclauses are really mergejoinable, but that likely wouldn't win
1483  * often enough to repay the extra cycles.      Queries with neither a join nor
1484  * a sort are reasonably common, though, so this much work seems worthwhile.
1485  */
1486 bool
1487 has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
1488 {
1489         if (rel->joininfo != NIL || rel->has_eclass_joins)
1490                 return true;                    /* might be able to use pathkeys for merging */
1491         if (root->query_pathkeys != NIL)
1492                 return true;                    /* might be able to use them for ordering */
1493         return false;                           /* definitely useless */
1494 }