src/backend/optimizer/prep/prepqual.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * prepqual.c
   4  *        Routines for preprocessing qualification expressions
   5  *
   6  *
   7  * The parser regards AND and OR as purely binary operators, so a qual like
   8  *              (A = 1) OR (A = 2) OR (A = 3) ...
   9  * will produce a nested parsetree
  10  *              (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
  11  * In reality, the optimizer and executor regard AND and OR as N-argument
  12  * operators, so this tree can be flattened to
  13  *              (OR (A = 1) (A = 2) (A = 3) ...)
  14  *
  15  * Formerly, this module was responsible for doing the initial flattening,
  16  * but now we leave it to eval_const_expressions to do that since it has to
  17  * make a complete pass over the expression tree anyway.  Instead, we just
  18  * have to ensure that our manipulations preserve AND/OR flatness.
  19  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
  20  * tree after local transformations that might introduce nested AND/ORs.
  21  *
  22  *
  23  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  24  * Portions Copyright (c) 1994, Regents of the University of California
  25  *
  26  *
  27  * IDENTIFICATION
  28  *        $PostgreSQL$
  29  *
  30  *-------------------------------------------------------------------------
  31  */
  32
  33 #include "postgres.h"
  34
  35 #include "optimizer/clauses.h"
  36 #include "optimizer/prep.h"
  37 #include "utils/lsyscache.h"
  38
  39
  40 static List *pull_ands(List *andlist);
  41 static List *pull_ors(List *orlist);
  42 static Expr *find_nots(Expr *qual);
  43 static Expr *push_nots(Expr *qual);
  44 static Expr *find_duplicate_ors(Expr *qual);
  45 static Expr *process_duplicate_ors(List *orlist);
  46
  47
  48 /*
  49  * canonicalize_qual
  50  *        Convert a qualification expression to the most useful form.
  51  *
  52  * The name of this routine is a holdover from a time when it would try to
  53  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
  54  * Eventually, we recognized that that had more theoretical purity than
  55  * actual usefulness, and so now the transformation doesn't involve any
  56  * notion of reaching a canonical form.
  57  *
  58  * NOTE: we assume the input has already been through eval_const_expressions
  59  * and therefore possesses AND/OR flatness.  Formerly this function included
  60  * its own flattening logic, but that requires a useless extra pass over the
  61  * tree.
  62  *
  63  * Returns the modified qualification.
  64  */
  65 Expr *
  66 canonicalize_qual(Expr *qual)
  67 {
  68         Expr       *newqual;
  69
  70         /* Quick exit for empty qual */
  71         if (qual == NULL)
  72                 return NULL;
  73
  74         /*
  75          * Push down NOTs.      We do this only in the top-level boolean expression,
  76          * without examining arguments of operators/functions. The main reason for
  77          * doing this is to expose as much top-level AND/OR structure as we can,
  78          * so there's no point in descending further.
  79          */
  80         newqual = find_nots(qual);
  81
  82         /*
  83          * Pull up redundant subclauses in OR-of-AND trees.  Again, we do this
  84          * only within the top-level AND/OR structure.
  85          */
  86         newqual = find_duplicate_ors(newqual);
  87
  88         return newqual;
  89 }
  90
  91
  92 /*
  93  * pull_ands
  94  *        Recursively flatten nested AND clauses into a single and-clause list.
  95  *
  96  * Input is the arglist of an AND clause.
  97  * Returns the rebuilt arglist (note original list structure is not touched).
  98  */
  99 static List *
 100 pull_ands(List *andlist)
 101 {
 102         List       *out_list = NIL;
 103         ListCell   *arg;
 104
 105         foreach(arg, andlist)
 106         {
 107                 Node       *subexpr = (Node *) lfirst(arg);
 108
 109                 /*
 110                  * Note: we can destructively concat the subexpression's arglist
 111                  * because we know the recursive invocation of pull_ands will have
 112                  * built a new arglist not shared with any other expr. Otherwise we'd
 113                  * need a list_copy here.
 114                  */
 115                 if (and_clause(subexpr))
 116                         out_list = list_concat(out_list,
 117                                                                    pull_ands(((BoolExpr *) subexpr)->args));
 118                 else
 119                         out_list = lappend(out_list, subexpr);
 120         }
 121         return out_list;
 122 }
 123
 124 /*
 125  * pull_ors
 126  *        Recursively flatten nested OR clauses into a single or-clause list.
 127  *
 128  * Input is the arglist of an OR clause.
 129  * Returns the rebuilt arglist (note original list structure is not touched).
 130  */
 131 static List *
 132 pull_ors(List *orlist)
 133 {
 134         List       *out_list = NIL;
 135         ListCell   *arg;
 136
 137         foreach(arg, orlist)
 138         {
 139                 Node       *subexpr = (Node *) lfirst(arg);
 140
 141                 /*
 142                  * Note: we can destructively concat the subexpression's arglist
 143                  * because we know the recursive invocation of pull_ors will have
 144                  * built a new arglist not shared with any other expr. Otherwise we'd
 145                  * need a list_copy here.
 146                  */
 147                 if (or_clause(subexpr))
 148                         out_list = list_concat(out_list,
 149                                                                    pull_ors(((BoolExpr *) subexpr)->args));
 150                 else
 151                         out_list = lappend(out_list, subexpr);
 152         }
 153         return out_list;
 154 }
 155
 156
 157 /*
 158  * find_nots
 159  *        Traverse the qualification, looking for NOTs to take care of.
 160  *        For NOT clauses, apply push_nots() to try to push down the NOT.
 161  *        For AND and OR clause types, simply recurse.  Otherwise stop
 162  *        recursing (we do not worry about structure below the top AND/OR tree).
 163  *
 164  * Returns the modified qualification.  AND/OR flatness is preserved.
 165  */
 166 static Expr *
 167 find_nots(Expr *qual)
 168 {
 169         if (and_clause((Node *) qual))
 170         {
 171                 List       *t_list = NIL;
 172                 ListCell   *temp;
 173
 174                 foreach(temp, ((BoolExpr *) qual)->args)
 175                         t_list = lappend(t_list, find_nots(lfirst(temp)));
 176                 return make_andclause(pull_ands(t_list));
 177         }
 178         else if (or_clause((Node *) qual))
 179         {
 180                 List       *t_list = NIL;
 181                 ListCell   *temp;
 182
 183                 foreach(temp, ((BoolExpr *) qual)->args)
 184                         t_list = lappend(t_list, find_nots(lfirst(temp)));
 185                 return make_orclause(pull_ors(t_list));
 186         }
 187         else if (not_clause((Node *) qual))
 188                 return push_nots(get_notclausearg(qual));
 189         else
 190                 return qual;
 191 }
 192
 193 /*
 194  * push_nots
 195  *        Push down a NOT as far as possible.
 196  *
 197  * Input is an expression to be negated (e.g., the argument of a NOT clause).
 198  * Returns a new qual equivalent to the negation of the given qual.
 199  */
 200 static Expr *
 201 push_nots(Expr *qual)
 202 {
 203         if (is_opclause(qual))
 204         {
 205                 /*
 206                  * Negate an operator clause if possible: (NOT (< A B)) => (>= A B)
 207                  * Otherwise, retain the clause as it is (the NOT can't be pushed down
 208                  * any farther).
 209                  */
 210                 OpExpr     *opexpr = (OpExpr *) qual;
 211                 Oid                     negator = get_negator(opexpr->opno);
 212
 213                 if (negator)
 214                 {
 215                         OpExpr     *newopexpr = makeNode(OpExpr);
 216
 217                         newopexpr->opno = negator;
 218                         newopexpr->opfuncid = InvalidOid;
 219                         newopexpr->opresulttype = opexpr->opresulttype;
 220                         newopexpr->opretset = opexpr->opretset;
 221                         newopexpr->args = opexpr->args;
 222                         newopexpr->location = opexpr->location;
 223                         return (Expr *) newopexpr;
 224                 }
 225                 else
 226                         return make_notclause(qual);
 227         }
 228         else if (qual && IsA(qual, ScalarArrayOpExpr))
 229         {
 230                 /*
 231                  * Negate a ScalarArrayOpExpr if there is a negator for its operator;
 232                  * for example x = ANY (list) becomes x <> ALL (list). Otherwise,
 233                  * retain the clause as it is (the NOT can't be pushed down any
 234                  * farther).
 235                  */
 236                 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) qual;
 237                 Oid                     negator = get_negator(saopexpr->opno);
 238
 239                 if (negator)
 240                 {
 241                         ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
 242
 243                         newopexpr->opno = negator;
 244                         newopexpr->opfuncid = InvalidOid;
 245                         newopexpr->useOr = !saopexpr->useOr;
 246                         newopexpr->args = saopexpr->args;
 247                         newopexpr->location = saopexpr->location;
 248                         return (Expr *) newopexpr;
 249                 }
 250                 else
 251                         return make_notclause(qual);
 252         }
 253         else if (and_clause((Node *) qual))
 254         {
 255                 /*--------------------
 256                  * Apply DeMorgan's Laws:
 257                  *              (NOT (AND A B)) => (OR (NOT A) (NOT B))
 258                  *              (NOT (OR A B))  => (AND (NOT A) (NOT B))
 259                  * i.e., swap AND for OR and negate all the subclauses.
 260                  *--------------------
 261                  */
 262                 List       *t_list = NIL;
 263                 ListCell   *temp;
 264
 265                 foreach(temp, ((BoolExpr *) qual)->args)
 266                         t_list = lappend(t_list, push_nots(lfirst(temp)));
 267                 return make_orclause(pull_ors(t_list));
 268         }
 269         else if (or_clause((Node *) qual))
 270         {
 271                 List       *t_list = NIL;
 272                 ListCell   *temp;
 273
 274                 foreach(temp, ((BoolExpr *) qual)->args)
 275                         t_list = lappend(t_list, push_nots(lfirst(temp)));
 276                 return make_andclause(pull_ands(t_list));
 277         }
 278         else if (not_clause((Node *) qual))
 279         {
 280                 /*
 281                  * Another NOT cancels this NOT, so eliminate the NOT and stop
 282                  * negating this branch.  But search the subexpression for more NOTs
 283                  * to simplify.
 284                  */
 285                 return find_nots(get_notclausearg(qual));
 286         }
 287         else
 288         {
 289                 /*
 290                  * We don't know how to negate anything else, place a NOT at this
 291                  * level.  No point in recursing deeper, either.
 292                  */
 293                 return make_notclause(qual);
 294         }
 295 }
 296
 297
 298 /*--------------------
 299  * The following code attempts to apply the inverse OR distributive law:
 300  *              ((A AND B) OR (A AND C))  =>  (A AND (B OR C))
 301  * That is, locate OR clauses in which every subclause contains an
 302  * identical term, and pull out the duplicated terms.
 303  *
 304  * This may seem like a fairly useless activity, but it turns out to be
 305  * applicable to many machine-generated queries, and there are also queries
 306  * in some of the TPC benchmarks that need it.  This was in fact almost the
 307  * sole useful side-effect of the old prepqual code that tried to force
 308  * the query into canonical AND-of-ORs form: the canonical equivalent of
 309  *              ((A AND B) OR (A AND C))
 310  * is
 311  *              ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
 312  * which the code was able to simplify to
 313  *              (A AND (A OR C) AND (B OR A) AND (B OR C))
 314  * thus successfully extracting the common condition A --- but at the cost
 315  * of cluttering the qual with many redundant clauses.
 316  *--------------------
 317  */
 318
 319 /*
 320  * find_duplicate_ors
 321  *        Given a qualification tree with the NOTs pushed down, search for
 322  *        OR clauses to which the inverse OR distributive law might apply.
 323  *        Only the top-level AND/OR structure is searched.
 324  *
 325  * Returns the modified qualification.  AND/OR flatness is preserved.
 326  */
 327 static Expr *
 328 find_duplicate_ors(Expr *qual)
 329 {
 330         if (or_clause((Node *) qual))
 331         {
 332                 List       *orlist = NIL;
 333                 ListCell   *temp;
 334
 335                 /* Recurse */
 336                 foreach(temp, ((BoolExpr *) qual)->args)
 337                         orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
 338
 339                 /*
 340                  * Don't need pull_ors() since this routine will never introduce an OR
 341                  * where there wasn't one before.
 342                  */
 343                 return process_duplicate_ors(orlist);
 344         }
 345         else if (and_clause((Node *) qual))
 346         {
 347                 List       *andlist = NIL;
 348                 ListCell   *temp;
 349
 350                 /* Recurse */
 351                 foreach(temp, ((BoolExpr *) qual)->args)
 352                         andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
 353                 /* Flatten any ANDs introduced just below here */
 354                 andlist = pull_ands(andlist);
 355                 /* The AND list can't get shorter, so result is always an AND */
 356                 return make_andclause(andlist);
 357         }
 358         else
 359                 return qual;
 360 }
 361
 362 /*
 363  * process_duplicate_ors
 364  *        Given a list of exprs which are ORed together, try to apply
 365  *        the inverse OR distributive law.
 366  *
 367  * Returns the resulting expression (could be an AND clause, an OR
 368  * clause, or maybe even a single subexpression).
 369  */
 370 static Expr *
 371 process_duplicate_ors(List *orlist)
 372 {
 373         List       *reference = NIL;
 374         int                     num_subclauses = 0;
 375         List       *winners;
 376         List       *neworlist;
 377         ListCell   *temp;
 378
 379         if (orlist == NIL)
 380                 return NULL;                    /* probably can't happen */
 381         if (list_length(orlist) == 1)           /* single-expression OR (can this
 382                                                                                  * happen?) */
 383                 return linitial(orlist);
 384
 385         /*
 386          * Choose the shortest AND clause as the reference list --- obviously, any
 387          * subclause not in this clause isn't in all the clauses. If we find a
 388          * clause that's not an AND, we can treat it as a one-element AND clause,
 389          * which necessarily wins as shortest.
 390          */
 391         foreach(temp, orlist)
 392         {
 393                 Expr       *clause = (Expr *) lfirst(temp);
 394
 395                 if (and_clause((Node *) clause))
 396                 {
 397                         List       *subclauses = ((BoolExpr *) clause)->args;
 398                         int                     nclauses = list_length(subclauses);
 399
 400                         if (reference == NIL || nclauses < num_subclauses)
 401                         {
 402                                 reference = subclauses;
 403                                 num_subclauses = nclauses;
 404                         }
 405                 }
 406                 else
 407                 {
 408                         reference = list_make1(clause);
 409                         break;
 410                 }
 411         }
 412
 413         /*
 414          * Just in case, eliminate any duplicates in the reference list.
 415          */
 416         reference = list_union(NIL, reference);
 417
 418         /*
 419          * Check each element of the reference list to see if it's in all the OR
 420          * clauses.  Build a new list of winning clauses.
 421          */
 422         winners = NIL;
 423         foreach(temp, reference)
 424         {
 425                 Expr       *refclause = (Expr *) lfirst(temp);
 426                 bool            win = true;
 427                 ListCell   *temp2;
 428
 429                 foreach(temp2, orlist)
 430                 {
 431                         Expr       *clause = (Expr *) lfirst(temp2);
 432
 433                         if (and_clause((Node *) clause))
 434                         {
 435                                 if (!list_member(((BoolExpr *) clause)->args, refclause))
 436                                 {
 437                                         win = false;
 438                                         break;
 439                                 }
 440                         }
 441                         else
 442                         {
 443                                 if (!equal(refclause, clause))
 444                                 {
 445                                         win = false;
 446                                         break;
 447                                 }
 448                         }
 449                 }
 450
 451                 if (win)
 452                         winners = lappend(winners, refclause);
 453         }
 454
 455         /*
 456          * If no winners, we can't transform the OR
 457          */
 458         if (winners == NIL)
 459                 return make_orclause(orlist);
 460
 461         /*
 462          * Generate new OR list consisting of the remaining sub-clauses.
 463          *
 464          * If any clause degenerates to empty, then we have a situation like (A
 465          * AND B) OR (A), which can be reduced to just A --- that is, the
 466          * additional conditions in other arms of the OR are irrelevant.
 467          *
 468          * Note that because we use list_difference, any multiple occurrences of a
 469          * winning clause in an AND sub-clause will be removed automatically.
 470          */
 471         neworlist = NIL;
 472         foreach(temp, orlist)
 473         {
 474                 Expr       *clause = (Expr *) lfirst(temp);
 475
 476                 if (and_clause((Node *) clause))
 477                 {
 478                         List       *subclauses = ((BoolExpr *) clause)->args;
 479
 480                         subclauses = list_difference(subclauses, winners);
 481                         if (subclauses != NIL)
 482                         {
 483                                 if (list_length(subclauses) == 1)
 484                                         neworlist = lappend(neworlist, linitial(subclauses));
 485                                 else
 486                                         neworlist = lappend(neworlist, make_andclause(subclauses));
 487                         }
 488                         else
 489                         {
 490                                 neworlist = NIL;        /* degenerate case, see above */
 491                                 break;
 492                         }
 493                 }
 494                 else
 495                 {
 496                         if (!list_member(winners, clause))
 497                                 neworlist = lappend(neworlist, clause);
 498                         else
 499                         {
 500                                 neworlist = NIL;        /* degenerate case, see above */
 501                                 break;
 502                         }
 503                 }
 504         }
 505
 506         /*
 507          * Append reduced OR to the winners list, if it's not degenerate, handling
 508          * the special case of one element correctly (can that really happen?).
 509          * Also be careful to maintain AND/OR flatness in case we pulled up a
 510          * sub-sub-OR-clause.
 511          */
 512         if (neworlist != NIL)
 513         {
 514                 if (list_length(neworlist) == 1)
 515                         winners = lappend(winners, linitial(neworlist));
 516                 else
 517                         winners = lappend(winners, make_orclause(pull_ors(neworlist)));
 518         }
 519
 520         /*
 521          * And return the constructed AND clause, again being wary of a single
 522          * element and AND/OR flatness.
 523          */
 524         if (list_length(winners) == 1)
 525                 return (Expr *) linitial(winners);
 526         else
 527                 return make_andclause(pull_ands(winners));
 528 }