1 /*-------------------------------------------------------------------------
4 * Routines for preprocessing qualification expressions
7 * The parser regards AND and OR as purely binary operators, so a qual like
8 * (A = 1) OR (A = 2) OR (A = 3) ...
9 * will produce a nested parsetree
10 * (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
11 * In reality, the optimizer and executor regard AND and OR as N-argument
12 * operators, so this tree can be flattened to
13 * (OR (A = 1) (A = 2) (A = 3) ...)
15 * Formerly, this module was responsible for doing the initial flattening,
16 * but now we leave it to eval_const_expressions to do that since it has to
17 * make a complete pass over the expression tree anyway. Instead, we just
18 * have to ensure that our manipulations preserve AND/OR flatness.
19 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
20 * tree after local transformations that might introduce nested AND/ORs.
23 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
30 *-------------------------------------------------------------------------
35 #include "optimizer/clauses.h"
36 #include "optimizer/prep.h"
37 #include "utils/lsyscache.h"
40 static List
*pull_ands(List
*andlist
);
41 static List
*pull_ors(List
*orlist
);
42 static Expr
*find_nots(Expr
*qual
);
43 static Expr
*push_nots(Expr
*qual
);
44 static Expr
*find_duplicate_ors(Expr
*qual
);
45 static Expr
*process_duplicate_ors(List
*orlist
);
50 * Convert a qualification expression to the most useful form.
52 * The name of this routine is a holdover from a time when it would try to
53 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
54 * Eventually, we recognized that that had more theoretical purity than
55 * actual usefulness, and so now the transformation doesn't involve any
56 * notion of reaching a canonical form.
58 * NOTE: we assume the input has already been through eval_const_expressions
59 * and therefore possesses AND/OR flatness. Formerly this function included
60 * its own flattening logic, but that requires a useless extra pass over the
63 * Returns the modified qualification.
66 canonicalize_qual(Expr
*qual
)
70 /* Quick exit for empty qual */
75 * Push down NOTs. We do this only in the top-level boolean expression,
76 * without examining arguments of operators/functions. The main reason for
77 * doing this is to expose as much top-level AND/OR structure as we can,
78 * so there's no point in descending further.
80 newqual
= find_nots(qual
);
83 * Pull up redundant subclauses in OR-of-AND trees. Again, we do this
84 * only within the top-level AND/OR structure.
86 newqual
= find_duplicate_ors(newqual
);
94 * Recursively flatten nested AND clauses into a single and-clause list.
96 * Input is the arglist of an AND clause.
97 * Returns the rebuilt arglist (note original list structure is not touched).
100 pull_ands(List
*andlist
)
102 List
*out_list
= NIL
;
105 foreach(arg
, andlist
)
107 Node
*subexpr
= (Node
*) lfirst(arg
);
110 * Note: we can destructively concat the subexpression's arglist
111 * because we know the recursive invocation of pull_ands will have
112 * built a new arglist not shared with any other expr. Otherwise we'd
113 * need a list_copy here.
115 if (and_clause(subexpr
))
116 out_list
= list_concat(out_list
,
117 pull_ands(((BoolExpr
*) subexpr
)->args
));
119 out_list
= lappend(out_list
, subexpr
);
126 * Recursively flatten nested OR clauses into a single or-clause list.
128 * Input is the arglist of an OR clause.
129 * Returns the rebuilt arglist (note original list structure is not touched).
132 pull_ors(List
*orlist
)
134 List
*out_list
= NIL
;
139 Node
*subexpr
= (Node
*) lfirst(arg
);
142 * Note: we can destructively concat the subexpression's arglist
143 * because we know the recursive invocation of pull_ors will have
144 * built a new arglist not shared with any other expr. Otherwise we'd
145 * need a list_copy here.
147 if (or_clause(subexpr
))
148 out_list
= list_concat(out_list
,
149 pull_ors(((BoolExpr
*) subexpr
)->args
));
151 out_list
= lappend(out_list
, subexpr
);
159 * Traverse the qualification, looking for NOTs to take care of.
160 * For NOT clauses, apply push_nots() to try to push down the NOT.
161 * For AND and OR clause types, simply recurse. Otherwise stop
162 * recursing (we do not worry about structure below the top AND/OR tree).
164 * Returns the modified qualification. AND/OR flatness is preserved.
167 find_nots(Expr
*qual
)
169 if (and_clause((Node
*) qual
))
174 foreach(temp
, ((BoolExpr
*) qual
)->args
)
175 t_list
= lappend(t_list
, find_nots(lfirst(temp
)));
176 return make_andclause(pull_ands(t_list
));
178 else if (or_clause((Node
*) qual
))
183 foreach(temp
, ((BoolExpr
*) qual
)->args
)
184 t_list
= lappend(t_list
, find_nots(lfirst(temp
)));
185 return make_orclause(pull_ors(t_list
));
187 else if (not_clause((Node
*) qual
))
188 return push_nots(get_notclausearg(qual
));
195 * Push down a NOT as far as possible.
197 * Input is an expression to be negated (e.g., the argument of a NOT clause).
198 * Returns a new qual equivalent to the negation of the given qual.
201 push_nots(Expr
*qual
)
203 if (is_opclause(qual
))
206 * Negate an operator clause if possible: (NOT (< A B)) => (>= A B)
207 * Otherwise, retain the clause as it is (the NOT can't be pushed down
210 OpExpr
*opexpr
= (OpExpr
*) qual
;
211 Oid negator
= get_negator(opexpr
->opno
);
215 OpExpr
*newopexpr
= makeNode(OpExpr
);
217 newopexpr
->opno
= negator
;
218 newopexpr
->opfuncid
= InvalidOid
;
219 newopexpr
->opresulttype
= opexpr
->opresulttype
;
220 newopexpr
->opretset
= opexpr
->opretset
;
221 newopexpr
->args
= opexpr
->args
;
222 newopexpr
->location
= opexpr
->location
;
223 return (Expr
*) newopexpr
;
226 return make_notclause(qual
);
228 else if (qual
&& IsA(qual
, ScalarArrayOpExpr
))
231 * Negate a ScalarArrayOpExpr if there is a negator for its operator;
232 * for example x = ANY (list) becomes x <> ALL (list). Otherwise,
233 * retain the clause as it is (the NOT can't be pushed down any
236 ScalarArrayOpExpr
*saopexpr
= (ScalarArrayOpExpr
*) qual
;
237 Oid negator
= get_negator(saopexpr
->opno
);
241 ScalarArrayOpExpr
*newopexpr
= makeNode(ScalarArrayOpExpr
);
243 newopexpr
->opno
= negator
;
244 newopexpr
->opfuncid
= InvalidOid
;
245 newopexpr
->useOr
= !saopexpr
->useOr
;
246 newopexpr
->args
= saopexpr
->args
;
247 newopexpr
->location
= saopexpr
->location
;
248 return (Expr
*) newopexpr
;
251 return make_notclause(qual
);
253 else if (and_clause((Node
*) qual
))
255 /*--------------------
256 * Apply DeMorgan's Laws:
257 * (NOT (AND A B)) => (OR (NOT A) (NOT B))
258 * (NOT (OR A B)) => (AND (NOT A) (NOT B))
259 * i.e., swap AND for OR and negate all the subclauses.
260 *--------------------
265 foreach(temp
, ((BoolExpr
*) qual
)->args
)
266 t_list
= lappend(t_list
, push_nots(lfirst(temp
)));
267 return make_orclause(pull_ors(t_list
));
269 else if (or_clause((Node
*) qual
))
274 foreach(temp
, ((BoolExpr
*) qual
)->args
)
275 t_list
= lappend(t_list
, push_nots(lfirst(temp
)));
276 return make_andclause(pull_ands(t_list
));
278 else if (not_clause((Node
*) qual
))
281 * Another NOT cancels this NOT, so eliminate the NOT and stop
282 * negating this branch. But search the subexpression for more NOTs
285 return find_nots(get_notclausearg(qual
));
290 * We don't know how to negate anything else, place a NOT at this
291 * level. No point in recursing deeper, either.
293 return make_notclause(qual
);
298 /*--------------------
299 * The following code attempts to apply the inverse OR distributive law:
300 * ((A AND B) OR (A AND C)) => (A AND (B OR C))
301 * That is, locate OR clauses in which every subclause contains an
302 * identical term, and pull out the duplicated terms.
304 * This may seem like a fairly useless activity, but it turns out to be
305 * applicable to many machine-generated queries, and there are also queries
306 * in some of the TPC benchmarks that need it. This was in fact almost the
307 * sole useful side-effect of the old prepqual code that tried to force
308 * the query into canonical AND-of-ORs form: the canonical equivalent of
309 * ((A AND B) OR (A AND C))
311 * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
312 * which the code was able to simplify to
313 * (A AND (A OR C) AND (B OR A) AND (B OR C))
314 * thus successfully extracting the common condition A --- but at the cost
315 * of cluttering the qual with many redundant clauses.
316 *--------------------
321 * Given a qualification tree with the NOTs pushed down, search for
322 * OR clauses to which the inverse OR distributive law might apply.
323 * Only the top-level AND/OR structure is searched.
325 * Returns the modified qualification. AND/OR flatness is preserved.
328 find_duplicate_ors(Expr
*qual
)
330 if (or_clause((Node
*) qual
))
336 foreach(temp
, ((BoolExpr
*) qual
)->args
)
337 orlist
= lappend(orlist
, find_duplicate_ors(lfirst(temp
)));
340 * Don't need pull_ors() since this routine will never introduce an OR
341 * where there wasn't one before.
343 return process_duplicate_ors(orlist
);
345 else if (and_clause((Node
*) qual
))
351 foreach(temp
, ((BoolExpr
*) qual
)->args
)
352 andlist
= lappend(andlist
, find_duplicate_ors(lfirst(temp
)));
353 /* Flatten any ANDs introduced just below here */
354 andlist
= pull_ands(andlist
);
355 /* The AND list can't get shorter, so result is always an AND */
356 return make_andclause(andlist
);
363 * process_duplicate_ors
364 * Given a list of exprs which are ORed together, try to apply
365 * the inverse OR distributive law.
367 * Returns the resulting expression (could be an AND clause, an OR
368 * clause, or maybe even a single subexpression).
371 process_duplicate_ors(List
*orlist
)
373 List
*reference
= NIL
;
374 int num_subclauses
= 0;
380 return NULL
; /* probably can't happen */
381 if (list_length(orlist
) == 1) /* single-expression OR (can this
383 return linitial(orlist
);
386 * Choose the shortest AND clause as the reference list --- obviously, any
387 * subclause not in this clause isn't in all the clauses. If we find a
388 * clause that's not an AND, we can treat it as a one-element AND clause,
389 * which necessarily wins as shortest.
391 foreach(temp
, orlist
)
393 Expr
*clause
= (Expr
*) lfirst(temp
);
395 if (and_clause((Node
*) clause
))
397 List
*subclauses
= ((BoolExpr
*) clause
)->args
;
398 int nclauses
= list_length(subclauses
);
400 if (reference
== NIL
|| nclauses
< num_subclauses
)
402 reference
= subclauses
;
403 num_subclauses
= nclauses
;
408 reference
= list_make1(clause
);
414 * Just in case, eliminate any duplicates in the reference list.
416 reference
= list_union(NIL
, reference
);
419 * Check each element of the reference list to see if it's in all the OR
420 * clauses. Build a new list of winning clauses.
423 foreach(temp
, reference
)
425 Expr
*refclause
= (Expr
*) lfirst(temp
);
429 foreach(temp2
, orlist
)
431 Expr
*clause
= (Expr
*) lfirst(temp2
);
433 if (and_clause((Node
*) clause
))
435 if (!list_member(((BoolExpr
*) clause
)->args
, refclause
))
443 if (!equal(refclause
, clause
))
452 winners
= lappend(winners
, refclause
);
456 * If no winners, we can't transform the OR
459 return make_orclause(orlist
);
462 * Generate new OR list consisting of the remaining sub-clauses.
464 * If any clause degenerates to empty, then we have a situation like (A
465 * AND B) OR (A), which can be reduced to just A --- that is, the
466 * additional conditions in other arms of the OR are irrelevant.
468 * Note that because we use list_difference, any multiple occurrences of a
469 * winning clause in an AND sub-clause will be removed automatically.
472 foreach(temp
, orlist
)
474 Expr
*clause
= (Expr
*) lfirst(temp
);
476 if (and_clause((Node
*) clause
))
478 List
*subclauses
= ((BoolExpr
*) clause
)->args
;
480 subclauses
= list_difference(subclauses
, winners
);
481 if (subclauses
!= NIL
)
483 if (list_length(subclauses
) == 1)
484 neworlist
= lappend(neworlist
, linitial(subclauses
));
486 neworlist
= lappend(neworlist
, make_andclause(subclauses
));
490 neworlist
= NIL
; /* degenerate case, see above */
496 if (!list_member(winners
, clause
))
497 neworlist
= lappend(neworlist
, clause
);
500 neworlist
= NIL
; /* degenerate case, see above */
507 * Append reduced OR to the winners list, if it's not degenerate, handling
508 * the special case of one element correctly (can that really happen?).
509 * Also be careful to maintain AND/OR flatness in case we pulled up a
512 if (neworlist
!= NIL
)
514 if (list_length(neworlist
) == 1)
515 winners
= lappend(winners
, linitial(neworlist
));
517 winners
= lappend(winners
, make_orclause(pull_ors(neworlist
)));
521 * And return the constructed AND clause, again being wary of a single
522 * element and AND/OR flatness.
524 if (list_length(winners
) == 1)
525 return (Expr
*) linitial(winners
);
527 return make_andclause(pull_ands(winners
));