Fix oversight in previous error-reporting patch; mustn't pfree path string
[PostgreSQL.git] / src / backend / optimizer / prep / prepqual.c
blob3e069005bf38abfeb285ab8b9277574d4c42f810
1 /*-------------------------------------------------------------------------
3 * prepqual.c
4 * Routines for preprocessing qualification expressions
7 * The parser regards AND and OR as purely binary operators, so a qual like
8 * (A = 1) OR (A = 2) OR (A = 3) ...
9 * will produce a nested parsetree
10 * (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
11 * In reality, the optimizer and executor regard AND and OR as N-argument
12 * operators, so this tree can be flattened to
13 * (OR (A = 1) (A = 2) (A = 3) ...)
15 * Formerly, this module was responsible for doing the initial flattening,
16 * but now we leave it to eval_const_expressions to do that since it has to
17 * make a complete pass over the expression tree anyway. Instead, we just
18 * have to ensure that our manipulations preserve AND/OR flatness.
19 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
20 * tree after local transformations that might introduce nested AND/ORs.
23 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
27 * IDENTIFICATION
28 * $PostgreSQL$
30 *-------------------------------------------------------------------------
33 #include "postgres.h"
35 #include "optimizer/clauses.h"
36 #include "optimizer/prep.h"
37 #include "utils/lsyscache.h"
40 static List *pull_ands(List *andlist);
41 static List *pull_ors(List *orlist);
42 static Expr *find_nots(Expr *qual);
43 static Expr *push_nots(Expr *qual);
44 static Expr *find_duplicate_ors(Expr *qual);
45 static Expr *process_duplicate_ors(List *orlist);
49 * canonicalize_qual
50 * Convert a qualification expression to the most useful form.
52 * The name of this routine is a holdover from a time when it would try to
53 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
54 * Eventually, we recognized that that had more theoretical purity than
55 * actual usefulness, and so now the transformation doesn't involve any
56 * notion of reaching a canonical form.
58 * NOTE: we assume the input has already been through eval_const_expressions
59 * and therefore possesses AND/OR flatness. Formerly this function included
60 * its own flattening logic, but that requires a useless extra pass over the
61 * tree.
63 * Returns the modified qualification.
65 Expr *
66 canonicalize_qual(Expr *qual)
68 Expr *newqual;
70 /* Quick exit for empty qual */
71 if (qual == NULL)
72 return NULL;
75 * Push down NOTs. We do this only in the top-level boolean expression,
76 * without examining arguments of operators/functions. The main reason for
77 * doing this is to expose as much top-level AND/OR structure as we can,
78 * so there's no point in descending further.
80 newqual = find_nots(qual);
83 * Pull up redundant subclauses in OR-of-AND trees. Again, we do this
84 * only within the top-level AND/OR structure.
86 newqual = find_duplicate_ors(newqual);
88 return newqual;
93 * pull_ands
94 * Recursively flatten nested AND clauses into a single and-clause list.
96 * Input is the arglist of an AND clause.
97 * Returns the rebuilt arglist (note original list structure is not touched).
99 static List *
100 pull_ands(List *andlist)
102 List *out_list = NIL;
103 ListCell *arg;
105 foreach(arg, andlist)
107 Node *subexpr = (Node *) lfirst(arg);
110 * Note: we can destructively concat the subexpression's arglist
111 * because we know the recursive invocation of pull_ands will have
112 * built a new arglist not shared with any other expr. Otherwise we'd
113 * need a list_copy here.
115 if (and_clause(subexpr))
116 out_list = list_concat(out_list,
117 pull_ands(((BoolExpr *) subexpr)->args));
118 else
119 out_list = lappend(out_list, subexpr);
121 return out_list;
125 * pull_ors
126 * Recursively flatten nested OR clauses into a single or-clause list.
128 * Input is the arglist of an OR clause.
129 * Returns the rebuilt arglist (note original list structure is not touched).
131 static List *
132 pull_ors(List *orlist)
134 List *out_list = NIL;
135 ListCell *arg;
137 foreach(arg, orlist)
139 Node *subexpr = (Node *) lfirst(arg);
142 * Note: we can destructively concat the subexpression's arglist
143 * because we know the recursive invocation of pull_ors will have
144 * built a new arglist not shared with any other expr. Otherwise we'd
145 * need a list_copy here.
147 if (or_clause(subexpr))
148 out_list = list_concat(out_list,
149 pull_ors(((BoolExpr *) subexpr)->args));
150 else
151 out_list = lappend(out_list, subexpr);
153 return out_list;
158 * find_nots
159 * Traverse the qualification, looking for NOTs to take care of.
160 * For NOT clauses, apply push_nots() to try to push down the NOT.
161 * For AND and OR clause types, simply recurse. Otherwise stop
162 * recursing (we do not worry about structure below the top AND/OR tree).
164 * Returns the modified qualification. AND/OR flatness is preserved.
166 static Expr *
167 find_nots(Expr *qual)
169 if (and_clause((Node *) qual))
171 List *t_list = NIL;
172 ListCell *temp;
174 foreach(temp, ((BoolExpr *) qual)->args)
175 t_list = lappend(t_list, find_nots(lfirst(temp)));
176 return make_andclause(pull_ands(t_list));
178 else if (or_clause((Node *) qual))
180 List *t_list = NIL;
181 ListCell *temp;
183 foreach(temp, ((BoolExpr *) qual)->args)
184 t_list = lappend(t_list, find_nots(lfirst(temp)));
185 return make_orclause(pull_ors(t_list));
187 else if (not_clause((Node *) qual))
188 return push_nots(get_notclausearg(qual));
189 else
190 return qual;
194 * push_nots
195 * Push down a NOT as far as possible.
197 * Input is an expression to be negated (e.g., the argument of a NOT clause).
198 * Returns a new qual equivalent to the negation of the given qual.
200 static Expr *
201 push_nots(Expr *qual)
203 if (is_opclause(qual))
206 * Negate an operator clause if possible: (NOT (< A B)) => (>= A B)
207 * Otherwise, retain the clause as it is (the NOT can't be pushed down
208 * any farther).
210 OpExpr *opexpr = (OpExpr *) qual;
211 Oid negator = get_negator(opexpr->opno);
213 if (negator)
215 OpExpr *newopexpr = makeNode(OpExpr);
217 newopexpr->opno = negator;
218 newopexpr->opfuncid = InvalidOid;
219 newopexpr->opresulttype = opexpr->opresulttype;
220 newopexpr->opretset = opexpr->opretset;
221 newopexpr->args = opexpr->args;
222 newopexpr->location = opexpr->location;
223 return (Expr *) newopexpr;
225 else
226 return make_notclause(qual);
228 else if (qual && IsA(qual, ScalarArrayOpExpr))
231 * Negate a ScalarArrayOpExpr if there is a negator for its operator;
232 * for example x = ANY (list) becomes x <> ALL (list). Otherwise,
233 * retain the clause as it is (the NOT can't be pushed down any
234 * farther).
236 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) qual;
237 Oid negator = get_negator(saopexpr->opno);
239 if (negator)
241 ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
243 newopexpr->opno = negator;
244 newopexpr->opfuncid = InvalidOid;
245 newopexpr->useOr = !saopexpr->useOr;
246 newopexpr->args = saopexpr->args;
247 newopexpr->location = saopexpr->location;
248 return (Expr *) newopexpr;
250 else
251 return make_notclause(qual);
253 else if (and_clause((Node *) qual))
255 /*--------------------
256 * Apply DeMorgan's Laws:
257 * (NOT (AND A B)) => (OR (NOT A) (NOT B))
258 * (NOT (OR A B)) => (AND (NOT A) (NOT B))
259 * i.e., swap AND for OR and negate all the subclauses.
260 *--------------------
262 List *t_list = NIL;
263 ListCell *temp;
265 foreach(temp, ((BoolExpr *) qual)->args)
266 t_list = lappend(t_list, push_nots(lfirst(temp)));
267 return make_orclause(pull_ors(t_list));
269 else if (or_clause((Node *) qual))
271 List *t_list = NIL;
272 ListCell *temp;
274 foreach(temp, ((BoolExpr *) qual)->args)
275 t_list = lappend(t_list, push_nots(lfirst(temp)));
276 return make_andclause(pull_ands(t_list));
278 else if (not_clause((Node *) qual))
281 * Another NOT cancels this NOT, so eliminate the NOT and stop
282 * negating this branch. But search the subexpression for more NOTs
283 * to simplify.
285 return find_nots(get_notclausearg(qual));
287 else
290 * We don't know how to negate anything else, place a NOT at this
291 * level. No point in recursing deeper, either.
293 return make_notclause(qual);
298 /*--------------------
299 * The following code attempts to apply the inverse OR distributive law:
300 * ((A AND B) OR (A AND C)) => (A AND (B OR C))
301 * That is, locate OR clauses in which every subclause contains an
302 * identical term, and pull out the duplicated terms.
304 * This may seem like a fairly useless activity, but it turns out to be
305 * applicable to many machine-generated queries, and there are also queries
306 * in some of the TPC benchmarks that need it. This was in fact almost the
307 * sole useful side-effect of the old prepqual code that tried to force
308 * the query into canonical AND-of-ORs form: the canonical equivalent of
309 * ((A AND B) OR (A AND C))
310 * is
311 * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
312 * which the code was able to simplify to
313 * (A AND (A OR C) AND (B OR A) AND (B OR C))
314 * thus successfully extracting the common condition A --- but at the cost
315 * of cluttering the qual with many redundant clauses.
316 *--------------------
320 * find_duplicate_ors
321 * Given a qualification tree with the NOTs pushed down, search for
322 * OR clauses to which the inverse OR distributive law might apply.
323 * Only the top-level AND/OR structure is searched.
325 * Returns the modified qualification. AND/OR flatness is preserved.
327 static Expr *
328 find_duplicate_ors(Expr *qual)
330 if (or_clause((Node *) qual))
332 List *orlist = NIL;
333 ListCell *temp;
335 /* Recurse */
336 foreach(temp, ((BoolExpr *) qual)->args)
337 orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
340 * Don't need pull_ors() since this routine will never introduce an OR
341 * where there wasn't one before.
343 return process_duplicate_ors(orlist);
345 else if (and_clause((Node *) qual))
347 List *andlist = NIL;
348 ListCell *temp;
350 /* Recurse */
351 foreach(temp, ((BoolExpr *) qual)->args)
352 andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
353 /* Flatten any ANDs introduced just below here */
354 andlist = pull_ands(andlist);
355 /* The AND list can't get shorter, so result is always an AND */
356 return make_andclause(andlist);
358 else
359 return qual;
363 * process_duplicate_ors
364 * Given a list of exprs which are ORed together, try to apply
365 * the inverse OR distributive law.
367 * Returns the resulting expression (could be an AND clause, an OR
368 * clause, or maybe even a single subexpression).
370 static Expr *
371 process_duplicate_ors(List *orlist)
373 List *reference = NIL;
374 int num_subclauses = 0;
375 List *winners;
376 List *neworlist;
377 ListCell *temp;
379 if (orlist == NIL)
380 return NULL; /* probably can't happen */
381 if (list_length(orlist) == 1) /* single-expression OR (can this
382 * happen?) */
383 return linitial(orlist);
386 * Choose the shortest AND clause as the reference list --- obviously, any
387 * subclause not in this clause isn't in all the clauses. If we find a
388 * clause that's not an AND, we can treat it as a one-element AND clause,
389 * which necessarily wins as shortest.
391 foreach(temp, orlist)
393 Expr *clause = (Expr *) lfirst(temp);
395 if (and_clause((Node *) clause))
397 List *subclauses = ((BoolExpr *) clause)->args;
398 int nclauses = list_length(subclauses);
400 if (reference == NIL || nclauses < num_subclauses)
402 reference = subclauses;
403 num_subclauses = nclauses;
406 else
408 reference = list_make1(clause);
409 break;
414 * Just in case, eliminate any duplicates in the reference list.
416 reference = list_union(NIL, reference);
419 * Check each element of the reference list to see if it's in all the OR
420 * clauses. Build a new list of winning clauses.
422 winners = NIL;
423 foreach(temp, reference)
425 Expr *refclause = (Expr *) lfirst(temp);
426 bool win = true;
427 ListCell *temp2;
429 foreach(temp2, orlist)
431 Expr *clause = (Expr *) lfirst(temp2);
433 if (and_clause((Node *) clause))
435 if (!list_member(((BoolExpr *) clause)->args, refclause))
437 win = false;
438 break;
441 else
443 if (!equal(refclause, clause))
445 win = false;
446 break;
451 if (win)
452 winners = lappend(winners, refclause);
456 * If no winners, we can't transform the OR
458 if (winners == NIL)
459 return make_orclause(orlist);
462 * Generate new OR list consisting of the remaining sub-clauses.
464 * If any clause degenerates to empty, then we have a situation like (A
465 * AND B) OR (A), which can be reduced to just A --- that is, the
466 * additional conditions in other arms of the OR are irrelevant.
468 * Note that because we use list_difference, any multiple occurrences of a
469 * winning clause in an AND sub-clause will be removed automatically.
471 neworlist = NIL;
472 foreach(temp, orlist)
474 Expr *clause = (Expr *) lfirst(temp);
476 if (and_clause((Node *) clause))
478 List *subclauses = ((BoolExpr *) clause)->args;
480 subclauses = list_difference(subclauses, winners);
481 if (subclauses != NIL)
483 if (list_length(subclauses) == 1)
484 neworlist = lappend(neworlist, linitial(subclauses));
485 else
486 neworlist = lappend(neworlist, make_andclause(subclauses));
488 else
490 neworlist = NIL; /* degenerate case, see above */
491 break;
494 else
496 if (!list_member(winners, clause))
497 neworlist = lappend(neworlist, clause);
498 else
500 neworlist = NIL; /* degenerate case, see above */
501 break;
507 * Append reduced OR to the winners list, if it's not degenerate, handling
508 * the special case of one element correctly (can that really happen?).
509 * Also be careful to maintain AND/OR flatness in case we pulled up a
510 * sub-sub-OR-clause.
512 if (neworlist != NIL)
514 if (list_length(neworlist) == 1)
515 winners = lappend(winners, linitial(neworlist));
516 else
517 winners = lappend(winners, make_orclause(pull_ors(neworlist)));
521 * And return the constructed AND clause, again being wary of a single
522 * element and AND/OR flatness.
524 if (list_length(winners) == 1)
525 return (Expr *) linitial(winners);
526 else
527 return make_andclause(pull_ands(winners));