Code review for patch to show definition of index columns in \d on index.
[PostgreSQL.git] / src / backend / optimizer / path / orindxpath.c
blob03b571e3206af21caf356f3655ecc836e0ac140a
1 /*-------------------------------------------------------------------------
3 * orindxpath.c
4 * Routines to find index paths that match a set of OR clauses
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
16 #include "postgres.h"
18 #include "optimizer/cost.h"
19 #include "optimizer/paths.h"
20 #include "optimizer/restrictinfo.h"
23 /*----------
24 * create_or_index_quals
25 * Examine join OR-of-AND quals to see if any useful restriction OR
26 * clauses can be extracted. If so, add them to the query.
28 * Although a join clause must reference other relations overall,
29 * an OR of ANDs clause might contain sub-clauses that reference just this
30 * relation and can be used to build a restriction clause.
31 * For example consider
32 * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
33 * We can transform this into
34 * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
35 * AND (a.x = 42 OR a.x = 44)
36 * AND (b.y = 43 OR b.z = 45);
37 * which opens the potential to build OR indexscans on a and b. In essence
38 * this is a partial transformation to CNF (AND of ORs format). It is not
39 * complete, however, because we do not unravel the original OR --- doing so
40 * would usually bloat the qualification expression to little gain.
42 * The added quals are partially redundant with the original OR, and therefore
43 * will cause the size of the joinrel to be underestimated when it is finally
44 * formed. (This would be true of a full transformation to CNF as well; the
45 * fault is not really in the transformation, but in clauselist_selectivity's
46 * inability to recognize redundant conditions.) To minimize the collateral
47 * damage, we want to minimize the number of quals added. Therefore we do
48 * not add every possible extracted restriction condition to the query.
49 * Instead, we search for the single restriction condition that generates
50 * the most useful (cheapest) OR indexscan, and add only that condition.
51 * This is a pretty ad-hoc heuristic, but quite useful.
53 * We can then compensate for the redundancy of the added qual by poking
54 * the recorded selectivity of the original OR clause, thereby ensuring
55 * the added qual doesn't change the estimated size of the joinrel when
56 * it is finally formed. This is a MAJOR HACK: it depends on the fact
57 * that clause selectivities are cached and on the fact that the same
58 * RestrictInfo node will appear in every joininfo list that might be used
59 * when the joinrel is formed. And it probably isn't right in cases where
60 * the size estimation is nonlinear (i.e., outer and IN joins). But it
61 * beats not doing anything.
63 * NOTE: one might think this messiness could be worked around by generating
64 * the indexscan path with a small path->rows value, and not touching the
65 * rel's baserestrictinfo or rel->rows. However, that does not work.
66 * The optimizer's fundamental design assumes that every general-purpose
67 * Path for a given relation generates the same number of rows. Without
68 * this assumption we'd not be able to optimize solely on the cost of Paths,
69 * but would have to take number of output rows into account as well.
70 * (Perhaps someday that'd be worth doing, but it's a pretty big change...)
72 * 'rel' is the relation entry for which quals are to be created
74 * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
75 * If no quals available, returns FALSE and doesn't change rel.
77 * Note: check_partial_indexes() must have been run previously.
78 *----------
80 bool
81 create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
83 BitmapOrPath *bestpath = NULL;
84 RestrictInfo *bestrinfo = NULL;
85 List *newrinfos;
86 RestrictInfo *or_rinfo;
87 Selectivity or_selec,
88 orig_selec;
89 ListCell *i;
92 * Find potentially interesting OR joinclauses.
94 * We must ignore clauses for which the target rel is in nullable_relids;
95 * that means there's an outer join below the clause and so it can't be
96 * enforced at the relation scan level.
98 * We must also ignore clauses that are marked !is_pushed_down (ie they
99 * are themselves outer-join clauses). It would be safe to extract an
100 * index condition from such a clause if we are within the nullable rather
101 * than the non-nullable side of its join, but we haven't got enough
102 * context here to tell which applies. OR clauses in outer-join quals
103 * aren't exactly common, so we'll let that case go unoptimized for now.
105 foreach(i, rel->joininfo)
107 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
109 if (restriction_is_or_clause(rinfo) &&
110 rinfo->is_pushed_down &&
111 !bms_is_member(rel->relid, rinfo->nullable_relids))
114 * Use the generate_bitmap_or_paths() machinery to estimate the
115 * value of each OR clause. We can use regular restriction
116 * clauses along with the OR clause contents to generate
117 * indexquals. We pass outer_rel = NULL so that sub-clauses that
118 * are actually joins will be ignored.
120 List *orpaths;
121 ListCell *k;
123 orpaths = generate_bitmap_or_paths(root, rel,
124 list_make1(rinfo),
125 rel->baserestrictinfo,
126 NULL);
128 /* Locate the cheapest OR path */
129 foreach(k, orpaths)
131 BitmapOrPath *path = (BitmapOrPath *) lfirst(k);
133 Assert(IsA(path, BitmapOrPath));
134 if (bestpath == NULL ||
135 path->path.total_cost < bestpath->path.total_cost)
137 bestpath = path;
138 bestrinfo = rinfo;
144 /* Fail if no suitable clauses found */
145 if (bestpath == NULL)
146 return false;
149 * Convert the path's indexclauses structure to a RestrictInfo tree. We
150 * include any partial-index predicates so as to get a reasonable
151 * representation of what the path is actually scanning.
153 newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath,
154 true, true);
156 /* It's possible we get back something other than a single OR clause */
157 if (list_length(newrinfos) != 1)
158 return false;
159 or_rinfo = (RestrictInfo *) linitial(newrinfos);
160 Assert(IsA(or_rinfo, RestrictInfo));
161 if (!restriction_is_or_clause(or_rinfo))
162 return false;
165 * OK, add it to the rel's restriction list.
167 rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);
170 * Adjust the original OR clause's cached selectivity to compensate for
171 * the selectivity of the added (but redundant) lower-level qual. This
172 * should result in the join rel getting approximately the same rows
173 * estimate as it would have gotten without all these shenanigans. (XXX
174 * major hack alert ... this depends on the assumption that the
175 * selectivity will stay cached ...)
177 or_selec = clause_selectivity(root, (Node *) or_rinfo,
178 0, JOIN_INNER, NULL);
179 if (or_selec > 0 && or_selec < 1)
181 orig_selec = clause_selectivity(root, (Node *) bestrinfo,
182 0, JOIN_INNER, NULL);
183 bestrinfo->norm_selec = orig_selec / or_selec;
184 /* clamp result to sane range */
185 if (bestrinfo->norm_selec > 1)
186 bestrinfo->norm_selec = 1;
187 /* It isn't an outer join clause, so no need to adjust outer_selec */
190 /* Tell caller to recompute partial index status and rowcount estimate */
191 return true;