1 -- Generic extended statistics support
4 -- Note: tables for which we check estimated row counts should be created
5 -- with autovacuum_enabled = off, so that we don't have unstable results
6 -- from auto-analyze happening when we didn't expect it.
9 -- check the number of estimated/actual rows in the top node
10 create function check_estimated_rows(text) returns table (estimated int, actual int)
16 first_row bool := true;
19 execute format('explain analyze %s', $1)
23 tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
24 return query select tmp[1]::int, tmp[2]::int;
31 CREATE TABLE ext_stats_test (x text, y int, z int);
32 CREATE STATISTICS tst;
33 CREATE STATISTICS tst ON a, b;
34 CREATE STATISTICS tst FROM sometab;
35 CREATE STATISTICS tst ON a, b FROM nonexistent;
36 CREATE STATISTICS tst ON a, b FROM ext_stats_test;
37 CREATE STATISTICS tst ON x, x, y FROM ext_stats_test;
38 CREATE STATISTICS tst ON x, x, y, x, x, y, x, x, y FROM ext_stats_test;
39 CREATE STATISTICS tst ON x, x, y, x, x, (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;
40 CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;
41 CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test;
42 CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
43 -- incorrect expressions
44 CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
45 CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
46 CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
47 DROP TABLE ext_stats_test;
49 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
50 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
51 CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
52 COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment';
53 CREATE ROLE regress_stats_ext;
54 SET SESSION AUTHORIZATION regress_stats_ext;
55 COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment';
56 DROP STATISTICS ab1_a_b_stats;
57 ALTER STATISTICS ab1_a_b_stats RENAME TO ab1_a_b_stats_new;
58 RESET SESSION AUTHORIZATION;
59 DROP ROLE regress_stats_ext;
61 CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
62 DROP STATISTICS ab1_a_b_stats;
64 CREATE SCHEMA regress_schema_2;
65 CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1;
67 -- Let's also verify the pg_get_statisticsobjdef output looks sane.
68 SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats';
70 DROP STATISTICS regress_schema_2.ab1_a_b_stats;
72 -- Ensure statistics are dropped when columns are
73 CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1;
74 CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1;
75 CREATE STATISTICS ab1_b_a_stats ON b, a FROM ab1;
76 ALTER TABLE ab1 DROP COLUMN a;
78 -- Ensure statistics are dropped when table is
79 SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
81 SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
83 -- Ensure things work sanely with SET STATISTICS 0
84 CREATE TABLE ab1 (a INTEGER, b INTEGER);
85 ALTER TABLE ab1 ALTER a SET STATISTICS 0;
86 INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
87 CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
89 ALTER TABLE ab1 ALTER a SET STATISTICS -1;
90 -- setting statistics target 0 skips the statistics, without printing any message, so check catalog
91 ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0;
94 SELECT stxname, stxdndistinct, stxddependencies, stxdmcv
95 FROM pg_statistic_ext s, pg_statistic_ext_data d
96 WHERE s.stxname = 'ab1_a_b_stats'
98 ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1;
100 -- partial analyze doesn't build stats either
104 ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0;
105 ALTER STATISTICS IF EXISTS ab1_a_b_stats SET STATISTICS 0;
107 -- Ensure we can build statistics for tables with inheritance.
108 CREATE TABLE ab1 (a INTEGER, b INTEGER);
109 CREATE TABLE ab1c () INHERITS (ab1);
110 INSERT INTO ab1 VALUES (1,1);
111 CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
113 DROP TABLE ab1 CASCADE;
115 -- Tests for stats with inheritance
116 CREATE TABLE stxdinh(a int, b int);
117 CREATE TABLE stxdinh1() INHERITS(stxdinh);
118 CREATE TABLE stxdinh2() INHERITS(stxdinh);
119 INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
120 INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
121 INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
122 VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
123 -- Ensure non-inherited stats are not applied to inherited query
124 -- Without stats object, it looks like this
125 SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
126 SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
127 CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
128 VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
129 -- Since the stats object does not include inherited stats, it should not
130 -- affect the estimates
131 SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
132 -- Dependencies are applied at individual relations (within append), so
133 -- this estimate changes a bit because we improve estimates for the parent
134 SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
135 DROP TABLE stxdinh, stxdinh1, stxdinh2;
137 -- basic test for statistics on expressions
138 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
140 -- expression stats may be built on a single expression column
141 CREATE STATISTICS ab1_exprstat_1 ON (a+b) FROM ab1;
143 -- with a single expression, we only enable expression statistics
144 CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1;
145 SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_2';
147 -- adding anything to the expression builds all statistics kinds
148 CREATE STATISTICS ab1_exprstat_3 ON (a+b), a FROM ab1;
149 SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3';
151 -- date_trunc on timestamptz is not immutable, but that should not matter
152 CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
154 -- date_trunc on timestamp is immutable
155 CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
157 -- insert some data and run analyze, to test that these cases build properly
160 generate_series(1,10),
161 generate_series(1,10),
162 generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
163 generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
167 -- Verify supported object types for extended statistics
168 CREATE schema tststats;
170 CREATE TABLE tststats.t (a int, b int, c text);
171 CREATE INDEX ti ON tststats.t (a, b);
172 CREATE SEQUENCE tststats.s;
173 CREATE VIEW tststats.v AS SELECT * FROM tststats.t;
174 CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t;
175 CREATE TYPE tststats.ty AS (a int, b int, c text);
176 CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw;
177 CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw;
178 CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv;
179 CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b);
180 CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10);
182 CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t;
183 CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti;
184 CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s;
185 CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v;
186 CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv;
187 CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty;
188 CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f;
189 CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt;
190 CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1;
193 relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass;
195 EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname;
196 EXCEPTION WHEN wrong_object_type THEN
197 RAISE NOTICE 'stats on toast table not created';
201 DROP SCHEMA tststats CASCADE;
202 DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE;
205 CREATE TABLE ndistinct (
214 WITH (autovacuum_enabled = off);
216 -- over-estimates when using only per-column statistics
217 INSERT INTO ndistinct (a, b, c, filler1)
218 SELECT i/100, i/100, i/100, cash_words((i/100)::money)
219 FROM generate_series(1,1000) s(i);
223 -- Group Aggregate, due to over-estimate of the number of groups
224 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
226 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
228 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
230 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
232 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
234 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
236 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
238 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
240 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
243 CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
247 SELECT s.stxkind, d.stxdndistinct
248 FROM pg_statistic_ext s, pg_statistic_ext_data d
249 WHERE s.stxrelid = 'ndistinct'::regclass
250 AND d.stxoid = s.oid;
252 -- minor improvement, make sure the ctid does not break the matching
253 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY ctid, a, b');
255 -- Hash Aggregate, thanks to estimates improved by the statistic
256 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
258 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
260 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
262 -- partial improvement (match on attributes)
263 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
265 -- expressions - no improvement
266 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
268 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
270 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
272 -- last two plans keep using Group Aggregate, because 'd' is not covered
273 -- by the statistic and while it's NULL-only we assume 200 values for it
274 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
276 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
278 TRUNCATE TABLE ndistinct;
280 -- under-estimates when using only per-column statistics
281 INSERT INTO ndistinct (a, b, c, filler1)
282 SELECT mod(i,13), mod(i,17), mod(i,19),
283 cash_words(mod(i,23)::int::money)
284 FROM generate_series(1,1000) s(i);
288 SELECT s.stxkind, d.stxdndistinct
289 FROM pg_statistic_ext s, pg_statistic_ext_data d
290 WHERE s.stxrelid = 'ndistinct'::regclass
291 AND d.stxoid = s.oid;
294 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
296 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
298 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
300 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
302 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
304 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
306 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
308 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
310 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
314 SELECT s.stxkind, d.stxdndistinct
315 FROM pg_statistic_ext s, pg_statistic_ext_data d
316 WHERE s.stxrelid = 'ndistinct'::regclass
317 AND d.stxoid = s.oid;
319 -- dropping the statistics results in under-estimates
320 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
322 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
324 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
326 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
328 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
330 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
332 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
334 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
336 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
338 -- ndistinct estimates with statistics on expressions
339 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
341 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
343 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
345 CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
349 SELECT s.stxkind, d.stxdndistinct
350 FROM pg_statistic_ext s, pg_statistic_ext_data d
351 WHERE s.stxrelid = 'ndistinct'::regclass
352 AND d.stxoid = s.oid;
354 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
356 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
358 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
362 -- a mix of attributes and expressions
363 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
365 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)');
367 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)');
369 CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
373 SELECT s.stxkind, d.stxdndistinct
374 FROM pg_statistic_ext s, pg_statistic_ext_data d
375 WHERE s.stxrelid = 'ndistinct'::regclass
376 AND d.stxoid = s.oid;
378 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
380 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)');
382 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)');
386 -- combination of multiple ndistinct statistics, with/without expressions
389 -- two mostly independent groups of columns
390 INSERT INTO ndistinct (a, b, c, d)
391 SELECT mod(i,3), mod(i,9), mod(i,5), mod(i,20)
392 FROM generate_series(1,1000) s(i);
396 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
398 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
400 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
402 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
404 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
406 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
408 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
410 -- basic statistics on both attributes (no expressions)
411 CREATE STATISTICS s11 (ndistinct) ON a, b FROM ndistinct;
413 CREATE STATISTICS s12 (ndistinct) ON c, d FROM ndistinct;
417 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
419 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
421 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
423 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
425 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
427 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
429 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
432 -- replace the second statistics by statistics on expressions
436 CREATE STATISTICS s12 (ndistinct) ON (c * 10), (d - 1) FROM ndistinct;
440 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
442 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
444 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
446 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
448 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
450 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
452 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
455 -- replace the second statistics by statistics on both attributes and expressions
459 CREATE STATISTICS s12 (ndistinct) ON c, d, (c * 10), (d - 1) FROM ndistinct;
463 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
465 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
467 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
469 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
471 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
473 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
475 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
478 -- replace the other statistics by statistics on both attributes and expressions
482 CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct;
486 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
488 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
490 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
492 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
494 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
496 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
498 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
501 -- replace statistics by somewhat overlapping ones (this expected to get worse estimate
502 -- because the first statistics shall be applied to 3 columns, and the second one can't
503 -- be really applied)
508 CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct;
509 CREATE STATISTICS s12 (ndistinct) ON a, (b+1), (c * 10) FROM ndistinct;
513 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
515 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
517 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
519 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
521 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
523 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
525 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
530 -- functional dependencies tests
531 CREATE TABLE functional_dependencies (
540 WITH (autovacuum_enabled = off);
542 CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
543 CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
545 -- random data (no functional dependencies)
546 INSERT INTO functional_dependencies (a, b, c, filler1)
547 SELECT mod(i, 5), mod(i, 7), mod(i, 11), i FROM generate_series(1,1000) s(i);
549 ANALYZE functional_dependencies;
551 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
553 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
556 CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
558 ANALYZE functional_dependencies;
560 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
562 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
564 -- a => b, a => c, b => c
565 TRUNCATE functional_dependencies;
566 DROP STATISTICS func_deps_stat;
568 -- now do the same thing, but with expressions
569 INSERT INTO functional_dependencies (a, b, c, filler1)
570 SELECT i, i, i, i FROM generate_series(1,5000) s(i);
572 ANALYZE functional_dependencies;
574 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
576 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
579 CREATE STATISTICS func_deps_stat (dependencies) ON (mod(a,11)), (mod(b::int, 13)), (mod(c, 7)) FROM functional_dependencies;
581 ANALYZE functional_dependencies;
583 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
585 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
587 -- a => b, a => c, b => c
588 TRUNCATE functional_dependencies;
589 DROP STATISTICS func_deps_stat;
591 INSERT INTO functional_dependencies (a, b, c, filler1)
592 SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
594 ANALYZE functional_dependencies;
596 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
598 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
601 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1''');
603 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')');
605 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')');
607 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ''1''');
609 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');
611 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)');
613 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
615 -- OR clauses referencing the same attribute
616 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
618 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
620 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
622 -- OR clauses referencing different attributes
623 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
626 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
628 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])');
630 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
632 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1');
634 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])');
636 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])');
638 -- ANY with inequalities should not benefit from functional dependencies
639 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a < ANY (ARRAY[1, 51]) AND b > ''1''');
641 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a >= ANY (ARRAY[1, 51]) AND b <= ANY (ARRAY[''1'', ''2''])');
643 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a <= ANY (ARRAY[1, 2, 51, 52]) AND b >= ANY (ARRAY[''1'', ''2''])');
645 -- ALL (should not benefit from functional dependencies)
646 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1''])');
648 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1'', ''2''])');
650 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])');
653 CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
655 ANALYZE functional_dependencies;
657 -- print the detected dependencies
658 SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat';
660 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
662 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
665 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1''');
667 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')');
669 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')');
671 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ''1''');
673 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');
675 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)');
677 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
679 -- OR clauses referencing the same attribute
680 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
682 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
684 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
686 -- OR clauses referencing different attributes are incompatible
687 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
690 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
692 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])');
694 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
696 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1');
698 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])');
700 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])');
702 -- ANY with inequalities should not benefit from functional dependencies
703 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a < ANY (ARRAY[1, 51]) AND b > ''1''');
705 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a >= ANY (ARRAY[1, 51]) AND b <= ANY (ARRAY[''1'', ''2''])');
707 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a <= ANY (ARRAY[1, 2, 51, 52]) AND b >= ANY (ARRAY[''1'', ''2''])');
709 -- ALL (should not benefit from functional dependencies)
710 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1''])');
712 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1'', ''2''])');
714 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])');
716 -- changing the type of column c causes all its stats to be dropped, reverting
717 -- to default estimates without any statistics, i.e. 0.5% selectivity for each
719 ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
721 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
723 ANALYZE functional_dependencies;
725 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
727 DROP STATISTICS func_deps_stat;
729 -- now try functional dependencies with expressions
731 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1''');
733 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'' AND (c + 1) = 2');
736 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ''1''');
738 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) IN (''1'', ''2'')');
740 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) IN (''1'', ''2'')');
742 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) = ''1''');
744 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND upper(b) IN (''1'', ''26'') AND (c + 1) = 2');
746 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND upper(b) IN (''1'', ''26'') AND (c + 1) IN (2)');
748 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND upper(b) IN (''1'', ''2'', ''26'', ''27'') AND (c + 1) IN (2, 3)');
750 -- OR clauses referencing the same attribute
751 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND upper(b) = ''1''');
753 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (upper(b) = ''1'' OR upper(b) = ''2'')');
755 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND (upper(b) = ''1'' OR upper(b) = ''2'')');
757 -- OR clauses referencing different attributes
758 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR upper(b) = ''1'') AND upper(b) = ''1''');
761 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ''1''');
763 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ANY (ARRAY[''1'', ''2''])');
765 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND upper(b) = ANY (ARRAY[''1'', ''2''])');
767 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND upper(b) = ANY (ARRAY[''1'', ''26'']) AND (c + 1) = 2');
769 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND upper(b) = ANY (ARRAY[''1'', ''26'']) AND (c + 1) = ANY (ARRAY[2])');
771 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND upper(b) = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND (c + 1) = ANY (ARRAY[2, 3])');
773 -- ANY with inequalities should not benefit from functional dependencies
774 -- the estimates however improve thanks to having expression statistics
775 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND upper(b) > ''1''');
777 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND upper(b) <= ANY (ARRAY[''1'', ''2''])');
779 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND upper(b) >= ANY (ARRAY[''1'', ''2''])');
781 -- ALL (should not benefit from functional dependencies)
782 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ALL (ARRAY[''1''])');
784 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ALL (ARRAY[''1'', ''2''])');
786 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) = ALL (ARRAY[''1'', ''2''])');
788 -- create statistics on expressions
789 CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), upper(b), (c + 1) FROM functional_dependencies;
791 ANALYZE functional_dependencies;
793 -- print the detected dependencies
794 SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat';
796 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1''');
798 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'' AND (c + 1) = 2');
801 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ''1''');
803 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) IN (''1'', ''2'')');
805 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) IN (''1'', ''2'')');
807 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) = ''1''');
809 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND upper(b) IN (''1'', ''26'') AND (c + 1) = 2');
811 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND upper(b) IN (''1'', ''26'') AND (c + 1) IN (2)');
813 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND upper(b) IN (''1'', ''2'', ''26'', ''27'') AND (c + 1) IN (2, 3)');
815 -- OR clauses referencing the same attribute
816 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND upper(b) = ''1''');
818 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (upper(b) = ''1'' OR upper(b) = ''2'')');
820 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND (upper(b) = ''1'' OR upper(b) = ''2'')');
822 -- OR clauses referencing different attributes
823 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR upper(b) = ''1'') AND upper(b) = ''1''');
826 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ''1''');
828 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND upper(b) = ANY (ARRAY[''1'', ''2''])');
830 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND upper(b) = ANY (ARRAY[''1'', ''2''])');
832 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND upper(b) = ANY (ARRAY[''1'', ''26'']) AND (c + 1) = 2');
834 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND upper(b) = ANY (ARRAY[''1'', ''26'']) AND (c + 1) = ANY (ARRAY[2])');
836 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND upper(b) = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND (c + 1) = ANY (ARRAY[2, 3])');
838 -- ANY with inequalities should not benefit from functional dependencies
839 -- the estimates however improve thanks to having expression statistics
840 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND upper(b) > ''1''');
842 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND upper(b) <= ANY (ARRAY[''1'', ''2''])');
844 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND upper(b) >= ANY (ARRAY[''1'', ''2''])');
846 -- ALL (should not benefit from functional dependencies)
847 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ALL (ARRAY[''1''])');
849 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND upper(b) = ALL (ARRAY[''1'', ''2''])');
851 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND upper(b) = ALL (ARRAY[''1'', ''2''])');
853 -- check the ability to use multiple functional dependencies
854 CREATE TABLE functional_dependencies_multi (
860 WITH (autovacuum_enabled = off);
862 INSERT INTO functional_dependencies_multi (a, b, c, d)
868 FROM generate_series(1,5000) s(i);
870 ANALYZE functional_dependencies_multi;
872 -- estimates without any functional dependencies
873 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
874 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE 0 = a AND 0 = b');
875 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
876 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
877 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE 0 = a AND b = 0 AND 0 = c AND d = 0');
879 -- create separate functional dependencies
880 CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi;
881 CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi;
883 ANALYZE functional_dependencies_multi;
885 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
886 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE 0 = a AND 0 = b');
887 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
888 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
889 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE 0 = a AND b = 0 AND 0 = c AND d = 0');
891 DROP TABLE functional_dependencies_multi;
894 CREATE TABLE mcv_lists (
903 WITH (autovacuum_enabled = off);
905 -- random data (no MCV list)
906 INSERT INTO mcv_lists (a, b, c, filler1)
907 SELECT mod(i,37), mod(i,41), mod(i,43), mod(i,47) FROM generate_series(1,5000) s(i);
911 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
913 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
916 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
920 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
922 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
925 DROP STATISTICS mcv_lists_stats;
927 -- random data (no MCV list), but with expression
928 INSERT INTO mcv_lists (a, b, c, filler1)
929 SELECT i, i, i, i FROM generate_series(1,1000) s(i);
933 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
935 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
938 CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,7)), (mod(b::int,11)), (mod(c,13)) FROM mcv_lists;
942 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
944 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
946 -- 100 distinct combinations, all in the MCV list
948 DROP STATISTICS mcv_lists_stats;
950 INSERT INTO mcv_lists (a, b, c, filler1)
951 SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
955 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
957 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = a AND ''1'' = b');
959 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');
961 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > a AND ''1'' > b');
963 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0''');
965 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= a AND ''0'' >= b');
967 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
969 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5');
971 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND ''1'' > b AND 5 > c');
973 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
975 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0'' >= b AND 4 >= c');
977 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
979 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
981 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
983 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
985 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
987 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2'', NULL])');
989 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
991 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, NULL, 2, 3]) AND b IN (''1'', ''2'', NULL, ''3'')');
993 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
995 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3, NULL])');
997 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
999 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])');
1001 -- create statistics
1002 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
1006 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
1008 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = a AND ''1'' = b');
1010 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');
1012 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > a AND ''1'' > b');
1014 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0''');
1016 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 0 >= a AND ''0'' >= b');
1018 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
1020 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5');
1022 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND ''1'' > b AND 5 > c');
1024 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
1026 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0'' >= b AND 4 >= c');
1028 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
1030 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
1032 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
1034 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
1036 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
1038 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
1040 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[NULL, 1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2'', NULL])');
1042 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
1044 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, NULL, 2, 3]) AND b IN (''1'', ''2'', NULL, ''3'')');
1046 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
1048 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3, NULL])');
1050 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
1052 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])');
1054 -- check change of unrelated column type does not reset the MCV statistics
1055 ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
1057 SELECT d.stxdmcv IS NOT NULL
1058 FROM pg_statistic_ext s, pg_statistic_ext_data d
1059 WHERE s.stxname = 'mcv_lists_stats'
1060 AND d.stxoid = s.oid;
1062 -- check change of column type resets the MCV statistics
1063 ALTER TABLE mcv_lists ALTER COLUMN c TYPE numeric;
1065 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
1069 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
1072 -- 100 distinct combinations, all in the MCV list, but with expressions
1074 DROP STATISTICS mcv_lists_stats;
1076 INSERT INTO mcv_lists (a, b, c, filler1)
1077 SELECT i, i, i, i FROM generate_series(1,1000) s(i);
1081 -- without any stats on the expressions, we have to use default selectivities, which
1082 -- is why the estimates here are different from the pre-computed case above
1084 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
1086 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
1088 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
1090 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
1092 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
1094 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
1096 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
1098 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
1100 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
1102 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
1104 -- create statistics with expressions only (we create three separate stats, in order not to build more complex extended stats)
1105 CREATE STATISTICS mcv_lists_stats_1 ON (mod(a,20)) FROM mcv_lists;
1106 CREATE STATISTICS mcv_lists_stats_2 ON (mod(b::int,10)) FROM mcv_lists;
1107 CREATE STATISTICS mcv_lists_stats_3 ON (mod(c,5)) FROM mcv_lists;
1111 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
1113 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
1115 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
1117 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
1119 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
1121 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
1123 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
1125 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
1127 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
1129 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
1131 DROP STATISTICS mcv_lists_stats_1;
1132 DROP STATISTICS mcv_lists_stats_2;
1133 DROP STATISTICS mcv_lists_stats_3;
1135 -- create statistics with both MCV and expressions
1136 CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,20)), (mod(b::int,10)), (mod(c,5)) FROM mcv_lists;
1140 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
1142 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
1144 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
1146 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
1148 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
1150 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
1152 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
1154 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
1156 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
1158 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
1160 -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
1161 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,5) = 1 OR d IS NOT NULL');
1163 -- 100 distinct combinations with NULL values, all in the MCV list
1165 DROP STATISTICS mcv_lists_stats;
1167 INSERT INTO mcv_lists (a, b, c, filler1)
1169 (CASE WHEN mod(i,100) = 1 THEN NULL ELSE mod(i,100) END),
1170 (CASE WHEN mod(i,50) = 1 THEN NULL ELSE mod(i,50) END),
1171 (CASE WHEN mod(i,25) = 1 THEN NULL ELSE mod(i,25) END),
1173 FROM generate_series(1,5000) s(i);
1177 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL');
1179 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL');
1181 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NOT NULL');
1183 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NOT NULL AND b IS NULL AND c IS NOT NULL');
1185 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (0, 1) AND b IN (''0'', ''1'')');
1187 -- create statistics
1188 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
1192 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL');
1194 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL');
1196 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NOT NULL');
1198 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NOT NULL AND b IS NULL AND c IS NOT NULL');
1200 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (0, 1) AND b IN (''0'', ''1'')');
1202 -- test pg_mcv_list_items with a very simple (single item) MCV list
1204 INSERT INTO mcv_lists (a, b, c) SELECT 1, 2, 3 FROM generate_series(1,1000) s(i);
1208 FROM pg_statistic_ext s, pg_statistic_ext_data d,
1209 pg_mcv_list_items(d.stxdmcv) m
1210 WHERE s.stxname = 'mcv_lists_stats'
1211 AND d.stxoid = s.oid;
1213 -- 2 distinct combinations with NULL values, all in the MCV list
1215 DROP STATISTICS mcv_lists_stats;
1217 INSERT INTO mcv_lists (a, b, c, d)
1219 NULL, -- always NULL
1220 (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 'x' END),
1221 (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 0 END),
1222 (CASE WHEN mod(i,2) = 0 THEN NULL ELSE 'x' END)
1223 FROM generate_series(1,5000) s(i);
1227 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR d = ''x''');
1229 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''x'' OR d = ''x''');
1231 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')');
1233 -- create statistics
1234 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists;
1238 -- test pg_mcv_list_items with MCV list containing variable-length data and NULLs
1240 FROM pg_statistic_ext s, pg_statistic_ext_data d,
1241 pg_mcv_list_items(d.stxdmcv) m
1242 WHERE s.stxname = 'mcv_lists_stats'
1243 AND d.stxoid = s.oid;
1245 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR d = ''x''');
1247 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''x'' OR d = ''x''');
1249 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')');
1251 -- mcv with pass-by-ref fixlen types, e.g. uuid
1252 CREATE TABLE mcv_lists_uuid (
1257 WITH (autovacuum_enabled = off);
1259 INSERT INTO mcv_lists_uuid (a, b, c)
1261 md5(mod(i,100)::text)::uuid,
1262 md5(mod(i,50)::text)::uuid,
1263 md5(mod(i,25)::text)::uuid
1264 FROM generate_series(1,5000) s(i);
1266 ANALYZE mcv_lists_uuid;
1268 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc''');
1270 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND c = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc''');
1272 CREATE STATISTICS mcv_lists_uuid_stats (mcv) ON a, b, c
1273 FROM mcv_lists_uuid;
1275 ANALYZE mcv_lists_uuid;
1277 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc''');
1279 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_uuid WHERE a = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND b = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc'' AND c = ''1679091c-5a88-0faf-6fb5-e6087eb1b2dc''');
1281 DROP TABLE mcv_lists_uuid;
1284 CREATE TABLE mcv_lists_arrays (
1289 WITH (autovacuum_enabled = off);
1291 INSERT INTO mcv_lists_arrays (a, b, c)
1293 ARRAY[md5((i/100)::text), md5((i/100-1)::text), md5((i/100+1)::text)],
1294 ARRAY[(i/100-1)::numeric/1000, (i/100)::numeric/1000, (i/100+1)::numeric/1000],
1295 ARRAY[(i/100-1), i/100, (i/100+1)]
1296 FROM generate_series(1,5000) s(i);
1298 CREATE STATISTICS mcv_lists_arrays_stats (mcv) ON a, b, c
1299 FROM mcv_lists_arrays;
1301 ANALYZE mcv_lists_arrays;
1304 CREATE TABLE mcv_lists_bool (
1309 WITH (autovacuum_enabled = off);
1311 INSERT INTO mcv_lists_bool (a, b, c)
1313 (mod(i,2) = 0), (mod(i,4) = 0), (mod(i,8) = 0)
1314 FROM generate_series(1,10000) s(i);
1316 ANALYZE mcv_lists_bool;
1318 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c');
1320 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c');
1322 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c');
1324 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
1326 CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
1327 FROM mcv_lists_bool;
1329 ANALYZE mcv_lists_bool;
1331 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c');
1333 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c');
1335 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c');
1337 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
1339 -- mcv covering just a small fraction of data
1340 CREATE TABLE mcv_lists_partial (
1346 -- 10 frequent groups, each with 100 elements
1347 INSERT INTO mcv_lists_partial (a, b, c)
1352 FROM generate_series(0,999) s(i);
1354 -- 100 groups that will make it to the MCV list (includes the 10 frequent ones)
1355 INSERT INTO mcv_lists_partial (a, b, c)
1360 FROM generate_series(0,99) s(i);
1362 -- 4000 groups in total, most of which won't make it (just a single item)
1363 INSERT INTO mcv_lists_partial (a, b, c)
1368 FROM generate_series(0,3999) s(i);
1370 ANALYZE mcv_lists_partial;
1372 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 0');
1374 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');
1376 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');
1378 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 OR b = 10 OR c = 10');
1380 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 10');
1382 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10');
1384 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)');
1386 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
1388 CREATE STATISTICS mcv_lists_partial_stats (mcv) ON a, b, c
1389 FROM mcv_lists_partial;
1391 ANALYZE mcv_lists_partial;
1393 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 0');
1395 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');
1397 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');
1399 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 OR b = 10 OR c = 10');
1401 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 10');
1403 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10');
1405 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)');
1407 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
1409 DROP TABLE mcv_lists_partial;
1411 -- check the ability to use multiple MCV lists
1412 CREATE TABLE mcv_lists_multi (
1418 WITH (autovacuum_enabled = off);
1420 INSERT INTO mcv_lists_multi (a, b, c, d)
1426 FROM generate_series(1,5000) s(i);
1428 ANALYZE mcv_lists_multi;
1430 -- estimates without any mcv statistics
1431 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0');
1432 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0');
1433 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 AND c = 0');
1434 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR c = 0');
1435 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
1436 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
1437 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
1439 -- create separate MCV statistics
1440 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
1441 CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
1443 ANALYZE mcv_lists_multi;
1445 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0');
1446 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0');
1447 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 AND c = 0');
1448 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR c = 0');
1449 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
1450 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
1451 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
1453 DROP TABLE mcv_lists_multi;
1456 -- statistics on integer expressions
1457 CREATE TABLE expr_stats (a int, b int, c int);
1458 INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i);
1461 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
1462 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
1464 CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats;
1467 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
1468 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
1470 DROP STATISTICS expr_stats_1;
1471 DROP TABLE expr_stats;
1473 -- statistics on a mix columns and expressions
1474 CREATE TABLE expr_stats (a int, b int, c int);
1475 INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i);
1478 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
1479 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
1480 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
1482 CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats;
1485 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
1486 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
1487 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
1489 DROP TABLE expr_stats;
1491 -- statistics on expressions with different data types
1492 CREATE TABLE expr_stats (a int, b name, c text);
1493 INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,1000) s(i);
1496 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
1498 CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats;
1501 SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
1503 DROP TABLE expr_stats;
1505 -- test handling of a mix of compatible and incompatible expressions
1506 CREATE TABLE expr_stats_incompatible_test (
1507 c0 double precision,
1511 CREATE STATISTICS expr_stat_comp_1 ON c0, c1 FROM expr_stats_incompatible_test;
1513 INSERT INTO expr_stats_incompatible_test VALUES (1234,false), (5678,true);
1514 ANALYZE expr_stats_incompatible_test;
1516 SELECT c0 FROM ONLY expr_stats_incompatible_test WHERE
1518 upper('x') LIKE ('x'||('[0,1]'::int4range))
1520 (c0 IN (0, 1) OR c1)
1523 DROP TABLE expr_stats_incompatible_test;
1525 -- Permission tests. Users should not be able to see specific data values in
1526 -- the extended statistics, if they lack permission to see those values in
1527 -- the underlying table.
1529 -- Currently this is only relevant for MCV stats.
1530 CREATE SCHEMA tststats;
1532 CREATE TABLE tststats.priv_test_tbl (
1537 INSERT INTO tststats.priv_test_tbl
1538 SELECT mod(i,5), mod(i,10) FROM generate_series(1,100) s(i);
1540 CREATE STATISTICS tststats.priv_test_stats (mcv) ON a, b
1541 FROM tststats.priv_test_tbl;
1543 ANALYZE tststats.priv_test_tbl;
1545 -- Check printing info about extended statistics by \dX
1546 create table stts_t1 (a int, b int);
1547 create statistics stts_1 (ndistinct) on a, b from stts_t1;
1548 create statistics stts_2 (ndistinct, dependencies) on a, b from stts_t1;
1549 create statistics stts_3 (ndistinct, dependencies, mcv) on a, b from stts_t1;
1551 create table stts_t2 (a int, b int, c int);
1552 create statistics stts_4 on b, c from stts_t2;
1554 create table stts_t3 (col1 int, col2 int, col3 int);
1555 create statistics stts_hoge on col1, col2, col3 from stts_t3;
1557 create schema stts_s1;
1558 create schema stts_s2;
1559 create statistics stts_s1.stts_foo on col1, col2 from stts_t3;
1560 create statistics stts_s2.stts_yama (dependencies, mcv) on col1, col3 from stts_t3;
1562 insert into stts_t1 select i,i from generate_series(1,100) i;
1564 set search_path to public, stts_s1, stts_s2, tststats;
1572 \dX+ stts_s2.stts_yama
1574 set search_path to public, stts_s1;
1577 create role regress_stats_ext nosuperuser;
1578 set role regress_stats_ext;
1582 drop table stts_t1, stts_t2, stts_t3;
1583 drop schema stts_s1, stts_s2 cascade;
1584 drop user regress_stats_ext;
1587 -- User with no access
1588 CREATE USER regress_stats_user1;
1589 GRANT USAGE ON SCHEMA tststats TO regress_stats_user1;
1590 SET SESSION AUTHORIZATION regress_stats_user1;
1591 SELECT * FROM tststats.priv_test_tbl; -- Permission denied
1593 -- Attempt to gain access using a leaky operator
1594 CREATE FUNCTION op_leak(int, int) RETURNS bool
1595 AS 'BEGIN RAISE NOTICE ''op_leak => %, %'', $1, $2; RETURN $1 < $2; END'
1597 CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int,
1598 restrict = scalarltsel);
1599 SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied
1600 DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied
1602 -- Grant access via a security barrier view, but hide all data
1603 RESET SESSION AUTHORIZATION;
1604 CREATE VIEW tststats.priv_test_view WITH (security_barrier=true)
1605 AS SELECT * FROM tststats.priv_test_tbl WHERE false;
1606 GRANT SELECT, DELETE ON tststats.priv_test_view TO regress_stats_user1;
1608 -- Should now have access via the view, but see nothing and leak nothing
1609 SET SESSION AUTHORIZATION regress_stats_user1;
1610 SELECT * FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak
1611 DELETE FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak
1613 -- Grant table access, but hide all data with RLS
1614 RESET SESSION AUTHORIZATION;
1615 ALTER TABLE tststats.priv_test_tbl ENABLE ROW LEVEL SECURITY;
1616 GRANT SELECT, DELETE ON tststats.priv_test_tbl TO regress_stats_user1;
1618 -- Should now have direct table access, but see nothing and leak nothing
1619 SET SESSION AUTHORIZATION regress_stats_user1;
1620 SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak
1621 DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak
1624 DROP OPERATOR <<< (int, int);
1625 DROP FUNCTION op_leak(int, int);
1626 RESET SESSION AUTHORIZATION;
1627 DROP SCHEMA tststats CASCADE;
1628 DROP USER regress_stats_user1;