5 -- awk '{print $3;}' onek.data | sort -n | uniq
7 SELECT DISTINCT two FROM onek ORDER BY 1;
15 -- awk '{print $5;}' onek.data | sort -n | uniq
17 SELECT DISTINCT ten FROM onek ORDER BY 1;
33 -- awk '{print $16;}' onek.data | sort -d | uniq
35 SELECT DISTINCT string4 FROM onek ORDER BY 1;
45 -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
46 -- sort +0n -1 +1d -2 +2n -3
48 SELECT DISTINCT two, string4, ten
50 ORDER BY two using <, string4 using <, ten using <;
96 -- awk '{print $2;}' person.data |
97 -- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
98 -- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
99 -- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
102 SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
128 -- Check mentioning same column more than once
130 EXPLAIN (VERBOSE, COSTS OFF)
132 (SELECT DISTINCT two, four, two FROM tenk1) ss;
134 --------------------------------------------------------
138 Output: tenk1.two, tenk1.four, tenk1.two
139 Group Key: tenk1.two, tenk1.four
140 -> Seq Scan on public.tenk1
141 Output: tenk1.two, tenk1.four, tenk1.two
145 (SELECT DISTINCT two, four, two FROM tenk1) ss;
152 -- Compare results between plans using sorting and plans using hash
153 -- aggregation. Force spilling in both cases by setting work_mem low.
156 -- Produce results with sorting.
157 SET enable_hashagg=FALSE;
158 SET jit_above_cost=0;
160 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
162 ------------------------------------------------
165 Sort Key: ((g % 1000))
166 -> Function Scan on generate_series g
169 CREATE TABLE distinct_group_1 AS
170 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
171 SET jit_above_cost TO DEFAULT;
172 CREATE TABLE distinct_group_2 AS
173 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
174 SET enable_seqscan = 0;
175 -- Check to see we get an incremental sort plan
177 SELECT DISTINCT hundred, two FROM tenk1;
179 -----------------------------------------------------
182 Sort Key: hundred, two
183 Presorted Key: hundred
184 -> Index Scan using tenk1_hundred on tenk1
187 RESET enable_seqscan;
188 SET enable_hashagg=TRUE;
189 -- Produce results with hash aggregation.
190 SET enable_sort=FALSE;
191 SET jit_above_cost=0;
193 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
195 ------------------------------------------
197 Group Key: (g % 1000)
198 -> Function Scan on generate_series g
201 CREATE TABLE distinct_hash_1 AS
202 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
203 SET jit_above_cost TO DEFAULT;
204 CREATE TABLE distinct_hash_2 AS
205 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
206 SET enable_sort=TRUE;
207 SET work_mem TO DEFAULT;
209 (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
211 (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
216 (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
218 (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
223 DROP TABLE distinct_hash_1;
224 DROP TABLE distinct_hash_2;
225 DROP TABLE distinct_group_1;
226 DROP TABLE distinct_group_2;
227 -- Test parallel DISTINCT
228 SET parallel_tuple_cost=0;
229 SET parallel_setup_cost=0;
230 SET min_parallel_table_scan_size=0;
231 SET max_parallel_workers_per_gather=2;
232 -- Ensure we get a parallel plan
234 SELECT DISTINCT four FROM tenk1;
236 ----------------------------------------------------
244 -> Parallel Seq Scan on tenk1
247 -- Ensure the parallel plan produces the correct results
248 SELECT DISTINCT four FROM tenk1;
257 CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
261 $$ LANGUAGE plpgsql PARALLEL UNSAFE;
262 -- Ensure we don't do parallel distinct with a parallel unsafe function
264 SELECT DISTINCT distinct_func(1) FROM tenk1;
266 ----------------------------------------------------------
269 Sort Key: (distinct_func(1))
270 -> Index Only Scan using tenk1_hundred on tenk1
273 -- make the function parallel safe
274 CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
278 $$ LANGUAGE plpgsql PARALLEL SAFE;
279 -- Ensure we do parallel distinct now that the function is parallel safe
281 SELECT DISTINCT distinct_func(1) FROM tenk1;
283 ----------------------------------------------------
289 Sort Key: (distinct_func(1))
290 -> Parallel Seq Scan on tenk1
293 RESET max_parallel_workers_per_gather;
294 RESET min_parallel_table_scan_size;
295 RESET parallel_setup_cost;
296 RESET parallel_tuple_cost;
298 -- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
299 -- all of the distinct_pathkeys have been marked as redundant
301 -- Ensure we get a plan with a Limit 1
303 SELECT DISTINCT four FROM tenk1 WHERE four = 0;
305 ----------------------------
311 -- Ensure the above gives us the correct result
312 SELECT DISTINCT four FROM tenk1 WHERE four = 0;
318 -- Ensure we get a plan with a Limit 1
320 SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
322 ---------------------------------------------
325 Filter: ((two <> 0) AND (four = 0))
328 -- Ensure no rows are returned
329 SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
334 -- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
336 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
338 ----------------------------
344 -- Ensure we only get 1 row
345 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
346 four | ?column? | ?column? | ?column?
347 ------+----------+----------+----------
351 SET parallel_setup_cost=0;
352 SET min_parallel_table_scan_size=0;
353 SET max_parallel_workers_per_gather=2;
354 -- Ensure we get a plan with a Limit 1 in both partial distinct and final
357 SELECT DISTINCT four FROM tenk1 WHERE four = 10;
359 ----------------------------------------------
364 -> Parallel Seq Scan on tenk1
368 RESET max_parallel_workers_per_gather;
369 RESET min_parallel_table_scan_size;
370 RESET parallel_setup_cost;
372 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
373 -- very own regression file.
375 CREATE TEMP TABLE disttable (f1 integer);
376 INSERT INTO DISTTABLE VALUES(1);
377 INSERT INTO DISTTABLE VALUES(2);
378 INSERT INTO DISTTABLE VALUES(3);
379 INSERT INTO DISTTABLE VALUES(NULL);
381 SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
390 SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
399 SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
408 SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;
417 -- check that optimizer constant-folds it properly
418 SELECT 1 IS DISTINCT FROM 2 as "yes";
424 SELECT 2 IS DISTINCT FROM 2 as "no";
430 SELECT 2 IS DISTINCT FROM null as "yes";
436 SELECT null IS DISTINCT FROM null as "no";
443 SELECT 1 IS NOT DISTINCT FROM 2 as "no";
449 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
455 SELECT 2 IS NOT DISTINCT FROM null as "no";
461 SELECT null IS NOT DISTINCT FROM null as "yes";
468 -- Test the planner's ability to reorder the distinctClause Pathkeys to match
469 -- the input path's ordering
471 CREATE TABLE distinct_tbl (x int, y int);
472 INSERT INTO distinct_tbl SELECT i%10, i%10 FROM generate_series(1, 1000) AS i;
473 CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
474 ANALYZE distinct_tbl;
475 -- Produce results with sorting.
476 SET enable_hashagg TO OFF;
477 -- Ensure we avoid the need to re-sort by reordering the distinctClause
478 -- Pathkeys to match the ordering of the input path
480 SELECT DISTINCT y, x FROM distinct_tbl;
482 ------------------------------------------------------------------
484 -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
487 SELECT DISTINCT y, x FROM distinct_tbl;
502 -- Ensure we leverage incremental-sort by reordering the distinctClause
503 -- Pathkeys to partially match the ordering of the input path
505 SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
507 ------------------------------------------------------------------------------
512 -> Subquery Scan on s
513 -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
516 SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
531 -- Ensure we avoid the need to re-sort in partial distinct by reordering the
532 -- distinctClause Pathkeys to match the ordering of the input path
533 SET parallel_tuple_cost=0;
534 SET parallel_setup_cost=0;
535 SET min_parallel_table_scan_size=0;
536 SET min_parallel_index_scan_size=0;
537 SET max_parallel_workers_per_gather=2;
539 SELECT DISTINCT y, x FROM distinct_tbl limit 10;
541 ---------------------------------------------------------------------------------------------
547 -> Parallel Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
550 SELECT DISTINCT y, x FROM distinct_tbl limit 10;
565 RESET max_parallel_workers_per_gather;
566 RESET min_parallel_index_scan_size;
567 RESET min_parallel_table_scan_size;
568 RESET parallel_setup_cost;
569 RESET parallel_tuple_cost;
570 -- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
571 -- input path even if there is ORDER BY clause
573 SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
575 ------------------------------------------------------------------------
579 -> Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
582 SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
597 RESET enable_hashagg;
598 DROP TABLE distinct_tbl;