src/test/regress/expected/select_distinct.out

   1 --
   2 -- SELECT_DISTINCT
   3 --
   4 --
   5 -- awk '{print $3;}' onek.data | sort -n | uniq
   6 --
   7 SELECT DISTINCT two FROM onek ORDER BY 1;
   8  two
   9 -----
  10    0
  11    1
  12 (2 rows)
  13
  14 --
  15 -- awk '{print $5;}' onek.data | sort -n | uniq
  16 --
  17 SELECT DISTINCT ten FROM onek ORDER BY 1;
  18  ten
  19 -----
  20    0
  21    1
  22    2
  23    3
  24    4
  25    5
  26    6
  27    7
  28    8
  29    9
  30 (10 rows)
  31
  32 --
  33 -- awk '{print $16;}' onek.data | sort -d | uniq
  34 --
  35 SELECT DISTINCT string4 FROM onek ORDER BY 1;
  36  string4
  37 ---------
  38  AAAAxx
  39  HHHHxx
  40  OOOOxx
  41  VVVVxx
  42 (4 rows)
  43
  44 --
  45 -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
  46 -- sort +0n -1 +1d -2 +2n -3
  47 --
  48 SELECT DISTINCT two, string4, ten
  49    FROM onek
  50    ORDER BY two using <, string4 using <, ten using <;
  51  two | string4 | ten
  52 -----+---------+-----
  53    0 | AAAAxx  |   0
  54    0 | AAAAxx  |   2
  55    0 | AAAAxx  |   4
  56    0 | AAAAxx  |   6
  57    0 | AAAAxx  |   8
  58    0 | HHHHxx  |   0
  59    0 | HHHHxx  |   2
  60    0 | HHHHxx  |   4
  61    0 | HHHHxx  |   6
  62    0 | HHHHxx  |   8
  63    0 | OOOOxx  |   0
  64    0 | OOOOxx  |   2
  65    0 | OOOOxx  |   4
  66    0 | OOOOxx  |   6
  67    0 | OOOOxx  |   8
  68    0 | VVVVxx  |   0
  69    0 | VVVVxx  |   2
  70    0 | VVVVxx  |   4
  71    0 | VVVVxx  |   6
  72    0 | VVVVxx  |   8
  73    1 | AAAAxx  |   1
  74    1 | AAAAxx  |   3
  75    1 | AAAAxx  |   5
  76    1 | AAAAxx  |   7
  77    1 | AAAAxx  |   9
  78    1 | HHHHxx  |   1
  79    1 | HHHHxx  |   3
  80    1 | HHHHxx  |   5
  81    1 | HHHHxx  |   7
  82    1 | HHHHxx  |   9
  83    1 | OOOOxx  |   1
  84    1 | OOOOxx  |   3
  85    1 | OOOOxx  |   5
  86    1 | OOOOxx  |   7
  87    1 | OOOOxx  |   9
  88    1 | VVVVxx  |   1
  89    1 | VVVVxx  |   3
  90    1 | VVVVxx  |   5
  91    1 | VVVVxx  |   7
  92    1 | VVVVxx  |   9
  93 (40 rows)
  94
  95 --
  96 -- awk '{print $2;}' person.data |
  97 -- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
  98 -- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
  99 -- awk 'BEGIN{FS="      ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
 100 -- sort -n -r | uniq
 101 --
 102 SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
 103  age
 104 -----
 105   98
 106   88
 107   78
 108   68
 109   60
 110   58
 111   50
 112   48
 113   40
 114   38
 115   34
 116   30
 117   28
 118   25
 119   24
 120   23
 121   20
 122   19
 123   18
 124    8
 125 (20 rows)
 126
 127 --
 128 -- Check mentioning same column more than once
 129 --
 130 EXPLAIN (VERBOSE, COSTS OFF)
 131 SELECT count(*) FROM
 132   (SELECT DISTINCT two, four, two FROM tenk1) ss;
 133                        QUERY PLAN
 134 --------------------------------------------------------
 135  Aggregate
 136    Output: count(*)
 137    ->  HashAggregate
 138          Output: tenk1.two, tenk1.four, tenk1.two
 139          Group Key: tenk1.two, tenk1.four
 140          ->  Seq Scan on public.tenk1
 141                Output: tenk1.two, tenk1.four, tenk1.two
 142 (7 rows)
 143
 144 SELECT count(*) FROM
 145   (SELECT DISTINCT two, four, two FROM tenk1) ss;
 146  count
 147 -------
 148      4
 149 (1 row)
 150
 151 --
 152 -- Compare results between plans using sorting and plans using hash
 153 -- aggregation. Force spilling in both cases by setting work_mem low.
 154 --
 155 SET work_mem='64kB';
 156 -- Produce results with sorting.
 157 SET enable_hashagg=FALSE;
 158 SET jit_above_cost=0;
 159 EXPLAIN (costs off)
 160 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
 161                    QUERY PLAN
 162 ------------------------------------------------
 163  Unique
 164    ->  Sort
 165          Sort Key: ((g % 1000))
 166          ->  Function Scan on generate_series g
 167 (4 rows)
 168
 169 CREATE TABLE distinct_group_1 AS
 170 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
 171 SET jit_above_cost TO DEFAULT;
 172 CREATE TABLE distinct_group_2 AS
 173 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
 174 SET enable_seqscan = 0;
 175 -- Check to see we get an incremental sort plan
 176 EXPLAIN (costs off)
 177 SELECT DISTINCT hundred, two FROM tenk1;
 178                      QUERY PLAN
 179 -----------------------------------------------------
 180  Unique
 181    ->  Incremental Sort
 182          Sort Key: hundred, two
 183          Presorted Key: hundred
 184          ->  Index Scan using tenk1_hundred on tenk1
 185 (5 rows)
 186
 187 RESET enable_seqscan;
 188 SET enable_hashagg=TRUE;
 189 -- Produce results with hash aggregation.
 190 SET enable_sort=FALSE;
 191 SET jit_above_cost=0;
 192 EXPLAIN (costs off)
 193 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
 194                 QUERY PLAN
 195 ------------------------------------------
 196  HashAggregate
 197    Group Key: (g % 1000)
 198    ->  Function Scan on generate_series g
 199 (3 rows)
 200
 201 CREATE TABLE distinct_hash_1 AS
 202 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
 203 SET jit_above_cost TO DEFAULT;
 204 CREATE TABLE distinct_hash_2 AS
 205 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
 206 SET enable_sort=TRUE;
 207 SET work_mem TO DEFAULT;
 208 -- Compare results
 209 (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
 210   UNION ALL
 211 (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
 212  ?column?
 213 ----------
 214 (0 rows)
 215
 216 (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
 217   UNION ALL
 218 (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
 219  ?column?
 220 ----------
 221 (0 rows)
 222
 223 DROP TABLE distinct_hash_1;
 224 DROP TABLE distinct_hash_2;
 225 DROP TABLE distinct_group_1;
 226 DROP TABLE distinct_group_2;
 227 -- Test parallel DISTINCT
 228 SET parallel_tuple_cost=0;
 229 SET parallel_setup_cost=0;
 230 SET min_parallel_table_scan_size=0;
 231 SET max_parallel_workers_per_gather=2;
 232 -- Ensure we get a parallel plan
 233 EXPLAIN (costs off)
 234 SELECT DISTINCT four FROM tenk1;
 235                      QUERY PLAN
 236 ----------------------------------------------------
 237  Unique
 238    ->  Gather Merge
 239          Workers Planned: 2
 240          ->  Sort
 241                Sort Key: four
 242                ->  HashAggregate
 243                      Group Key: four
 244                      ->  Parallel Seq Scan on tenk1
 245 (8 rows)
 246
 247 -- Ensure the parallel plan produces the correct results
 248 SELECT DISTINCT four FROM tenk1;
 249  four
 250 ------
 251     0
 252     1
 253     2
 254     3
 255 (4 rows)
 256
 257 CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
 258   BEGIN
 259     RETURN a;
 260   END;
 261 $$ LANGUAGE plpgsql PARALLEL UNSAFE;
 262 -- Ensure we don't do parallel distinct with a parallel unsafe function
 263 EXPLAIN (COSTS OFF)
 264 SELECT DISTINCT distinct_func(1) FROM tenk1;
 265                         QUERY PLAN
 266 ----------------------------------------------------------
 267  Unique
 268    ->  Sort
 269          Sort Key: (distinct_func(1))
 270          ->  Index Only Scan using tenk1_hundred on tenk1
 271 (4 rows)
 272
 273 -- make the function parallel safe
 274 CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
 275   BEGIN
 276     RETURN a;
 277   END;
 278 $$ LANGUAGE plpgsql PARALLEL SAFE;
 279 -- Ensure we do parallel distinct now that the function is parallel safe
 280 EXPLAIN (COSTS OFF)
 281 SELECT DISTINCT distinct_func(1) FROM tenk1;
 282                      QUERY PLAN
 283 ----------------------------------------------------
 284  Unique
 285    ->  Gather Merge
 286          Workers Planned: 2
 287          ->  Unique
 288                ->  Sort
 289                      Sort Key: (distinct_func(1))
 290                      ->  Parallel Seq Scan on tenk1
 291 (7 rows)
 292
 293 RESET max_parallel_workers_per_gather;
 294 RESET min_parallel_table_scan_size;
 295 RESET parallel_setup_cost;
 296 RESET parallel_tuple_cost;
 297 --
 298 -- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
 299 -- all of the distinct_pathkeys have been marked as redundant
 300 --
 301 -- Ensure we get a plan with a Limit 1
 302 EXPLAIN (COSTS OFF)
 303 SELECT DISTINCT four FROM tenk1 WHERE four = 0;
 304          QUERY PLAN
 305 ----------------------------
 306  Limit
 307    ->  Seq Scan on tenk1
 308          Filter: (four = 0)
 309 (3 rows)
 310
 311 -- Ensure the above gives us the correct result
 312 SELECT DISTINCT four FROM tenk1 WHERE four = 0;
 313  four
 314 ------
 315     0
 316 (1 row)
 317
 318 -- Ensure we get a plan with a Limit 1
 319 EXPLAIN (COSTS OFF)
 320 SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
 321                  QUERY PLAN
 322 ---------------------------------------------
 323  Limit
 324    ->  Seq Scan on tenk1
 325          Filter: ((two <> 0) AND (four = 0))
 326 (3 rows)
 327
 328 -- Ensure no rows are returned
 329 SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
 330  four
 331 ------
 332 (0 rows)
 333
 334 -- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
 335 EXPLAIN (COSTS OFF)
 336 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
 337          QUERY PLAN
 338 ----------------------------
 339  Limit
 340    ->  Seq Scan on tenk1
 341          Filter: (four = 0)
 342 (3 rows)
 343
 344 -- Ensure we only get 1 row
 345 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
 346  four | ?column? | ?column? | ?column?
 347 ------+----------+----------+----------
 348     0 |        1 |        2 |        3
 349 (1 row)
 350
 351 SET parallel_setup_cost=0;
 352 SET min_parallel_table_scan_size=0;
 353 SET max_parallel_workers_per_gather=2;
 354 -- Ensure we get a plan with a Limit 1 in both partial distinct and final
 355 -- distinct
 356 EXPLAIN (COSTS OFF)
 357 SELECT DISTINCT four FROM tenk1 WHERE four = 10;
 358                   QUERY PLAN
 359 ----------------------------------------------
 360  Limit
 361    ->  Gather
 362          Workers Planned: 2
 363          ->  Limit
 364                ->  Parallel Seq Scan on tenk1
 365                      Filter: (four = 10)
 366 (6 rows)
 367
 368 RESET max_parallel_workers_per_gather;
 369 RESET min_parallel_table_scan_size;
 370 RESET parallel_setup_cost;
 371 --
 372 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 373 -- very own regression file.
 374 --
 375 CREATE TEMP TABLE disttable (f1 integer);
 376 INSERT INTO DISTTABLE VALUES(1);
 377 INSERT INTO DISTTABLE VALUES(2);
 378 INSERT INTO DISTTABLE VALUES(3);
 379 INSERT INTO DISTTABLE VALUES(NULL);
 380 -- basic cases
 381 SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
 382  f1 | not 2
 383 ----+-------
 384   1 | t
 385   2 | f
 386   3 | t
 387     | t
 388 (4 rows)
 389
 390 SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
 391  f1 | not null
 392 ----+----------
 393   1 | t
 394   2 | t
 395   3 | t
 396     | f
 397 (4 rows)
 398
 399 SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
 400  f1 | false
 401 ----+-------
 402   1 | f
 403   2 | f
 404   3 | f
 405     | f
 406 (4 rows)
 407
 408 SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;
 409  f1 | not null
 410 ----+----------
 411   1 | t
 412   2 | t
 413   3 | t
 414     | f
 415 (4 rows)
 416
 417 -- check that optimizer constant-folds it properly
 418 SELECT 1 IS DISTINCT FROM 2 as "yes";
 419  yes
 420 -----
 421  t
 422 (1 row)
 423
 424 SELECT 2 IS DISTINCT FROM 2 as "no";
 425  no
 426 ----
 427  f
 428 (1 row)
 429
 430 SELECT 2 IS DISTINCT FROM null as "yes";
 431  yes
 432 -----
 433  t
 434 (1 row)
 435
 436 SELECT null IS DISTINCT FROM null as "no";
 437  no
 438 ----
 439  f
 440 (1 row)
 441
 442 -- negated form
 443 SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 444  no
 445 ----
 446  f
 447 (1 row)
 448
 449 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 450  yes
 451 -----
 452  t
 453 (1 row)
 454
 455 SELECT 2 IS NOT DISTINCT FROM null as "no";
 456  no
 457 ----
 458  f
 459 (1 row)
 460
 461 SELECT null IS NOT DISTINCT FROM null as "yes";
 462  yes
 463 -----
 464  t
 465 (1 row)
 466
 467 --
 468 -- Test the planner's ability to reorder the distinctClause Pathkeys to match
 469 -- the input path's ordering
 470 --
 471 CREATE TABLE distinct_tbl (x int, y int);
 472 INSERT INTO distinct_tbl SELECT i%10, i%10 FROM generate_series(1, 1000) AS i;
 473 CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
 474 ANALYZE distinct_tbl;
 475 -- Produce results with sorting.
 476 SET enable_hashagg TO OFF;
 477 -- Ensure we avoid the need to re-sort by reordering the distinctClause
 478 -- Pathkeys to match the ordering of the input path
 479 EXPLAIN (COSTS OFF)
 480 SELECT DISTINCT y, x FROM distinct_tbl;
 481                             QUERY PLAN
 482 ------------------------------------------------------------------
 483  Unique
 484    ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
 485 (2 rows)
 486
 487 SELECT DISTINCT y, x FROM distinct_tbl;
 488  y | x
 489 ---+---
 490  0 | 0
 491  1 | 1
 492  2 | 2
 493  3 | 3
 494  4 | 4
 495  5 | 5
 496  6 | 6
 497  7 | 7
 498  8 | 8
 499  9 | 9
 500 (10 rows)
 501
 502 -- Ensure we leverage incremental-sort by reordering the distinctClause
 503 -- Pathkeys to partially match the ordering of the input path
 504 EXPLAIN (COSTS OFF)
 505 SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
 506                                   QUERY PLAN
 507 ------------------------------------------------------------------------------
 508  Unique
 509    ->  Incremental Sort
 510          Sort Key: s.x, s.y
 511          Presorted Key: s.x
 512          ->  Subquery Scan on s
 513                ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
 514 (6 rows)
 515
 516 SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
 517  y | x
 518 ---+---
 519  0 | 0
 520  1 | 1
 521  2 | 2
 522  3 | 3
 523  4 | 4
 524  5 | 5
 525  6 | 6
 526  7 | 7
 527  8 | 8
 528  9 | 9
 529 (10 rows)
 530
 531 -- Ensure we avoid the need to re-sort in partial distinct by reordering the
 532 -- distinctClause Pathkeys to match the ordering of the input path
 533 SET parallel_tuple_cost=0;
 534 SET parallel_setup_cost=0;
 535 SET min_parallel_table_scan_size=0;
 536 SET min_parallel_index_scan_size=0;
 537 SET max_parallel_workers_per_gather=2;
 538 EXPLAIN (COSTS OFF)
 539 SELECT DISTINCT y, x FROM distinct_tbl limit 10;
 540                                          QUERY PLAN
 541 ---------------------------------------------------------------------------------------------
 542  Limit
 543    ->  Unique
 544          ->  Gather Merge
 545                Workers Planned: 1
 546                ->  Unique
 547                      ->  Parallel Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
 548 (6 rows)
 549
 550 SELECT DISTINCT y, x FROM distinct_tbl limit 10;
 551  y | x
 552 ---+---
 553  0 | 0
 554  1 | 1
 555  2 | 2
 556  3 | 3
 557  4 | 4
 558  5 | 5
 559  6 | 6
 560  7 | 7
 561  8 | 8
 562  9 | 9
 563 (10 rows)
 564
 565 RESET max_parallel_workers_per_gather;
 566 RESET min_parallel_index_scan_size;
 567 RESET min_parallel_table_scan_size;
 568 RESET parallel_setup_cost;
 569 RESET parallel_tuple_cost;
 570 -- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
 571 -- input path even if there is ORDER BY clause
 572 EXPLAIN (COSTS OFF)
 573 SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
 574                                QUERY PLAN
 575 ------------------------------------------------------------------------
 576  Sort
 577    Sort Key: y
 578    ->  Unique
 579          ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
 580 (4 rows)
 581
 582 SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
 583  y | x
 584 ---+---
 585  0 | 0
 586  1 | 1
 587  2 | 2
 588  3 | 3
 589  4 | 4
 590  5 | 5
 591  6 | 6
 592  7 | 7
 593  8 | 8
 594  9 | 9
 595 (10 rows)
 596
 597 RESET enable_hashagg;
 598 DROP TABLE distinct_tbl;