1 -- When we have to sort the entire table, incremental sort will
2 -- be slower than plain sort, so it should not be used.
4 select * from (select * from tenk1 order by four) t order by four, ten;
6 -----------------------------------
8 Sort Key: tenk1.four, tenk1.ten
14 -- When there is a LIMIT clause, incremental sort is beneficial because
15 -- it only has to sort some of the groups, and not the entire table.
17 select * from (select * from tenk1 order by four) t order by four, ten
20 -----------------------------------------
23 Sort Key: tenk1.four, tenk1.ten
24 Presorted Key: tenk1.four
30 -- When work_mem is not enough to sort the entire table, incremental sort
31 -- may be faster if individual groups still fit into work_mem.
32 set work_mem to '2MB';
34 select * from (select * from tenk1 order by four) t order by four, ten;
36 -----------------------------------
38 Sort Key: tenk1.four, tenk1.ten
39 Presorted Key: tenk1.four
46 create table t(a integer, b integer);
47 create or replace function explain_analyze_without_memory(query text)
48 returns table (out_line text) language plpgsql
55 execute 'explain (analyze, costs off, summary off, timing off) ' || query
57 out_line := regexp_replace(line, '\d+kB', 'NNkB', 'g');
62 create or replace function explain_analyze_inc_sort_nodes(query text)
63 returns jsonb language plpgsql
69 matching_nodes jsonb := '[]'::jsonb;
71 execute 'explain (analyze, costs off, summary off, timing off, format ''json'') ' || query into strict elements;
72 while jsonb_array_length(elements) > 0 loop
73 element := elements->0;
74 elements := elements - 0;
75 case jsonb_typeof(element)
77 if jsonb_array_length(element) > 0 then
78 elements := elements || element;
81 if element ? 'Plan' then
82 elements := elements || jsonb_build_array(element->'Plan');
83 element := element - 'Plan';
85 if element ? 'Plans' then
86 elements := elements || jsonb_build_array(element->'Plans');
87 element := element - 'Plans';
89 if (element->>'Node Type')::text = 'Incremental Sort' then
90 matching_nodes := matching_nodes || element;
95 return matching_nodes;
98 create or replace function explain_analyze_inc_sort_nodes_without_memory(query text)
99 returns jsonb language plpgsql
103 nodes jsonb := '[]'::jsonb;
108 for node in select * from jsonb_array_elements(explain_analyze_inc_sort_nodes(query)) t loop
109 for group_key in select unnest(array['Full-sort Groups', 'Pre-sorted Groups']::text[]) t loop
110 for space_key in select unnest(array['Sort Space Memory', 'Sort Space Disk']::text[]) t loop
111 node := jsonb_set(node, array[group_key, space_key, 'Average Sort Space Used'], '"NN"', false);
112 node := jsonb_set(node, array[group_key, space_key, 'Peak Sort Space Used'], '"NN"', false);
115 nodes := nodes || node;
120 create or replace function explain_analyze_inc_sort_nodes_verify_invariants(query text)
121 returns bool language plpgsql
130 for node in select * from jsonb_array_elements(explain_analyze_inc_sort_nodes(query)) t loop
131 for group_key in select unnest(array['Full-sort Groups', 'Pre-sorted Groups']::text[]) t loop
132 group_stats := node->group_key;
133 for space_key in select unnest(array['Sort Space Memory', 'Sort Space Disk']::text[]) t loop
134 if (group_stats->space_key->'Peak Sort Space Used')::bigint < (group_stats->space_key->'Peak Sort Space Used')::bigint then
135 raise exception '% has invalid max space < average space', group_key;
143 -- A single large group tested around each mode transition point.
144 insert into t(a, b) select i/100 + 1, i + 1 from generate_series(0, 999) n(i);
146 explain (costs off) select * from (select * from t order by a) s order by a, b limit 31;
148 ---------------------------------
158 select * from (select * from t order by a) s order by a, b limit 31;
194 explain (costs off) select * from (select * from t order by a) s order by a, b limit 32;
196 ---------------------------------
206 select * from (select * from t order by a) s order by a, b limit 32;
243 explain (costs off) select * from (select * from t order by a) s order by a, b limit 33;
245 ---------------------------------
255 select * from (select * from t order by a) s order by a, b limit 33;
293 explain (costs off) select * from (select * from t order by a) s order by a, b limit 65;
295 ---------------------------------
305 select * from (select * from t order by a) s order by a, b limit 65;
375 explain (costs off) select * from (select * from t order by a) s order by a, b limit 66;
377 ---------------------------------
387 select * from (select * from t order by a) s order by a, b limit 66;
459 -- An initial large group followed by a small group.
460 insert into t(a, b) select i/50 + 1, i + 1 from generate_series(0, 999) n(i);
462 explain (costs off) select * from (select * from t order by a) s order by a, b limit 55;
464 ---------------------------------
474 select * from (select * from t order by a) s order by a, b limit 55;
534 -- Test EXPLAIN ANALYZE with only a fullsort group.
535 select explain_analyze_without_memory('select * from (select * from t order by a) s order by a, b limit 55');
536 explain_analyze_without_memory
537 ---------------------------------------------------------------------------------------------------------------
538 Limit (actual rows=55 loops=1)
539 -> Incremental Sort (actual rows=55 loops=1)
542 Full-sort Groups: 2 Sort Methods: top-N heapsort, quicksort Average Memory: NNkB Peak Memory: NNkB
543 -> Sort (actual rows=101 loops=1)
545 Sort Method: quicksort Memory: NNkB
546 -> Seq Scan on t (actual rows=1000 loops=1)
549 select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 55'));
551 -------------------------------------------------
558 "Node Type": "Incremental Sort", +
561 "Async Capable": false, +
565 "Parallel Aware": false, +
566 "Full-sort Groups": { +
568 "Sort Methods Used": [ +
572 "Sort Space Memory": { +
573 "Peak Sort Space Used": "NN", +
574 "Average Sort Space Used": "NN"+
577 "Parent Relationship": "Outer" +
582 select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 55');
583 explain_analyze_inc_sort_nodes_verify_invariants
584 --------------------------------------------------
589 -- An initial small group followed by a large group.
590 insert into t(a, b) select (case when i < 5 then i else 9 end), i from generate_series(1, 1000) n(i);
592 explain (costs off) select * from (select * from t order by a) s order by a, b limit 70;
594 ---------------------------------
604 select * from (select * from t order by a) s order by a, b limit 70;
679 -- Checks case where we hit a group boundary at the last tuple of a batch.
680 -- Because the full sort state is bounded, we scan 64 tuples (the mode
681 -- transition point) but only retain 5. Thus when we transition modes, all
682 -- tuples in the full sort state have different prefix keys.
683 explain (costs off) select * from (select * from t order by a) s order by a, b limit 5;
685 ---------------------------------
695 select * from (select * from t order by a) s order by a, b limit 5;
707 -- We force the planner to choose a plan with incremental sort on the right side
708 -- of a nested loop join node. That way we trigger the rescan code path.
709 set local enable_hashjoin = off;
710 set local enable_mergejoin = off;
711 set local enable_material = off;
712 set local enable_sort = off;
713 explain (costs off) select * from t left join (select * from (select * from t order by a) v order by a, b) s on s.a = t.a where t.a in (1, 2);
715 ------------------------------------------------
716 Nested Loop Left Join
717 Join Filter: (t_1.a = t.a)
719 Filter: (a = ANY ('{1,2}'::integer[]))
721 Sort Key: t_1.a, t_1.b
728 select * from t left join (select * from (select * from t order by a) v order by a, b) s on s.a = t.a where t.a in (1, 2);
736 -- Test EXPLAIN ANALYZE with both fullsort and presorted groups.
737 select explain_analyze_without_memory('select * from (select * from t order by a) s order by a, b limit 70');
738 explain_analyze_without_memory
739 ----------------------------------------------------------------------------------------------------------------
740 Limit (actual rows=70 loops=1)
741 -> Incremental Sort (actual rows=70 loops=1)
744 Full-sort Groups: 1 Sort Method: quicksort Average Memory: NNkB Peak Memory: NNkB
745 Pre-sorted Groups: 5 Sort Methods: top-N heapsort, quicksort Average Memory: NNkB Peak Memory: NNkB
746 -> Sort (actual rows=1000 loops=1)
748 Sort Method: quicksort Memory: NNkB
749 -> Seq Scan on t (actual rows=1000 loops=1)
752 select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 70'));
754 -------------------------------------------------
761 "Node Type": "Incremental Sort", +
764 "Async Capable": false, +
768 "Parallel Aware": false, +
769 "Full-sort Groups": { +
771 "Sort Methods Used": [ +
774 "Sort Space Memory": { +
775 "Peak Sort Space Used": "NN", +
776 "Average Sort Space Used": "NN"+
779 "Pre-sorted Groups": { +
781 "Sort Methods Used": [ +
785 "Sort Space Memory": { +
786 "Peak Sort Space Used": "NN", +
787 "Average Sort Space Used": "NN"+
790 "Parent Relationship": "Outer" +
795 select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 70');
796 explain_analyze_inc_sort_nodes_verify_invariants
797 --------------------------------------------------
802 -- Small groups of 10 tuples each tested around each mode transition point.
803 insert into t(a, b) select i / 10, i from generate_series(1, 1000) n(i);
805 explain (costs off) select * from (select * from t order by a) s order by a, b limit 31;
807 ---------------------------------
817 select * from (select * from t order by a) s order by a, b limit 31;
853 explain (costs off) select * from (select * from t order by a) s order by a, b limit 32;
855 ---------------------------------
865 select * from (select * from t order by a) s order by a, b limit 32;
902 explain (costs off) select * from (select * from t order by a) s order by a, b limit 33;
904 ---------------------------------
914 select * from (select * from t order by a) s order by a, b limit 33;
952 explain (costs off) select * from (select * from t order by a) s order by a, b limit 65;
954 ---------------------------------
964 select * from (select * from t order by a) s order by a, b limit 65;
1034 explain (costs off) select * from (select * from t order by a) s order by a, b limit 66;
1036 ---------------------------------
1046 select * from (select * from t order by a) s order by a, b limit 66;
1118 -- Small groups of only 1 tuple each tested around each mode transition point.
1119 insert into t(a, b) select i, i from generate_series(1, 1000) n(i);
1121 explain (costs off) select * from (select * from t order by a) s order by a, b limit 31;
1123 ---------------------------------
1133 select * from (select * from t order by a) s order by a, b limit 31;
1169 explain (costs off) select * from (select * from t order by a) s order by a, b limit 32;
1171 ---------------------------------
1181 select * from (select * from t order by a) s order by a, b limit 32;
1218 explain (costs off) select * from (select * from t order by a) s order by a, b limit 33;
1220 ---------------------------------
1230 select * from (select * from t order by a) s order by a, b limit 33;
1268 explain (costs off) select * from (select * from t order by a) s order by a, b limit 65;
1270 ---------------------------------
1280 select * from (select * from t order by a) s order by a, b limit 65;
1350 explain (costs off) select * from (select * from t order by a) s order by a, b limit 66;
1352 ---------------------------------
1362 select * from (select * from t order by a) s order by a, b limit 66;
1435 -- Incremental sort vs. parallel queries
1436 set min_parallel_table_scan_size = '1kB';
1437 set min_parallel_index_scan_size = '1kB';
1438 set parallel_setup_cost = 0;
1439 set parallel_tuple_cost = 0;
1440 set max_parallel_workers_per_gather = 2;
1441 create table t (a int, b int, c int);
1442 insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
1443 create index on t (a);
1445 set enable_incremental_sort = off;
1446 explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
1448 ------------------------------------------------------
1451 Sort Key: a, b, (sum(c))
1452 -> Finalize HashAggregate
1456 -> Partial HashAggregate
1458 -> Parallel Seq Scan on t
1461 set enable_incremental_sort = on;
1462 explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
1464 ----------------------------------------------------------------------
1467 Sort Key: a, b, (sum(c))
1476 -> Parallel Index Scan using t_a_idx on t
1479 -- Incremental sort vs. set operations with varno 0
1480 set enable_hashagg to off;
1481 explain (costs off) select * from t union select * from t order by 1,3;
1483 ----------------------------------------------------------
1489 Sort Key: t.a, t.b, t.c
1493 -> Parallel Seq Scan on t
1496 -> Parallel Seq Scan on t t_1
1499 -- Full sort, not just incremental sort can be pushed below a gather merge path
1500 -- by generate_useful_gather_paths.
1501 explain (costs off) select distinct a,b from t;
1503 ------------------------------------------
1509 -> Parallel Seq Scan on t
1513 -- Sort pushdown can't go below where expressions are part of the rel target.
1514 -- In particular this is interesting for volatile expressions which have to
1515 -- go above joins since otherwise we'll incorrectly use expression evaluations
1516 -- across multiple rows.
1517 set enable_hashagg=off;
1518 set enable_seqscan=off;
1519 set enable_incremental_sort = off;
1520 set parallel_tuple_cost=0;
1521 set parallel_setup_cost=0;
1522 set min_parallel_table_scan_size = 0;
1523 set min_parallel_index_scan_size = 0;
1524 -- Parallel sort below join.
1525 explain (costs off) select distinct sub.unique1, stringu1
1526 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1528 --------------------------------------------------------------------------
1534 Sort Key: tenk1.unique1, tenk1.stringu1
1535 -> Parallel Index Scan using tenk1_unique1 on tenk1
1536 -> Function Scan on generate_series
1539 explain (costs off) select sub.unique1, stringu1
1540 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1543 --------------------------------------------------------------------
1548 Sort Key: tenk1.unique1, tenk1.stringu1
1549 -> Parallel Index Scan using tenk1_unique1 on tenk1
1550 -> Function Scan on generate_series
1553 -- Parallel sort but with expression that can be safely generated at the base rel.
1554 explain (costs off) select distinct sub.unique1, md5(stringu1)
1555 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1557 ----------------------------------------------------------------------------------------
1563 Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1564 -> Parallel Index Scan using tenk1_unique1 on tenk1
1565 -> Function Scan on generate_series
1568 explain (costs off) select sub.unique1, md5(stringu1)
1569 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1572 ----------------------------------------------------------------------------------
1577 Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1578 -> Parallel Index Scan using tenk1_unique1 on tenk1
1579 -> Function Scan on generate_series
1582 -- Parallel sort with an aggregate that can be safely generated in parallel,
1583 -- but we can't sort by partial aggregate values.
1584 explain (costs off) select count(*)
1586 join tenk1 t2 on t1.unique1 = t2.unique2
1587 join tenk1 t3 on t2.unique1 = t3.unique1
1590 -----------------------------------------------------------------------------------------------
1592 Sort Key: (count(*))
1593 -> Finalize Aggregate
1596 -> Partial Aggregate
1597 -> Parallel Hash Join
1598 Hash Cond: (t2.unique1 = t3.unique1)
1599 -> Parallel Hash Join
1600 Hash Cond: (t1.unique1 = t2.unique2)
1601 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
1603 -> Parallel Index Scan using tenk1_unique2 on tenk1 t2
1605 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t3
1608 -- Parallel sort but with expression (correlated subquery) that
1609 -- is prohibited in parallel plans.
1610 explain (costs off) select distinct
1612 (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1)
1613 from tenk1 t, generate_series(1, 1000);
1615 ---------------------------------------------------------------------------------
1618 Sort Key: t.unique1, ((SubPlan 1))
1622 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t
1623 -> Function Scan on generate_series
1625 -> Index Only Scan using tenk1_unique1 on tenk1
1626 Index Cond: (unique1 = t.unique1)
1629 explain (costs off) select
1631 (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1)
1632 from tenk1 t, generate_series(1, 1000)
1635 ---------------------------------------------------------------------------
1637 Sort Key: t.unique1, ((SubPlan 1))
1641 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t
1642 -> Function Scan on generate_series
1644 -> Index Only Scan using tenk1_unique1 on tenk1
1645 Index Cond: (unique1 = t.unique1)
1648 -- Parallel sort but with expression not available until the upper rel.
1649 explain (costs off) select distinct sub.unique1, stringu1 || random()::text
1650 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1652 ---------------------------------------------------------------------------------------------
1655 Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1659 -> Parallel Index Scan using tenk1_unique1 on tenk1
1660 -> Function Scan on generate_series
1663 explain (costs off) select sub.unique1, stringu1 || random()::text
1664 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1667 ---------------------------------------------------------------------------------------
1669 Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1673 -> Parallel Index Scan using tenk1_unique1 on tenk1
1674 -> Function Scan on generate_series
1677 -- Disallow pushing down sort when pathkey is an SRF.
1678 explain (costs off) select unique1 from tenk1 order by unnest('{1,2}'::int[]);
1680 -------------------------------------------------------------------------
1682 Sort Key: (unnest('{1,2}'::integer[]))
1686 -> Parallel Index Only Scan using tenk1_unique1 on tenk1