1 -- When there is a LIMIT clause, incremental sort is beneficial because
2 -- it only has to sort some of the groups, and not the entire table.
4 select * from (select * from tenk1 order by four) t order by four, ten
7 -----------------------------------------
10 Sort Key: tenk1.four, tenk1.ten
11 Presorted Key: tenk1.four
17 -- When work_mem is not enough to sort the entire table, incremental sort
18 -- may be faster if individual groups still fit into work_mem.
19 set work_mem to '2MB';
21 select * from (select * from tenk1 order by four) t order by four, ten;
23 -----------------------------------
25 Sort Key: tenk1.four, tenk1.ten
26 Presorted Key: tenk1.four
33 create table t(a integer, b integer);
34 create or replace function explain_analyze_without_memory(query text)
35 returns table (out_line text) language plpgsql
42 execute 'explain (analyze, costs off, summary off, timing off, buffers off) ' || query
44 out_line := regexp_replace(line, '\d+kB', 'NNkB', 'g');
49 create or replace function explain_analyze_inc_sort_nodes(query text)
50 returns jsonb language plpgsql
56 matching_nodes jsonb := '[]'::jsonb;
58 execute 'explain (analyze, costs off, summary off, timing off, buffers off, format ''json'') ' || query into strict elements;
59 while jsonb_array_length(elements) > 0 loop
60 element := elements->0;
61 elements := elements - 0;
62 case jsonb_typeof(element)
64 if jsonb_array_length(element) > 0 then
65 elements := elements || element;
68 if element ? 'Plan' then
69 elements := elements || jsonb_build_array(element->'Plan');
70 element := element - 'Plan';
72 if element ? 'Plans' then
73 elements := elements || jsonb_build_array(element->'Plans');
74 element := element - 'Plans';
76 if (element->>'Node Type')::text = 'Incremental Sort' then
77 matching_nodes := matching_nodes || element;
82 return matching_nodes;
85 create or replace function explain_analyze_inc_sort_nodes_without_memory(query text)
86 returns jsonb language plpgsql
90 nodes jsonb := '[]'::jsonb;
95 for node in select * from jsonb_array_elements(explain_analyze_inc_sort_nodes(query)) t loop
96 for group_key in select unnest(array['Full-sort Groups', 'Pre-sorted Groups']::text[]) t loop
97 for space_key in select unnest(array['Sort Space Memory', 'Sort Space Disk']::text[]) t loop
98 node := jsonb_set(node, array[group_key, space_key, 'Average Sort Space Used'], '"NN"', false);
99 node := jsonb_set(node, array[group_key, space_key, 'Peak Sort Space Used'], '"NN"', false);
102 nodes := nodes || node;
107 create or replace function explain_analyze_inc_sort_nodes_verify_invariants(query text)
108 returns bool language plpgsql
117 for node in select * from jsonb_array_elements(explain_analyze_inc_sort_nodes(query)) t loop
118 for group_key in select unnest(array['Full-sort Groups', 'Pre-sorted Groups']::text[]) t loop
119 group_stats := node->group_key;
120 for space_key in select unnest(array['Sort Space Memory', 'Sort Space Disk']::text[]) t loop
121 if (group_stats->space_key->'Peak Sort Space Used')::bigint < (group_stats->space_key->'Peak Sort Space Used')::bigint then
122 raise exception '% has invalid max space < average space', group_key;
130 -- A single large group tested around each mode transition point.
131 insert into t(a, b) select i/100 + 1, i + 1 from generate_series(0, 999) n(i);
133 explain (costs off) select * from (select * from t order by a) s order by a, b limit 31;
135 ---------------------------------
145 select * from (select * from t order by a) s order by a, b limit 31;
181 explain (costs off) select * from (select * from t order by a) s order by a, b limit 32;
183 ---------------------------------
193 select * from (select * from t order by a) s order by a, b limit 32;
230 explain (costs off) select * from (select * from t order by a) s order by a, b limit 33;
232 ---------------------------------
242 select * from (select * from t order by a) s order by a, b limit 33;
280 explain (costs off) select * from (select * from t order by a) s order by a, b limit 65;
282 ---------------------------------
292 select * from (select * from t order by a) s order by a, b limit 65;
362 explain (costs off) select * from (select * from t order by a) s order by a, b limit 66;
364 ---------------------------------
374 select * from (select * from t order by a) s order by a, b limit 66;
446 -- An initial large group followed by a small group.
447 insert into t(a, b) select i/50 + 1, i + 1 from generate_series(0, 999) n(i);
449 explain (costs off) select * from (select * from t order by a) s order by a, b limit 55;
451 ---------------------------------
461 select * from (select * from t order by a) s order by a, b limit 55;
521 -- Test EXPLAIN ANALYZE with only a fullsort group.
522 select explain_analyze_without_memory('select * from (select * from t order by a) s order by a, b limit 55');
523 explain_analyze_without_memory
524 ---------------------------------------------------------------------------------------------------------------
525 Limit (actual rows=55 loops=1)
526 -> Incremental Sort (actual rows=55 loops=1)
529 Full-sort Groups: 2 Sort Methods: top-N heapsort, quicksort Average Memory: NNkB Peak Memory: NNkB
530 -> Sort (actual rows=101 loops=1)
532 Sort Method: quicksort Memory: NNkB
533 -> Seq Scan on t (actual rows=1000 loops=1)
536 select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 55'));
538 -------------------------------------------------
546 "Node Type": "Incremental Sort", +
549 "Async Capable": false, +
553 "Parallel Aware": false, +
554 "Full-sort Groups": { +
556 "Sort Methods Used": [ +
560 "Sort Space Memory": { +
561 "Peak Sort Space Used": "NN", +
562 "Average Sort Space Used": "NN"+
565 "Parent Relationship": "Outer" +
570 select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 55');
571 explain_analyze_inc_sort_nodes_verify_invariants
572 --------------------------------------------------
577 -- An initial small group followed by a large group.
578 insert into t(a, b) select (case when i < 5 then i else 9 end), i from generate_series(1, 1000) n(i);
580 explain (costs off) select * from (select * from t order by a) s order by a, b limit 70;
582 ---------------------------------
592 select * from (select * from t order by a) s order by a, b limit 70;
667 -- Checks case where we hit a group boundary at the last tuple of a batch.
668 -- Because the full sort state is bounded, we scan 64 tuples (the mode
669 -- transition point) but only retain 5. Thus when we transition modes, all
670 -- tuples in the full sort state have different prefix keys.
671 explain (costs off) select * from (select * from t order by a) s order by a, b limit 5;
673 ---------------------------------
683 select * from (select * from t order by a) s order by a, b limit 5;
695 -- We force the planner to choose a plan with incremental sort on the right side
696 -- of a nested loop join node. That way we trigger the rescan code path.
697 set local enable_hashjoin = off;
698 set local enable_mergejoin = off;
699 set local enable_material = off;
700 set local enable_sort = off;
701 explain (costs off) select * from t left join (select * from (select * from t order by a) v order by a, b) s on s.a = t.a where t.a in (1, 2);
703 ------------------------------------------------
704 Nested Loop Left Join
705 Join Filter: (t_1.a = t.a)
707 Filter: (a = ANY ('{1,2}'::integer[]))
709 Sort Key: t_1.a, t_1.b
717 select * from t left join (select * from (select * from t order by a) v order by a, b) s on s.a = t.a where t.a in (1, 2);
725 -- Test EXPLAIN ANALYZE with both fullsort and presorted groups.
726 select explain_analyze_without_memory('select * from (select * from t order by a) s order by a, b limit 70');
727 explain_analyze_without_memory
728 ----------------------------------------------------------------------------------------------------------------
729 Limit (actual rows=70 loops=1)
730 -> Incremental Sort (actual rows=70 loops=1)
733 Full-sort Groups: 1 Sort Method: quicksort Average Memory: NNkB Peak Memory: NNkB
734 Pre-sorted Groups: 5 Sort Methods: top-N heapsort, quicksort Average Memory: NNkB Peak Memory: NNkB
735 -> Sort (actual rows=1000 loops=1)
737 Sort Method: quicksort Memory: NNkB
738 -> Seq Scan on t (actual rows=1000 loops=1)
741 select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 70'));
743 -------------------------------------------------
751 "Node Type": "Incremental Sort", +
754 "Async Capable": false, +
758 "Parallel Aware": false, +
759 "Full-sort Groups": { +
761 "Sort Methods Used": [ +
764 "Sort Space Memory": { +
765 "Peak Sort Space Used": "NN", +
766 "Average Sort Space Used": "NN"+
769 "Pre-sorted Groups": { +
771 "Sort Methods Used": [ +
775 "Sort Space Memory": { +
776 "Peak Sort Space Used": "NN", +
777 "Average Sort Space Used": "NN"+
780 "Parent Relationship": "Outer" +
785 select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 70');
786 explain_analyze_inc_sort_nodes_verify_invariants
787 --------------------------------------------------
792 -- Small groups of 10 tuples each tested around each mode transition point.
793 insert into t(a, b) select i / 10, i from generate_series(1, 1000) n(i);
795 explain (costs off) select * from (select * from t order by a) s order by a, b limit 31;
797 ---------------------------------
807 select * from (select * from t order by a) s order by a, b limit 31;
843 explain (costs off) select * from (select * from t order by a) s order by a, b limit 32;
845 ---------------------------------
855 select * from (select * from t order by a) s order by a, b limit 32;
892 explain (costs off) select * from (select * from t order by a) s order by a, b limit 33;
894 ---------------------------------
904 select * from (select * from t order by a) s order by a, b limit 33;
942 explain (costs off) select * from (select * from t order by a) s order by a, b limit 65;
944 ---------------------------------
954 select * from (select * from t order by a) s order by a, b limit 65;
1024 explain (costs off) select * from (select * from t order by a) s order by a, b limit 66;
1026 ---------------------------------
1036 select * from (select * from t order by a) s order by a, b limit 66;
1108 -- Small groups of only 1 tuple each tested around each mode transition point.
1109 insert into t(a, b) select i, i from generate_series(1, 1000) n(i);
1111 explain (costs off) select * from (select * from t order by a) s order by a, b limit 31;
1113 ---------------------------------
1123 select * from (select * from t order by a) s order by a, b limit 31;
1159 explain (costs off) select * from (select * from t order by a) s order by a, b limit 32;
1161 ---------------------------------
1171 select * from (select * from t order by a) s order by a, b limit 32;
1208 explain (costs off) select * from (select * from t order by a) s order by a, b limit 33;
1210 ---------------------------------
1220 select * from (select * from t order by a) s order by a, b limit 33;
1258 explain (costs off) select * from (select * from t order by a) s order by a, b limit 65;
1260 ---------------------------------
1270 select * from (select * from t order by a) s order by a, b limit 65;
1340 explain (costs off) select * from (select * from t order by a) s order by a, b limit 66;
1342 ---------------------------------
1352 select * from (select * from t order by a) s order by a, b limit 66;
1425 -- Incremental sort vs. parallel queries
1426 set min_parallel_table_scan_size = '1kB';
1427 set min_parallel_index_scan_size = '1kB';
1428 set parallel_setup_cost = 0;
1429 set parallel_tuple_cost = 0;
1430 set max_parallel_workers_per_gather = 2;
1431 create table t (a int, b int, c int);
1432 insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
1433 create index on t (a);
1435 set enable_incremental_sort = off;
1436 explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
1438 ------------------------------------------------------
1441 Sort Key: a, b, (sum(c))
1442 -> Finalize HashAggregate
1446 -> Partial HashAggregate
1448 -> Parallel Seq Scan on t
1451 set enable_incremental_sort = on;
1452 explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
1454 ----------------------------------------------------------------------
1457 Sort Key: a, b, (sum(c))
1466 -> Parallel Index Scan using t_a_idx on t
1469 -- Incremental sort vs. set operations with varno 0
1470 set enable_hashagg to off;
1471 explain (costs off) select * from t union select * from t order by 1,3;
1473 ----------------------------------------------------------
1479 Sort Key: t.a, t.b, t.c
1483 Sort Key: t.a, t.b, t.c
1484 -> Parallel Seq Scan on t
1488 Sort Key: t_1.a, t_1.b, t_1.c
1489 -> Parallel Seq Scan on t t_1
1492 -- Full sort, not just incremental sort can be pushed below a gather merge path
1493 -- by generate_useful_gather_paths.
1494 explain (costs off) select distinct a,b from t;
1496 ------------------------------------------------
1503 -> Parallel Seq Scan on t
1507 -- Sort pushdown can't go below where expressions are part of the rel target.
1508 -- In particular this is interesting for volatile expressions which have to
1509 -- go above joins since otherwise we'll incorrectly use expression evaluations
1510 -- across multiple rows.
1511 set enable_hashagg=off;
1512 set enable_seqscan=off;
1513 set enable_incremental_sort = off;
1514 set parallel_tuple_cost=0;
1515 set parallel_setup_cost=0;
1516 set min_parallel_table_scan_size = 0;
1517 set min_parallel_index_scan_size = 0;
1518 -- Parallel sort below join.
1519 explain (costs off) select distinct sub.unique1, stringu1
1520 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1522 --------------------------------------------------------------------------
1528 Sort Key: tenk1.unique1, tenk1.stringu1
1529 -> Parallel Index Scan using tenk1_unique1 on tenk1
1530 -> Function Scan on generate_series
1533 explain (costs off) select sub.unique1, stringu1
1534 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1537 --------------------------------------------------------------------
1542 Sort Key: tenk1.unique1, tenk1.stringu1
1543 -> Parallel Index Scan using tenk1_unique1 on tenk1
1544 -> Function Scan on generate_series
1547 -- Parallel sort but with expression that can be safely generated at the base rel.
1548 explain (costs off) select distinct sub.unique1, md5(stringu1)
1549 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1551 ----------------------------------------------------------------------------------------
1557 Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1558 -> Parallel Index Scan using tenk1_unique1 on tenk1
1559 -> Function Scan on generate_series
1562 explain (costs off) select sub.unique1, md5(stringu1)
1563 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1566 ----------------------------------------------------------------------------------
1571 Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1572 -> Parallel Index Scan using tenk1_unique1 on tenk1
1573 -> Function Scan on generate_series
1576 -- Parallel sort with an aggregate that can be safely generated in parallel,
1577 -- but we can't sort by partial aggregate values.
1578 explain (costs off) select count(*)
1580 join tenk1 t2 on t1.unique1 = t2.unique2
1581 join tenk1 t3 on t2.unique1 = t3.unique1
1584 -----------------------------------------------------------------------------------------------
1586 Sort Key: (count(*))
1587 -> Finalize Aggregate
1590 -> Partial Aggregate
1591 -> Parallel Hash Join
1592 Hash Cond: (t2.unique1 = t3.unique1)
1593 -> Parallel Hash Join
1594 Hash Cond: (t1.unique1 = t2.unique2)
1595 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
1597 -> Parallel Index Scan using tenk1_unique2 on tenk1 t2
1599 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t3
1602 -- Parallel sort but with expression (correlated subquery) that
1603 -- is prohibited in parallel plans.
1604 explain (costs off) select distinct
1606 (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1)
1607 from tenk1 t, generate_series(1, 1000);
1609 ---------------------------------------------------------------------------------
1612 Sort Key: t.unique1, ((SubPlan 1))
1616 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t
1617 -> Function Scan on generate_series
1619 -> Index Only Scan using tenk1_unique1 on tenk1
1620 Index Cond: (unique1 = t.unique1)
1623 explain (costs off) select
1625 (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1)
1626 from tenk1 t, generate_series(1, 1000)
1629 ---------------------------------------------------------------------------
1631 Sort Key: t.unique1, ((SubPlan 1))
1635 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t
1636 -> Function Scan on generate_series
1638 -> Index Only Scan using tenk1_unique1 on tenk1
1639 Index Cond: (unique1 = t.unique1)
1642 -- Parallel sort but with expression not available until the upper rel.
1643 explain (costs off) select distinct sub.unique1, stringu1 || random()::text
1644 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1646 ---------------------------------------------------------------------------------------------
1649 Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1653 -> Parallel Index Scan using tenk1_unique1 on tenk1
1654 -> Function Scan on generate_series
1657 explain (costs off) select sub.unique1, stringu1 || random()::text
1658 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1661 ---------------------------------------------------------------------------------------
1663 Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1667 -> Parallel Index Scan using tenk1_unique1 on tenk1
1668 -> Function Scan on generate_series
1671 reset enable_hashagg;
1672 reset enable_seqscan;
1673 reset enable_incremental_sort;
1674 reset parallel_tuple_cost;
1675 reset parallel_setup_cost;
1676 reset min_parallel_table_scan_size;
1677 reset min_parallel_index_scan_size;
1678 -- Ensure incremental sorts work for amcanorderbyop type indexes
1679 create table point_table (a point, b int);
1680 create index point_table_a_idx on point_table using gist(a);
1681 -- Ensure we get an incremental sort plan for both of the following queries
1682 explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b limit 1;
1684 ---------------------------------------------------------------
1687 Sort Key: ((a <-> '(5,5)'::point)), b
1688 Presorted Key: ((a <-> '(5,5)'::point))
1689 -> Index Scan using point_table_a_idx on point_table
1690 Order By: (a <-> '(5,5)'::point)
1693 explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b desc limit 1;
1695 ---------------------------------------------------------------
1698 Sort Key: ((a <-> '(5,5)'::point)), b DESC
1699 Presorted Key: ((a <-> '(5,5)'::point))
1700 -> Index Scan using point_table_a_idx on point_table
1701 Order By: (a <-> '(5,5)'::point)
1704 -- Ensure we get an incremental sort on the outer side of the mergejoin
1707 (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
1708 order by t1.four, t1.two limit 1;
1710 -----------------------------------------------------------------------
1713 Merge Cond: ((tenk1.four = t2.four) AND (tenk1.two = t2.two))
1715 Sort Key: tenk1.four, tenk1.two
1716 Presorted Key: tenk1.four
1718 Sort Key: tenk1.four
1719 -> Seq Scan on tenk1
1721 Sort Key: t2.four, t2.two
1722 -> Seq Scan on tenk1 t2