workflows/premerge: Fix condition for macos job (#125237)
[llvm-project.git] / libc / src / stdlib / quick_sort.h
blob9ab28302500186b18c67284229de8b0980fe0208
1 //===-- Implementation header for qsort utilities ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
10 #define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
12 #include "src/__support/CPP/bit.h"
13 #include "src/__support/CPP/cstddef.h"
14 #include "src/__support/macros/config.h"
15 #include "src/stdlib/qsort_pivot.h"
17 #include <stdint.h>
19 namespace LIBC_NAMESPACE_DECL {
20 namespace internal {
22 // Branchless Lomuto partition based on the implementation by Lukas
23 // Bergdoll and Orson Peters
24 // https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md.
25 // Simplified to avoid having to stack allocate.
26 template <typename A, typename F>
27 LIBC_INLINE size_t partition_lomuto_branchless(const A &array,
28 const void *pivot,
29 const F &is_less) {
30 const size_t array_len = array.len();
32 size_t left = 0;
33 size_t right = 0;
35 while (right < array_len) {
36 const bool right_is_lt = is_less(array.get(right), pivot);
37 array.swap(left, right);
38 left += static_cast<size_t>(right_is_lt);
39 right += 1;
42 return left;
45 // Optimized for large types that are expensive to move. Not optimized
46 // for integers. It's possible to use a cyclic permutation here for
47 // large types as done in ipnsort but the advantages of this are limited
48 // as `is_less` is a small wrapper around a call to a function pointer
49 // and won't incur much binary-size overhead. The other reason to use
50 // cyclic permutation is to have more efficient swapping, but we don't
51 // know the element size so this isn't applicable here either.
52 template <typename A, typename F>
53 LIBC_INLINE size_t partition_hoare_branchy(const A &array, const void *pivot,
54 const F &is_less) {
55 const size_t array_len = array.len();
57 size_t left = 0;
58 size_t right = array_len;
60 while (true) {
61 while (left < right && is_less(array.get(left), pivot))
62 ++left;
64 while (true) {
65 --right;
66 if (left >= right || is_less(array.get(right), pivot)) {
67 break;
71 if (left >= right)
72 break;
74 array.swap(left, right);
75 ++left;
78 return left;
81 template <typename A, typename F>
82 LIBC_INLINE size_t partition(const A &array, size_t pivot_index,
83 const F &is_less) {
84 // Place the pivot at the beginning of the array.
85 if (pivot_index != 0) {
86 array.swap(0, pivot_index);
89 const A array_without_pivot = array.make_array(1, array.len() - 1);
90 const void *pivot = array.get(0);
92 size_t num_lt;
93 if constexpr (A::has_fixed_size()) {
94 // Branchless Lomuto avoid branch misprediction penalties, but
95 // it also swaps more often which is only faster if the swap is a fast
96 // constant operation.
97 num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less);
98 } else {
99 num_lt = partition_hoare_branchy(array_without_pivot, pivot, is_less);
102 // Place the pivot between the two partitions.
103 array.swap(0, num_lt);
105 return num_lt;
108 template <typename A, typename F>
109 LIBC_INLINE void quick_sort_impl(A &array, const void *ancestor_pivot,
110 size_t limit, const F &is_less) {
111 while (true) {
112 const size_t array_len = array.len();
113 if (array_len <= 1)
114 return;
116 // If too many bad pivot choices were made, simply fall back to
117 // heapsort in order to guarantee `O(N x log(N))` worst-case.
118 if (limit == 0) {
119 heap_sort(array, is_less);
120 return;
123 limit -= 1;
125 const size_t pivot_index = choose_pivot(array, is_less);
127 // If the chosen pivot is equal to the predecessor, then it's the smallest
128 // element in the slice. Partition the slice into elements equal to and
129 // elements greater than the pivot. This case is usually hit when the slice
130 // contains many duplicate elements.
131 if (ancestor_pivot) {
132 if (!is_less(ancestor_pivot, array.get(pivot_index))) {
133 const size_t num_lt =
134 partition(array, pivot_index,
135 [is_less](const void *a, const void *b) -> bool {
136 return !is_less(b, a);
139 // Continue sorting elements greater than the pivot. We know that
140 // `num_lt` cont
141 array.reset_bounds(num_lt + 1, array.len() - (num_lt + 1));
142 ancestor_pivot = nullptr;
143 continue;
147 size_t split_index = partition(array, pivot_index, is_less);
149 if (array_len == 2)
150 // The partition operation sorts the two element array.
151 return;
153 // Split the array into `left`, `pivot`, and `right`.
154 A left = array.make_array(0, split_index);
155 const void *pivot = array.get(split_index);
156 const size_t right_start = split_index + 1;
157 A right = array.make_array(right_start, array.len() - right_start);
159 // Recurse into the left side. We have a fixed recursion limit,
160 // testing shows no real benefit for recursing into the shorter
161 // side.
162 quick_sort_impl(left, ancestor_pivot, limit, is_less);
164 // Continue with the right side.
165 array = right;
166 ancestor_pivot = pivot;
170 constexpr size_t ilog2(size_t n) { return cpp::bit_width(n) - 1; }
172 template <typename A, typename F>
173 LIBC_INLINE void quick_sort(A &array, const F &is_less) {
174 const void *ancestor_pivot = nullptr;
175 // Limit the number of imbalanced partitions to `2 * floor(log2(len))`.
176 // The binary OR by one is used to eliminate the zero-check in the logarithm.
177 const size_t limit = 2 * ilog2((array.len() | 1));
178 quick_sort_impl(array, ancestor_pivot, limit, is_less);
181 } // namespace internal
182 } // namespace LIBC_NAMESPACE_DECL
184 #endif // LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H