[AMDGPU][True16][CodeGen] true16 codegen pattern for v_med3_u/i16 (#121850)
[llvm-project.git] / libc / src / stdlib / qsort_data.h
blobaa6d9bbc123de853e7c0e911255e55718e719839
1 //===-- Data structures for sorting routines --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H
10 #define LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H
12 #include "src/__support/CPP/cstddef.h"
13 #include "src/__support/macros/config.h"
15 #include <stdint.h>
17 namespace LIBC_NAMESPACE_DECL {
18 namespace internal {
20 class ArrayGenericSize {
21 cpp::byte *array_base;
22 size_t array_len;
23 size_t elem_size;
25 LIBC_INLINE cpp::byte *get_internal(size_t i) const {
26 return array_base + (i * elem_size);
29 public:
30 LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
31 : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
32 elem_size(e) {}
34 static constexpr bool has_fixed_size() { return false; }
36 LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
38 LIBC_INLINE void swap(size_t i, size_t j) const {
39 // It's possible to use 8 byte blocks with `uint64_t`, but that
40 // generates more machine code as the remainder loop gets
41 // unrolled, plus 4 byte operations are more likely to be
42 // efficient on a wider variety of hardware. On x86 LLVM tends
43 // to unroll the block loop again into 2 16 byte swaps per
44 // iteration which is another reason that 4 byte blocks yields
45 // good performance even for big types.
46 using block_t = uint32_t;
47 constexpr size_t BLOCK_SIZE = sizeof(block_t);
49 alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
51 cpp::byte *elem_i = get_internal(i);
52 cpp::byte *elem_j = get_internal(j);
54 const size_t elem_size_rem = elem_size % BLOCK_SIZE;
55 const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
57 while (elem_i != elem_i_block_end) {
58 __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE);
59 __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE);
60 __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE);
62 elem_i += BLOCK_SIZE;
63 elem_j += BLOCK_SIZE;
66 for (size_t n = 0; n < elem_size_rem; ++n) {
67 cpp::byte tmp = elem_i[n];
68 elem_i[n] = elem_j[n];
69 elem_j[n] = tmp;
73 LIBC_INLINE size_t len() const { return array_len; }
75 // Make an Array starting at index |i| and length |s|.
76 LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const {
77 return ArrayGenericSize(get_internal(i), s, elem_size);
80 // Reset this Array to point at a different interval of the same
81 // items starting at index |i|.
82 LIBC_INLINE void reset_bounds(size_t i, size_t s) {
83 array_base = get_internal(i);
84 array_len = s;
88 // Having a specialized Array type for sorting that knows at
89 // compile-time what the size of the element is, allows for much more
90 // efficient swapping and for cheaper offset calculations.
91 template <size_t ELEM_SIZE> class ArrayFixedSize {
92 cpp::byte *array_base;
93 size_t array_len;
95 LIBC_INLINE cpp::byte *get_internal(size_t i) const {
96 return array_base + (i * ELEM_SIZE);
99 public:
100 LIBC_INLINE ArrayFixedSize(void *a, size_t s)
101 : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
103 // Beware this function is used a heuristic for cheap to swap types, so
104 // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
105 // idea perf wise.
106 static constexpr bool has_fixed_size() { return true; }
108 LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
110 LIBC_INLINE void swap(size_t i, size_t j) const {
111 alignas(32) cpp::byte tmp[ELEM_SIZE];
113 cpp::byte *elem_i = get_internal(i);
114 cpp::byte *elem_j = get_internal(j);
116 __builtin_memcpy(tmp, elem_i, ELEM_SIZE);
117 __builtin_memmove(elem_i, elem_j, ELEM_SIZE);
118 __builtin_memcpy(elem_j, tmp, ELEM_SIZE);
121 LIBC_INLINE size_t len() const { return array_len; }
123 // Make an Array starting at index |i| and length |s|.
124 LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const {
125 return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s);
128 // Reset this Array to point at a different interval of the same
129 // items starting at index |i|.
130 LIBC_INLINE void reset_bounds(size_t i, size_t s) {
131 array_base = get_internal(i);
132 array_len = s;
136 } // namespace internal
137 } // namespace LIBC_NAMESPACE_DECL
139 #endif // LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H