1 //===-- Memory utils --------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
12 #include "src/__support/CPP/bit.h"
13 #include "src/__support/CPP/cstddef.h"
14 #include "src/__support/CPP/type_traits.h"
15 #include "src/__support/endian.h"
16 #include "src/__support/macros/attributes.h" // LIBC_INLINE
17 #include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN
18 #include "src/__support/macros/properties/architectures.h"
20 #include <stddef.h> // size_t
21 #include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN
23 namespace LIBC_NAMESPACE
{
25 // Allows compile time error reporting in `if constexpr` branches.
26 template <bool flag
= false>
27 LIBC_INLINE
void deferred_static_assert(const char *msg
) {
28 static_assert(flag
, "compilation error");
32 // Return whether `value` is zero or a power of two.
33 LIBC_INLINE
constexpr bool is_power2_or_zero(size_t value
) {
34 return (value
& (value
- 1U)) == 0;
37 // Return whether `value` is a power of two.
38 LIBC_INLINE
constexpr bool is_power2(size_t value
) {
39 return value
&& is_power2_or_zero(value
);
42 // Compile time version of log2 that handles 0.
43 LIBC_INLINE
constexpr size_t log2s(size_t value
) {
44 return (value
== 0 || value
== 1) ? 0 : 1 + log2s(value
/ 2);
47 // Returns the first power of two preceding value or value if it is already a
48 // power of two (or 0 when value is 0).
49 LIBC_INLINE
constexpr size_t le_power2(size_t value
) {
50 return value
== 0 ? value
: 1ULL << log2s(value
);
53 // Returns the first power of two following value or value if it is already a
54 // power of two (or 0 when value is 0).
55 LIBC_INLINE
constexpr size_t ge_power2(size_t value
) {
56 return is_power2_or_zero(value
) ? value
: 1ULL << (log2s(value
) + 1);
59 // Returns the number of bytes to substract from ptr to get to the previous
60 // multiple of alignment. If ptr is already aligned returns 0.
61 template <size_t alignment
>
62 LIBC_INLINE
uintptr_t distance_to_align_down(const void *ptr
) {
63 static_assert(is_power2(alignment
), "alignment must be a power of 2");
64 return reinterpret_cast<uintptr_t>(ptr
) & (alignment
- 1U);
67 // Returns the number of bytes to add to ptr to get to the next multiple of
68 // alignment. If ptr is already aligned returns 0.
69 template <size_t alignment
>
70 LIBC_INLINE
uintptr_t distance_to_align_up(const void *ptr
) {
71 static_assert(is_power2(alignment
), "alignment must be a power of 2");
72 // The logic is not straightforward and involves unsigned modulo arithmetic
73 // but the generated code is as fast as it can be.
74 return -reinterpret_cast<uintptr_t>(ptr
) & (alignment
- 1U);
77 // Returns the number of bytes to add to ptr to get to the next multiple of
78 // alignment. If ptr is already aligned returns alignment.
79 template <size_t alignment
>
80 LIBC_INLINE
uintptr_t distance_to_next_aligned(const void *ptr
) {
81 return alignment
- distance_to_align_down
<alignment
>(ptr
);
84 // Returns the same pointer but notifies the compiler that it is aligned.
85 template <size_t alignment
, typename T
> LIBC_INLINE T
*assume_aligned(T
*ptr
) {
86 return reinterpret_cast<T
*>(__builtin_assume_aligned(ptr
, alignment
));
89 // Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are
91 LIBC_INLINE
bool is_disjoint(const void *p1
, const void *p2
, size_t size
) {
92 const ptrdiff_t sdiff
=
93 static_cast<const char *>(p1
) - static_cast<const char *>(p2
);
94 // We use bit_cast to make sure that we don't run into accidental integer
95 // promotion. Notably the unary minus operator goes through integer promotion
96 // at the expression level. We assume arithmetic to be two's complement (i.e.,
97 // bit_cast has the same behavior as a regular signed to unsigned cast).
98 static_assert(-1 == ~0, "not 2's complement");
99 const size_t udiff
= cpp::bit_cast
<size_t>(sdiff
);
100 // Integer promition would be caught here.
101 const size_t neg_udiff
= cpp::bit_cast
<size_t>(-sdiff
);
102 // This is expected to compile a conditional move.
103 return sdiff
>= 0 ? size
<= udiff
: size
<= neg_udiff
;
106 #if LIBC_HAS_BUILTIN(__builtin_memcpy_inline)
107 #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
110 #if LIBC_HAS_BUILTIN(__builtin_memset_inline)
111 #define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
114 // Performs a constant count copy.
115 template <size_t Size
>
116 LIBC_INLINE
void memcpy_inline(void *__restrict dst
,
117 const void *__restrict src
) {
118 #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
119 __builtin_memcpy_inline(dst
, src
, Size
);
121 // In memory functions `memcpy_inline` is instantiated several times with
122 // different value of the Size parameter. This doesn't play well with GCC's
123 // Value Range Analysis that wrongly detects out of bounds accesses. We
124 // disable the 'array-bounds' warning for the purpose of this function.
125 #pragma GCC diagnostic push
126 #pragma GCC diagnostic ignored "-Warray-bounds"
127 for (size_t i
= 0; i
< Size
; ++i
)
128 static_cast<char *>(dst
)[i
] = static_cast<const char *>(src
)[i
];
129 #pragma GCC diagnostic pop
133 using Ptr
= cpp::byte
*; // Pointer to raw data.
134 using CPtr
= const cpp::byte
*; // Const pointer to raw data.
136 // This type makes sure that we don't accidentally promote an integral type to
137 // another one. It is only constructible from the exact T type.
138 template <typename T
> struct StrictIntegralType
{
139 static_assert(cpp::is_integral_v
<T
>);
141 // Can only be constructed from a T.
142 template <typename U
, cpp::enable_if_t
<cpp::is_same_v
<U
, T
>, bool> = 0>
143 LIBC_INLINE
StrictIntegralType(U value
) : value(value
) {}
145 // Allows using the type in an if statement.
146 LIBC_INLINE
explicit operator bool() const { return value
; }
148 // If type is unsigned (bcmp) we allow bitwise OR operations.
149 LIBC_INLINE StrictIntegralType
150 operator|(const StrictIntegralType
&Rhs
) const {
151 static_assert(!cpp::is_signed_v
<T
>);
152 return value
| Rhs
.value
;
155 // For interation with the C API we allow explicit conversion back to the
157 LIBC_INLINE
explicit operator int() const {
158 // bit_cast makes sure that T and int have the same size.
159 return cpp::bit_cast
<int>(value
);
162 // Helper to get the zero value.
163 LIBC_INLINE
static constexpr StrictIntegralType
ZERO() { return {T(0)}; }
164 LIBC_INLINE
static constexpr StrictIntegralType
NONZERO() { return {T(1)}; }
170 using MemcmpReturnType
= StrictIntegralType
<int32_t>;
171 using BcmpReturnType
= StrictIntegralType
<uint32_t>;
173 // This implements the semantic of 'memcmp' returning a negative value when 'a'
174 // is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise.
175 LIBC_INLINE MemcmpReturnType
cmp_uint32_t(uint32_t a
, uint32_t b
) {
176 // We perform the difference as an int64_t.
177 const int64_t diff
= static_cast<int64_t>(a
) - static_cast<int64_t>(b
);
178 // For the int64_t to int32_t conversion we want the following properties:
179 // - int32_t[31:31] == 1 iff diff < 0
180 // - int32_t[31:0] == 0 iff diff == 0
182 // We also observe that:
183 // - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0
184 // - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0
185 // - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0
186 // - https://godbolt.org/z/8W7qWP6e5
187 // - This implies that we can only look at diff[32:32] for determining the
188 // sign bit for the returned int32_t.
190 // So, we do the following:
191 // - int32_t[31:31] = diff[32:32]
192 // - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0.
194 // And, we can achieve the above by the expression below. We could have also
195 // used (diff64 >> 1) | (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than
196 // (diff64 & 0x1). https://godbolt.org/z/j3b569rW1
197 return static_cast<int32_t>((diff
>> 1) | (diff
& 0xFFFF));
200 // Returns a negative value if 'a' is less than 'b' and a positive value
201 // otherwise. This implements the semantic of 'memcmp' when we know that 'a' and
203 LIBC_INLINE MemcmpReturnType
cmp_neq_uint64_t(uint64_t a
, uint64_t b
) {
204 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
205 // On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for
206 // positive and negative value respectively as they are one value apart:
207 // xor eax, eax <- free
208 // cmp rdi, rsi <- serializing
209 // adc eax, 2147483647 <- serializing
211 // Unfortunately we found instances of client code that negate the result of
212 // 'memcmp' to reverse ordering. Because signed integers are not symmetric
213 // (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as
214 // `-INT_MIN` is not representable as an int32_t.
216 // As a consequence, we use 5 and -5 which is still OK nice in terms of
218 // cmp rdi, rsi <- serializing
219 // mov ecx, -5 <- can be done in parallel
220 // mov eax, 5 <- can be done in parallel
221 // cmovb eax, ecx <- serializing
222 static constexpr int32_t POSITIVE
= 5;
223 static constexpr int32_t NEGATIVE
= -5;
225 // On RISC-V we simply use '1' and '-1' as it leads to branchless code.
226 // On ARMv8, both strategies lead to the same performance.
227 static constexpr int32_t POSITIVE
= 1;
228 static constexpr int32_t NEGATIVE
= -1;
230 static_assert(POSITIVE
> 0);
231 static_assert(NEGATIVE
< 0);
232 return a
< b
? NEGATIVE
: POSITIVE
;
235 // Loads bytes from memory (possibly unaligned) and materializes them as
237 template <typename T
> LIBC_INLINE T
load(CPtr ptr
) {
239 memcpy_inline
<sizeof(T
)>(&Out
, ptr
);
243 // Stores a value of type T in memory (possibly unaligned).
244 template <typename T
> LIBC_INLINE
void store(Ptr ptr
, T value
) {
245 memcpy_inline
<sizeof(T
)>(ptr
, &value
);
248 // On architectures that do not allow for unaligned access we perform several
249 // aligned accesses and recombine them through shifts and logicals operations.
250 // For instance, if we know that the pointer is 2-byte aligned we can decompose
251 // a 64-bit operation into four 16-bit operations.
253 // Loads a 'ValueType' by decomposing it into several loads that are assumed to
255 // e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr);
256 template <typename ValueType
, typename T
, typename
... TS
>
257 LIBC_INLINE ValueType
load_aligned(CPtr src
) {
258 static_assert(sizeof(ValueType
) >= (sizeof(T
) + ... + sizeof(TS
)));
259 const ValueType value
= load
<T
>(assume_aligned
<sizeof(T
)>(src
));
260 if constexpr (sizeof...(TS
) > 0) {
261 constexpr size_t shift
= sizeof(T
) * 8;
262 const ValueType next
= load_aligned
<ValueType
, TS
...>(src
+ sizeof(T
));
263 if constexpr (Endian::IS_LITTLE
)
264 return value
| (next
<< shift
);
265 else if constexpr (Endian::IS_BIG
)
266 return (value
<< shift
) | next
;
268 deferred_static_assert("Invalid endianness");
274 // Alias for loading a 'uint32_t'.
275 template <typename T
, typename
... TS
>
276 LIBC_INLINE
auto load32_aligned(CPtr src
, size_t offset
) {
277 static_assert((sizeof(T
) + ... + sizeof(TS
)) == sizeof(uint32_t));
278 return load_aligned
<uint32_t, T
, TS
...>(src
+ offset
);
281 // Alias for loading a 'uint64_t'.
282 template <typename T
, typename
... TS
>
283 LIBC_INLINE
auto load64_aligned(CPtr src
, size_t offset
) {
284 static_assert((sizeof(T
) + ... + sizeof(TS
)) == sizeof(uint64_t));
285 return load_aligned
<uint64_t, T
, TS
...>(src
+ offset
);
288 // Stores a 'ValueType' by decomposing it into several stores that are assumed
290 // e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr);
291 template <typename ValueType
, typename T
, typename
... TS
>
292 LIBC_INLINE
void store_aligned(ValueType value
, Ptr dst
) {
293 static_assert(sizeof(ValueType
) >= (sizeof(T
) + ... + sizeof(TS
)));
294 constexpr size_t shift
= sizeof(T
) * 8;
295 if constexpr (Endian::IS_LITTLE
) {
296 store
<T
>(assume_aligned
<sizeof(T
)>(dst
), value
& ~T(0));
297 if constexpr (sizeof...(TS
) > 0)
298 store_aligned
<ValueType
, TS
...>(value
>> shift
, dst
+ sizeof(T
));
299 } else if constexpr (Endian::IS_BIG
) {
300 constexpr size_t OFFSET
= (0 + ... + sizeof(TS
));
301 store
<T
>(assume_aligned
<sizeof(T
)>(dst
+ OFFSET
), value
& ~T(0));
302 if constexpr (sizeof...(TS
) > 0)
303 store_aligned
<ValueType
, TS
...>(value
>> shift
, dst
);
305 deferred_static_assert("Invalid endianness");
309 // Alias for storing a 'uint32_t'.
310 template <typename T
, typename
... TS
>
311 LIBC_INLINE
void store32_aligned(uint32_t value
, Ptr dst
, size_t offset
) {
312 static_assert((sizeof(T
) + ... + sizeof(TS
)) == sizeof(uint32_t));
313 store_aligned
<uint32_t, T
, TS
...>(value
, dst
+ offset
);
316 // Alias for storing a 'uint64_t'.
317 template <typename T
, typename
... TS
>
318 LIBC_INLINE
void store64_aligned(uint64_t value
, Ptr dst
, size_t offset
) {
319 static_assert((sizeof(T
) + ... + sizeof(TS
)) == sizeof(uint64_t));
320 store_aligned
<uint64_t, T
, TS
...>(value
, dst
+ offset
);
323 // Advances the pointers p1 and p2 by offset bytes and decrease count by the
325 template <typename T1
, typename T2
>
326 LIBC_INLINE
void adjust(ptrdiff_t offset
, T1
*__restrict
&p1
,
327 T2
*__restrict
&p2
, size_t &count
) {
333 // Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary
334 // and decrease count by the same amount.
335 // We make sure the compiler knows about the adjusted pointer alignment.
336 template <size_t SIZE
, typename T1
, typename T2
>
337 void align_p1_to_next_boundary(T1
*__restrict
&p1
, T2
*__restrict
&p2
,
339 adjust(distance_to_next_aligned
<SIZE
>(p1
), p1
, p2
, count
);
340 p1
= assume_aligned
<SIZE
>(p1
);
343 // Same as align_p1_to_next_boundary above but with a single pointer instead.
344 template <size_t SIZE
, typename T1
>
345 LIBC_INLINE
void align_to_next_boundary(T1
*&p1
, size_t &count
) {
347 align_p1_to_next_boundary
<SIZE
>(p1
, dummy
, count
);
350 // An enum class that discriminates between the first and second pointer.
351 enum class Arg
{ P1
, P2
, Dst
= P1
, Src
= P2
};
353 // Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1.
354 // Precondition: &p1 != &p2
355 template <size_t SIZE
, Arg AlignOn
, typename T1
, typename T2
>
356 LIBC_INLINE
void align_to_next_boundary(T1
*__restrict
&p1
, T2
*__restrict
&p2
,
358 if constexpr (AlignOn
== Arg::P1
)
359 align_p1_to_next_boundary
<SIZE
>(p1
, p2
, count
);
360 else if constexpr (AlignOn
== Arg::P2
)
361 align_p1_to_next_boundary
<SIZE
>(p2
, p1
, count
); // swapping p1 and p2.
363 deferred_static_assert("AlignOn must be either Arg::P1 or Arg::P2");
366 template <size_t SIZE
> struct AlignHelper
{
367 LIBC_INLINE
AlignHelper(CPtr ptr
)
368 : offset_(distance_to_next_aligned
<SIZE
>(ptr
)) {}
370 LIBC_INLINE
bool not_aligned() const { return offset_
!= SIZE
; }
371 LIBC_INLINE
uintptr_t offset() const { return offset_
; }
377 } // namespace LIBC_NAMESPACE
379 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H