1 //===-- Generic implementation of memory function building blocks ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file provides generic C++ building blocks.
10 // Depending on the requested size, the block operation uses unsigned integral
11 // types, vector types or an array of the type with the maximum size.
13 // The maximum size is passed as a template argument. For instance, on x86
14 // platforms that only supports integral types the maximum size would be 8
15 // (corresponding to uint64_t). On this platform if we request the size 32, this
16 // would be treated as a cpp::array<uint64_t, 4>.
18 // On the other hand, if the platform is x86 with support for AVX the maximum
19 // size is 32 and the operation can be handled with a single native operation.
21 //===----------------------------------------------------------------------===//
23 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
24 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
26 #include "src/__support/CPP/array.h"
27 #include "src/__support/CPP/type_traits.h"
28 #include "src/__support/common.h"
29 #include "src/__support/endian.h"
30 #include "src/__support/macros/optimization.h"
31 #include "src/string/memory_utils/op_builtin.h"
32 #include "src/string/memory_utils/utils.h"
36 namespace __llvm_libc
{
37 // Compiler types using the vector attributes.
38 using uint8x1_t
= uint8_t __attribute__((__vector_size__(1)));
39 using uint8x2_t
= uint8_t __attribute__((__vector_size__(2)));
40 using uint8x4_t
= uint8_t __attribute__((__vector_size__(4)));
41 using uint8x8_t
= uint8_t __attribute__((__vector_size__(8)));
42 using uint8x16_t
= uint8_t __attribute__((__vector_size__(16)));
43 using uint8x32_t
= uint8_t __attribute__((__vector_size__(32)));
44 using uint8x64_t
= uint8_t __attribute__((__vector_size__(64)));
45 } // namespace __llvm_libc
47 namespace __llvm_libc::generic
{
48 // We accept three types of values as elements for generic operations:
49 // - scalar : unsigned integral types
50 // - vector : compiler types using the vector attributes
51 // - array : a cpp::array<T, N> where T is itself either a scalar or a vector.
52 // The following traits help discriminate between these cases.
54 constexpr bool is_scalar_v
= cpp::is_integral_v
<T
> && cpp::is_unsigned_v
<T
>;
57 constexpr bool is_vector_v
=
58 cpp::details::is_unqualified_any_of
<T
, uint8x1_t
, uint8x2_t
, uint8x4_t
,
59 uint8x8_t
, uint8x16_t
, uint8x32_t
,
62 template <class T
> struct is_array
: cpp::false_type
{};
63 template <class T
, size_t N
> struct is_array
<cpp::array
<T
, N
>> {
64 static constexpr bool value
= is_scalar_v
<T
> || is_vector_v
<T
>;
66 template <typename T
> constexpr bool is_array_v
= is_array
<T
>::value
;
69 constexpr bool is_element_type_v
=
70 is_scalar_v
<T
> || is_vector_v
<T
> || is_array_v
<T
>;
73 template <class T
> struct array_size
{};
74 template <class T
, size_t N
>
75 struct array_size
<cpp::array
<T
, N
>> : cpp::integral_constant
<size_t, N
> {};
76 template <typename T
> constexpr size_t array_size_v
= array_size
<T
>::value
;
78 // Generic operations for the above type categories.
80 template <typename T
> T
load(CPtr src
) {
81 static_assert(is_element_type_v
<T
>);
82 if constexpr (is_scalar_v
<T
> || is_vector_v
<T
>) {
83 return ::__llvm_libc::load
<T
>(src
);
84 } else if constexpr (is_array_v
<T
>) {
85 using value_type
= typename
T::value_type
;
87 for (size_t I
= 0; I
< array_size_v
<T
>; ++I
)
88 Value
[I
] = load
<value_type
>(src
+ (I
* sizeof(value_type
)));
93 template <typename T
> void store(Ptr dst
, T value
) {
94 static_assert(is_element_type_v
<T
>);
95 if constexpr (is_scalar_v
<T
> || is_vector_v
<T
>) {
96 ::__llvm_libc::store
<T
>(dst
, value
);
97 } else if constexpr (is_array_v
<T
>) {
98 using value_type
= typename
T::value_type
;
99 for (size_t I
= 0; I
< array_size_v
<T
>; ++I
)
100 store
<value_type
>(dst
+ (I
* sizeof(value_type
)), value
[I
]);
104 template <typename T
> T
splat(uint8_t value
) {
105 static_assert(is_scalar_v
<T
> || is_vector_v
<T
>);
106 if constexpr (is_scalar_v
<T
>)
107 return T(~0) / T(0xFF) * T(value
);
108 else if constexpr (is_vector_v
<T
>) {
110 // This for loop is optimized out for vector types.
111 for (size_t i
= 0; i
< sizeof(T
); ++i
)
117 static_assert((UINTPTR_MAX
== 4294967295U) ||
118 (UINTPTR_MAX
== 18446744073709551615UL),
119 "We currently only support 32- or 64-bit platforms");
121 #if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
122 #define LLVM_LIBC_HAS_UINT64
126 // Checks that each type is sorted in strictly decreasing order of size.
127 // i.e. sizeof(First) > sizeof(Second) > ... > sizeof(Last)
128 template <typename First
> constexpr bool is_decreasing_size() {
129 return sizeof(First
) == 1;
131 template <typename First
, typename Second
, typename
... Next
>
132 constexpr bool is_decreasing_size() {
133 if constexpr (sizeof...(Next
) > 0)
134 return sizeof(First
) > sizeof(Second
) && is_decreasing_size
<Next
...>();
136 return sizeof(First
) > sizeof(Second
) && is_decreasing_size
<Second
>();
139 template <size_t Size
, typename
... Ts
> struct Largest
;
140 template <size_t Size
> struct Largest
<Size
> : cpp::type_identity
<uint8_t> {};
141 template <size_t Size
, typename T
, typename
... Ts
>
142 struct Largest
<Size
, T
, Ts
...> {
143 using next
= Largest
<Size
, Ts
...>;
144 using type
= cpp::conditional_t
<(Size
>= sizeof(T
)), T
, typename
next::type
>;
147 } // namespace details
149 // 'SupportedTypes' holds a list of natively supported types.
150 // The types are instanciations of ScalarType or VectorType.
151 // They should be ordered in strictly decreasing order.
152 // The 'TypeFor<Size>' type retrieves is the largest supported type that can
153 // handle 'Size' bytes. e.g.
155 // using ST = SupportedTypes<ScalarType<uint16_t>, ScalarType<uint8_t>>;
156 // using Type = ST::TypeFor<10>;
157 // static_assert(cpp:is_same_v<Type, ScalarType<uint16_t>>);
159 template <typename First
, typename
... Ts
> struct SupportedTypes
{
160 static_assert(details::is_decreasing_size
<First
, Ts
...>());
162 using MaxType
= First
;
164 template <size_t Size
>
165 using TypeFor
= typename
details::Largest
<Size
, First
, Ts
...>::type
;
168 // Map from sizes to structures offering static load, store and splat methods.
169 // Note: On platforms lacking vector support, we use the ArrayType below and
170 // decompose the operation in smaller pieces.
172 // Lists a generic native types to use for Memset and Memmove operations.
173 // TODO: Inject the native types within Memset and Memmove depending on the
174 // target architectures and derive MaxSize from it.
175 using NativeTypeMap
= SupportedTypes
<uint8x64_t
, //
178 #if defined(LLVM_LIBC_HAS_UINT64)
179 uint64_t, // Not available on 32bit
187 // Helper to test if a type is void.
188 template <typename T
> inline constexpr bool is_void_v
= cpp::is_same_v
<T
, void>;
190 // In case the 'Size' is not supported we can fall back to a sequence of smaller
191 // operations using the largest natively supported type.
192 template <size_t Size
, size_t MaxSize
> static constexpr bool useArrayType() {
193 return (Size
> MaxSize
) && ((Size
% MaxSize
) == 0) &&
194 !details::is_void_v
<NativeTypeMap::TypeFor
<MaxSize
>>;
197 // Compute the type to handle an operation of 'Size' bytes knowing that the
198 // underlying platform only support native types up to MaxSize bytes.
199 template <size_t Size
, size_t MaxSize
>
200 using getTypeFor
= cpp::conditional_t
<
201 useArrayType
<Size
, MaxSize
>(),
202 cpp::array
<NativeTypeMap::TypeFor
<MaxSize
>, Size
/ MaxSize
>,
203 NativeTypeMap::TypeFor
<Size
>>;
205 } // namespace details
207 ///////////////////////////////////////////////////////////////////////////////
209 ///////////////////////////////////////////////////////////////////////////////
211 template <typename T
> struct Memset
{
212 static constexpr size_t SIZE
= sizeof(T
);
214 LIBC_INLINE
static void block(Ptr dst
, uint8_t value
) {
215 static_assert(is_element_type_v
<T
>);
216 if constexpr (is_scalar_v
<T
> || is_vector_v
<T
>) {
217 store
<T
>(dst
, splat
<T
>(value
));
218 } else if constexpr (is_array_v
<T
>) {
219 using value_type
= typename
T::value_type
;
220 const auto Splat
= splat
<value_type
>(value
);
221 for (size_t I
= 0; I
< array_size_v
<T
>; ++I
)
222 store
<value_type
>(dst
+ (I
* sizeof(value_type
)), Splat
);
226 LIBC_INLINE
static void tail(Ptr dst
, uint8_t value
, size_t count
) {
227 block(dst
+ count
- SIZE
, value
);
230 LIBC_INLINE
static void head_tail(Ptr dst
, uint8_t value
, size_t count
) {
232 tail(dst
, value
, count
);
235 LIBC_INLINE
static void loop_and_tail(Ptr dst
, uint8_t value
, size_t count
) {
236 static_assert(SIZE
> 1, "a loop of size 1 does not need tail");
239 block(dst
+ offset
, value
);
241 } while (offset
< count
- SIZE
);
242 tail(dst
, value
, count
);
246 template <typename T
, typename
... TS
> struct MemsetSequence
{
247 static constexpr size_t SIZE
= (sizeof(T
) + ... + sizeof(TS
));
248 LIBC_INLINE
static void block(Ptr dst
, uint8_t value
) {
249 Memset
<T
>::block(dst
, value
);
250 if constexpr (sizeof...(TS
) > 0) {
251 return MemsetSequence
<TS
...>::block(dst
+ sizeof(T
), value
);
256 ///////////////////////////////////////////////////////////////////////////////
258 ///////////////////////////////////////////////////////////////////////////////
260 template <typename T
> struct Memmove
{
261 static constexpr size_t SIZE
= sizeof(T
);
263 LIBC_INLINE
static void block(Ptr dst
, CPtr src
) {
264 store
<T
>(dst
, load
<T
>(src
));
267 LIBC_INLINE
static void head_tail(Ptr dst
, CPtr src
, size_t count
) {
268 const size_t offset
= count
- SIZE
;
269 // The load and store operations can be performed in any order as long as
270 // they are not interleaved. More investigations are needed to determine
272 const auto head
= load
<T
>(src
);
273 const auto tail
= load
<T
>(src
+ offset
);
275 store
<T
>(dst
+ offset
, tail
);
278 // Align forward suitable when dst < src. The alignment is performed with
279 // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
281 // e.g. Moving two bytes forward, we make sure src is aligned.
283 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
284 // [____LLLLLLLL_____________________]
285 // [___________LLLLLLLA______________]
286 // [_SSSSSSSS________________________]
287 // [________SSSSSSSS_________________]
289 // e.g. Moving two bytes forward, we make sure dst is aligned.
291 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
292 // [____LLLLLLLL_____________________]
293 // [______LLLLLLLL___________________]
294 // [_SSSSSSSS________________________]
295 // [___SSSSSSSA______________________]
296 template <Arg AlignOn
>
297 LIBC_INLINE
static void align_forward(Ptr
&dst
, CPtr
&src
, size_t &count
) {
300 size_t prev_count
= count
;
301 align_to_next_boundary
<SIZE
, AlignOn
>(dst
, src
, count
);
302 adjust(SIZE
, dst
, src
, count
);
303 head_tail(prev_dst
, prev_src
, prev_count
- count
);
306 // Align backward suitable when dst > src. The alignment is performed with
307 // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
309 // e.g. Moving two bytes backward, we make sure src is aligned.
311 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
312 // [ _________________ALLLLLLL_______]
313 // [ ___________________LLLLLLLL_____]
314 // [____________________SSSSSSSS_____]
315 // [______________________SSSSSSSS___]
317 // e.g. Moving two bytes backward, we make sure dst is aligned.
319 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
320 // [ _______________LLLLLLLL_________]
321 // [ ___________________LLLLLLLL_____]
322 // [__________________ASSSSSSS_______]
323 // [______________________SSSSSSSS___]
324 template <Arg AlignOn
>
325 LIBC_INLINE
static void align_backward(Ptr
&dst
, CPtr
&src
, size_t &count
) {
326 Ptr headtail_dst
= dst
+ count
;
327 CPtr headtail_src
= src
+ count
;
328 size_t headtail_size
= 0;
329 align_to_next_boundary
<SIZE
, AlignOn
>(headtail_dst
, headtail_src
,
331 adjust(-2 * SIZE
, headtail_dst
, headtail_src
, headtail_size
);
332 head_tail(headtail_dst
, headtail_src
, headtail_size
);
333 count
-= headtail_size
;
336 // Move forward suitable when dst < src. We load the tail bytes before
337 // handling the loop.
339 // e.g. Moving two bytes
341 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
342 // [_________________________LLLLLLLL___]
343 // [___LLLLLLLL_________________________]
344 // [_SSSSSSSS___________________________]
345 // [___________LLLLLLLL_________________]
346 // [_________SSSSSSSS___________________]
347 // [___________________LLLLLLLL_________]
348 // [_________________SSSSSSSS___________]
349 // [_______________________SSSSSSSS_____]
350 LIBC_INLINE
static void loop_and_tail_forward(Ptr dst
, CPtr src
,
352 static_assert(SIZE
> 1, "a loop of size 1 does not need tail");
353 const size_t tail_offset
= count
- SIZE
;
354 const auto tail_value
= load
<T
>(src
+ tail_offset
);
358 block(dst
+ offset
, src
+ offset
);
360 } while (offset
< count
- SIZE
);
361 store
<T
>(dst
+ tail_offset
, tail_value
);
364 // Move backward suitable when dst > src. We load the head bytes before
365 // handling the loop.
367 // e.g. Moving two bytes
369 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
370 // [___LLLLLLLL_________________________]
371 // [_________________________LLLLLLLL___]
372 // [___________________________SSSSSSSS_]
373 // [_________________LLLLLLLL___________]
374 // [___________________SSSSSSSS_________]
375 // [_________LLLLLLLL___________________]
376 // [___________SSSSSSSS_________________]
377 // [_____SSSSSSSS_______________________]
378 LIBC_INLINE
static void loop_and_tail_backward(Ptr dst
, CPtr src
,
380 static_assert(SIZE
> 1, "a loop of size 1 does not need tail");
381 const auto head_value
= load
<T
>(src
);
382 ptrdiff_t offset
= count
- SIZE
;
385 block(dst
+ offset
, src
+ offset
);
387 } while (offset
>= 0);
388 store
<T
>(dst
, head_value
);
392 ///////////////////////////////////////////////////////////////////////////////
394 ///////////////////////////////////////////////////////////////////////////////
395 template <size_t Size
> struct Bcmp
{
396 static constexpr size_t SIZE
= Size
;
397 static constexpr size_t MaxSize
= LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64
)
401 template <typename T
> LIBC_INLINE
static uint32_t load_xor(CPtr p1
, CPtr p2
) {
402 static_assert(sizeof(T
) <= sizeof(uint32_t));
403 return load
<T
>(p1
) ^ load
<T
>(p2
);
406 template <typename T
>
407 LIBC_INLINE
static uint32_t load_not_equal(CPtr p1
, CPtr p2
) {
408 return load
<T
>(p1
) != load
<T
>(p2
);
411 LIBC_INLINE
static BcmpReturnType
block(CPtr p1
, CPtr p2
) {
412 if constexpr (Size
== 1) {
413 return load_xor
<uint8_t>(p1
, p2
);
414 } else if constexpr (Size
== 2) {
415 return load_xor
<uint16_t>(p1
, p2
);
416 } else if constexpr (Size
== 4) {
417 return load_xor
<uint32_t>(p1
, p2
);
418 } else if constexpr (Size
== 8) {
419 return load_not_equal
<uint64_t>(p1
, p2
);
420 } else if constexpr (details::useArrayType
<Size
, MaxSize
>()) {
421 for (size_t offset
= 0; offset
< Size
; offset
+= MaxSize
)
422 if (auto value
= Bcmp
<MaxSize
>::block(p1
+ offset
, p2
+ offset
))
425 deferred_static_assert("Unimplemented Size");
427 return BcmpReturnType::ZERO();
430 LIBC_INLINE
static BcmpReturnType
tail(CPtr p1
, CPtr p2
, size_t count
) {
431 return block(p1
+ count
- SIZE
, p2
+ count
- SIZE
);
434 LIBC_INLINE
static BcmpReturnType
head_tail(CPtr p1
, CPtr p2
, size_t count
) {
435 return block(p1
, p2
) | tail(p1
, p2
, count
);
438 LIBC_INLINE
static BcmpReturnType
loop_and_tail(CPtr p1
, CPtr p2
,
440 static_assert(Size
> 1, "a loop of size 1 does not need tail");
443 if (auto value
= block(p1
+ offset
, p2
+ offset
))
446 } while (offset
< count
- SIZE
);
447 return tail(p1
, p2
, count
);
451 ///////////////////////////////////////////////////////////////////////////////
453 ///////////////////////////////////////////////////////////////////////////////
454 template <size_t Size
> struct Memcmp
{
455 static constexpr size_t SIZE
= Size
;
456 static constexpr size_t MaxSize
= LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64
)
460 template <typename T
> LIBC_INLINE
static T
load_be(CPtr ptr
) {
461 return Endian::to_big_endian(load
<T
>(ptr
));
464 template <typename T
>
465 LIBC_INLINE
static MemcmpReturnType
load_be_diff(CPtr p1
, CPtr p2
) {
466 return load_be
<T
>(p1
) - load_be
<T
>(p2
);
469 template <typename T
>
470 LIBC_INLINE
static MemcmpReturnType
load_be_cmp(CPtr p1
, CPtr p2
) {
471 const auto la
= load_be
<T
>(p1
);
472 const auto lb
= load_be
<T
>(p2
);
473 return la
> lb
? 1 : la
< lb
? -1 : 0;
476 LIBC_INLINE
static MemcmpReturnType
block(CPtr p1
, CPtr p2
) {
477 if constexpr (Size
== 1) {
478 return load_be_diff
<uint8_t>(p1
, p2
);
479 } else if constexpr (Size
== 2) {
480 return load_be_diff
<uint16_t>(p1
, p2
);
481 } else if constexpr (Size
== 4) {
482 return load_be_cmp
<uint32_t>(p1
, p2
);
483 } else if constexpr (Size
== 8) {
484 return load_be_cmp
<uint64_t>(p1
, p2
);
485 } else if constexpr (details::useArrayType
<Size
, MaxSize
>()) {
486 for (size_t offset
= 0; offset
< Size
; offset
+= MaxSize
)
487 if (Bcmp
<MaxSize
>::block(p1
+ offset
, p2
+ offset
))
488 return Memcmp
<MaxSize
>::block(p1
+ offset
, p2
+ offset
);
489 return MemcmpReturnType::ZERO();
490 } else if constexpr (Size
== 3) {
491 if (auto value
= Memcmp
<2>::block(p1
, p2
))
493 return Memcmp
<1>::block(p1
+ 2, p2
+ 2);
495 deferred_static_assert("Unimplemented Size");
499 LIBC_INLINE
static MemcmpReturnType
tail(CPtr p1
, CPtr p2
, size_t count
) {
500 return block(p1
+ count
- SIZE
, p2
+ count
- SIZE
);
503 LIBC_INLINE
static MemcmpReturnType
head_tail(CPtr p1
, CPtr p2
,
505 if (auto value
= block(p1
, p2
))
507 return tail(p1
, p2
, count
);
510 LIBC_INLINE
static MemcmpReturnType
loop_and_tail(CPtr p1
, CPtr p2
,
512 static_assert(Size
> 1, "a loop of size 1 does not need tail");
515 if (auto value
= block(p1
+ offset
, p2
+ offset
))
518 } while (offset
< count
- SIZE
);
519 return tail(p1
, p2
, count
);
523 } // namespace __llvm_libc::generic
525 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H