1 //===-- Memset implementation for x86_64 ------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
9 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
11 #include "src/__support/macros/attributes.h" // LIBC_INLINE
12 #include "src/__support/macros/config.h"
13 #include "src/string/memory_utils/op_generic.h"
14 #include "src/string/memory_utils/op_x86.h"
15 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
17 #include <stddef.h> // size_t
19 namespace LIBC_NAMESPACE_DECL
{
21 // Size of one cache line for software prefetching
22 LIBC_INLINE_VAR
constexpr size_t K_ONE_CACHELINE_SIZE
= 64;
23 LIBC_INLINE_VAR
constexpr size_t K_TWO_CACHELINES_SIZE
=
24 K_ONE_CACHELINE_SIZE
* 2;
25 LIBC_INLINE_VAR
constexpr size_t K_FIVE_CACHELINES_SIZE
=
26 K_ONE_CACHELINE_SIZE
* 5;
28 LIBC_INLINE_VAR
constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET
=
29 LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING
);
33 #if defined(__AVX512F__)
34 using uint128_t
= generic_v128
;
35 using uint256_t
= generic_v256
;
36 using uint512_t
= generic_v512
;
37 #elif defined(__AVX__)
38 using uint128_t
= generic_v128
;
39 using uint256_t
= generic_v256
;
40 using uint512_t
= cpp::array
<generic_v256
, 2>;
41 #elif defined(__SSE2__)
42 using uint128_t
= generic_v128
;
43 using uint256_t
= cpp::array
<generic_v128
, 2>;
44 using uint512_t
= cpp::array
<generic_v128
, 4>;
46 using uint128_t
= cpp::array
<uint64_t, 2>;
47 using uint256_t
= cpp::array
<uint64_t, 4>;
48 using uint512_t
= cpp::array
<uint64_t, 8>;
51 [[maybe_unused
]] LIBC_INLINE
static void
52 inline_memset_x86_gt64_sw_prefetching(Ptr dst
, uint8_t value
, size_t count
) {
53 constexpr size_t PREFETCH_DISTANCE
= x86::K_FIVE_CACHELINES_SIZE
;
54 constexpr size_t PREFETCH_DEGREE
= x86::K_TWO_CACHELINES_SIZE
;
55 constexpr size_t SIZE
= sizeof(uint256_t
);
56 // Prefetch one cache line
57 prefetch_for_write(dst
+ x86::K_ONE_CACHELINE_SIZE
);
59 return generic::Memset
<uint512_t
>::head_tail(dst
, value
, count
);
60 // Prefetch the second cache line
61 prefetch_for_write(dst
+ x86::K_TWO_CACHELINES_SIZE
);
63 generic::Memset
<uint256_t
>::block(dst
, value
);
64 align_to_next_boundary
<32>(dst
, count
);
66 return generic::Memset
<uint256_t
>::loop_and_tail(dst
, value
, count
);
68 generic::MemsetSequence
<uint512_t
, uint256_t
>::block(dst
, value
);
70 while (offset
+ PREFETCH_DEGREE
+ SIZE
<= count
) {
71 prefetch_for_write(dst
+ offset
+ PREFETCH_DISTANCE
);
72 prefetch_for_write(dst
+ offset
+ PREFETCH_DISTANCE
+
73 x86::K_ONE_CACHELINE_SIZE
);
74 for (size_t i
= 0; i
< PREFETCH_DEGREE
; i
+= SIZE
, offset
+= SIZE
)
75 generic::Memset
<uint256_t
>::block(dst
+ offset
, value
);
77 generic::Memset
<uint256_t
>::loop_and_tail_offset(dst
, value
, count
, offset
);
81 [[maybe_unused
]] LIBC_INLINE
static void
82 inline_memset_x86(Ptr dst
, uint8_t value
, size_t count
) {
86 return generic::Memset
<uint8_t>::block(dst
, value
);
88 return generic::Memset
<uint16_t>::block(dst
, value
);
90 return generic::MemsetSequence
<uint16_t, uint8_t>::block(dst
, value
);
92 return generic::Memset
<uint32_t>::head_tail(dst
, value
, count
);
94 return generic::Memset
<uint64_t>::head_tail(dst
, value
, count
);
96 return generic::Memset
<uint128_t
>::head_tail(dst
, value
, count
);
98 return generic::Memset
<uint256_t
>::head_tail(dst
, value
, count
);
99 if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET
)
100 return inline_memset_x86_gt64_sw_prefetching(dst
, value
, count
);
102 return generic::Memset
<uint512_t
>::head_tail(dst
, value
, count
);
104 generic::Memset
<uint256_t
>::block(dst
, value
);
105 align_to_next_boundary
<32>(dst
, count
);
106 return generic::Memset
<uint256_t
>::loop_and_tail(dst
, value
, count
);
108 } // namespace LIBC_NAMESPACE_DECL
110 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H