[TySan] Don't report globals with incomplete types. (#121922)
[llvm-project.git] / libc / src / string / memory_utils / x86_64 / inline_memset.h
blob9f8e584d2bbb454ad2a0358c2275c3250e55564b
1 //===-- Memset implementation for x86_64 ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
9 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
11 #include "src/__support/macros/attributes.h" // LIBC_INLINE
12 #include "src/__support/macros/config.h"
13 #include "src/string/memory_utils/op_generic.h"
14 #include "src/string/memory_utils/op_x86.h"
15 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
17 #include <stddef.h> // size_t
19 namespace LIBC_NAMESPACE_DECL {
20 namespace x86 {
21 // Size of one cache line for software prefetching
22 LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE_SIZE = 64;
23 LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES_SIZE =
24 K_ONE_CACHELINE_SIZE * 2;
25 LIBC_INLINE_VAR constexpr size_t K_FIVE_CACHELINES_SIZE =
26 K_ONE_CACHELINE_SIZE * 5;
28 LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET =
29 LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING);
31 } // namespace x86
33 #if defined(__AVX512F__)
34 using uint128_t = generic_v128;
35 using uint256_t = generic_v256;
36 using uint512_t = generic_v512;
37 #elif defined(__AVX__)
38 using uint128_t = generic_v128;
39 using uint256_t = generic_v256;
40 using uint512_t = cpp::array<generic_v256, 2>;
41 #elif defined(__SSE2__)
42 using uint128_t = generic_v128;
43 using uint256_t = cpp::array<generic_v128, 2>;
44 using uint512_t = cpp::array<generic_v128, 4>;
45 #else
46 using uint128_t = cpp::array<uint64_t, 2>;
47 using uint256_t = cpp::array<uint64_t, 4>;
48 using uint512_t = cpp::array<uint64_t, 8>;
49 #endif
51 [[maybe_unused]] LIBC_INLINE static void
52 inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) {
53 constexpr size_t PREFETCH_DISTANCE = x86::K_FIVE_CACHELINES_SIZE;
54 constexpr size_t PREFETCH_DEGREE = x86::K_TWO_CACHELINES_SIZE;
55 constexpr size_t SIZE = sizeof(uint256_t);
56 // Prefetch one cache line
57 prefetch_for_write(dst + x86::K_ONE_CACHELINE_SIZE);
58 if (count <= 128)
59 return generic::Memset<uint512_t>::head_tail(dst, value, count);
60 // Prefetch the second cache line
61 prefetch_for_write(dst + x86::K_TWO_CACHELINES_SIZE);
62 // Aligned loop
63 generic::Memset<uint256_t>::block(dst, value);
64 align_to_next_boundary<32>(dst, count);
65 if (count <= 192) {
66 return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
67 } else {
68 generic::MemsetSequence<uint512_t, uint256_t>::block(dst, value);
69 size_t offset = 96;
70 while (offset + PREFETCH_DEGREE + SIZE <= count) {
71 prefetch_for_write(dst + offset + PREFETCH_DISTANCE);
72 prefetch_for_write(dst + offset + PREFETCH_DISTANCE +
73 x86::K_ONE_CACHELINE_SIZE);
74 for (size_t i = 0; i < PREFETCH_DEGREE; i += SIZE, offset += SIZE)
75 generic::Memset<uint256_t>::block(dst + offset, value);
77 generic::Memset<uint256_t>::loop_and_tail_offset(dst, value, count, offset);
81 [[maybe_unused]] LIBC_INLINE static void
82 inline_memset_x86(Ptr dst, uint8_t value, size_t count) {
83 if (count == 0)
84 return;
85 if (count == 1)
86 return generic::Memset<uint8_t>::block(dst, value);
87 if (count == 2)
88 return generic::Memset<uint16_t>::block(dst, value);
89 if (count == 3)
90 return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value);
91 if (count <= 8)
92 return generic::Memset<uint32_t>::head_tail(dst, value, count);
93 if (count <= 16)
94 return generic::Memset<uint64_t>::head_tail(dst, value, count);
95 if (count <= 32)
96 return generic::Memset<uint128_t>::head_tail(dst, value, count);
97 if (count <= 64)
98 return generic::Memset<uint256_t>::head_tail(dst, value, count);
99 if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET)
100 return inline_memset_x86_gt64_sw_prefetching(dst, value, count);
101 if (count <= 128)
102 return generic::Memset<uint512_t>::head_tail(dst, value, count);
103 // Aligned loop
104 generic::Memset<uint256_t>::block(dst, value);
105 align_to_next_boundary<32>(dst, count);
106 return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
108 } // namespace LIBC_NAMESPACE_DECL
110 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H