[libc][NFC] Move aligned access implementations to separate header
[llvm-project.git] / libc / src / string / memory_utils / bcmp_implementations.h
blob7bfc1737f6126a12fc222a8760c3ee74686ecf9b
1 //===-- Implementation of bcmp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
12 #include "src/__support/common.h"
13 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL
14 #include "src/__support/macros/properties/architectures.h"
15 #include "src/string/memory_utils/generic/aligned_access.h"
16 #include "src/string/memory_utils/generic/byte_per_byte.h"
17 #include "src/string/memory_utils/op_aarch64.h"
18 #include "src/string/memory_utils/op_builtin.h"
19 #include "src/string/memory_utils/op_generic.h"
20 #include "src/string/memory_utils/op_riscv.h"
21 #include "src/string/memory_utils/op_x86.h"
23 #include <stddef.h> // size_t
25 namespace __llvm_libc {
27 #if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
28 [[maybe_unused]] LIBC_INLINE BcmpReturnType
29 inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
30 return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
32 #endif // defined(LIBC_TARGET_ARCH_IS_X86) ||
33 // defined(LIBC_TARGET_ARCH_IS_AARCH64)
35 #if defined(LIBC_TARGET_ARCH_IS_X86)
36 #if defined(__SSE4_1__)
37 [[maybe_unused]] LIBC_INLINE BcmpReturnType
38 inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
39 if (count <= 32)
40 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
41 return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
43 #endif // __SSE4_1__
45 #if defined(__AVX__)
46 [[maybe_unused]] LIBC_INLINE BcmpReturnType
47 inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
48 if (count <= 32)
49 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
50 if (count <= 64)
51 return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
52 return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
54 #endif // __AVX__
56 #if defined(__AVX512BW__)
57 [[maybe_unused]] LIBC_INLINE BcmpReturnType
58 inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
59 if (count <= 32)
60 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
61 if (count <= 64)
62 return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
63 if (count <= 128)
64 return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
65 return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
67 #endif // __AVX512BW__
69 [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
70 size_t count) {
71 if (count == 0)
72 return BcmpReturnType::ZERO();
73 if (count == 1)
74 return generic::Bcmp<uint8_t>::block(p1, p2);
75 if (count == 2)
76 return generic::Bcmp<uint16_t>::block(p1, p2);
77 if (count == 3)
78 return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
79 if (count == 4)
80 return generic::Bcmp<uint32_t>::block(p1, p2);
81 if (count == 5)
82 return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
83 if (count == 6)
84 return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
85 if (count == 7)
86 return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
87 if (count == 8)
88 return generic::Bcmp<uint64_t>::block(p1, p2);
89 if (count <= 16)
90 return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
91 #if defined(__AVX512BW__)
92 return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
93 #elif defined(__AVX__)
94 return inline_bcmp_x86_avx_gt16(p1, p2, count);
95 #elif defined(__SSE4_1__)
96 return inline_bcmp_x86_sse41_gt16(p1, p2, count);
97 #else
98 return inline_bcmp_generic_gt16(p1, p2, count);
99 #endif
101 #endif // defined(LIBC_TARGET_ARCH_IS_X86)
103 #if defined(LIBC_TARGET_ARCH_IS_AARCH64)
104 [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
105 CPtr p2,
106 size_t count) {
107 if (LIBC_LIKELY(count <= 32)) {
108 if (LIBC_UNLIKELY(count >= 16)) {
109 return aarch64::Bcmp<16>::head_tail(p1, p2, count);
111 switch (count) {
112 case 0:
113 return BcmpReturnType::ZERO();
114 case 1:
115 return generic::Bcmp<uint8_t>::block(p1, p2);
116 case 2:
117 return generic::Bcmp<uint16_t>::block(p1, p2);
118 case 3:
119 return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
120 case 4:
121 return generic::Bcmp<uint32_t>::block(p1, p2);
122 case 5:
123 case 6:
124 case 7:
125 return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
126 case 8:
127 return generic::Bcmp<uint64_t>::block(p1, p2);
128 case 9:
129 case 10:
130 case 11:
131 case 12:
132 case 13:
133 case 14:
134 case 15:
135 return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
139 if (count <= 64)
140 return aarch64::Bcmp<32>::head_tail(p1, p2, count);
142 // Aligned loop if > 256, otherwise normal loop
143 if (LIBC_UNLIKELY(count > 256)) {
144 if (auto value = aarch64::Bcmp<32>::block(p1, p2))
145 return value;
146 align_to_next_boundary<16, Arg::P1>(p1, p2, count);
148 return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
150 #endif // defined(LIBC_TARGET_ARCH_IS_AARCH64)
152 LIBC_INLINE BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
153 #if defined(LIBC_TARGET_ARCH_IS_X86)
154 return inline_bcmp_x86(p1, p2, count);
155 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
156 return inline_bcmp_aarch64(p1, p2, count);
157 #elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
158 return inline_bcmp_aligned_access_64bit(p1, p2, count);
159 #elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
160 return inline_bcmp_aligned_access_32bit(p1, p2, count);
161 #else
162 return inline_bcmp_byte_per_byte(p1, p2, count);
163 #endif
166 LIBC_INLINE int inline_bcmp(const void *p1, const void *p2, size_t count) {
167 return static_cast<int>(inline_bcmp(reinterpret_cast<CPtr>(p1),
168 reinterpret_cast<CPtr>(p2), count));
171 } // namespace __llvm_libc
173 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H