1 //===-- Implementation of bcmp --------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
12 #include "src/__support/common.h"
13 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL
14 #include "src/__support/macros/properties/architectures.h"
15 #include "src/string/memory_utils/generic/aligned_access.h"
16 #include "src/string/memory_utils/generic/byte_per_byte.h"
17 #include "src/string/memory_utils/op_aarch64.h"
18 #include "src/string/memory_utils/op_builtin.h"
19 #include "src/string/memory_utils/op_generic.h"
20 #include "src/string/memory_utils/op_riscv.h"
21 #include "src/string/memory_utils/op_x86.h"
23 #include <stddef.h> // size_t
25 namespace __llvm_libc
{
27 #if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
28 [[maybe_unused
]] LIBC_INLINE BcmpReturnType
29 inline_bcmp_generic_gt16(CPtr p1
, CPtr p2
, size_t count
) {
30 return generic::Bcmp
<uint64_t>::loop_and_tail_align_above(256, p1
, p2
, count
);
32 #endif // defined(LIBC_TARGET_ARCH_IS_X86) ||
33 // defined(LIBC_TARGET_ARCH_IS_AARCH64)
35 #if defined(LIBC_TARGET_ARCH_IS_X86)
36 #if defined(__SSE4_1__)
37 [[maybe_unused
]] LIBC_INLINE BcmpReturnType
38 inline_bcmp_x86_sse41_gt16(CPtr p1
, CPtr p2
, size_t count
) {
40 return generic::Bcmp
<__m128i
>::head_tail(p1
, p2
, count
);
41 return generic::Bcmp
<__m128i
>::loop_and_tail_align_above(256, p1
, p2
, count
);
46 [[maybe_unused
]] LIBC_INLINE BcmpReturnType
47 inline_bcmp_x86_avx_gt16(CPtr p1
, CPtr p2
, size_t count
) {
49 return generic::Bcmp
<__m128i
>::head_tail(p1
, p2
, count
);
51 return generic::Bcmp
<__m256i
>::head_tail(p1
, p2
, count
);
52 return generic::Bcmp
<__m256i
>::loop_and_tail_align_above(256, p1
, p2
, count
);
56 #if defined(__AVX512BW__)
57 [[maybe_unused
]] LIBC_INLINE BcmpReturnType
58 inline_bcmp_x86_avx512bw_gt16(CPtr p1
, CPtr p2
, size_t count
) {
60 return generic::Bcmp
<__m128i
>::head_tail(p1
, p2
, count
);
62 return generic::Bcmp
<__m256i
>::head_tail(p1
, p2
, count
);
64 return generic::Bcmp
<__m512i
>::head_tail(p1
, p2
, count
);
65 return generic::Bcmp
<__m512i
>::loop_and_tail_align_above(256, p1
, p2
, count
);
67 #endif // __AVX512BW__
69 [[maybe_unused
]] LIBC_INLINE BcmpReturnType
inline_bcmp_x86(CPtr p1
, CPtr p2
,
72 return BcmpReturnType::ZERO();
74 return generic::Bcmp
<uint8_t>::block(p1
, p2
);
76 return generic::Bcmp
<uint16_t>::block(p1
, p2
);
78 return generic::BcmpSequence
<uint16_t, uint8_t>::block(p1
, p2
);
80 return generic::Bcmp
<uint32_t>::block(p1
, p2
);
82 return generic::BcmpSequence
<uint32_t, uint8_t>::block(p1
, p2
);
84 return generic::BcmpSequence
<uint32_t, uint16_t>::block(p1
, p2
);
86 return generic::BcmpSequence
<uint32_t, uint16_t, uint8_t>::block(p1
, p2
);
88 return generic::Bcmp
<uint64_t>::block(p1
, p2
);
90 return generic::Bcmp
<uint64_t>::head_tail(p1
, p2
, count
);
91 #if defined(__AVX512BW__)
92 return inline_bcmp_x86_avx512bw_gt16(p1
, p2
, count
);
93 #elif defined(__AVX__)
94 return inline_bcmp_x86_avx_gt16(p1
, p2
, count
);
95 #elif defined(__SSE4_1__)
96 return inline_bcmp_x86_sse41_gt16(p1
, p2
, count
);
98 return inline_bcmp_generic_gt16(p1
, p2
, count
);
101 #endif // defined(LIBC_TARGET_ARCH_IS_X86)
103 #if defined(LIBC_TARGET_ARCH_IS_AARCH64)
104 [[maybe_unused
]] LIBC_INLINE BcmpReturnType
inline_bcmp_aarch64(CPtr p1
,
107 if (LIBC_LIKELY(count
<= 32)) {
108 if (LIBC_UNLIKELY(count
>= 16)) {
109 return aarch64::Bcmp
<16>::head_tail(p1
, p2
, count
);
113 return BcmpReturnType::ZERO();
115 return generic::Bcmp
<uint8_t>::block(p1
, p2
);
117 return generic::Bcmp
<uint16_t>::block(p1
, p2
);
119 return generic::Bcmp
<uint16_t>::head_tail(p1
, p2
, count
);
121 return generic::Bcmp
<uint32_t>::block(p1
, p2
);
125 return generic::Bcmp
<uint32_t>::head_tail(p1
, p2
, count
);
127 return generic::Bcmp
<uint64_t>::block(p1
, p2
);
135 return generic::Bcmp
<uint64_t>::head_tail(p1
, p2
, count
);
140 return aarch64::Bcmp
<32>::head_tail(p1
, p2
, count
);
142 // Aligned loop if > 256, otherwise normal loop
143 if (LIBC_UNLIKELY(count
> 256)) {
144 if (auto value
= aarch64::Bcmp
<32>::block(p1
, p2
))
146 align_to_next_boundary
<16, Arg::P1
>(p1
, p2
, count
);
148 return aarch64::Bcmp
<32>::loop_and_tail(p1
, p2
, count
);
150 #endif // defined(LIBC_TARGET_ARCH_IS_AARCH64)
152 LIBC_INLINE BcmpReturnType
inline_bcmp(CPtr p1
, CPtr p2
, size_t count
) {
153 #if defined(LIBC_TARGET_ARCH_IS_X86)
154 return inline_bcmp_x86(p1
, p2
, count
);
155 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
156 return inline_bcmp_aarch64(p1
, p2
, count
);
157 #elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
158 return inline_bcmp_aligned_access_64bit(p1
, p2
, count
);
159 #elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
160 return inline_bcmp_aligned_access_32bit(p1
, p2
, count
);
162 return inline_bcmp_byte_per_byte(p1
, p2
, count
);
166 LIBC_INLINE
int inline_bcmp(const void *p1
, const void *p2
, size_t count
) {
167 return static_cast<int>(inline_bcmp(reinterpret_cast<CPtr
>(p1
),
168 reinterpret_cast<CPtr
>(p2
), count
));
171 } // namespace __llvm_libc
173 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H