[Flang] remove whole-archive option for AIX linker (#76039)
[llvm-project.git] / clang / lib / Headers / pmmintrin.h
blob91cee1edda30678e4e6cd4f99543f0b87225c7b9
1 /*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
10 #ifndef __PMMINTRIN_H
11 #define __PMMINTRIN_H
13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
15 #endif
17 #include <emmintrin.h>
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("sse3,no-evex512"), __min_vector_width__(128)))
24 /// Loads data from an unaligned memory location to elements in a 128-bit
25 /// vector.
26 ///
27 /// If the address of the data is not 16-byte aligned, the instruction may
28 /// read two adjacent aligned blocks of memory to retrieve the requested
29 /// data.
30 ///
31 /// \headerfile <x86intrin.h>
32 ///
33 /// This intrinsic corresponds to the <c> VLDDQU </c> instruction.
34 ///
35 /// \param __p
36 /// A pointer to a 128-bit integer vector containing integer values.
37 /// \returns A 128-bit vector containing the moved values.
38 static __inline__ __m128i __DEFAULT_FN_ATTRS
39 _mm_lddqu_si128(__m128i_u const *__p)
41 return (__m128i)__builtin_ia32_lddqu((char const *)__p);
44 /// Adds the even-indexed values and subtracts the odd-indexed values of
45 /// two 128-bit vectors of [4 x float].
46 ///
47 /// \headerfile <x86intrin.h>
48 ///
49 /// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.
50 ///
51 /// \param __a
52 /// A 128-bit vector of [4 x float] containing the left source operand.
53 /// \param __b
54 /// A 128-bit vector of [4 x float] containing the right source operand.
55 /// \returns A 128-bit vector of [4 x float] containing the alternating sums and
56 /// differences of both operands.
57 static __inline__ __m128 __DEFAULT_FN_ATTRS
58 _mm_addsub_ps(__m128 __a, __m128 __b)
60 return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
63 /// Horizontally adds the adjacent pairs of values contained in two
64 /// 128-bit vectors of [4 x float].
65 ///
66 /// \headerfile <x86intrin.h>
67 ///
68 /// This intrinsic corresponds to the <c> VHADDPS </c> instruction.
69 ///
70 /// \param __a
71 /// A 128-bit vector of [4 x float] containing one of the source operands.
72 /// The horizontal sums of the values are stored in the lower bits of the
73 /// destination.
74 /// \param __b
75 /// A 128-bit vector of [4 x float] containing one of the source operands.
76 /// The horizontal sums of the values are stored in the upper bits of the
77 /// destination.
78 /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
79 /// both operands.
80 static __inline__ __m128 __DEFAULT_FN_ATTRS
81 _mm_hadd_ps(__m128 __a, __m128 __b)
83 return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
86 /// Horizontally subtracts the adjacent pairs of values contained in two
87 /// 128-bit vectors of [4 x float].
88 ///
89 /// \headerfile <x86intrin.h>
90 ///
91 /// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.
92 ///
93 /// \param __a
94 /// A 128-bit vector of [4 x float] containing one of the source operands.
95 /// The horizontal differences between the values are stored in the lower
96 /// bits of the destination.
97 /// \param __b
98 /// A 128-bit vector of [4 x float] containing one of the source operands.
99 /// The horizontal differences between the values are stored in the upper
100 /// bits of the destination.
101 /// \returns A 128-bit vector of [4 x float] containing the horizontal
102 /// differences of both operands.
103 static __inline__ __m128 __DEFAULT_FN_ATTRS
104 _mm_hsub_ps(__m128 __a, __m128 __b)
106 return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
109 /// Moves and duplicates odd-indexed values from a 128-bit vector
110 /// of [4 x float] to float values stored in a 128-bit vector of
111 /// [4 x float].
113 /// \headerfile <x86intrin.h>
115 /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
117 /// \param __a
118 /// A 128-bit vector of [4 x float]. \n
119 /// Bits [127:96] of the source are written to bits [127:96] and [95:64] of
120 /// the destination. \n
121 /// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
122 /// destination.
123 /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
124 /// values.
125 static __inline__ __m128 __DEFAULT_FN_ATTRS
126 _mm_movehdup_ps(__m128 __a)
128 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
131 /// Duplicates even-indexed values from a 128-bit vector of
132 /// [4 x float] to float values stored in a 128-bit vector of [4 x float].
134 /// \headerfile <x86intrin.h>
136 /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
138 /// \param __a
139 /// A 128-bit vector of [4 x float] \n
140 /// Bits [95:64] of the source are written to bits [127:96] and [95:64] of
141 /// the destination. \n
142 /// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
143 /// destination.
144 /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
145 /// values.
146 static __inline__ __m128 __DEFAULT_FN_ATTRS
147 _mm_moveldup_ps(__m128 __a)
149 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
152 /// Adds the even-indexed values and subtracts the odd-indexed values of
153 /// two 128-bit vectors of [2 x double].
155 /// \headerfile <x86intrin.h>
157 /// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.
159 /// \param __a
160 /// A 128-bit vector of [2 x double] containing the left source operand.
161 /// \param __b
162 /// A 128-bit vector of [2 x double] containing the right source operand.
163 /// \returns A 128-bit vector of [2 x double] containing the alternating sums
164 /// and differences of both operands.
165 static __inline__ __m128d __DEFAULT_FN_ATTRS
166 _mm_addsub_pd(__m128d __a, __m128d __b)
168 return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
171 /// Horizontally adds the pairs of values contained in two 128-bit
172 /// vectors of [2 x double].
174 /// \headerfile <x86intrin.h>
176 /// This intrinsic corresponds to the <c> VHADDPD </c> instruction.
178 /// \param __a
179 /// A 128-bit vector of [2 x double] containing one of the source operands.
180 /// The horizontal sum of the values is stored in the lower bits of the
181 /// destination.
182 /// \param __b
183 /// A 128-bit vector of [2 x double] containing one of the source operands.
184 /// The horizontal sum of the values is stored in the upper bits of the
185 /// destination.
186 /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
187 /// both operands.
188 static __inline__ __m128d __DEFAULT_FN_ATTRS
189 _mm_hadd_pd(__m128d __a, __m128d __b)
191 return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
194 /// Horizontally subtracts the pairs of values contained in two 128-bit
195 /// vectors of [2 x double].
197 /// \headerfile <x86intrin.h>
199 /// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.
201 /// \param __a
202 /// A 128-bit vector of [2 x double] containing one of the source operands.
203 /// The horizontal difference of the values is stored in the lower bits of
204 /// the destination.
205 /// \param __b
206 /// A 128-bit vector of [2 x double] containing one of the source operands.
207 /// The horizontal difference of the values is stored in the upper bits of
208 /// the destination.
209 /// \returns A 128-bit vector of [2 x double] containing the horizontal
210 /// differences of both operands.
211 static __inline__ __m128d __DEFAULT_FN_ATTRS
212 _mm_hsub_pd(__m128d __a, __m128d __b)
214 return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
217 /// Moves and duplicates one double-precision value to double-precision
218 /// values stored in a 128-bit vector of [2 x double].
220 /// \headerfile <x86intrin.h>
222 /// \code
223 /// __m128d _mm_loaddup_pd(double const *dp);
224 /// \endcode
226 /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
228 /// \param dp
229 /// A pointer to a double-precision value to be moved and duplicated.
230 /// \returns A 128-bit vector of [2 x double] containing the moved and
231 /// duplicated values.
232 #define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
234 /// Moves and duplicates the double-precision value in the lower bits of
235 /// a 128-bit vector of [2 x double] to double-precision values stored in a
236 /// 128-bit vector of [2 x double].
238 /// \headerfile <x86intrin.h>
240 /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
242 /// \param __a
243 /// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits
244 /// [127:64] and [63:0] of the destination.
245 /// \returns A 128-bit vector of [2 x double] containing the moved and
246 /// duplicated values.
247 static __inline__ __m128d __DEFAULT_FN_ATTRS
248 _mm_movedup_pd(__m128d __a)
250 return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
253 /// Establishes a linear address memory range to be monitored and puts
254 /// the processor in the monitor event pending state. Data stored in the
255 /// monitored address range causes the processor to exit the pending state.
257 /// The \c MONITOR instruction can be used in kernel mode, and in other modes
258 /// if MSR <c> C001_0015h[MonMwaitUserEn] </c> is set.
260 /// \headerfile <x86intrin.h>
262 /// This intrinsic corresponds to the \c MONITOR instruction.
264 /// \param __p
265 /// The memory range to be monitored. The size of the range is determined by
266 /// CPUID function 0000_0005h.
267 /// \param __extensions
268 /// Optional extensions for the monitoring state.
269 /// \param __hints
270 /// Optional hints for the monitoring state.
271 static __inline__ void __DEFAULT_FN_ATTRS
272 _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
274 __builtin_ia32_monitor(__p, __extensions, __hints);
277 /// Used with the \c MONITOR instruction to wait while the processor is in
278 /// the monitor event pending state. Data stored in the monitored address
279 /// range, or an interrupt, causes the processor to exit the pending state.
281 /// The \c MWAIT instruction can be used in kernel mode, and in other modes if
282 /// MSR <c> C001_0015h[MonMwaitUserEn] </c> is set.
284 /// \headerfile <x86intrin.h>
286 /// This intrinsic corresponds to the \c MWAIT instruction.
288 /// \param __extensions
289 /// Optional extensions for the monitoring state, which can vary by
290 /// processor.
291 /// \param __hints
292 /// Optional hints for the monitoring state, which can vary by processor.
293 static __inline__ void __DEFAULT_FN_ATTRS
294 _mm_mwait(unsigned __extensions, unsigned __hints)
296 __builtin_ia32_mwait(__extensions, __hints);
299 #undef __DEFAULT_FN_ATTRS
301 #endif /* __PMMINTRIN_H */