1 /*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
10 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
13 #ifndef __AVX512ERINTRIN_H
14 #define __AVX512ERINTRIN_H
17 #define _mm512_exp2a23_round_pd(A, R) \
18 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
19 (__v8df)_mm512_setzero_pd(), \
20 (__mmask8)-1, (int)(R)))
22 #define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
23 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
24 (__v8df)(__m512d)(S), (__mmask8)(M), \
27 #define _mm512_maskz_exp2a23_round_pd(M, A, R) \
28 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
29 (__v8df)_mm512_setzero_pd(), \
30 (__mmask8)(M), (int)(R)))
32 #define _mm512_exp2a23_pd(A) \
33 _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
35 #define _mm512_mask_exp2a23_pd(S, M, A) \
36 _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
38 #define _mm512_maskz_exp2a23_pd(M, A) \
39 _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
41 #define _mm512_exp2a23_round_ps(A, R) \
42 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
43 (__v16sf)_mm512_setzero_ps(), \
44 (__mmask16)-1, (int)(R)))
46 #define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
47 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
48 (__v16sf)(__m512)(S), (__mmask16)(M), \
51 #define _mm512_maskz_exp2a23_round_ps(M, A, R) \
52 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
53 (__v16sf)_mm512_setzero_ps(), \
54 (__mmask16)(M), (int)(R)))
56 #define _mm512_exp2a23_ps(A) \
57 _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
59 #define _mm512_mask_exp2a23_ps(S, M, A) \
60 _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
62 #define _mm512_maskz_exp2a23_ps(M, A) \
63 _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
66 #define _mm512_rsqrt28_round_pd(A, R) \
67 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
68 (__v8df)_mm512_setzero_pd(), \
69 (__mmask8)-1, (int)(R)))
71 #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
72 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
73 (__v8df)(__m512d)(S), (__mmask8)(M), \
76 #define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
77 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
78 (__v8df)_mm512_setzero_pd(), \
79 (__mmask8)(M), (int)(R)))
81 #define _mm512_rsqrt28_pd(A) \
82 _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
84 #define _mm512_mask_rsqrt28_pd(S, M, A) \
85 _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
87 #define _mm512_maskz_rsqrt28_pd(M, A) \
88 _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
90 #define _mm512_rsqrt28_round_ps(A, R) \
91 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
92 (__v16sf)_mm512_setzero_ps(), \
93 (__mmask16)-1, (int)(R)))
95 #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
96 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
97 (__v16sf)(__m512)(S), (__mmask16)(M), \
100 #define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
101 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
102 (__v16sf)_mm512_setzero_ps(), \
103 (__mmask16)(M), (int)(R)))
105 #define _mm512_rsqrt28_ps(A) \
106 _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
108 #define _mm512_mask_rsqrt28_ps(S, M, A) \
109 _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
111 #define _mm512_maskz_rsqrt28_ps(M, A) \
112 _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
114 #define _mm_rsqrt28_round_ss(A, B, R) \
115 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
116 (__v4sf)(__m128)(B), \
117 (__v4sf)_mm_setzero_ps(), \
118 (__mmask8)-1, (int)(R)))
120 #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
121 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
122 (__v4sf)(__m128)(B), \
123 (__v4sf)(__m128)(S), \
124 (__mmask8)(M), (int)(R)))
126 #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
127 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
128 (__v4sf)(__m128)(B), \
129 (__v4sf)_mm_setzero_ps(), \
130 (__mmask8)(M), (int)(R)))
132 #define _mm_rsqrt28_ss(A, B) \
133 _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
135 #define _mm_mask_rsqrt28_ss(S, M, A, B) \
136 _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
138 #define _mm_maskz_rsqrt28_ss(M, A, B) \
139 _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
141 #define _mm_rsqrt28_round_sd(A, B, R) \
142 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
143 (__v2df)(__m128d)(B), \
144 (__v2df)_mm_setzero_pd(), \
145 (__mmask8)-1, (int)(R)))
147 #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
148 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
149 (__v2df)(__m128d)(B), \
150 (__v2df)(__m128d)(S), \
151 (__mmask8)(M), (int)(R)))
153 #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
154 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
155 (__v2df)(__m128d)(B), \
156 (__v2df)_mm_setzero_pd(), \
157 (__mmask8)(M), (int)(R)))
159 #define _mm_rsqrt28_sd(A, B) \
160 _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
162 #define _mm_mask_rsqrt28_sd(S, M, A, B) \
163 _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
165 #define _mm_maskz_rsqrt28_sd(M, A, B) \
166 _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
169 #define _mm512_rcp28_round_pd(A, R) \
170 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
171 (__v8df)_mm512_setzero_pd(), \
172 (__mmask8)-1, (int)(R)))
174 #define _mm512_mask_rcp28_round_pd(S, M, A, R) \
175 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
176 (__v8df)(__m512d)(S), (__mmask8)(M), \
179 #define _mm512_maskz_rcp28_round_pd(M, A, R) \
180 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
181 (__v8df)_mm512_setzero_pd(), \
182 (__mmask8)(M), (int)(R)))
184 #define _mm512_rcp28_pd(A) \
185 _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
187 #define _mm512_mask_rcp28_pd(S, M, A) \
188 _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
190 #define _mm512_maskz_rcp28_pd(M, A) \
191 _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
193 #define _mm512_rcp28_round_ps(A, R) \
194 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
195 (__v16sf)_mm512_setzero_ps(), \
196 (__mmask16)-1, (int)(R)))
198 #define _mm512_mask_rcp28_round_ps(S, M, A, R) \
199 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
200 (__v16sf)(__m512)(S), (__mmask16)(M), \
203 #define _mm512_maskz_rcp28_round_ps(M, A, R) \
204 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
205 (__v16sf)_mm512_setzero_ps(), \
206 (__mmask16)(M), (int)(R)))
208 #define _mm512_rcp28_ps(A) \
209 _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
211 #define _mm512_mask_rcp28_ps(S, M, A) \
212 _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
214 #define _mm512_maskz_rcp28_ps(M, A) \
215 _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
217 #define _mm_rcp28_round_ss(A, B, R) \
218 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
219 (__v4sf)(__m128)(B), \
220 (__v4sf)_mm_setzero_ps(), \
221 (__mmask8)-1, (int)(R)))
223 #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
224 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
225 (__v4sf)(__m128)(B), \
226 (__v4sf)(__m128)(S), \
227 (__mmask8)(M), (int)(R)))
229 #define _mm_maskz_rcp28_round_ss(M, A, B, R) \
230 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
231 (__v4sf)(__m128)(B), \
232 (__v4sf)_mm_setzero_ps(), \
233 (__mmask8)(M), (int)(R)))
235 #define _mm_rcp28_ss(A, B) \
236 _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
238 #define _mm_mask_rcp28_ss(S, M, A, B) \
239 _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
241 #define _mm_maskz_rcp28_ss(M, A, B) \
242 _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
244 #define _mm_rcp28_round_sd(A, B, R) \
245 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
246 (__v2df)(__m128d)(B), \
247 (__v2df)_mm_setzero_pd(), \
248 (__mmask8)-1, (int)(R)))
250 #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
251 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
252 (__v2df)(__m128d)(B), \
253 (__v2df)(__m128d)(S), \
254 (__mmask8)(M), (int)(R)))
256 #define _mm_maskz_rcp28_round_sd(M, A, B, R) \
257 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
258 (__v2df)(__m128d)(B), \
259 (__v2df)_mm_setzero_pd(), \
260 (__mmask8)(M), (int)(R)))
262 #define _mm_rcp28_sd(A, B) \
263 _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
265 #define _mm_mask_rcp28_sd(S, M, A, B) \
266 _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
268 #define _mm_maskz_rcp28_sd(M, A, B) \
269 _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
271 #endif /* __AVX512ERINTRIN_H */