1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
4 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
5 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
7 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
8 ; CHECK-LABEL: test_x86_vfnmadd_ps_z:
10 ; CHECK-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
12 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
15 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
17 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
18 ; CHECK-LABEL: test_mask_vfnmadd_ps:
20 ; CHECK-NEXT: kmovw %edi, %k1
21 ; CHECK-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
23 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
27 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
28 ; CHECK-LABEL: test_x86_vfnmadd_pd_z:
30 ; CHECK-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
32 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
35 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
37 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
38 ; CHECK-LABEL: test_mask_vfnmadd_pd:
40 ; CHECK-NEXT: kmovw %edi, %k1
41 ; CHECK-NEXT: vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
43 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
47 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
48 ; CHECK-LABEL: test_x86_vfnmsubps_z:
50 ; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
52 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
55 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
57 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
58 ; CHECK-LABEL: test_mask_vfnmsub_ps:
60 ; CHECK-NEXT: kmovw %edi, %k1
61 ; CHECK-NEXT: vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
63 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
67 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
68 ; CHECK-LABEL: test_x86_vfnmsubpd_z:
70 ; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
72 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
75 declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
77 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
78 ; CHECK-LABEL: test_mask_vfnmsub_pd:
80 ; CHECK-NEXT: kmovw %edi, %k1
81 ; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
83 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
87 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
88 ; CHECK-LABEL: test_x86_vfmaddsubps_z:
90 ; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
92 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
96 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
97 ; CHECK-LABEL: test_mask_fmaddsub_ps:
99 ; CHECK-NEXT: kmovw %edi, %k1
100 ; CHECK-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
102 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
103 ret <16 x float> %res
106 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
108 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
109 ; CHECK-LABEL: test_x86_vfmaddsubpd_z:
111 ; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
113 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
114 ret <8 x double> %res
116 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
118 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
119 ; CHECK-LABEL: test_mask_vfmaddsub_pd:
121 ; CHECK-NEXT: kmovw %edi, %k1
122 ; CHECK-NEXT: vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
124 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
125 ret <8 x double> %res
128 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
129 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
131 ; CHECK-NEXT: kmovw %edi, %k1
132 ; CHECK-NEXT: vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
134 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
135 ret <8 x double> %res
138 declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
140 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
141 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
143 ; CHECK-NEXT: kmovw %edi, %k1
144 ; CHECK-NEXT: vfmaddsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2
145 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
147 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
148 ret <8 x double> %res
151 declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
153 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
154 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
156 ; CHECK-NEXT: kmovw %edi, %k1
157 ; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
159 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
160 ret <8 x double> %res
163 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
164 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
166 ; CHECK-NEXT: kmovw %edi, %k1
167 ; CHECK-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
169 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
170 ret <16 x float> %res
173 declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
175 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
176 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
178 ; CHECK-NEXT: kmovw %edi, %k1
179 ; CHECK-NEXT: vfmaddsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2
180 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
182 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
183 ret <16 x float> %res
186 declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
188 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
189 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
191 ; CHECK-NEXT: kmovw %edi, %k1
192 ; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
194 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
195 ret <16 x float> %res
198 declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
200 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
201 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
203 ; CHECK-NEXT: kmovw %edi, %k1
204 ; CHECK-NEXT: vfmsubadd231pd {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2
205 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
207 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
208 ret <8 x double> %res
211 declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
213 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
214 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
216 ; CHECK-NEXT: kmovw %edi, %k1
217 ; CHECK-NEXT: vfmsubadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2
218 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
220 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
221 ret <16 x float> %res
224 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
225 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
227 ; CHECK-NEXT: kmovw %edi, %k1
228 ; CHECK-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
230 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
231 ret <16 x float> %res
234 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
235 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
237 ; CHECK-NEXT: kmovw %edi, %k1
238 ; CHECK-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
240 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
241 ret <16 x float> %res
244 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
245 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
247 ; CHECK-NEXT: kmovw %edi, %k1
248 ; CHECK-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
250 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
251 ret <16 x float> %res
254 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
255 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
257 ; CHECK-NEXT: kmovw %edi, %k1
258 ; CHECK-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
260 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
261 ret <16 x float> %res
264 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
265 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
267 ; CHECK-NEXT: kmovw %edi, %k1
268 ; CHECK-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
270 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
271 ret <16 x float> %res
274 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
275 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne:
277 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
279 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
280 ret <16 x float> %res
283 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
284 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn:
286 ; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0
288 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
289 ret <16 x float> %res
292 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
293 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp:
295 ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
297 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
298 ret <16 x float> %res
301 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
302 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz:
304 ; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0
306 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
307 ret <16 x float> %res
310 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
311 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current:
313 ; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
315 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
316 ret <16 x float> %res
319 declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
321 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
322 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
324 ; CHECK-NEXT: kmovw %edi, %k1
325 ; CHECK-NEXT: vfmsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2
326 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
328 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
329 ret <8 x double> %res
332 declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
334 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
335 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
337 ; CHECK-NEXT: kmovw %edi, %k1
338 ; CHECK-NEXT: vfmsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2
339 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
341 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
342 ret <16 x float> %res
345 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
346 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
348 ; CHECK-NEXT: kmovw %edi, %k1
349 ; CHECK-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
351 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
352 ret <8 x double> %res
355 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
356 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
358 ; CHECK-NEXT: kmovw %edi, %k1
359 ; CHECK-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
361 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
362 ret <8 x double> %res
365 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
366 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
368 ; CHECK-NEXT: kmovw %edi, %k1
369 ; CHECK-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
371 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
372 ret <8 x double> %res
375 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
376 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
378 ; CHECK-NEXT: kmovw %edi, %k1
379 ; CHECK-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
381 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
382 ret <8 x double> %res
385 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
386 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
388 ; CHECK-NEXT: kmovw %edi, %k1
389 ; CHECK-NEXT: vfmadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
391 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
392 ret <8 x double> %res
395 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
396 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne:
398 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
400 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
401 ret <8 x double> %res
404 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
405 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn:
407 ; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0
409 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
410 ret <8 x double> %res
413 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
414 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp:
416 ; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0
418 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
419 ret <8 x double> %res
422 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
423 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz:
425 ; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0
427 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
428 ret <8 x double> %res
431 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
432 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current:
434 ; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
436 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
437 ret <8 x double> %res
440 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
441 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
443 ; CHECK-NEXT: kmovw %edi, %k1
444 ; CHECK-NEXT: vfmadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
446 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
447 ret <8 x double> %res
450 declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
452 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
453 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
455 ; CHECK-NEXT: kmovw %edi, %k1
456 ; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2
457 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
459 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
460 ret <8 x double> %res
463 declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
465 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
466 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
468 ; CHECK-NEXT: kmovw %edi, %k1
469 ; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
471 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
472 ret <8 x double> %res
475 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
476 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
478 ; CHECK-NEXT: kmovw %edi, %k1
479 ; CHECK-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
481 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
482 ret <16 x float> %res
485 declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
487 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
488 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
490 ; CHECK-NEXT: kmovw %edi, %k1
491 ; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2
492 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
494 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
495 ret <16 x float> %res
498 declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
500 define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
501 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
503 ; CHECK-NEXT: kmovw %edi, %k1
504 ; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
506 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
507 ret <16 x float> %res
511 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
512 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
514 ; CHECK-NEXT: kmovw %edi, %k1
515 ; CHECK-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
517 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
518 ret <8 x double> %res
521 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
522 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
524 ; CHECK-NEXT: kmovw %edi, %k1
525 ; CHECK-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
527 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
528 ret <8 x double> %res
531 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
532 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
534 ; CHECK-NEXT: kmovw %edi, %k1
535 ; CHECK-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
537 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
538 ret <8 x double> %res
541 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
542 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
544 ; CHECK-NEXT: kmovw %edi, %k1
545 ; CHECK-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
547 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
548 ret <8 x double> %res
551 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
552 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
554 ; CHECK-NEXT: kmovw %edi, %k1
555 ; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
557 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
558 ret <8 x double> %res
561 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
562 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne:
564 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
566 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
567 ret <8 x double> %res
570 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
571 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn:
573 ; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0
575 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
576 ret <8 x double> %res
579 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
580 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp:
582 ; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0
584 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
585 ret <8 x double> %res
588 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
589 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz:
591 ; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0
593 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
594 ret <8 x double> %res
597 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
598 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current:
600 ; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
602 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
603 ret <8 x double> %res
606 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
607 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
609 ; CHECK-NEXT: kmovw %edi, %k1
610 ; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
612 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
613 ret <8 x double> %res
616 declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
618 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
619 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
621 ; CHECK-NEXT: kmovw %edi, %k1
622 ; CHECK-NEXT: vfnmsub231pd {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2
623 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
625 %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
626 ret <8 x double> %res
629 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
630 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
632 ; CHECK-NEXT: kmovw %edi, %k1
633 ; CHECK-NEXT: vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
635 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
636 ret <16 x float> %res
639 declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
641 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
642 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
644 ; CHECK-NEXT: kmovw %edi, %k1
645 ; CHECK-NEXT: vfnmsub231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2
646 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
648 %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
649 ret <16 x float> %res
652 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
653 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
655 ; CHECK-NEXT: kmovw %edi, %k1
656 ; CHECK-NEXT: vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
658 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
659 ret <8 x double> %res
662 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
663 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
665 ; CHECK-NEXT: kmovw %edi, %k1
666 ; CHECK-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
668 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
669 ret <16 x float> %res