1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
4 declare <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half>, <32 x half>, i32)
6 define <32 x half> @test_int_x86_avx512fp16_add_ph_512(<32 x half> %x1, <32 x half> %x2) {
7 ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_512:
9 ; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0
11 %res = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
15 define <32 x half> @test_int_x86_avx512fp16_mask_add_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, <32 x half>* %ptr) {
16 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_512:
18 ; CHECK-NEXT: kmovd %edi, %k1
19 ; CHECK-NEXT: vaddph %zmm2, %zmm1, %zmm0 {%k1}
21 %mask = bitcast i32 %msk to <32 x i1>
22 %res0 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
23 %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src
27 define <32 x half> @test_int_x86_avx512fp16_maskz_add_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, <32 x half>* %ptr) {
28 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_512:
30 ; CHECK-NEXT: kmovd %edi, %k1
31 ; CHECK-NEXT: vaddph %zmm2, %zmm1, %zmm0 {%k1} {z}
32 ; CHECK-NEXT: vaddph (%rsi), %zmm1, %zmm1 {%k1} {z}
33 ; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0
35 %mask = bitcast i32 %msk to <32 x i1>
36 %val = load <32 x half>, <32 x half>* %ptr
37 %res0 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
38 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
39 %t2 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %val, i32 4)
40 %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer
41 %res3 = fadd <32 x half> %res1, %res2
45 define <32 x half> @test_int_x86_avx512fp16_add_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, <32 x half>* %ptr) {
46 ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_512_round:
48 ; CHECK-NEXT: kmovd %edi, %k1
49 ; CHECK-NEXT: vaddph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
50 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
52 %mask = bitcast i32 %msk to <32 x i1>
53 %t1 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10)
54 %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src
58 declare <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half>, <32 x half>, i32)
60 define <32 x half> @test_int_x86_avx512fp16_sub_ph_512(<32 x half> %x1, <32 x half> %x2) {
61 ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_512:
63 ; CHECK-NEXT: vsubph %zmm1, %zmm0, %zmm0
65 %res = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
69 define <32 x half> @test_int_x86_avx512fp16_mask_sub_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, <32 x half>* %ptr) {
70 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_512:
72 ; CHECK-NEXT: kmovd %edi, %k1
73 ; CHECK-NEXT: vsubph %zmm2, %zmm1, %zmm0 {%k1}
75 %mask = bitcast i32 %msk to <32 x i1>
76 %res0 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
77 %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src
81 define <32 x half> @test_int_x86_avx512fp16_maskz_sub_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, <32 x half>* %ptr) {
82 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_512:
84 ; CHECK-NEXT: kmovd %edi, %k1
85 ; CHECK-NEXT: vsubph %zmm2, %zmm1, %zmm0 {%k1} {z}
86 ; CHECK-NEXT: vsubph (%rsi), %zmm1, %zmm1 {%k1} {z}
87 ; CHECK-NEXT: vsubph %zmm1, %zmm0, %zmm0
89 %mask = bitcast i32 %msk to <32 x i1>
90 %val = load <32 x half>, <32 x half>* %ptr
91 %res0 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
92 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
93 %t2 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %val, i32 4)
94 %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer
95 %res3 = fsub <32 x half> %res1, %res2
99 define <32 x half> @test_int_x86_avx512fp16_sub_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, <32 x half>* %ptr) {
100 ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_512_round:
102 ; CHECK-NEXT: kmovd %edi, %k1
103 ; CHECK-NEXT: vsubph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
104 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
106 %mask = bitcast i32 %msk to <32 x i1>
107 %t1 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10)
108 %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src
112 declare <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half>, <32 x half>, i32)
114 define <32 x half> @test_int_x86_avx512fp16_mul_ph_512(<32 x half> %x1, <32 x half> %x2) {
115 ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_512:
117 ; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0
119 %res = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
123 define <32 x half> @test_int_x86_avx512fp16_mask_mul_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, <32 x half>* %ptr) {
124 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_512:
126 ; CHECK-NEXT: kmovd %edi, %k1
127 ; CHECK-NEXT: vmulph %zmm2, %zmm1, %zmm0 {%k1}
129 %mask = bitcast i32 %msk to <32 x i1>
130 %res0 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
131 %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src
135 define <32 x half> @test_int_x86_avx512fp16_maskz_mul_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, <32 x half>* %ptr) {
136 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_512:
138 ; CHECK-NEXT: kmovd %edi, %k1
139 ; CHECK-NEXT: vmulph %zmm2, %zmm1, %zmm0 {%k1} {z}
140 ; CHECK-NEXT: vmulph (%rsi), %zmm1, %zmm1 {%k1} {z}
141 ; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0
143 %mask = bitcast i32 %msk to <32 x i1>
144 %val = load <32 x half>, <32 x half>* %ptr
145 %res0 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
146 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
147 %t2 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %val, i32 4)
148 %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer
149 %res3 = fmul <32 x half> %res1, %res2
150 ret <32 x half> %res3
153 define <32 x half> @test_int_x86_avx512fp16_mul_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, <32 x half>* %ptr) {
154 ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_512_round:
156 ; CHECK-NEXT: kmovd %edi, %k1
157 ; CHECK-NEXT: vmulph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
158 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
160 %mask = bitcast i32 %msk to <32 x i1>
161 %t1 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10)
162 %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src
166 declare <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half>, <32 x half>, i32)
168 define <32 x half> @test_int_x86_avx512fp16_div_ph_512(<32 x half> %x1, <32 x half> %x2) {
169 ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_512:
171 ; CHECK-NEXT: vdivph %zmm1, %zmm0, %zmm0
173 %res = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
177 define <32 x half> @test_int_x86_avx512fp16_mask_div_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, <32 x half>* %ptr) {
178 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_512:
180 ; CHECK-NEXT: kmovd %edi, %k1
181 ; CHECK-NEXT: vdivph %zmm2, %zmm1, %zmm0 {%k1}
183 %mask = bitcast i32 %msk to <32 x i1>
184 %res0 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
185 %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src
189 define <32 x half> @test_int_x86_avx512fp16_maskz_div_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, <32 x half>* %ptr) {
190 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_512:
192 ; CHECK-NEXT: kmovd %edi, %k1
193 ; CHECK-NEXT: vdivph %zmm2, %zmm1, %zmm0 {%k1} {z}
194 ; CHECK-NEXT: vdivph (%rsi), %zmm1, %zmm1 {%k1} {z}
195 ; CHECK-NEXT: vdivph %zmm1, %zmm0, %zmm0
197 %mask = bitcast i32 %msk to <32 x i1>
198 %val = load <32 x half>, <32 x half>* %ptr
199 %res0 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4)
200 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
201 %t2 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %val, i32 4)
202 %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer
203 %res3 = fdiv <32 x half> %res1, %res2
204 ret <32 x half> %res3
207 define <32 x half> @test_int_x86_avx512fp16_div_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, <32 x half>* %ptr) {
208 ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_512_round:
210 ; CHECK-NEXT: kmovd %edi, %k1
211 ; CHECK-NEXT: vdivph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
212 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
214 %mask = bitcast i32 %msk to <32 x i1>
215 %t1 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10)
216 %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src
220 declare <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half>, <32 x half>, i32)
222 define <32 x half> @test_min_ph(<32 x half> %x1, <32 x half> %x2) {
223 ; CHECK-LABEL: test_min_ph:
225 ; CHECK-NEXT: vminph %zmm1, %zmm0, %zmm0
227 %res0 = fcmp olt <32 x half> %x1, %x2
228 %res1 = select <32 x i1> %res0, <32 x half> %x1, <32 x half> %x2
229 ret <32 x half> %res1
232 define <32 x half> @test_int_x86_avx512fp16_min_ph_512_sae(<32 x half> %x1, <32 x half> %x2) {
233 ; CHECK-LABEL: test_int_x86_avx512fp16_min_ph_512_sae:
235 ; CHECK-NEXT: vminph {sae}, %zmm1, %zmm0, %zmm0
237 %res0 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8)
238 ret <32 x half> %res0
241 define <32 x half> @test_int_x86_avx512fp16_maskz_min_ph_512_sae(<32 x half> %x1, <32 x half> %x2, i32 %msk) {
242 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_min_ph_512_sae:
244 ; CHECK-NEXT: kmovd %edi, %k1
245 ; CHECK-NEXT: vminph {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
247 %mask = bitcast i32 %msk to <32 x i1>
248 %res0 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8)
249 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
250 ret <32 x half> %res1
253 declare <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half>, <32 x half>, i32)
255 define <32 x half> @test_max_ph(<32 x half> %x1, <32 x half> %x2) {
256 ; CHECK-LABEL: test_max_ph:
258 ; CHECK-NEXT: vmaxph %zmm1, %zmm0, %zmm0
260 %res0 = fcmp ogt <32 x half> %x1, %x2
261 %res1 = select <32 x i1> %res0, <32 x half> %x1, <32 x half> %x2
262 ret <32 x half> %res1
265 define <32 x half> @test_int_x86_avx512fp16_max_ph_512_sae(<32 x half> %x1, <32 x half> %x2) {
266 ; CHECK-LABEL: test_int_x86_avx512fp16_max_ph_512_sae:
268 ; CHECK-NEXT: vmaxph {sae}, %zmm1, %zmm0, %zmm0
270 %res0 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8)
271 ret <32 x half> %res0
274 define <32 x half> @test_int_x86_avx512fp16_maskz_max_ph_512_sae(<32 x half> %x1, <32 x half> %x2, i32 %msk) {
275 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_max_ph_512_sae:
277 ; CHECK-NEXT: kmovd %edi, %k1
278 ; CHECK-NEXT: vmaxph {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
280 %mask = bitcast i32 %msk to <32 x i1>
281 %res0 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8)
282 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
283 ret <32 x half> %res1
286 declare <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half>, <8 x double>, i8, i32)
288 define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
289 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd:
291 ; CHECK-NEXT: kmovd %edi, %k1
292 ; CHECK-NEXT: vcvtph2pd %xmm0, %zmm1 {%k1}
293 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
295 %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
296 ret <8 x double> %res
299 define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_sae(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
300 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_sae:
302 ; CHECK-NEXT: kmovd %edi, %k1
303 ; CHECK-NEXT: vcvtph2pd {sae}, %xmm0, %zmm1 {%k1}
304 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
306 %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 8)
307 ret <8 x double> %res
310 define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_nomask(<8 x half> %x0, <8 x double> %x1) {
311 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_nomask:
313 ; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0
315 %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 -1, i32 4)
316 ret <8 x double> %res
319 define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_load(<8 x half>* %px0, <8 x double> %x1, i8 %x2) {
320 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_load:
322 ; CHECK-NEXT: kmovd %esi, %k1
323 ; CHECK-NEXT: vcvtph2pd (%rdi), %zmm0 {%k1}
325 %x0 = load <8 x half>, <8 x half>* %px0, align 16
326 %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
327 ret <8 x double> %res
330 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double>, <8 x half>, i8, i32)
332 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
333 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph:
335 ; CHECK-NEXT: kmovd %edi, %k1
336 ; CHECK-NEXT: vcvtpd2ph %zmm0, %xmm1 {%k1}
337 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
338 ; CHECK-NEXT: vzeroupper
340 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
344 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_r(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
345 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_r:
347 ; CHECK-NEXT: kmovd %edi, %k1
348 ; CHECK-NEXT: vcvtpd2ph {rz-sae}, %zmm0, %xmm1 {%k1}
349 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
350 ; CHECK-NEXT: vzeroupper
352 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 11)
356 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_load(<8 x double>* %px0, <8 x half> %x1, i8 %x2) {
357 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_load:
359 ; CHECK-NEXT: kmovd %esi, %k1
360 ; CHECK-NEXT: vcvtpd2phz (%rdi), %xmm0 {%k1}
362 %x0 = load <8 x double>, <8 x double>* %px0, align 64
363 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
367 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half>, <4 x float>, <8 x half>, i8, i32)
369 define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
370 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round:
372 ; CHECK-NEXT: kmovd %edi, %k1
373 ; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm2 {%k1}
374 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
376 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 4)
380 define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
381 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r:
383 ; CHECK-NEXT: kmovd %edi, %k1
384 ; CHECK-NEXT: vcvtss2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
385 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
387 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 11)
391 define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2) {
392 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask:
394 ; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm0
396 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 -1, i32 4)
400 define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
401 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z:
403 ; CHECK-NEXT: kmovd %edi, %k1
404 ; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
406 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
410 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half>, <2 x double>, <8 x half>, i8, i32)
412 define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
413 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round:
415 ; CHECK-NEXT: kmovd %edi, %k1
416 ; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm2 {%k1}
417 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
419 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 4)
423 define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
424 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r:
426 ; CHECK-NEXT: kmovd %edi, %k1
427 ; CHECK-NEXT: vcvtsd2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
428 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
430 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 11)
434 define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2) {
435 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask:
437 ; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm0
439 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 -1, i32 4)
443 define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
444 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z:
446 ; CHECK-NEXT: kmovd %edi, %k1
447 ; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
449 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
453 declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float>, <8 x half>, <4 x float>, i8, i32)
455 define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
456 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round:
458 ; CHECK-NEXT: kmovd %edi, %k1
459 ; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm2 {%k1}
460 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
462 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 4)
466 define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
467 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r:
469 ; CHECK-NEXT: kmovd %edi, %k1
470 ; CHECK-NEXT: vcvtsh2ss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
471 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
473 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 8)
477 define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2) {
478 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask:
480 ; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm0
482 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 -1, i32 4)
486 define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
487 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z:
489 ; CHECK-NEXT: kmovd %edi, %k1
490 ; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm0 {%k1} {z}
492 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> zeroinitializer, i8 %x2, i32 4)
496 declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double>, <8 x half>, <2 x double>, i8, i32)
498 define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
499 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round:
501 ; CHECK-NEXT: kmovd %edi, %k1
502 ; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm2 {%k1}
503 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
505 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 4)
506 ret <2 x double> %res
509 define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
510 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r:
512 ; CHECK-NEXT: kmovd %edi, %k1
513 ; CHECK-NEXT: vcvtsh2sd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
514 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
516 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 8)
517 ret <2 x double> %res
520 define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2) {
521 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask:
523 ; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm0
525 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 -1, i32 4)
526 ret <2 x double> %res
529 define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
530 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z:
532 ; CHECK-NEXT: kmovd %edi, %k1
533 ; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm0 {%k1} {z}
535 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> zeroinitializer, i8 %x2, i32 4)
536 ret <2 x double> %res
539 declare <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half>, <16 x float>, i16, i32)
541 define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512(<16 x half> %x0) {
542 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512:
544 ; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0
546 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 4)
547 ret <16 x float> %res
550 define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
551 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512:
553 ; CHECK-NEXT: kmovd %edi, %k1
554 ; CHECK-NEXT: vcvtph2psx %ymm0, %zmm1 {%k1}
555 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
557 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 4)
558 ret <16 x float> %res
561 define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512(<16 x half> %x0, i16 %x2) {
562 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512:
564 ; CHECK-NEXT: kmovd %edi, %k1
565 ; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0 {%k1} {z}
567 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 4)
568 ret <16 x float> %res
571 define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r(<16 x half> %x0) {
572 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512r:
574 ; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm0
576 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 8)
577 ret <16 x float> %res
580 define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512r(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
581 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512r:
583 ; CHECK-NEXT: kmovd %edi, %k1
584 ; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm1 {%k1}
585 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
587 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 8)
588 ret <16 x float> %res
591 define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512r(<16 x half> %x0, i16 %x2) {
592 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512r:
594 ; CHECK-NEXT: kmovd %edi, %k1
595 ; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm0 {%k1} {z}
597 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 8)
598 ret <16 x float> %res
601 declare <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float>, <16 x half>, i16, i32)
603 define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512(<16 x float> %x0) {
604 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_512:
606 ; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0
608 %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> undef, i16 -1, i32 4)
612 define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
613 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512:
615 ; CHECK-NEXT: kmovd %edi, %k1
616 ; CHECK-NEXT: vcvtps2phx %zmm0, %ymm1 {%k1}
617 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
619 %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 4)
623 define <16 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_512(<16 x float> %x0, i16 %x2) {
624 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_512:
626 ; CHECK-NEXT: kmovd %edi, %k1
627 ; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0 {%k1} {z}
629 %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> zeroinitializer, i16 %x2, i32 4)
633 define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512r(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
634 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512r:
636 ; CHECK-NEXT: kmovd %edi, %k1
637 ; CHECK-NEXT: vcvtps2phx {rd-sae}, %zmm0, %ymm1 {%k1}
638 ; CHECK-NEXT: vcvtps2phx {ru-sae}, %zmm0, %ymm0
639 ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
641 %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 9)
642 %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 -1, i32 10)
643 %res2 = fadd <16 x half> %res, %res1
644 ret <16 x half> %res2