1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
4 define <16 x half> @test_int_x86_avx512fp16_add_ph_256(<16 x half> %x1, <16 x half> %x2) {
5 ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_256:
7 ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0
9 %res = fadd <16 x half> %x1, %x2
13 define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
14 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_256:
16 ; CHECK-NEXT: kmovd %edi, %k1
17 ; CHECK-NEXT: vmovaps %ymm2, %ymm3
18 ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm3 {%k1}
19 ; CHECK-NEXT: vaddph (%rsi), %ymm0, %ymm2 {%k1}
20 ; CHECK-NEXT: vaddph %ymm2, %ymm3, %ymm0
22 %msk = bitcast i16 %mask to <16 x i1>
23 %val = load <16 x half>, ptr %ptr
24 %res0 = fadd <16 x half> %x1, %x2
25 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
26 %t3 = fadd <16 x half> %x1, %val
27 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
28 %res = fadd <16 x half> %res1 , %res2
32 define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
33 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_256:
35 ; CHECK-NEXT: kmovd %edi, %k1
36 ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0 {%k1} {z}
38 %msk = bitcast i16 %mask to <16 x i1>
39 %res0 = fadd <16 x half> %x1, %x2
40 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
44 define <8 x half> @test_int_x86_avx512fp16_add_ph_128(<8 x half> %x1, <8 x half> %x2) {
45 ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_128:
47 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
49 %res = fadd <8 x half> %x1, %x2
53 define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
54 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_128:
56 ; CHECK-NEXT: kmovd %edi, %k1
57 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
58 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm3 {%k1}
59 ; CHECK-NEXT: vaddph (%rsi), %xmm0, %xmm2 {%k1}
60 ; CHECK-NEXT: vaddph %xmm2, %xmm3, %xmm0
62 %msk = bitcast i8 %mask to <8 x i1>
63 %val = load <8 x half>, ptr %ptr
64 %res0 = fadd <8 x half> %x1, %x2
65 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
66 %t3 = fadd <8 x half> %x1, %val
67 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
68 %res = fadd <8 x half> %res1 , %res2
72 define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
73 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_128:
75 ; CHECK-NEXT: kmovd %edi, %k1
76 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 {%k1} {z}
78 %msk = bitcast i8 %mask to <8 x i1>
79 %res0 = fadd <8 x half> %x1, %x2
80 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
84 define <16 x half> @test_int_x86_avx512fp16_sub_ph_256(<16 x half> %x1, <16 x half> %x2) {
85 ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_256:
87 ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0
89 %res = fsub <16 x half> %x1, %x2
93 define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
94 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_256:
96 ; CHECK-NEXT: kmovd %edi, %k1
97 ; CHECK-NEXT: vmovaps %ymm2, %ymm3
98 ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm3 {%k1}
99 ; CHECK-NEXT: vsubph (%rsi), %ymm0, %ymm2 {%k1}
100 ; CHECK-NEXT: vsubph %ymm2, %ymm3, %ymm0
102 %msk = bitcast i16 %mask to <16 x i1>
103 %val = load <16 x half>, ptr %ptr
104 %res0 = fsub <16 x half> %x1, %x2
105 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
106 %t3 = fsub <16 x half> %x1, %val
107 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
108 %res = fsub <16 x half> %res1 , %res2
112 define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
113 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_256:
115 ; CHECK-NEXT: kmovd %edi, %k1
116 ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0 {%k1} {z}
118 %msk = bitcast i16 %mask to <16 x i1>
119 %res0 = fsub <16 x half> %x1, %x2
120 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
121 ret <16 x half> %res1
124 define <8 x half> @test_int_x86_avx512fp16_sub_ph_128(<8 x half> %x1, <8 x half> %x2) {
125 ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_128:
127 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0
129 %res = fsub <8 x half> %x1, %x2
133 define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
134 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_128:
136 ; CHECK-NEXT: kmovd %edi, %k1
137 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
138 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm3 {%k1}
139 ; CHECK-NEXT: vsubph (%rsi), %xmm0, %xmm2 {%k1}
140 ; CHECK-NEXT: vsubph %xmm2, %xmm3, %xmm0
142 %msk = bitcast i8 %mask to <8 x i1>
143 %val = load <8 x half>, ptr %ptr
144 %res0 = fsub <8 x half> %x1, %x2
145 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
146 %t3 = fsub <8 x half> %x1, %val
147 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
148 %res = fsub <8 x half> %res1 , %res2
152 define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
153 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_128:
155 ; CHECK-NEXT: kmovd %edi, %k1
156 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 {%k1} {z}
158 %msk = bitcast i8 %mask to <8 x i1>
159 %res0 = fsub <8 x half> %x1, %x2
160 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
164 define <16 x half> @test_int_x86_avx512fp16_mul_ph_256(<16 x half> %x1, <16 x half> %x2) {
165 ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_256:
167 ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0
169 %res = fmul <16 x half> %x1, %x2
173 define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
174 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_256:
176 ; CHECK-NEXT: kmovd %edi, %k1
177 ; CHECK-NEXT: vmovaps %ymm2, %ymm3
178 ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm3 {%k1}
179 ; CHECK-NEXT: vmulph (%rsi), %ymm0, %ymm2 {%k1}
180 ; CHECK-NEXT: vmulph %ymm2, %ymm3, %ymm0
182 %msk = bitcast i16 %mask to <16 x i1>
183 %val = load <16 x half>, ptr %ptr
184 %res0 = fmul <16 x half> %x1, %x2
185 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
186 %t3 = fmul <16 x half> %x1, %val
187 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
188 %res = fmul <16 x half> %res1 , %res2
192 define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
193 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_256:
195 ; CHECK-NEXT: kmovd %edi, %k1
196 ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0 {%k1} {z}
198 %msk = bitcast i16 %mask to <16 x i1>
199 %res0 = fmul <16 x half> %x1, %x2
200 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
201 ret <16 x half> %res1
204 define <8 x half> @test_int_x86_avx512fp16_mul_ph_128(<8 x half> %x1, <8 x half> %x2) {
205 ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_128:
207 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0
209 %res = fmul <8 x half> %x1, %x2
213 define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
214 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_128:
216 ; CHECK-NEXT: kmovd %edi, %k1
217 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
218 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm3 {%k1}
219 ; CHECK-NEXT: vmulph (%rsi), %xmm0, %xmm2 {%k1}
220 ; CHECK-NEXT: vmulph %xmm2, %xmm3, %xmm0
222 %msk = bitcast i8 %mask to <8 x i1>
223 %val = load <8 x half>, ptr %ptr
224 %res0 = fmul <8 x half> %x1, %x2
225 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
226 %t3 = fmul <8 x half> %x1, %val
227 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
228 %res = fmul <8 x half> %res1 , %res2
232 define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
233 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_128:
235 ; CHECK-NEXT: kmovd %edi, %k1
236 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 {%k1} {z}
238 %msk = bitcast i8 %mask to <8 x i1>
239 %res0 = fmul <8 x half> %x1, %x2
240 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
244 define <16 x half> @test_int_x86_avx512fp16_div_ph_256(<16 x half> %x1, <16 x half> %x2) {
245 ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256:
247 ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0
249 %res = fdiv <16 x half> %x1, %x2
253 define <16 x half> @test_int_x86_avx512fp16_div_ph_256_fast(<16 x half> %x1, <16 x half> %x2) {
254 ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256_fast:
256 ; CHECK-NEXT: vrcpph %ymm1, %ymm1
257 ; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0
259 %res = fdiv fast <16 x half> %x1, %x2
263 define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) {
264 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_256:
266 ; CHECK-NEXT: kmovd %edi, %k1
267 ; CHECK-NEXT: vmovaps %ymm2, %ymm3
268 ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm3 {%k1}
269 ; CHECK-NEXT: vdivph (%rsi), %ymm0, %ymm2 {%k1}
270 ; CHECK-NEXT: vdivph %ymm2, %ymm3, %ymm0
272 %msk = bitcast i16 %mask to <16 x i1>
273 %val = load <16 x half>, ptr %ptr
274 %res0 = fdiv <16 x half> %x1, %x2
275 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
276 %t3 = fdiv <16 x half> %x1, %val
277 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
278 %res = fdiv <16 x half> %res1 , %res2
282 define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) {
283 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_256:
285 ; CHECK-NEXT: kmovd %edi, %k1
286 ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0 {%k1} {z}
288 %msk = bitcast i16 %mask to <16 x i1>
289 %res0 = fdiv <16 x half> %x1, %x2
290 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
291 ret <16 x half> %res1
294 define <8 x half> @test_int_x86_avx512fp16_div_ph_128(<8 x half> %x1, <8 x half> %x2) {
295 ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128:
297 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0
299 %res = fdiv <8 x half> %x1, %x2
303 define <8 x half> @test_int_x86_avx512fp16_div_ph_128_fast(<8 x half> %x1, <8 x half> %x2) {
304 ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128_fast:
306 ; CHECK-NEXT: vrcpph %xmm1, %xmm1
307 ; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0
309 %res = fdiv fast <8 x half> %x1, %x2
313 define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
314 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_128:
316 ; CHECK-NEXT: kmovd %edi, %k1
317 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
318 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm3 {%k1}
319 ; CHECK-NEXT: vdivph (%rsi), %xmm0, %xmm2 {%k1}
320 ; CHECK-NEXT: vdivph %xmm2, %xmm3, %xmm0
322 %msk = bitcast i8 %mask to <8 x i1>
323 %val = load <8 x half>, ptr %ptr
324 %res0 = fdiv <8 x half> %x1, %x2
325 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
326 %t3 = fdiv <8 x half> %x1, %val
327 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
328 %res = fdiv <8 x half> %res1 , %res2
332 define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) {
333 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_128:
335 ; CHECK-NEXT: kmovd %edi, %k1
336 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 {%k1} {z}
338 %msk = bitcast i8 %mask to <8 x i1>
339 %res0 = fdiv <8 x half> %x1, %x2
340 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
344 define <16 x half> @test_min_ph_256(<16 x half> %x1, <16 x half> %x2) {
345 ; CHECK-LABEL: test_min_ph_256:
347 ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
349 %res0 = fcmp olt <16 x half> %x1, %x2
350 %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2
351 ret <16 x half> %res1
354 define <16 x half> @test_max_ph_256(<16 x half> %x1, <16 x half> %x2) {
355 ; CHECK-LABEL: test_max_ph_256:
357 ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0
359 %res0 = fcmp ogt <16 x half> %x1, %x2
360 %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2
361 ret <16 x half> %res1
364 define <8 x half> @test_min_ph_128(<8 x half> %x1, <8 x half> %x2) {
365 ; CHECK-LABEL: test_min_ph_128:
367 ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
369 %res0 = fcmp olt <8 x half> %x1, %x2
370 %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2
374 define <8 x half> @test_max_ph_128(<8 x half> %x1, <8 x half> %x2) {
375 ; CHECK-LABEL: test_max_ph_128:
377 ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
379 %res0 = fcmp ogt <8 x half> %x1, %x2
380 %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2
384 declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>)
385 declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>)
387 define <8 x half> @test_max_ph_128_2(<8 x half> %x1, <8 x half> %x2) {
388 ; CHECK-LABEL: test_max_ph_128_2:
390 ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
392 %res0 = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> %x1, <8 x half> %x2)
396 define <16 x half> @test_max_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
397 ; CHECK-LABEL: test_max_ph_256_2:
399 ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0
401 %res0 = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> %x1, <16 x half> %x2)
402 ret <16 x half> %res0
405 declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>)
406 declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>)
408 define <8 x half> @test_min_ph_128_2(<8 x half> %x1, <8 x half> %x2) {
409 ; CHECK-LABEL: test_min_ph_128_2:
411 ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
413 %res0 = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> %x1, <8 x half> %x2)
417 define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
418 ; CHECK-LABEL: test_min_ph_256_2:
420 ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
422 %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2)
423 ret <16 x half> %res0
426 declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
428 define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
429 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256:
431 ; CHECK-NEXT: kmovd %edi, %k1
432 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1}
433 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
435 %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
436 ret <4 x double> %res
439 define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
440 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask:
442 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
444 %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
445 ret <4 x double> %res
448 declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
450 define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
451 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128:
453 ; CHECK-NEXT: kmovd %edi, %k1
454 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1}
455 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
457 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
458 ret <2 x double> %res
461 define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
462 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask:
464 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
466 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
467 ret <2 x double> %res
470 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
472 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
473 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256:
475 ; CHECK-NEXT: kmovd %edi, %k1
476 ; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1}
477 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
478 ; CHECK-NEXT: vzeroupper
480 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
484 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) {
485 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load:
487 ; CHECK-NEXT: kmovd %esi, %k1
488 ; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1}
490 %x0 = load <4 x double>, ptr %px0, align 32
491 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
495 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
497 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
498 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128:
500 ; CHECK-NEXT: kmovd %edi, %k1
501 ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1}
502 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
504 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
508 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) {
509 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load:
511 ; CHECK-NEXT: kmovd %esi, %k1
512 ; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1}
514 %x0 = load <2 x double>, ptr %px0, align 16
515 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
519 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
521 define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
522 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
524 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0
526 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
530 define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
531 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
533 ; CHECK-NEXT: kmovd %edi, %k1
534 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1}
535 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
537 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
541 define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
542 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
544 ; CHECK-NEXT: kmovd %edi, %k1
545 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z}
547 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
551 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
553 define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
554 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
556 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0
558 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
562 define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
563 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
565 ; CHECK-NEXT: kmovd %edi, %k1
566 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1}
567 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
569 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
573 define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
574 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
576 ; CHECK-NEXT: kmovd %edi, %k1
577 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z}
579 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
583 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
585 define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
586 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
588 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
590 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
594 define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
595 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
597 ; CHECK-NEXT: kmovd %edi, %k1
598 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1}
599 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
601 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
605 define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
606 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
608 ; CHECK-NEXT: kmovd %edi, %k1
609 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z}
611 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
615 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
617 define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
618 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
620 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
622 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
626 define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
627 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
629 ; CHECK-NEXT: kmovd %edi, %k1
630 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1}
631 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
633 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
637 define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
638 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
640 ; CHECK-NEXT: kmovd %edi, %k1
641 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z}
643 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
647 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
649 define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
650 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
652 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
654 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
658 define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
659 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
661 ; CHECK-NEXT: kmovd %edi, %k1
662 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1}
663 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
665 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
669 define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
670 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
672 ; CHECK-NEXT: kmovd %edi, %k1
673 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z}
675 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
679 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
681 define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
682 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
684 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
686 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
690 define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
691 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
693 ; CHECK-NEXT: kmovd %edi, %k1
694 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1}
695 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
697 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
701 define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
702 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
704 ; CHECK-NEXT: kmovd %edi, %k1
705 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z}
707 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
711 declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8)
713 define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) {
714 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128:
716 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
718 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1)
722 define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
723 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128:
725 ; CHECK-NEXT: kmovd %edi, %k1
726 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm1 {%k1}
727 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
729 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2)
733 define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) {
734 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128:
736 ; CHECK-NEXT: kmovd %edi, %k1
737 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z}
739 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2)
743 declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8)
745 define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) {
746 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256:
748 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0
750 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1)
754 define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) {
755 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256:
757 ; CHECK-NEXT: kmovd %edi, %k1
758 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm1 {%k1}
759 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
761 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2)
765 define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) {
766 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256:
768 ; CHECK-NEXT: kmovd %edi, %k1
769 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z}
771 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2)
775 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
777 define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
778 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128:
780 ; CHECK-NEXT: kmovd %edi, %k1
781 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
782 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
783 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
785 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2)
786 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1)
787 %res2 = fadd <8 x half> %res, %res1
791 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
793 define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) {
794 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256:
796 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
797 ; CHECK-NEXT: vzeroupper
799 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1)
803 define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) {
804 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256:
806 ; CHECK-NEXT: kmovd %edi, %k1
807 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
808 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
809 ; CHECK-NEXT: vzeroupper
811 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2)
815 define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) {
816 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256:
818 ; CHECK-NEXT: kmovd %edi, %k1
819 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z}
820 ; CHECK-NEXT: vzeroupper
822 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2)