1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
4 define signext i16 @test_mm_cvtsi128_si16(<2 x i64> %A) local_unnamed_addr #0 {
5 ; CHECK-LABEL: test_mm_cvtsi128_si16:
6 ; CHECK: # %bb.0: # %entry
7 ; CHECK-NEXT: vmovw %xmm0, %eax
8 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
11 %0 = bitcast <2 x i64> %A to <8 x i16>
12 %vecext.i = extractelement <8 x i16> %0, i32 0
16 define <2 x i64> @test_mm_cvtsi16_si128(i16 signext %A) local_unnamed_addr #0 {
17 ; CHECK-LABEL: test_mm_cvtsi16_si128:
18 ; CHECK: # %bb.0: # %entry
19 ; CHECK-NEXT: vmovw %edi, %xmm0
22 %vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %A, i32 0
23 %0 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
27 define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
28 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256:
30 ; CHECK-NEXT: kmovd %edi, %k1
31 ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm1 {%k1}
32 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
33 ; CHECK-NEXT: vzeroupper
35 %mask = bitcast i8 %x2 to <8 x i1>
36 %res0 = sitofp <8 x i32> %x0 to <8 x half>
37 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
41 define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256_z(<8 x i32> %x0, i8 %x2) {
42 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256_z:
44 ; CHECK-NEXT: kmovd %edi, %k1
45 ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0 {%k1} {z}
46 ; CHECK-NEXT: vzeroupper
48 %mask = bitcast i8 %x2 to <8 x i1>
49 %res0 = sitofp <8 x i32> %x0 to <8 x half>
50 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
54 define <8 x half> @sint_to_fp_8i32_to_8f16(<8 x i32> %x) {
55 ; CHECK-LABEL: sint_to_fp_8i32_to_8f16:
57 ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0
58 ; CHECK-NEXT: vzeroupper
60 %res = sitofp <8 x i32> %x to <8 x half>
64 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32>, <8 x half>, i8)
66 define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
67 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128:
69 ; CHECK-NEXT: kmovd %edi, %k1
70 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm1 {%k1}
71 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
73 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
77 define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
78 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_nomask:
80 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
82 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
86 define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_z(<4 x i32> %x0, i8 %x2) {
87 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_z:
89 ; CHECK-NEXT: kmovd %edi, %k1
90 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 {%k1} {z}
92 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
96 define <4 x half> @sint_to_fp_4i32_to_4f16(<4 x i32> %x) {
97 ; CHECK-LABEL: sint_to_fp_4i32_to_4f16:
99 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
101 %res = sitofp <4 x i32> %x to <4 x half>
105 define <2 x half> @sint_to_fp_2i32_to_2f16(<2 x i32> %x) {
106 ; CHECK-LABEL: sint_to_fp_2i32_to_2f16:
108 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
110 %res = sitofp <2 x i32> %x to <2 x half>
114 define <4 x i32> @fp_to_sint_4f16_to_4i32(<4 x half> %x) {
115 ; CHECK-LABEL: fp_to_sint_4f16_to_4i32:
117 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
119 %res = fptosi <4 x half> %x to <4 x i32>
123 define <2 x i32> @fp_to_sint_2f16_to_2i32(<2 x half> %x) {
124 ; CHECK-LABEL: fp_to_sint_2f16_to_2i32:
126 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
128 %res = fptosi <2 x half> %x to <2 x i32>
132 define <2 x i16> @fp_to_sint_2f16_to_2i16(<2 x half> %x) {
133 ; CHECK-LABEL: fp_to_sint_2f16_to_2i16:
135 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
137 %res = fptosi <2 x half> %x to <2 x i16>
141 define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
142 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256:
144 ; CHECK-NEXT: kmovd %edi, %k1
145 ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm1 {%k1}
146 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
147 ; CHECK-NEXT: vzeroupper
149 %mask = bitcast i8 %x2 to <8 x i1>
150 %res0 = uitofp <8 x i32> %x0 to <8 x half>
151 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
155 define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256_z(<8 x i32> %x0, i8 %x2) {
156 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256_z:
158 ; CHECK-NEXT: kmovd %edi, %k1
159 ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0 {%k1} {z}
160 ; CHECK-NEXT: vzeroupper
162 %mask = bitcast i8 %x2 to <8 x i1>
163 %res0 = uitofp <8 x i32> %x0 to <8 x half>
164 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
168 define <8 x half> @uint_to_fp_8i32_to_8f16(<8 x i32> %x) {
169 ; CHECK-LABEL: uint_to_fp_8i32_to_8f16:
171 ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0
172 ; CHECK-NEXT: vzeroupper
174 %res = uitofp <8 x i32> %x to <8 x half>
178 define <8 x i32> @fp_to_uint_8f16_to_8i32(<8 x half> %x) {
179 ; CHECK-LABEL: fp_to_uint_8f16_to_8i32:
181 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
183 %res = fptoui <8 x half> %x to <8 x i32>
187 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32>, <8 x half>, i8)
189 define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
190 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128:
192 ; CHECK-NEXT: kmovd %edi, %k1
193 ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm1 {%k1}
194 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
196 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
200 define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
201 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_nomask:
203 ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
205 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
209 define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_z(<4 x i32> %x0, i8 %x2) {
210 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_z:
212 ; CHECK-NEXT: kmovd %edi, %k1
213 ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 {%k1} {z}
215 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
219 define <4 x half> @uint_to_fp_4i32_to_4f16(<4 x i32> %x) {
220 ; CHECK-LABEL: uint_to_fp_4i32_to_4f16:
222 ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
224 %res = uitofp <4 x i32> %x to <4 x half>
228 define <2 x half> @uint_to_fp_2i32_to_2f16(<2 x i32> %x) {
229 ; CHECK-LABEL: uint_to_fp_2i32_to_2f16:
231 ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
233 %res = uitofp <2 x i32> %x to <2 x half>
237 define <4 x i32> @fp_to_uint_4f16_to_4i32(<4 x half> %x) {
238 ; CHECK-LABEL: fp_to_uint_4f16_to_4i32:
240 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
242 %res = fptoui <4 x half> %x to <4 x i32>
246 define <2 x i32> @fp_to_uint_2f16_to_2i32(<2 x half> %x) {
247 ; CHECK-LABEL: fp_to_uint_2f16_to_2i32:
249 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
251 %res = fptoui <2 x half> %x to <2 x i32>
255 define <2 x i16> @fp_to_uint_2f16_to_2i16(<2 x half> %x) {
256 ; CHECK-LABEL: fp_to_uint_2f16_to_2i16:
258 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
260 %res = fptoui <2 x half> %x to <2 x i16>
264 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half>, <4 x i32>, i8)
266 define <4 x i32> @test_int_x86_avx512_cvt_ph2dq_128(<8 x half> %x0) {
267 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_128:
269 ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0
271 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
275 define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
276 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_128:
278 ; CHECK-NEXT: kmovd %edi, %k1
279 ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm1 {%k1}
280 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
282 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
286 define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_128(<8 x half> %x0, i8 %x2) {
287 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_128:
289 ; CHECK-NEXT: kmovd %edi, %k1
290 ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0 {%k1} {z}
292 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
296 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half>, <8 x i32>, i8)
298 define <8 x i32> @test_int_x86_avx512_cvt_ph2dq_256(<8 x half> %x0) {
299 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_256:
301 ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0
303 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
307 define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
308 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_256:
310 ; CHECK-NEXT: kmovd %edi, %k1
311 ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm1 {%k1}
312 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
314 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
318 define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_256(<8 x half> %x0, i8 %x2) {
319 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_256:
321 ; CHECK-NEXT: kmovd %edi, %k1
322 ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0 {%k1} {z}
324 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
328 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
330 define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
331 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
333 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0
335 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
339 define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
340 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
342 ; CHECK-NEXT: kmovd %edi, %k1
343 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1}
344 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
346 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
350 define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
351 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
353 ; CHECK-NEXT: kmovd %edi, %k1
354 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z}
356 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
360 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
362 define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
363 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
365 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0
367 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
371 define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
372 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
374 ; CHECK-NEXT: kmovd %edi, %k1
375 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1}
376 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
378 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
382 define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
383 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
385 ; CHECK-NEXT: kmovd %edi, %k1
386 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z}
388 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
392 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
394 define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
395 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
397 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
399 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
403 define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
404 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
406 ; CHECK-NEXT: kmovd %edi, %k1
407 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1}
408 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
410 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
414 define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
415 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
417 ; CHECK-NEXT: kmovd %edi, %k1
418 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z}
420 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
424 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
426 define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
427 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
429 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
431 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
435 define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
436 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
438 ; CHECK-NEXT: kmovd %edi, %k1
439 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1}
440 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
442 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
446 define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
447 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
449 ; CHECK-NEXT: kmovd %edi, %k1
450 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z}
452 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
456 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
458 define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
459 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
461 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
463 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
467 define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
468 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
470 ; CHECK-NEXT: kmovd %edi, %k1
471 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1}
472 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
474 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
478 define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
479 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
481 ; CHECK-NEXT: kmovd %edi, %k1
482 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z}
484 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
488 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
490 define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
491 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
493 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
495 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
499 define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
500 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
502 ; CHECK-NEXT: kmovd %edi, %k1
503 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1}
504 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
506 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
510 define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
511 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
513 ; CHECK-NEXT: kmovd %edi, %k1
514 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z}
516 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
520 declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
522 define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
523 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256:
525 ; CHECK-NEXT: kmovd %edi, %k1
526 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1}
527 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
529 %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
530 ret <4 x double> %res
533 define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
534 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256_nomask:
536 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
538 %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
539 ret <4 x double> %res
542 declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
544 define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
545 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128:
547 ; CHECK-NEXT: kmovd %edi, %k1
548 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1}
549 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
551 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
552 ret <2 x double> %res
555 define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
556 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128_nomask:
558 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
560 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
561 ret <2 x double> %res
564 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
566 define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
567 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256:
569 ; CHECK-NEXT: kmovd %edi, %k1
570 ; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1}
571 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
572 ; CHECK-NEXT: vzeroupper
574 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
578 define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) {
579 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256_load:
581 ; CHECK-NEXT: kmovd %esi, %k1
582 ; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1}
584 %x0 = load <4 x double>, ptr %px0, align 32
585 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
589 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
591 define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
592 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128:
594 ; CHECK-NEXT: kmovd %edi, %k1
595 ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1}
596 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
598 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
602 define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) {
603 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128_load:
605 ; CHECK-NEXT: kmovd %esi, %k1
606 ; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1}
608 %x0 = load <2 x double>, ptr %px0, align 16
609 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
613 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64>, <8 x half>, i8)
615 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
616 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256:
618 ; CHECK-NEXT: kmovd %edi, %k1
619 ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm1 {%k1}
620 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
621 ; CHECK-NEXT: vzeroupper
623 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
627 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
628 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_nomask:
630 ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
631 ; CHECK-NEXT: vzeroupper
633 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
637 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_z(<4 x i64> %x0, i8 %x2) {
638 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_z:
640 ; CHECK-NEXT: kmovd %edi, %k1
641 ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 {%k1} {z}
642 ; CHECK-NEXT: vzeroupper
644 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
648 define <4 x half> @sint_to_fp_4i64_to_4f16(<4 x i64> %x) {
649 ; CHECK-LABEL: sint_to_fp_4i64_to_4f16:
651 ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
652 ; CHECK-NEXT: vzeroupper
654 %res = sitofp <4 x i64> %x to <4 x half>
658 define <4 x i64> @fp_to_sint_4f16_to_4i64(<4 x half> %x) {
659 ; CHECK-LABEL: fp_to_sint_4f16_to_4i64:
661 ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
663 %res = fptosi <4 x half> %x to <4 x i64>
667 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64>, <8 x half>, i8)
669 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
670 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128:
672 ; CHECK-NEXT: kmovd %edi, %k1
673 ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm1 {%k1}
674 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
676 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
680 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
681 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_nomask:
683 ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
685 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
689 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_z(<2 x i64> %x0, i8 %x2) {
690 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_z:
692 ; CHECK-NEXT: kmovd %edi, %k1
693 ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 {%k1} {z}
695 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
699 define <2 x half> @sint_to_fp_2i64_to_2f16(<2 x i64> %x) {
700 ; CHECK-LABEL: sint_to_fp_2i64_to_2f16:
702 ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
704 %res = sitofp <2 x i64> %x to <2 x half>
708 define <2 x i64> @fp_to_sint_2f16_to_2i64(<2 x half> %x) {
709 ; CHECK-LABEL: fp_to_sint_2f16_to_2i64:
711 ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
713 %res = fptosi <2 x half> %x to <2 x i64>
717 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64>, <8 x half>, i8)
719 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
720 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256:
722 ; CHECK-NEXT: kmovd %edi, %k1
723 ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm1 {%k1}
724 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
725 ; CHECK-NEXT: vzeroupper
727 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
731 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
732 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask:
734 ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
735 ; CHECK-NEXT: vzeroupper
737 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
741 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_z(<4 x i64> %x0, i8 %x2) {
742 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_z:
744 ; CHECK-NEXT: kmovd %edi, %k1
745 ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 {%k1} {z}
746 ; CHECK-NEXT: vzeroupper
748 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
752 define <4 x half> @uint_to_fp_4i64_to_4f16(<4 x i64> %x) {
753 ; CHECK-LABEL: uint_to_fp_4i64_to_4f16:
755 ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
756 ; CHECK-NEXT: vzeroupper
758 %res = uitofp <4 x i64> %x to <4 x half>
762 define <4 x i64> @fp_to_uint_4f16_to_4i64(<4 x half> %x) {
763 ; CHECK-LABEL: fp_to_uint_4f16_to_4i64:
765 ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
767 %res = fptoui <4 x half> %x to <4 x i64>
771 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64>, <8 x half>, i8)
773 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
774 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128:
776 ; CHECK-NEXT: kmovd %edi, %k1
777 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm1 {%k1}
778 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
780 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
784 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
785 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask:
787 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
789 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
793 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_z(<2 x i64> %x0, i8 %x2) {
794 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_z:
796 ; CHECK-NEXT: kmovd %edi, %k1
797 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 {%k1} {z}
799 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
803 define <2 x half> @uint_to_fp_2i64_to_2f16(<2 x i64> %x) {
804 ; CHECK-LABEL: uint_to_fp_2i64_to_2f16:
806 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
808 %res = uitofp <2 x i64> %x to <2 x half>
812 define <2 x i64> @fp_to_uint_2f16_to_2i64(<2 x half> %x) {
813 ; CHECK-LABEL: fp_to_uint_2f16_to_2i64:
815 ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
817 %res = fptoui <2 x half> %x to <2 x i64>
821 declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half>, <2 x i64>, i8)
823 define <2 x i64> @test_int_x86_avx512_cvtt_ph2qq_128(<8 x half> %x0) {
824 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_128:
826 ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
828 %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
832 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
833 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_128:
835 ; CHECK-NEXT: kmovd %edi, %k1
836 ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm1 {%k1}
837 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
839 %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
843 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_128(<8 x half> %x0, i8 %x2) {
844 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_128:
846 ; CHECK-NEXT: kmovd %edi, %k1
847 ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 {%k1} {z}
849 %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
853 declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half>, <4 x i64>, i8)
855 define <4 x i64> @test_int_x86_avx512_cvtt_ph2qq_256(<8 x half> %x0) {
856 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_256:
858 ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
860 %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
864 define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
865 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_256:
867 ; CHECK-NEXT: kmovd %edi, %k1
868 ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm1 {%k1}
869 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
871 %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
875 define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_256(<8 x half> %x0, i8 %x2) {
876 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_256:
878 ; CHECK-NEXT: kmovd %edi, %k1
879 ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 {%k1} {z}
881 %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
885 declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half>, <2 x i64>, i8)
887 define <2 x i64> @test_int_x86_avx512_cvtt_ph2uqq_128(<8 x half> %x0) {
888 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_128:
890 ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
892 %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
896 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
897 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_128:
899 ; CHECK-NEXT: kmovd %edi, %k1
900 ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm1 {%k1}
901 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
903 %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
907 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_128(<8 x half> %x0, i8 %x2) {
908 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_128:
910 ; CHECK-NEXT: kmovd %edi, %k1
911 ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 {%k1} {z}
913 %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
917 declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half>, <4 x i64>, i8)
919 define <4 x i64> @test_int_x86_avx512_cvtt_ph2uqq_256(<8 x half> %x0) {
920 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_256:
922 ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
924 %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
928 define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
929 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_256:
931 ; CHECK-NEXT: kmovd %edi, %k1
932 ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm1 {%k1}
933 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
935 %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
939 define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_256(<8 x half> %x0, i8 %x2) {
940 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_256:
942 ; CHECK-NEXT: kmovd %edi, %k1
943 ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 {%k1} {z}
945 %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
949 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
950 declare <16 x half> @llvm.sqrt.v16f16(<16 x half>)
952 define <8 x half> @test_sqrt_ph_128(<8 x half> %a0) {
953 ; CHECK-LABEL: test_sqrt_ph_128:
955 ; CHECK-NEXT: vsqrtph %xmm0, %xmm0
957 %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
961 define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) {
962 ; CHECK-LABEL: test_sqrt_ph_128_fast:
964 ; CHECK-NEXT: vrsqrtph %xmm0, %xmm0
965 ; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0
967 %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
968 %2 = fdiv fast <8 x half> %a1, %1
972 define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) {
973 ; CHECK-LABEL: test_sqrt_ph_128_fast2:
975 ; CHECK-NEXT: vsqrtph %xmm0, %xmm0
977 %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
981 define <8 x half> @test_mask_sqrt_ph_128(<8 x half> %a0, <8 x half> %passthru, i8 %mask) {
982 ; CHECK-LABEL: test_mask_sqrt_ph_128:
984 ; CHECK-NEXT: kmovd %edi, %k1
985 ; CHECK-NEXT: vsqrtph %xmm0, %xmm1 {%k1}
986 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
988 %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
989 %2 = bitcast i8 %mask to <8 x i1>
990 %3 = select <8 x i1> %2, <8 x half> %1, <8 x half> %passthru
994 define <8 x half> @test_maskz_sqrt_ph_128(<8 x half> %a0, i8 %mask) {
995 ; CHECK-LABEL: test_maskz_sqrt_ph_128:
997 ; CHECK-NEXT: kmovd %edi, %k1
998 ; CHECK-NEXT: vsqrtph %xmm0, %xmm0 {%k1} {z}
1000 %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
1001 %2 = bitcast i8 %mask to <8 x i1>
1002 %3 = select <8 x i1> %2, <8 x half> %1, <8 x half> zeroinitializer
1006 define <16 x half> @test_sqrt_ph_256(<16 x half> %a0) {
1007 ; CHECK-LABEL: test_sqrt_ph_256:
1009 ; CHECK-NEXT: vsqrtph %ymm0, %ymm0
1011 %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
1015 define <16 x half> @test_sqrt_ph_256_fast(<16 x half> %a0, <16 x half> %a1) {
1016 ; CHECK-LABEL: test_sqrt_ph_256_fast:
1018 ; CHECK-NEXT: vrsqrtph %ymm0, %ymm0
1019 ; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0
1021 %1 = call fast <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
1022 %2 = fdiv fast <16 x half> %a1, %1
1026 define <16 x half> @test_mask_sqrt_ph_256(<16 x half> %a0, <16 x half> %passthru, i16 %mask) {
1027 ; CHECK-LABEL: test_mask_sqrt_ph_256:
1029 ; CHECK-NEXT: kmovd %edi, %k1
1030 ; CHECK-NEXT: vsqrtph %ymm0, %ymm1 {%k1}
1031 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1033 %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
1034 %2 = bitcast i16 %mask to <16 x i1>
1035 %3 = select <16 x i1> %2, <16 x half> %1, <16 x half> %passthru
1039 define <16 x half> @test_maskz_sqrt_ph_256(<16 x half> %a0, i16 %mask) {
1040 ; CHECK-LABEL: test_maskz_sqrt_ph_256:
1042 ; CHECK-NEXT: kmovd %edi, %k1
1043 ; CHECK-NEXT: vsqrtph %ymm0, %ymm0 {%k1} {z}
1045 %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0)
1046 %2 = bitcast i16 %mask to <16 x i1>
1047 %3 = select <16 x i1> %2, <16 x half> %1, <16 x half> zeroinitializer
1051 declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half>, <8 x half>, i8)
1052 declare <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half>, <16 x half>, i16)
1054 define <8 x half> @test_rsqrt_ph_128(<8 x half> %a0) {
1055 ; CHECK-LABEL: test_rsqrt_ph_128:
1057 ; CHECK-NEXT: vrsqrtph %xmm0, %xmm0
1059 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half> %a0, <8 x half> zeroinitializer, i8 -1)
1063 define <16 x half> @test_rsqrt_ph_256(<16 x half> %a0) {
1064 ; CHECK-LABEL: test_rsqrt_ph_256:
1066 ; CHECK-NEXT: vrsqrtph %ymm0, %ymm0
1068 %res = call <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half> %a0, <16 x half> zeroinitializer, i16 -1)
1069 ret <16 x half> %res
1072 declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half>, <8 x half>, i8)
1073 declare <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half>, <16 x half>, i16)
1075 define <8 x half> @test_rcp_ph_128(<8 x half> %a0, <8 x half> %a1, i8 %mask) {
1076 ; CHECK-LABEL: test_rcp_ph_128:
1078 ; CHECK-NEXT: kmovd %edi, %k1
1079 ; CHECK-NEXT: vrcpph %xmm0, %xmm1 {%k1}
1080 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1082 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half> %a0, <8 x half> %a1, i8 %mask)
1086 define <16 x half> @test_rcp_ph_256(<16 x half> %a0, <16 x half> %a1, i16 %mask) {
1087 ; CHECK-LABEL: test_rcp_ph_256:
1089 ; CHECK-NEXT: kmovd %edi, %k1
1090 ; CHECK-NEXT: vrcpph %ymm0, %ymm1 {%k1}
1091 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1093 %res = call <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half> %a0, <16 x half> %a1, i16 %mask)
1094 ret <16 x half> %res
1097 declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half>, i32, <8 x half>, i8)
1098 declare <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half>, i32, <16 x half>, i16)
1100 define <8 x half>@test_int_x86_avx512_mask_reduce_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
1101 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_128:
1103 ; CHECK-NEXT: kmovd %edi, %k1
1104 ; CHECK-NEXT: vreduceph $8, %xmm0, %xmm1 {%k1}
1105 ; CHECK-NEXT: vreduceph $4, %xmm0, %xmm0
1106 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
1108 %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
1109 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
1110 %res2 = fadd <8 x half> %res, %res1
1111 ret <8 x half> %res2
1114 define <16 x half>@test_int_x86_avx512_mask_reduce_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
1115 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_256:
1117 ; CHECK-NEXT: kmovd %edi, %k1
1118 ; CHECK-NEXT: vreduceph $8, %ymm0, %ymm1 {%k1}
1119 ; CHECK-NEXT: vreduceph $4, %ymm0, %ymm0
1120 ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
1122 %res = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
1123 %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
1124 %res2 = fadd <16 x half> %res, %res1
1125 ret <16 x half> %res2
1128 declare <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half>, i32)
1129 declare <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half>, i32)
1131 define i8 @test_int_x86_avx512_fpclass_ph_128(<8 x half> %x0) {
1132 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_128:
1134 ; CHECK-NEXT: vfpclassph $2, %xmm0, %k1
1135 ; CHECK-NEXT: vfpclassph $4, %xmm0, %k0 {%k1}
1136 ; CHECK-NEXT: kmovd %k0, %eax
1137 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1139 %res = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 4)
1140 %res1 = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 2)
1141 %1 = and <8 x i1> %res1, %res
1142 %2 = bitcast <8 x i1> %1 to i8
1146 define i16 @test_int_x86_avx512_fpclass_ph_256(<16 x half> %x0) {
1147 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_256:
1149 ; CHECK-NEXT: vfpclassph $2, %ymm0, %k1
1150 ; CHECK-NEXT: vfpclassph $4, %ymm0, %k0 {%k1}
1151 ; CHECK-NEXT: kmovd %k0, %eax
1152 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
1153 ; CHECK-NEXT: vzeroupper
1155 %res = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 4)
1156 %res1 = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 2)
1157 %1 = and <16 x i1> %res1, %res
1158 %2 = bitcast <16 x i1> %1 to i16
1162 declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half>, <8 x half>, i8)
1163 declare <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half>, <16 x half>, i16)
1165 define <8 x half>@test_int_x86_avx512_getexp_ph_128(<8 x half> %x0) {
1166 ; CHECK-LABEL: test_int_x86_avx512_getexp_ph_128:
1168 ; CHECK-NEXT: vgetexpph %xmm0, %xmm0
1170 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 -1)
1174 define <8 x half>@test_int_x86_avx512_mask_getexp_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x2) {
1175 ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_128:
1177 ; CHECK-NEXT: kmovd %edi, %k1
1178 ; CHECK-NEXT: vgetexpph %xmm0, %xmm1 {%k1}
1179 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1181 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> %x1, i8 %x2)
1185 define <8 x half>@test_int_x86_avx512_maskz_getexp_ph_128(<8 x half> %x0, i8 %x2) {
1186 ; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_128:
1188 ; CHECK-NEXT: kmovd %edi, %k1
1189 ; CHECK-NEXT: vgetexpph %xmm0, %xmm0 {%k1} {z}
1191 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 %x2)
1195 define <16 x half>@test_int_x86_avx512_getexp_ph_256(<16 x half> %x0) {
1196 ; CHECK-LABEL: test_int_x86_avx512_getexp_ph_256:
1198 ; CHECK-NEXT: vgetexpph %ymm0, %ymm0
1200 %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 -1)
1201 ret <16 x half> %res
1204 define <16 x half>@test_int_x86_avx512_mask_getexp_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x2) {
1205 ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_256:
1207 ; CHECK-NEXT: kmovd %edi, %k1
1208 ; CHECK-NEXT: vgetexpph %ymm0, %ymm1 {%k1}
1209 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1211 %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> %x1, i16 %x2)
1212 ret <16 x half> %res
1215 define <16 x half>@test_int_x86_avx512_maskz_getexp_ph_256(<16 x half> %x0, i16 %x2) {
1216 ; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_256:
1218 ; CHECK-NEXT: kmovd %edi, %k1
1219 ; CHECK-NEXT: vgetexpph %ymm0, %ymm0 {%k1} {z}
1221 %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 %x2)
1222 ret <16 x half> %res
1225 declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half>, i32, <8 x half>, i8)
1226 declare <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half>, i32, <16 x half>, i16)
1228 define <8 x half>@test_int_x86_avx512_mask_getmant_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
1229 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_128:
1231 ; CHECK-NEXT: kmovd %edi, %k1
1232 ; CHECK-NEXT: vgetmantph $8, %xmm0, %xmm1 {%k1}
1233 ; CHECK-NEXT: vgetmantph $4, %xmm0, %xmm0
1234 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
1236 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
1237 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
1238 %res2 = fadd <8 x half> %res, %res1
1239 ret <8 x half> %res2
1242 define <16 x half>@test_int_x86_avx512_mask_getmant_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
1243 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_256:
1245 ; CHECK-NEXT: kmovd %edi, %k1
1246 ; CHECK-NEXT: vgetmantph $8, %ymm0, %ymm1 {%k1}
1247 ; CHECK-NEXT: vgetmantph $4, %ymm0, %ymm0
1248 ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
1250 %res = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
1251 %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
1252 %res2 = fadd <16 x half> %res, %res1
1253 ret <16 x half> %res2
1256 declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half>, i32, <8 x half>, i8)
1257 declare <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half>, i32, <16 x half>, i16)
1259 define <8 x half>@test_int_x86_avx512_mask_rndscale_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) {
1260 ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_128:
1262 ; CHECK-NEXT: kmovd %edi, %k1
1263 ; CHECK-NEXT: vrndscaleph $8, %xmm0, %xmm1 {%k1}
1264 ; CHECK-NEXT: vrndscaleph $4, %xmm0, %xmm0
1265 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
1267 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3)
1268 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1)
1269 %res2 = fadd <8 x half> %res, %res1
1270 ret <8 x half> %res2
1273 define <16 x half>@test_int_x86_avx512_mask_rndscale_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) {
1274 ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_256:
1276 ; CHECK-NEXT: kmovd %edi, %k1
1277 ; CHECK-NEXT: vrndscaleph $8, %ymm0, %ymm1 {%k1}
1278 ; CHECK-NEXT: vrndscaleph $4, %ymm0, %ymm0
1279 ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
1281 %res = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3)
1282 %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1)
1283 %res2 = fadd <16 x half> %res, %res1
1284 ret <16 x half> %res2
1287 declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half>, <8 x half>, <8 x half>, i8)
1288 declare <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half>, <16 x half>, <16 x half>, i16)
1290 define <8 x half>@test_int_x86_avx512_scalef_ph_128(<8 x half> %x0, <8 x half> %x1) {
1291 ; CHECK-LABEL: test_int_x86_avx512_scalef_ph_128:
1293 ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm0
1295 %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 -1)
1299 define <8 x half>@test_int_x86_avx512_mask_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3) {
1300 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_128:
1302 ; CHECK-NEXT: kmovd %edi, %k1
1303 ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm2 {%k1}
1304 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
1306 %mask = bitcast i8 %x3 to <8 x i1>
1307 %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3)
1311 define <8 x half>@test_int_x86_avx512_maskz_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x3) {
1312 ; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_128:
1314 ; CHECK-NEXT: kmovd %edi, %k1
1315 ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm0 {%k1} {z}
1317 %mask = bitcast i8 %x3 to <8 x i1>
1318 %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 %x3)
1322 define <16 x half>@test_int_x86_avx512_scalef_ph_256(<16 x half> %x0, <16 x half> %x1) {
1323 ; CHECK-LABEL: test_int_x86_avx512_scalef_ph_256:
1325 ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm0
1327 %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 -1)
1328 ret <16 x half> %res
1331 define <16 x half>@test_int_x86_avx512_mask_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3) {
1332 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_256:
1334 ; CHECK-NEXT: kmovd %edi, %k1
1335 ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm2 {%k1}
1336 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
1338 %mask = bitcast i16 %x3 to <16 x i1>
1339 %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3)
1340 ret <16 x half> %res
1343 define <16 x half>@test_int_x86_avx512_maskz_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x3) {
1344 ; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_256:
1346 ; CHECK-NEXT: kmovd %edi, %k1
1347 ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm0 {%k1} {z}
1349 %mask = bitcast i16 %x3 to <16 x i1>
1350 %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 %x3)
1351 ret <16 x half> %res