1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s
4 declare i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half>, <8 x half>, i32, i32)
6 define i32 @test_x86_avx512fp16_ucomi_sh_lt(<8 x half> %a0, <8 x half> %a1) {
7 ; CHECK-LABEL: test_x86_avx512fp16_ucomi_sh_lt:
9 ; CHECK-NEXT: vcmpngesh %xmm1, %xmm0, %k0
10 ; CHECK-NEXT: kmovw %k0, %eax
12 %res = call i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %a0, <8 x half> %a1, i32 9, i32 4)
16 declare <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half>, i32) nounwind readnone
18 define <32 x half> @test_sqrt_ph_512(<32 x half> %a0) {
19 ; CHECK-LABEL: test_sqrt_ph_512:
21 ; CHECK-NEXT: vsqrtph %zmm0, %zmm0
23 %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
27 define <32 x half> @test_sqrt_ph_512_fast(<32 x half> %a0, <32 x half> %a1) {
28 ; CHECK-LABEL: test_sqrt_ph_512_fast:
30 ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0
31 ; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
33 %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
34 %2 = fdiv fast <32 x half> %a1, %1
38 define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrt" {
39 ; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute:
41 ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0
42 ; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
44 %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
45 %2 = fdiv fast <32 x half> %a1, %1
49 define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute_2(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrth:1" {
50 ; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute_2:
52 ; CHECK-NEXT: vrsqrtph %zmm0, %zmm2
53 ; CHECK-NEXT: vmulph %zmm2, %zmm0, %zmm0
54 ; CHECK-NEXT: vfmadd213ph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm0
55 ; CHECK-NEXT: vmulph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm2
56 ; CHECK-NEXT: vmulph %zmm2, %zmm1, %zmm1
57 ; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
59 %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
60 %2 = fdiv fast <32 x half> %a1, %1
64 define <32 x half> @test_mask_sqrt_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) {
65 ; CHECK-LABEL: test_mask_sqrt_ph_512:
67 ; CHECK-NEXT: kmovd %edi, %k1
68 ; CHECK-NEXT: vsqrtph %zmm0, %zmm1 {%k1}
69 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
71 %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
72 %2 = bitcast i32 %mask to <32 x i1>
73 %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru
77 define <32 x half> @test_maskz_sqrt_ph_512(<32 x half> %a0, i32 %mask) {
78 ; CHECK-LABEL: test_maskz_sqrt_ph_512:
80 ; CHECK-NEXT: kmovd %edi, %k1
81 ; CHECK-NEXT: vsqrtph %zmm0, %zmm0 {%k1} {z}
83 %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
84 %2 = bitcast i32 %mask to <32 x i1>
85 %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer
89 declare <32 x half> @llvm.sqrt.v32f16(<32 x half>)
91 define <32 x half> @test_sqrt_round_ph_512(<32 x half> %a0) {
92 ; CHECK-LABEL: test_sqrt_round_ph_512:
94 ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm0
96 %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
100 define <32 x half> @test_mask_sqrt_round_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) {
101 ; CHECK-LABEL: test_mask_sqrt_round_ph_512:
103 ; CHECK-NEXT: kmovd %edi, %k1
104 ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm1 {%k1}
105 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
107 %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
108 %2 = bitcast i32 %mask to <32 x i1>
109 %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru
113 define <32 x half> @test_maskz_sqrt_round_ph_512(<32 x half> %a0, i32 %mask) {
114 ; CHECK-LABEL: test_maskz_sqrt_round_ph_512:
116 ; CHECK-NEXT: kmovd %edi, %k1
117 ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm0 {%k1} {z}
119 %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
120 %2 = bitcast i32 %mask to <32 x i1>
121 %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer
125 declare <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8, i32) nounwind readnone
127 define <8 x half> @test_sqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
128 ; CHECK-LABEL: test_sqrt_sh:
130 ; CHECK-NEXT: kmovd %edi, %k1
131 ; CHECK-NEXT: vsqrtsh %xmm1, %xmm0, %xmm2 {%k1}
132 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
134 %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 4)
138 define half @test_sqrt_sh2(half %a0, half %a1) {
139 ; CHECK-LABEL: test_sqrt_sh2:
141 ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0
142 ; CHECK-NEXT: vmulsh %xmm0, %xmm1, %xmm0
144 %1 = call fast half @llvm.sqrt.f16(half %a0)
145 %2 = fdiv fast half %a1, %1
149 define half @test_sqrt_sh3(half %a0, half %a1) {
150 ; CHECK-LABEL: test_sqrt_sh3:
152 ; CHECK-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0
154 %1 = call fast half @llvm.sqrt.f16(half %a0)
158 declare half @llvm.sqrt.f16(half)
160 define <8 x half> @test_sqrt_sh_r(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
161 ; CHECK-LABEL: test_sqrt_sh_r:
163 ; CHECK-NEXT: kmovd %edi, %k1
164 ; CHECK-NEXT: vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
165 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
167 %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 10)
171 define <8 x half> @test_sqrt_sh_nomask(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) {
172 ; CHECK-LABEL: test_sqrt_sh_nomask:
174 ; CHECK-NEXT: vsqrtsh %xmm1, %xmm0, %xmm0
176 %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 -1, i32 4)
180 define <8 x half> @test_sqrt_sh_z(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
181 ; CHECK-LABEL: test_sqrt_sh_z:
183 ; CHECK-NEXT: kmovd %edi, %k1
184 ; CHECK-NEXT: vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
186 %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 %mask, i32 10)
190 declare <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half>, <32 x half>, i32)
191 declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8)
193 define <32 x half> @test_rsqrt_ph_512(<32 x half> %a0) {
194 ; CHECK-LABEL: test_rsqrt_ph_512:
196 ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0
198 %res = call <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half> %a0, <32 x half> zeroinitializer, i32 -1)
202 define <8 x half> @test_rsqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) {
203 ; CHECK-LABEL: test_rsqrt_sh:
205 ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0
207 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> %a2, i8 -1)
211 define <8 x half> @test_rsqrt_sh_load(<8 x half> %a0, ptr %a1ptr) {
212 ; CHECK-LABEL: test_rsqrt_sh_load:
214 ; CHECK-NEXT: vrsqrtsh (%rdi), %xmm0, %xmm0
216 %a1 = load <8 x half>, ptr %a1ptr
217 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> undef, i8 -1)
221 define <8 x half> @test_rsqrt_sh_maskz(<8 x half> %a0, i8 %mask) {
222 ; CHECK-LABEL: test_rsqrt_sh_maskz:
224 ; CHECK-NEXT: kmovd %edi, %k1
225 ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0 {%k1} {z}
227 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 %mask)
231 define <8 x half> @test_rsqrt_sh_mask(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) {
232 ; CHECK-LABEL: test_rsqrt_sh_mask:
234 ; CHECK-NEXT: kmovd %edi, %k1
235 ; CHECK-NEXT: vrsqrtsh %xmm1, %xmm0, %xmm2 {%k1}
236 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
238 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask)
242 declare <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half>, i32)
244 define i32 @test_int_x86_avx512_fpclass_ph_512(<32 x half> %x0) {
245 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_512:
247 ; CHECK-NEXT: vfpclassph $2, %zmm0, %k1
248 ; CHECK-NEXT: vfpclassph $4, %zmm0, %k0 {%k1}
249 ; CHECK-NEXT: kmovd %k0, %eax
250 ; CHECK-NEXT: vzeroupper
252 %res = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 4)
253 %res1 = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 2)
254 %1 = and <32 x i1> %res1, %res
255 %2 = bitcast <32 x i1> %1 to i32
259 declare i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half>, i32, i8)
261 define i8 @test_int_x86_avx512_mask_fpclass_sh(<8 x half> %x0) {
262 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh:
264 ; CHECK-NEXT: vfpclasssh $4, %xmm0, %k1
265 ; CHECK-NEXT: vfpclasssh $2, %xmm0, %k0 {%k1}
266 ; CHECK-NEXT: kmovd %k0, %eax
267 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
269 %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 2, i8 -1)
270 %res1 = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 %res)
274 define i8 @test_int_x86_avx512_mask_fpclass_sh_load(ptr %x0ptr) {
275 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh_load:
277 ; CHECK-NEXT: vfpclasssh $4, (%rdi), %k0
278 ; CHECK-NEXT: kmovd %k0, %eax
279 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
281 %x0 = load <8 x half>, ptr %x0ptr
282 %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 -1)
286 declare <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half>, <32 x half>, i32)
288 define <32 x half> @test_rcp_ph_512(<32 x half> %a0, <32 x half> %a1, i32 %mask) {
289 ; CHECK-LABEL: test_rcp_ph_512:
291 ; CHECK-NEXT: kmovd %edi, %k1
292 ; CHECK-NEXT: vrcpph %zmm0, %zmm1 {%k1}
293 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
295 %res = call <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half> %a0, <32 x half> %a1, i32 %mask)
299 declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half>, <8 x half>, <8 x half>, i8)
301 define <8 x half> @test_rcp_sh(<8 x half> %a0) {
302 ; CHECK-LABEL: test_rcp_sh:
304 ; CHECK-NEXT: vrcpsh %xmm0, %xmm0, %xmm0
306 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 -1)
310 define <8 x half> @test_rcp_sh_load(<8 x half> %a0, ptr %a1ptr) {
311 ; CHECK-LABEL: test_rcp_sh_load:
313 ; CHECK-NEXT: vrcpsh (%rdi), %xmm0, %xmm0
315 %a1 = load <8 x half>, ptr %a1ptr
316 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 -1)
320 declare <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half>, i32, <32 x half>, i32, i32)
322 define <32 x half>@test_int_x86_avx512_mask_reduce_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
323 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_512:
325 ; CHECK-NEXT: kmovd %edi, %k1
326 ; CHECK-NEXT: vreduceph $8, %zmm0, %zmm1 {%k1}
327 ; CHECK-NEXT: vreduceph $4, {sae}, %zmm0, %zmm0
328 ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
330 %res = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
331 %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
332 %res2 = fadd <32 x half> %res, %res1
333 ret <32 x half> %res2
336 declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32)
338 define <8 x half>@test_int_x86_avx512_mask_reduce_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
339 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh:
341 ; CHECK-NEXT: kmovd %edi, %k1
342 ; CHECK-NEXT: vreducesh $4, %xmm1, %xmm0, %xmm2 {%k1}
343 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
345 %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4)
349 define <8 x half>@test_int_x86_avx512_mask_reduce_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
350 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh_nomask:
352 ; CHECK-NEXT: vreducesh $4, {sae}, %xmm1, %xmm0, %xmm0
354 %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8)
358 declare <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half>, i32, <32 x half>, i32, i32)
360 define <32 x half>@test_int_x86_avx512_mask_rndscale_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
361 ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_512:
363 ; CHECK-NEXT: kmovd %edi, %k1
364 ; CHECK-NEXT: vrndscaleph $8, %zmm0, %zmm1 {%k1}
365 ; CHECK-NEXT: vrndscaleph $4, {sae}, %zmm0, %zmm0
366 ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
368 %res = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
369 %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
370 %res2 = fadd <32 x half> %res, %res1
371 ret <32 x half> %res2
374 declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32)
376 define <8 x half>@test_int_x86_avx512_mask_rndscale_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
377 ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh:
379 ; CHECK-NEXT: kmovd %edi, %k1
380 ; CHECK-NEXT: vrndscalesh $4, %xmm1, %xmm0, %xmm2 {%k1}
381 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
383 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4)
387 define <8 x half>@test_int_x86_avx512_mask_rndscale_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
388 ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh_nomask:
390 ; CHECK-NEXT: vrndscalesh $4, {sae}, %xmm1, %xmm0, %xmm0
392 %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8)
396 declare <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half>, <32 x half>, i32, i32)
398 define <32 x half>@test_int_x86_avx512_mask_getexp_ph_512(<32 x half> %x0, <32 x half> %x1, i32 %x2) {
399 ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_512:
401 ; CHECK-NEXT: kmovd %edi, %k1
402 ; CHECK-NEXT: vgetexpph %zmm0, %zmm1 {%k1}
403 ; CHECK-NEXT: vgetexpph {sae}, %zmm0, %zmm0
404 ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
406 %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> %x1, i32 %x2, i32 4)
407 %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> zeroinitializer, i32 -1, i32 8)
408 %res3 = fadd <32 x half> %res1, %res2
409 ret <32 x half> %res3
412 declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32)
414 define <8 x half>@test_int_x86_avx512_mask_getexp_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
415 ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh:
417 ; CHECK-NEXT: kmovd %edi, %k1
418 ; CHECK-NEXT: vgetexpsh %xmm1, %xmm0, %xmm2 {%k1}
419 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
421 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4)
425 define <8 x half>@test_int_x86_avx512_mask_getexp_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
426 ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_nomask:
428 ; CHECK-NEXT: vgetexpsh {sae}, %xmm1, %xmm0, %xmm0
430 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8)
434 define <8 x half>@test_int_x86_avx512_mask_getexp_sh_load(<8 x half> %x0, ptr %x1ptr) {
435 ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_load:
437 ; CHECK-NEXT: vgetexpsh (%rdi), %xmm0, %xmm0
439 %x1 = load <8 x half>, ptr %x1ptr
440 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4)
444 declare <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half>, i32, <32 x half>, i32, i32)
446 define <32 x half>@test_int_x86_avx512_mask_getmant_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
447 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_512:
449 ; CHECK-NEXT: kmovd %edi, %k1
450 ; CHECK-NEXT: vgetmantph $8, %zmm0, %zmm1 {%k1}
451 ; CHECK-NEXT: vgetmantph $4, {sae}, %zmm0, %zmm0
452 ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
454 %res = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
455 %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
456 %res2 = fadd <32 x half> %res, %res1
457 ret <32 x half> %res2
460 declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half>, <8 x half>, i32, <8 x half>, i8, i32)
462 define <8 x half>@test_int_x86_avx512_mask_getmant_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
463 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh:
465 ; CHECK-NEXT: kmovd %edi, %k1
466 ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm2 {%k1}
467 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
469 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 %x4, i32 4)
473 define <8 x half>@test_int_x86_avx512_mask_getmant_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
474 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_nomask:
476 ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm0
478 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 -1, i32 4)
482 define <8 x half>@test_int_x86_avx512_mask_getmant_sh_z(<8 x half> %x0, <8 x half> %x1, i8 %x4) {
483 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_z:
485 ; CHECK-NEXT: kmovd %edi, %k1
486 ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm0 {%k1} {z}
488 %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> zeroinitializer, i8 %x4, i32 4)
492 declare <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half>, <32 x half>, <32 x half>, i32, i32)
494 define <32 x half>@test_int_x86_avx512_mask_scalef_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3) {
495 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_512:
497 ; CHECK-NEXT: kmovd %edi, %k1
498 ; CHECK-NEXT: vscalefph {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
499 ; CHECK-NEXT: vscalefph {rn-sae}, %zmm1, %zmm0, %zmm0
500 ; CHECK-NEXT: vaddph %zmm0, %zmm2, %zmm0
502 %mask = bitcast i32 %x3 to <32 x i1>
503 %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3, i32 11)
504 %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> zeroinitializer, i32 -1, i32 8)
505 %res3 = fadd <32 x half> %res1, %res2
506 ret <32 x half> %res3
509 declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32)
511 define <8 x half>@test_int_x86_avx512_mask_scalef_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
512 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh:
514 ; CHECK-NEXT: kmovd %edi, %k1
515 ; CHECK-NEXT: vscalefsh %xmm1, %xmm0, %xmm2 {%k1}
516 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
518 %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4)
522 define <8 x half>@test_int_x86_avx512_mask_scalef_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
523 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_nomask:
525 ; CHECK-NEXT: vscalefsh {rn-sae}, %xmm1, %xmm0, %xmm0
527 %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8)
531 define <8 x half>@test_int_x86_avx512_mask_scalef_sh_load(<8 x half> %x0, ptr %x1ptr) {
532 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_load:
534 ; CHECK-NEXT: vscalefsh (%rdi), %xmm0, %xmm0
536 %x1 = load <8 x half>, ptr %x1ptr
537 %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4)
541 declare <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
543 define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
544 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_sh:
546 ; CHECK-NEXT: kmovd %edi, %k1
547 ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
548 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
549 ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm3 {%k1}
550 ; CHECK-NEXT: vaddsh %xmm1, %xmm3, %xmm0 {%k1} {z}
551 ; CHECK-NEXT: vaddsh (%rsi), %xmm0, %xmm2 {%k1}
552 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
554 %val.half = load half,ptr %ptr
555 %val = insertelement <8 x half> undef, half %val.half, i32 0
556 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
557 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
558 %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
559 %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
563 declare <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
565 define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
566 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_sh:
568 ; CHECK-NEXT: kmovd %edi, %k1
569 ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
570 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
571 ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm3 {%k1}
572 ; CHECK-NEXT: vsubsh %xmm1, %xmm3, %xmm0 {%k1} {z}
573 ; CHECK-NEXT: vsubsh (%rsi), %xmm0, %xmm2 {%k1}
574 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
576 %val.half = load half,ptr %ptr
577 %val = insertelement <8 x half> undef, half %val.half, i32 0
578 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
579 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
580 %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
581 %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
585 declare <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
587 define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
588 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_sh:
590 ; CHECK-NEXT: kmovd %edi, %k1
591 ; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm0
592 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
593 ; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm3 {%k1}
594 ; CHECK-NEXT: vmulsh %xmm1, %xmm3, %xmm0 {%k1} {z}
595 ; CHECK-NEXT: vmulsh (%rsi), %xmm0, %xmm2 {%k1}
596 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
598 %val.half = load half,ptr %ptr
599 %val = insertelement <8 x half> undef, half %val.half, i32 0
600 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
601 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
602 %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
603 %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
607 declare <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
609 define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
610 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_sh:
612 ; CHECK-NEXT: kmovd %edi, %k1
613 ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
614 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
615 ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm3 {%k1}
616 ; CHECK-NEXT: vdivsh %xmm1, %xmm3, %xmm0 {%k1} {z}
617 ; CHECK-NEXT: vdivsh (%rsi), %xmm0, %xmm2 {%k1}
618 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
620 %val.half = load half,ptr %ptr
621 %val = insertelement <8 x half> undef, half %val.half, i32 0
622 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
623 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
624 %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
625 %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
629 declare <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
631 define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
632 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_min_sh:
634 ; CHECK-NEXT: kmovd %edi, %k1
635 ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
636 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
637 ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm3 {%k1}
638 ; CHECK-NEXT: vminsh %xmm1, %xmm3, %xmm0 {%k1} {z}
639 ; CHECK-NEXT: vminsh (%rsi), %xmm0, %xmm2 {%k1}
640 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
642 %val.half = load half,ptr %ptr
643 %val = insertelement <8 x half> undef, half %val.half, i32 0
644 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
645 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
646 %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
647 %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
651 declare <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)
653 define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
654 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_max_sh:
656 ; CHECK-NEXT: kmovd %edi, %k1
657 ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
658 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
659 ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm3 {%k1}
660 ; CHECK-NEXT: vmaxsh %xmm1, %xmm3, %xmm0 {%k1} {z}
661 ; CHECK-NEXT: vmaxsh (%rsi), %xmm0, %xmm2 {%k1}
662 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
664 %val.half = load half,ptr %ptr
665 %val = insertelement <8 x half> undef, half %val.half, i32 0
666 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
667 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
668 %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
669 %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
673 declare i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half>, <8 x half>, i32, i8, i32)
675 define i8 @test_int_x86_avx512_mask_cmp_sh(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) {
676 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh:
678 ; CHECK-NEXT: kmovd %edi, %k1
679 ; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k0 {%k1}
680 ; CHECK-NEXT: kmovd %k0, %eax
681 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
683 %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 %x3, i32 4)
688 define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) {
689 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh_all:
691 ; CHECK-NEXT: kmovd %edi, %k1
692 ; CHECK-NEXT: vcmplesh %xmm1, %xmm0, %k0
693 ; CHECK-NEXT: kmovd %k0, %ecx
694 ; CHECK-NEXT: vcmpunordsh {sae}, %xmm1, %xmm0, %k0
695 ; CHECK-NEXT: kmovd %k0, %edx
696 ; CHECK-NEXT: vcmpneqsh %xmm1, %xmm0, %k0 {%k1}
697 ; CHECK-NEXT: kmovd %k0, %esi
698 ; CHECK-NEXT: vcmpnltsh {sae}, %xmm1, %xmm0, %k0 {%k1}
699 ; CHECK-NEXT: kmovd %k0, %eax
700 ; CHECK-NEXT: andb %cl, %dl
701 ; CHECK-NEXT: andb %sil, %al
702 ; CHECK-NEXT: andb %dl, %al
703 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
705 %res1 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 2, i8 -1, i32 4)
706 %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 -1, i32 8)
707 %res3 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 4, i8 %x3, i32 4)
708 %res4 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 5, i8 %x3, i32 8)
710 %res11 = and i8 %res1, %res2
711 %res12 = and i8 %res3, %res4
712 %res13 = and i8 %res11, %res12
716 declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32)
718 define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
719 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512:
721 ; CHECK-NEXT: kmovd %edi, %k1
722 ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm1 {%k1}
723 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
725 %mask = bitcast i16 %x2 to <16 x i1>
726 %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
727 %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
731 define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
732 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_r:
734 ; CHECK-NEXT: kmovd %edi, %k1
735 ; CHECK-NEXT: vcvtdq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
736 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
738 %mask = bitcast i16 %x2 to <16 x i1>
739 %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
740 %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
744 define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
745 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_nomask:
747 ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
749 %res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
753 define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) {
754 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_z:
756 ; CHECK-NEXT: kmovd %edi, %k1
757 ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 {%k1} {z}
759 %mask = bitcast i16 %x2 to <16 x i1>
760 %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
761 %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
765 define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) {
766 ; CHECK-LABEL: sint_to_fp_16i32_to_16f16:
768 ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
770 %res = sitofp <16 x i32> %x to <16 x half>
774 declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32)
776 define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
777 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_r:
779 ; CHECK-NEXT: kmovd %edi, %k1
780 ; CHECK-NEXT: vcvtudq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
781 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
783 %mask = bitcast i16 %x2 to <16 x i1>
784 %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
785 %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
789 define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
790 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_nomask:
792 ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
794 %res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
798 define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) {
799 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_z:
801 ; CHECK-NEXT: kmovd %edi, %k1
802 ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 {%k1} {z}
804 %mask = bitcast i16 %x2 to <16 x i1>
805 %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
806 %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
810 define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) {
811 ; CHECK-LABEL: uint_to_fp_16i32_to_16f16:
813 ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
815 %res = uitofp <16 x i32> %x to <16 x half>
819 declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32)
821 define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
822 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_512:
824 ; CHECK-NEXT: kmovd %edi, %k1
825 ; CHECK-NEXT: vcvtph2dq {ru-sae}, %ymm0, %zmm1 {%k1}
826 ; CHECK-NEXT: vcvtph2dq {rn-sae}, %ymm0, %zmm0
827 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
829 %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
830 %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
831 %res2 = add <16 x i32> %res, %res1
835 declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32)
837 define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
838 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_512:
840 ; CHECK-NEXT: kmovd %edi, %k1
841 ; CHECK-NEXT: vcvtph2udq {ru-sae}, %ymm0, %zmm1 {%k1}
842 ; CHECK-NEXT: vcvtph2udq {rn-sae}, %ymm0, %zmm0
843 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
845 %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
846 %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
847 %res2 = add <16 x i32> %res, %res1
851 declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32)
853 define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
854 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_512:
856 ; CHECK-NEXT: kmovd %edi, %k1
857 ; CHECK-NEXT: vcvttph2dq %ymm0, %zmm1 {%k1}
858 ; CHECK-NEXT: vcvttph2dq {sae}, %ymm0, %zmm0
859 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
861 %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
862 %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
863 %res2 = add <16 x i32> %res, %res1
867 declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32)
869 define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
870 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_512:
872 ; CHECK-NEXT: kmovd %edi, %k1
873 ; CHECK-NEXT: vcvttph2udq %ymm0, %zmm1 {%k1}
874 ; CHECK-NEXT: vcvttph2udq {sae}, %ymm0, %zmm0
875 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
877 %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
878 %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
879 %res2 = add <16 x i32> %res, %res1
883 declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32)
885 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
886 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512:
888 ; CHECK-NEXT: kmovd %edi, %k1
889 ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm1 {%k1}
890 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
891 ; CHECK-NEXT: vzeroupper
893 %mask = bitcast i8 %x2 to <8 x i1>
894 %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
895 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
899 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
900 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_r:
902 ; CHECK-NEXT: kmovd %edi, %k1
903 ; CHECK-NEXT: vcvtqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
904 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
905 ; CHECK-NEXT: vzeroupper
907 %mask = bitcast i8 %x2 to <8 x i1>
908 %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
909 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
913 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
914 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_nomask:
916 ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0
917 ; CHECK-NEXT: vzeroupper
919 %res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
923 define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) {
924 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_z:
926 ; CHECK-NEXT: kmovd %edi, %k1
927 ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 {%k1} {z}
928 ; CHECK-NEXT: vzeroupper
930 %mask = bitcast i8 %x2 to <8 x i1>
931 %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
932 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
936 declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32)
938 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
939 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512:
941 ; CHECK-NEXT: kmovd %edi, %k1
942 ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm1 {%k1}
943 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
944 ; CHECK-NEXT: vzeroupper
946 %mask = bitcast i8 %x2 to <8 x i1>
947 %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
948 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
952 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
953 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_r:
955 ; CHECK-NEXT: kmovd %edi, %k1
956 ; CHECK-NEXT: vcvtuqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
957 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
958 ; CHECK-NEXT: vzeroupper
960 %mask = bitcast i8 %x2 to <8 x i1>
961 %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
962 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
966 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
967 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask:
969 ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0
970 ; CHECK-NEXT: vzeroupper
972 %res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
976 define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) {
977 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_z:
979 ; CHECK-NEXT: kmovd %edi, %k1
980 ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 {%k1} {z}
981 ; CHECK-NEXT: vzeroupper
983 %mask = bitcast i8 %x2 to <8 x i1>
984 %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
985 %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
989 declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32)
991 define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
992 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2qq_512:
994 ; CHECK-NEXT: kmovd %edi, %k1
995 ; CHECK-NEXT: vcvtph2qq {ru-sae}, %xmm0, %zmm1 {%k1}
996 ; CHECK-NEXT: vcvtph2qq {rn-sae}, %xmm0, %zmm0
997 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
999 %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
1000 %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
1001 %res2 = add <8 x i64> %res, %res1
1005 declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
1007 define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
1008 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2uqq_512:
1010 ; CHECK-NEXT: kmovd %edi, %k1
1011 ; CHECK-NEXT: vcvtph2uqq {ru-sae}, %xmm0, %zmm1 {%k1}
1012 ; CHECK-NEXT: vcvtph2uqq {rn-sae}, %xmm0, %zmm0
1013 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
1015 %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
1016 %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
1017 %res2 = add <8 x i64> %res, %res1
1021 declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
1023 define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
1024 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_512:
1026 ; CHECK-NEXT: kmovd %edi, %k1
1027 ; CHECK-NEXT: vcvttph2uqq {sae}, %xmm0, %zmm1 {%k1}
1028 ; CHECK-NEXT: vcvttph2uqq %xmm0, %zmm0
1029 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
1031 %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8)
1032 %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4)
1033 %res2 = add <8 x i64> %res, %res1
1037 declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32)
1039 define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) {
1040 ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si32:
1042 ; CHECK-NEXT: vcvtsh2si %xmm0, %ecx
1043 ; CHECK-NEXT: vcvtsh2si {rz-sae}, %xmm0, %eax
1044 ; CHECK-NEXT: addl %ecx, %eax
1046 %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4)
1047 %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11)
1048 %res = add i32 %res1, %res2
1052 declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32)
1054 define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) {
1055 ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si64:
1057 ; CHECK-NEXT: vcvtsh2si %xmm0, %rcx
1058 ; CHECK-NEXT: vcvtsh2si {ru-sae}, %xmm0, %rax
1059 ; CHECK-NEXT: addq %rcx, %rax
1061 %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4)
1062 %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10)
1063 %res = add i64 %res1, %res2
1067 declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32)
1069 define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) {
1070 ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si32:
1072 ; CHECK-NEXT: vcvttsh2si %xmm0, %ecx
1073 ; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %eax
1074 ; CHECK-NEXT: addl %ecx, %eax
1076 %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4)
1077 %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8)
1078 %res = add i32 %res1, %res2
1082 declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32)
1084 define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) {
1085 ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si64:
1087 ; CHECK-NEXT: vcvttsh2si %xmm0, %rcx
1088 ; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %rax
1089 ; CHECK-NEXT: addq %rcx, %rax
1091 %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4)
1092 %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8)
1093 %res = add i64 %res1, %res2
1098 declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32)
1100 define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) {
1101 ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi32:
1103 ; CHECK-NEXT: vcvtsh2usi %xmm0, %ecx
1104 ; CHECK-NEXT: vcvtsh2usi {rd-sae}, %xmm0, %eax
1105 ; CHECK-NEXT: addl %ecx, %eax
1107 %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4)
1108 %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9)
1109 %res = add i32 %res1, %res2
1114 declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32)
1116 define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) {
1117 ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi64:
1119 ; CHECK-NEXT: vcvtsh2usi %xmm0, %rcx
1120 ; CHECK-NEXT: vcvtsh2usi {ru-sae}, %xmm0, %rax
1121 ; CHECK-NEXT: addq %rcx, %rax
1123 %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4)
1124 %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10)
1125 %res = add i64 %res1, %res2
1129 declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32)
1131 define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) {
1132 ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi32:
1134 ; CHECK-NEXT: vcvttsh2usi %xmm0, %ecx
1135 ; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %eax
1136 ; CHECK-NEXT: addl %ecx, %eax
1138 %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4)
1139 %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8)
1140 %res = add i32 %res1, %res2
1144 declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32)
1146 define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) {
1147 ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi64:
1149 ; CHECK-NEXT: vcvttsh2usi %xmm0, %rcx
1150 ; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %rax
1151 ; CHECK-NEXT: addq %rcx, %rax
1153 %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4)
1154 %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8)
1155 %res = add i64 %res1, %res2
1159 declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32)
1161 define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) {
1162 ; CHECK-LABEL: test_x86_avx512fp16_vcvtsi2sh:
1164 ; CHECK-NEXT: vcvtsi2sh %edi, %xmm0, %xmm1
1165 ; CHECK-NEXT: vcvtsi2sh %edi, {rd-sae}, %xmm0, %xmm0
1166 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
1168 %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
1169 %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
1170 %res = fadd <8 x half> %res1, %res2
1174 declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32)
1176 define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) {
1177 ; CHECK-LABEL: test_x86_avx512fp16_vcvtsi642sh:
1179 ; CHECK-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm1
1180 ; CHECK-NEXT: vcvtsi2sh %rdi, {rn-sae}, %xmm0, %xmm0
1181 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
1183 %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
1184 %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8)
1185 %res = fadd <8 x half> %res1, %res2
1189 declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32)
1191 define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) {
1192 ; CHECK-LABEL: test_x86_avx512fp16_vcvtusi2sh:
1194 ; CHECK-NEXT: vcvtusi2sh %edi, %xmm0, %xmm1
1195 ; CHECK-NEXT: vcvtusi2sh %edi, {rd-sae}, %xmm0, %xmm0
1196 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
1198 %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
1199 %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
1200 %res = fadd <8 x half> %res1, %res2
1204 declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32)
1206 define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) {
1207 ; CHECK-LABEL: test_x86_avx512fp16_vcvtusi642sh:
1209 ; CHECK-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm1
1210 ; CHECK-NEXT: vcvtusi2sh %rdi, {rd-sae}, %xmm0, %xmm0
1211 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
1213 %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
1214 %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9)
1215 %res = fadd <8 x half> %res1, %res2
1220 define <16 x half> @test_mm256_castph128_ph256_freeze(<8 x half> %a0) nounwind {
1221 ; CHECK-LABEL: test_mm256_castph128_ph256_freeze:
1223 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1225 %a1 = freeze <8 x half> poison
1226 %res = shufflevector <8 x half> %a0, <8 x half> %a1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1227 ret <16 x half> %res
1231 define <32 x half> @test_mm512_castph128_ph512_freeze(<8 x half> %a0) nounwind {
1232 ; CHECK-LABEL: test_mm512_castph128_ph512_freeze:
1234 ; CHECK-NEXT: vmovaps %xmm0, %xmm0
1236 %a1 = freeze <8 x half> poison
1237 %res = shufflevector <8 x half> %a0, <8 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1238 ret <32 x half> %res
1242 define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind {
1243 ; CHECK-LABEL: test_mm512_castph256_ph512_freeze:
1245 ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1247 %a1 = freeze <16 x half> poison
1248 %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1249 ret <32 x half> %res