1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 | FileCheck %s
4 declare <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16>, i32)
6 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
7 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512:
9 ; CHECK-NEXT: kmovd %edi, %k1
10 ; CHECK-NEXT: vcvtw2ph %zmm0, %zmm1 {%k1}
11 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
13 %msk = bitcast i32 %mask to <32 x i1>
14 %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
15 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
19 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
20 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_2:
22 ; CHECK-NEXT: kmovd %edi, %k1
23 ; CHECK-NEXT: vcvtw2ph %zmm0, %zmm1 {%k1}
24 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
26 %msk = bitcast i32 %mask to <32 x i1>
27 %res0 = sitofp <32 x i16> %arg0 to <32 x half>
28 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
32 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b(ptr %arg0, <32 x half> %arg1, i32 %mask) {
33 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b:
35 ; CHECK-NEXT: kmovd %esi, %k1
36 ; CHECK-NEXT: vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
38 %msk = bitcast i32 %mask to <32 x i1>
39 %scalar = load i16, ptr %arg0
40 %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
41 %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
42 %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
43 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
47 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
48 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2:
50 ; CHECK-NEXT: kmovd %esi, %k1
51 ; CHECK-NEXT: vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
53 %msk = bitcast i32 %mask to <32 x i1>
54 %scalar = load i16, ptr %arg0
55 %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
56 %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
57 %res0 = sitofp <32 x i16> %val to <32 x half>
58 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
62 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
63 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_r:
65 ; CHECK-NEXT: kmovd %edi, %k1
66 ; CHECK-NEXT: vcvtw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
67 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
69 %msk = bitcast i32 %mask to <32 x i1>
70 %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
71 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
75 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask(<32 x i16> %arg0) {
76 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask:
78 ; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
80 %res = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
84 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2(<32 x i16> %arg0) {
85 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2:
87 ; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
89 %res = sitofp <32 x i16> %arg0 to <32 x half>
93 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
94 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z:
96 ; CHECK-NEXT: kmovd %edi, %k1
97 ; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0 {%k1} {z}
99 %msk = bitcast i32 %mask to <32 x i1>
100 %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
101 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
105 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
106 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2:
108 ; CHECK-NEXT: kmovd %edi, %k1
109 ; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0 {%k1} {z}
111 %msk = bitcast i32 %mask to <32 x i1>
112 %res0 = sitofp <32 x i16> %arg0 to <32 x half>
113 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
117 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load(ptr %arg0, <32 x half> %arg1, i32 %mask) {
118 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load:
120 ; CHECK-NEXT: kmovd %esi, %k1
121 ; CHECK-NEXT: vcvtw2ph (%rdi), %zmm0 {%k1}
123 %msk = bitcast i32 %mask to <32 x i1>
124 %val = load <32 x i16>, ptr %arg0
125 %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
126 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
130 define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
131 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2:
133 ; CHECK-NEXT: kmovd %esi, %k1
134 ; CHECK-NEXT: vcvtw2ph (%rdi), %zmm0 {%k1}
136 %msk = bitcast i32 %mask to <32 x i1>
137 %val = load <32 x i16>, ptr %arg0
138 %res0 = sitofp <32 x i16> %val to <32 x half>
139 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
143 declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half>, <32 x i16>, i32, i32)
145 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
146 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512:
148 ; CHECK-NEXT: kmovd %edi, %k1
149 ; CHECK-NEXT: vcvtph2w %zmm0, %zmm1 {%k1}
150 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
152 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
156 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
157 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_b:
159 ; CHECK-NEXT: kmovd %esi, %k1
160 ; CHECK-NEXT: vcvtph2w (%rdi){1to32}, %zmm0 {%k1}
162 %scalar = load half, ptr %arg0
163 %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
164 %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
165 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
169 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
170 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_r:
172 ; CHECK-NEXT: kmovd %edi, %k1
173 ; CHECK-NEXT: vcvtph2w {rd-sae}, %zmm0, %zmm1 {%k1}
174 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
176 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
180 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
181 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_nomask:
183 ; CHECK-NEXT: vcvtph2w %zmm0, %zmm0
185 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
189 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_z(<32 x half> %arg0, i32 %mask) {
190 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_z:
192 ; CHECK-NEXT: kmovd %edi, %k1
193 ; CHECK-NEXT: vcvtph2w %zmm0, %zmm0 {%k1} {z}
195 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
199 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
200 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_load:
202 ; CHECK-NEXT: kmovd %esi, %k1
203 ; CHECK-NEXT: vcvtph2w (%rdi), %zmm0 {%k1}
205 %val = load <32 x half>, ptr %arg0
206 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
211 declare <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16>, i32)
213 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
214 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512:
216 ; CHECK-NEXT: kmovd %edi, %k1
217 ; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm1 {%k1}
218 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
220 %msk = bitcast i32 %mask to <32 x i1>
221 %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
222 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
226 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
227 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_2:
229 ; CHECK-NEXT: kmovd %edi, %k1
230 ; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm1 {%k1}
231 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
233 %msk = bitcast i32 %mask to <32 x i1>
234 %res0 = uitofp <32 x i16> %arg0 to <32 x half>
235 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
239 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b(ptr %arg0, <32 x half> %arg1, i32 %mask) {
240 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b:
242 ; CHECK-NEXT: kmovd %esi, %k1
243 ; CHECK-NEXT: vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
245 %msk = bitcast i32 %mask to <32 x i1>
246 %scalar = load i16, ptr %arg0
247 %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
248 %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
249 %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
250 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
254 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
255 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2:
257 ; CHECK-NEXT: kmovd %esi, %k1
258 ; CHECK-NEXT: vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
260 %msk = bitcast i32 %mask to <32 x i1>
261 %scalar = load i16, ptr %arg0
262 %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
263 %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
264 %res0 = uitofp <32 x i16> %val to <32 x half>
265 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
269 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
270 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_r:
272 ; CHECK-NEXT: kmovd %edi, %k1
273 ; CHECK-NEXT: vcvtuw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
274 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
276 %msk = bitcast i32 %mask to <32 x i1>
277 %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
278 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
282 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask(<32 x i16> %arg0) {
283 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask:
285 ; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
287 %res = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
291 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2(<32 x i16> %arg0) {
292 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2:
294 ; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
296 %res = uitofp <32 x i16> %arg0 to <32 x half>
300 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
301 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z:
303 ; CHECK-NEXT: kmovd %edi, %k1
304 ; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
306 %msk = bitcast i32 %mask to <32 x i1>
307 %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
308 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
312 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
313 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2:
315 ; CHECK-NEXT: kmovd %edi, %k1
316 ; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
318 %msk = bitcast i32 %mask to <32 x i1>
319 %res0 = uitofp <32 x i16> %arg0 to <32 x half>
320 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
324 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load(ptr %arg0, <32 x half> %arg1, i32 %mask) {
325 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load:
327 ; CHECK-NEXT: kmovd %esi, %k1
328 ; CHECK-NEXT: vcvtuw2ph (%rdi), %zmm0 {%k1}
330 %msk = bitcast i32 %mask to <32 x i1>
331 %val = load <32 x i16>, ptr %arg0
332 %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
333 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
337 define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2(ptr %arg0, <32 x half> %arg1, i32 %mask) {
338 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2:
340 ; CHECK-NEXT: kmovd %esi, %k1
341 ; CHECK-NEXT: vcvtuw2ph (%rdi), %zmm0 {%k1}
343 %msk = bitcast i32 %mask to <32 x i1>
344 %val = load <32 x i16>, ptr %arg0
345 %res0 = uitofp <32 x i16> %val to <32 x half>
346 %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
350 declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half>, <32 x i16>, i32, i32)
352 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
353 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512:
355 ; CHECK-NEXT: kmovd %edi, %k1
356 ; CHECK-NEXT: vcvtph2uw %zmm0, %zmm1 {%k1}
357 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
359 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
363 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
364 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_b:
366 ; CHECK-NEXT: kmovd %esi, %k1
367 ; CHECK-NEXT: vcvtph2uw (%rdi){1to32}, %zmm0 {%k1}
369 %scalar = load half, ptr %arg0
370 %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
371 %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
372 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
376 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
377 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_r:
379 ; CHECK-NEXT: kmovd %edi, %k1
380 ; CHECK-NEXT: vcvtph2uw {rd-sae}, %zmm0, %zmm1 {%k1}
381 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
383 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
387 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
388 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask:
390 ; CHECK-NEXT: vcvtph2uw %zmm0, %zmm0
392 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
396 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_z(<32 x half> %arg0, i32 %mask) {
397 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_z:
399 ; CHECK-NEXT: kmovd %edi, %k1
400 ; CHECK-NEXT: vcvtph2uw %zmm0, %zmm0 {%k1} {z}
402 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
406 define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
407 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_load:
409 ; CHECK-NEXT: kmovd %esi, %k1
410 ; CHECK-NEXT: vcvtph2uw (%rdi), %zmm0 {%k1}
412 %val = load <32 x half>, ptr %arg0
413 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
417 declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half>, <32 x i16>, i32, i32)
419 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
420 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512:
422 ; CHECK-NEXT: kmovd %edi, %k1
423 ; CHECK-NEXT: vcvttph2w %zmm0, %zmm1 {%k1}
424 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
426 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
430 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
431 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_b:
433 ; CHECK-NEXT: kmovd %esi, %k1
434 ; CHECK-NEXT: vcvttph2w (%rdi){1to32}, %zmm0 {%k1}
436 %scalar = load half, ptr %arg0
437 %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
438 %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
439 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
443 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
444 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_sae:
446 ; CHECK-NEXT: kmovd %edi, %k1
447 ; CHECK-NEXT: vcvttph2w {sae}, %zmm0, %zmm1 {%k1}
448 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
450 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
454 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
455 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_nomask:
457 ; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
459 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
463 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_z(<32 x half> %arg0, i32 %mask) {
464 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_z:
466 ; CHECK-NEXT: kmovd %edi, %k1
467 ; CHECK-NEXT: vcvttph2w %zmm0, %zmm0 {%k1} {z}
469 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
473 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
474 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_load:
476 ; CHECK-NEXT: kmovd %esi, %k1
477 ; CHECK-NEXT: vcvttph2w (%rdi), %zmm0 {%k1}
479 %val = load <32 x half>, ptr %arg0
480 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
484 declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half>, <32 x i16>, i32, i32)
486 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
487 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512:
489 ; CHECK-NEXT: kmovd %edi, %k1
490 ; CHECK-NEXT: vcvttph2uw %zmm0, %zmm1 {%k1}
491 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
493 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
497 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_b(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
498 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_b:
500 ; CHECK-NEXT: kmovd %esi, %k1
501 ; CHECK-NEXT: vcvttph2uw (%rdi){1to32}, %zmm0 {%k1}
503 %scalar = load half, ptr %arg0
504 %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
505 %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
506 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
510 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
511 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_sae:
513 ; CHECK-NEXT: kmovd %edi, %k1
514 ; CHECK-NEXT: vcvttph2uw {sae}, %zmm0, %zmm1 {%k1}
515 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
517 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
521 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
522 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask:
524 ; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0
526 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
530 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_z(<32 x half> %arg0, i32 %mask) {
531 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_z:
533 ; CHECK-NEXT: kmovd %edi, %k1
534 ; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0 {%k1} {z}
536 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
540 define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_load(ptr %arg0, <32 x i16> %arg1, i32 %mask) {
541 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_load:
543 ; CHECK-NEXT: kmovd %esi, %k1
544 ; CHECK-NEXT: vcvttph2uw (%rdi), %zmm0 {%k1}
546 %val = load <32 x half>, ptr %arg0
547 %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)