1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
4 define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
5 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256:
7 ; CHECK-NEXT: kmovd %edi, %k1
8 ; CHECK-NEXT: vcvtw2ph %ymm0, %ymm1 {%k1}
9 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
11 %msk = bitcast i16 %mask to <16 x i1>
12 %res0 = sitofp <16 x i16> %arg0 to <16 x half>
13 %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
17 define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_b(ptr %arg0, <16 x half> %arg1, i16 %mask) {
18 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_b:
20 ; CHECK-NEXT: kmovd %esi, %k1
21 ; CHECK-NEXT: vcvtw2ph (%rdi){1to16}, %ymm0 {%k1}
23 %msk = bitcast i16 %mask to <16 x i1>
24 %scalar = load i16, ptr %arg0
25 %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
26 %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
27 %res0 = sitofp <16 x i16> %val to <16 x half>
28 %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
32 define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
33 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask:
35 ; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
37 %res = sitofp <16 x i16> %arg0 to <16 x half>
41 define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
42 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_z:
44 ; CHECK-NEXT: kmovd %edi, %k1
45 ; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0 {%k1} {z}
47 %msk = bitcast i16 %mask to <16 x i1>
48 %res0 = sitofp <16 x i16> %arg0 to <16 x half>
49 %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
53 define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_load(ptr %arg0, <16 x half> %arg1, i16 %mask) {
54 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_load:
56 ; CHECK-NEXT: kmovd %esi, %k1
57 ; CHECK-NEXT: vcvtw2ph (%rdi), %ymm0 {%k1}
59 %msk = bitcast i16 %mask to <16 x i1>
60 %val = load <16 x i16>, ptr %arg0
61 %res0 = sitofp <16 x i16> %val to <16 x half>
62 %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
66 declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half>, <16 x i16>, i16)
68 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
69 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256:
71 ; CHECK-NEXT: kmovd %edi, %k1
72 ; CHECK-NEXT: vcvtph2w %ymm0, %ymm1 {%k1}
73 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
75 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
79 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
80 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_b:
82 ; CHECK-NEXT: kmovd %esi, %k1
83 ; CHECK-NEXT: vcvtph2w (%rdi){1to16}, %ymm0 {%k1}
85 %scalar = load half, ptr %arg0
86 %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
87 %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
88 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
92 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
93 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_nomask:
95 ; CHECK-NEXT: vcvtph2w %ymm0, %ymm0
97 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
101 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_z(<16 x half> %arg0, i16 %mask) {
102 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_z:
104 ; CHECK-NEXT: kmovd %edi, %k1
105 ; CHECK-NEXT: vcvtph2w %ymm0, %ymm0 {%k1} {z}
107 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
111 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
112 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_load:
114 ; CHECK-NEXT: kmovd %esi, %k1
115 ; CHECK-NEXT: vcvtph2w (%rdi), %ymm0 {%k1}
117 %val = load <16 x half>, ptr %arg0
118 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
122 define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
123 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256:
125 ; CHECK-NEXT: kmovd %edi, %k1
126 ; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm1 {%k1}
127 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
129 %msk = bitcast i16 %mask to <16 x i1>
130 %res0 = uitofp <16 x i16> %arg0 to <16 x half>
131 %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
135 define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_b(ptr %arg0, <16 x half> %arg1, i16 %mask) {
136 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_b:
138 ; CHECK-NEXT: kmovd %esi, %k1
139 ; CHECK-NEXT: vcvtuw2ph (%rdi){1to16}, %ymm0 {%k1}
141 %msk = bitcast i16 %mask to <16 x i1>
142 %scalar = load i16, ptr %arg0
143 %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
144 %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
145 %res0 = uitofp <16 x i16> %val to <16 x half>
146 %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
150 define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
151 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask:
153 ; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0
155 %res = uitofp <16 x i16> %arg0 to <16 x half>
159 define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
160 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_z:
162 ; CHECK-NEXT: kmovd %edi, %k1
163 ; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0 {%k1} {z}
165 %msk = bitcast i16 %mask to <16 x i1>
166 %res0 = uitofp <16 x i16> %arg0 to <16 x half>
167 %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
171 define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_load(ptr %arg0, <16 x half> %arg1, i16 %mask) {
172 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_load:
174 ; CHECK-NEXT: kmovd %esi, %k1
175 ; CHECK-NEXT: vcvtuw2ph (%rdi), %ymm0 {%k1}
177 %msk = bitcast i16 %mask to <16 x i1>
178 %val = load <16 x i16>, ptr %arg0
179 %res0 = uitofp <16 x i16> %val to <16 x half>
180 %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
184 declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half>, <16 x i16>, i16)
186 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
187 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256:
189 ; CHECK-NEXT: kmovd %edi, %k1
190 ; CHECK-NEXT: vcvtph2uw %ymm0, %ymm1 {%k1}
191 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
193 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
197 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
198 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_b:
200 ; CHECK-NEXT: kmovd %esi, %k1
201 ; CHECK-NEXT: vcvtph2uw (%rdi){1to16}, %ymm0 {%k1}
203 %scalar = load half, ptr %arg0
204 %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
205 %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
206 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
210 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
211 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask:
213 ; CHECK-NEXT: vcvtph2uw %ymm0, %ymm0
215 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
219 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_z(<16 x half> %arg0, i16 %mask) {
220 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_z:
222 ; CHECK-NEXT: kmovd %edi, %k1
223 ; CHECK-NEXT: vcvtph2uw %ymm0, %ymm0 {%k1} {z}
225 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
229 define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
230 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_load:
232 ; CHECK-NEXT: kmovd %esi, %k1
233 ; CHECK-NEXT: vcvtph2uw (%rdi), %ymm0 {%k1}
235 %val = load <16 x half>, ptr %arg0
236 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
240 declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half>, <16 x i16>, i16)
242 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
243 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256:
245 ; CHECK-NEXT: kmovd %edi, %k1
246 ; CHECK-NEXT: vcvttph2w %ymm0, %ymm1 {%k1}
247 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
249 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
253 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
254 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_b:
256 ; CHECK-NEXT: kmovd %esi, %k1
257 ; CHECK-NEXT: vcvttph2w (%rdi){1to16}, %ymm0 {%k1}
259 %scalar = load half, ptr %arg0
260 %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
261 %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
262 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
266 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
267 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_nomask:
269 ; CHECK-NEXT: vcvttph2w %ymm0, %ymm0
271 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
275 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_z(<16 x half> %arg0, i16 %mask) {
276 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_z:
278 ; CHECK-NEXT: kmovd %edi, %k1
279 ; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 {%k1} {z}
281 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
285 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
286 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_load:
288 ; CHECK-NEXT: kmovd %esi, %k1
289 ; CHECK-NEXT: vcvttph2w (%rdi), %ymm0 {%k1}
291 %val = load <16 x half>, ptr %arg0
292 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
296 declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half>, <16 x i16>, i16)
298 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
299 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256:
301 ; CHECK-NEXT: kmovd %edi, %k1
302 ; CHECK-NEXT: vcvttph2uw %ymm0, %ymm1 {%k1}
303 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
305 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
309 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
310 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_b:
312 ; CHECK-NEXT: kmovd %esi, %k1
313 ; CHECK-NEXT: vcvttph2uw (%rdi){1to16}, %ymm0 {%k1}
315 %scalar = load half, ptr %arg0
316 %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
317 %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
318 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
322 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
323 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask:
325 ; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0
327 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
331 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_z(<16 x half> %arg0, i16 %mask) {
332 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_z:
334 ; CHECK-NEXT: kmovd %edi, %k1
335 ; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 {%k1} {z}
337 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
341 define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) {
342 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_load:
344 ; CHECK-NEXT: kmovd %esi, %k1
345 ; CHECK-NEXT: vcvttph2uw (%rdi), %ymm0 {%k1}
347 %val = load <16 x half>, ptr %arg0
348 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
352 define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
353 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128:
355 ; CHECK-NEXT: kmovd %edi, %k1
356 ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm1 {%k1}
357 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
359 %msk = bitcast i8 %mask to <8 x i1>
360 %res0 = sitofp <8 x i16> %arg0 to <8 x half>
361 %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
365 define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_b(ptr %arg0, <8 x half> %arg1, i8 %mask) {
366 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_b:
368 ; CHECK-NEXT: kmovd %esi, %k1
369 ; CHECK-NEXT: vcvtw2ph (%rdi){1to8}, %xmm0 {%k1}
371 %msk = bitcast i8 %mask to <8 x i1>
372 %scalar = load i16, ptr %arg0
373 %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
374 %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
375 %res0 = sitofp <8 x i16> %val to <8 x half>
376 %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
380 define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
381 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask:
383 ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
385 %res = sitofp <8 x i16> %arg0 to <8 x half>
389 define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
390 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_z:
392 ; CHECK-NEXT: kmovd %edi, %k1
393 ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0 {%k1} {z}
395 %msk = bitcast i8 %mask to <8 x i1>
396 %res0 = sitofp <8 x i16> %arg0 to <8 x half>
397 %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
401 define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_load(ptr %arg0, <8 x half> %arg1, i8 %mask) {
402 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_load:
404 ; CHECK-NEXT: kmovd %esi, %k1
405 ; CHECK-NEXT: vcvtw2ph (%rdi), %xmm0 {%k1}
407 %msk = bitcast i8 %mask to <8 x i1>
408 %val = load <8 x i16>, ptr %arg0
409 %res0 = sitofp <8 x i16> %val to <8 x half>
410 %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
414 declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half>, <8 x i16>, i8)
416 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
417 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128:
419 ; CHECK-NEXT: kmovd %edi, %k1
420 ; CHECK-NEXT: vcvtph2w %xmm0, %xmm1 {%k1}
421 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
423 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
427 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
428 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_b:
430 ; CHECK-NEXT: kmovd %esi, %k1
431 ; CHECK-NEXT: vcvtph2w (%rdi){1to8}, %xmm0 {%k1}
433 %scalar = load half, ptr %arg0
434 %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
435 %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
436 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
440 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
441 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_nomask:
443 ; CHECK-NEXT: vcvtph2w %xmm0, %xmm0
445 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
449 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_z(<8 x half> %arg0, i8 %mask) {
450 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_z:
452 ; CHECK-NEXT: kmovd %edi, %k1
453 ; CHECK-NEXT: vcvtph2w %xmm0, %xmm0 {%k1} {z}
455 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
459 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
460 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_load:
462 ; CHECK-NEXT: kmovd %esi, %k1
463 ; CHECK-NEXT: vcvtph2w (%rdi), %xmm0 {%k1}
465 %val = load <8 x half>, ptr %arg0
466 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
471 define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
472 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128:
474 ; CHECK-NEXT: kmovd %edi, %k1
475 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm1 {%k1}
476 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
478 %msk = bitcast i8 %mask to <8 x i1>
479 %res0 = uitofp <8 x i16> %arg0 to <8 x half>
480 %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
484 define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_b(ptr %arg0, <8 x half> %arg1, i8 %mask) {
485 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_b:
487 ; CHECK-NEXT: kmovd %esi, %k1
488 ; CHECK-NEXT: vcvtuw2ph (%rdi){1to8}, %xmm0 {%k1}
490 %msk = bitcast i8 %mask to <8 x i1>
491 %scalar = load i16, ptr %arg0
492 %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
493 %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
494 %res0 = uitofp <8 x i16> %val to <8 x half>
495 %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
499 define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
500 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask:
502 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
504 %res = uitofp <8 x i16> %arg0 to <8 x half>
508 define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
509 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_z:
511 ; CHECK-NEXT: kmovd %edi, %k1
512 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0 {%k1} {z}
514 %msk = bitcast i8 %mask to <8 x i1>
515 %res0 = uitofp <8 x i16> %arg0 to <8 x half>
516 %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
520 define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_load(ptr %arg0, <8 x half> %arg1, i8 %mask) {
521 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_load:
523 ; CHECK-NEXT: kmovd %esi, %k1
524 ; CHECK-NEXT: vcvtuw2ph (%rdi), %xmm0 {%k1}
526 %msk = bitcast i8 %mask to <8 x i1>
527 %val = load <8 x i16>, ptr %arg0
528 %res0 = uitofp <8 x i16> %val to <8 x half>
529 %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
533 declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half>, <8 x i16>, i8)
535 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
536 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128:
538 ; CHECK-NEXT: kmovd %edi, %k1
539 ; CHECK-NEXT: vcvtph2uw %xmm0, %xmm1 {%k1}
540 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
542 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
546 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
547 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_b:
549 ; CHECK-NEXT: kmovd %esi, %k1
550 ; CHECK-NEXT: vcvtph2uw (%rdi){1to8}, %xmm0 {%k1}
552 %scalar = load half, ptr %arg0
553 %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
554 %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
555 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
559 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
560 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask:
562 ; CHECK-NEXT: vcvtph2uw %xmm0, %xmm0
564 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
568 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_z(<8 x half> %arg0, i8 %mask) {
569 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_z:
571 ; CHECK-NEXT: kmovd %edi, %k1
572 ; CHECK-NEXT: vcvtph2uw %xmm0, %xmm0 {%k1} {z}
574 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
578 define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
579 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_load:
581 ; CHECK-NEXT: kmovd %esi, %k1
582 ; CHECK-NEXT: vcvtph2uw (%rdi), %xmm0 {%k1}
584 %val = load <8 x half>, ptr %arg0
585 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
589 declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half>, <8 x i16>, i8)
591 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
592 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128:
594 ; CHECK-NEXT: kmovd %edi, %k1
595 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm1 {%k1}
596 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
598 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
602 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
603 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_b:
605 ; CHECK-NEXT: kmovd %esi, %k1
606 ; CHECK-NEXT: vcvttph2w (%rdi){1to8}, %xmm0 {%k1}
608 %scalar = load half, ptr %arg0
609 %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
610 %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
611 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
615 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
616 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_nomask:
618 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
620 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
624 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_z(<8 x half> %arg0, i8 %mask) {
625 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_z:
627 ; CHECK-NEXT: kmovd %edi, %k1
628 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0 {%k1} {z}
630 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
634 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
635 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_load:
637 ; CHECK-NEXT: kmovd %esi, %k1
638 ; CHECK-NEXT: vcvttph2w (%rdi), %xmm0 {%k1}
640 %val = load <8 x half>, ptr %arg0
641 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
645 declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half>, <8 x i16>, i8)
647 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
648 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128:
650 ; CHECK-NEXT: kmovd %edi, %k1
651 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm1 {%k1}
652 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
654 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
658 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
659 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_b:
661 ; CHECK-NEXT: kmovd %esi, %k1
662 ; CHECK-NEXT: vcvttph2uw (%rdi){1to8}, %xmm0 {%k1}
664 %scalar = load half, ptr %arg0
665 %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
666 %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
667 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
671 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
672 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask:
674 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
676 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
680 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_z(<8 x half> %arg0, i8 %mask) {
681 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_z:
683 ; CHECK-NEXT: kmovd %edi, %k1
684 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0 {%k1} {z}
686 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
690 define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) {
691 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_load:
693 ; CHECK-NEXT: kmovd %esi, %k1
694 ; CHECK-NEXT: vcvttph2uw (%rdi), %xmm0 {%k1}
696 %val = load <8 x half>, ptr %arg0
697 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
701 define <4 x half> @test_u16tofp4(<4 x i16> %arg0) {
702 ; CHECK-LABEL: test_u16tofp4:
704 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
706 %res = uitofp <4 x i16> %arg0 to <4 x half>
710 define <2 x half> @test_s16tofp2(<2 x i16> %arg0) {
711 ; CHECK-LABEL: test_s16tofp2:
713 ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
715 %res = sitofp <2 x i16> %arg0 to <2 x half>
719 define <4 x half> @test_u8tofp4(<4 x i8> %arg0) {
720 ; CHECK-LABEL: test_u8tofp4:
722 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
723 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
725 %res = uitofp <4 x i8> %arg0 to <4 x half>
729 define <2 x half> @test_s8tofp2(<2 x i8> %arg0) {
730 ; CHECK-LABEL: test_s8tofp2:
732 ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
733 ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
735 %res = sitofp <2 x i8> %arg0 to <2 x half>
739 define <2 x half> @test_u1tofp2(<2 x i1> %arg0) {
740 ; CHECK-LABEL: test_u1tofp2:
742 ; CHECK-NEXT: vpmovqw %xmm0, %xmm0
743 ; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
744 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
746 %res = uitofp <2 x i1> %arg0 to <2 x half>
750 define <4 x half> @test_s17tofp4(<4 x i17> %arg0) {
751 ; CHECK-LABEL: test_s17tofp4:
753 ; CHECK-NEXT: vpslld $15, %xmm0, %xmm0
754 ; CHECK-NEXT: vpsrad $15, %xmm0, %xmm0
755 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
757 %res = sitofp <4 x i17> %arg0 to <4 x half>
761 define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
762 ; CHECK-LABEL: test_u33tofp2:
764 ; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
765 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
767 %res = uitofp <2 x i33> %arg0 to <2 x half>
771 define <16 x i16> @test_s16tof16(<16 x half> %a) {
772 ; CHECK-LABEL: test_s16tof16:
774 ; CHECK-NEXT: vcvttph2w %ymm0, %ymm0
776 %res = fptosi <16 x half> %a to <16 x i16>
780 define <16 x i16> @test_u16tof16(<16 x half> %a) {
781 ; CHECK-LABEL: test_u16tof16:
783 ; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0
785 %res = fptoui <16 x half> %a to <16 x i16>