1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
4 define <16 x half> @test_int_x86_avx512fp16_add_ph_256(<16 x half> %x1, <16 x half> %x2) {
5 ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_256:
7 ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0
9 %res = fadd <16 x half> %x1, %x2
13 define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, <16 x half>* %ptr) {
14 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_256:
16 ; CHECK-NEXT: kmovd %edi, %k1
17 ; CHECK-NEXT: vmovaps %ymm2, %ymm3
18 ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm3 {%k1}
19 ; CHECK-NEXT: vaddph (%rsi), %ymm0, %ymm2 {%k1}
20 ; CHECK-NEXT: vaddph %ymm2, %ymm3, %ymm0
22 %msk = bitcast i16 %mask to <16 x i1>
23 %val = load <16 x half>, <16 x half>* %ptr
24 %res0 = fadd <16 x half> %x1, %x2
25 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
26 %t3 = fadd <16 x half> %x1, %val
27 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
28 %res = fadd <16 x half> %res1 , %res2
32 define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, <16 x half>* %ptr) {
33 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_256:
35 ; CHECK-NEXT: kmovd %edi, %k1
36 ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0 {%k1} {z}
38 %msk = bitcast i16 %mask to <16 x i1>
39 %res0 = fadd <16 x half> %x1, %x2
40 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
44 define <8 x half> @test_int_x86_avx512fp16_add_ph_128(<8 x half> %x1, <8 x half> %x2) {
45 ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_128:
47 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
49 %res = fadd <8 x half> %x1, %x2
53 define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, <8 x half>* %ptr) {
54 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_128:
56 ; CHECK-NEXT: kmovd %edi, %k1
57 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
58 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm3 {%k1}
59 ; CHECK-NEXT: vaddph (%rsi), %xmm0, %xmm2 {%k1}
60 ; CHECK-NEXT: vaddph %xmm2, %xmm3, %xmm0
62 %msk = bitcast i8 %mask to <8 x i1>
63 %val = load <8 x half>, <8 x half>* %ptr
64 %res0 = fadd <8 x half> %x1, %x2
65 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
66 %t3 = fadd <8 x half> %x1, %val
67 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
68 %res = fadd <8 x half> %res1 , %res2
72 define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, <8 x half>* %ptr) {
73 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_128:
75 ; CHECK-NEXT: kmovd %edi, %k1
76 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 {%k1} {z}
78 %msk = bitcast i8 %mask to <8 x i1>
79 %res0 = fadd <8 x half> %x1, %x2
80 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
84 define <16 x half> @test_int_x86_avx512fp16_sub_ph_256(<16 x half> %x1, <16 x half> %x2) {
85 ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_256:
87 ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0
89 %res = fsub <16 x half> %x1, %x2
93 define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, <16 x half>* %ptr) {
94 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_256:
96 ; CHECK-NEXT: kmovd %edi, %k1
97 ; CHECK-NEXT: vmovaps %ymm2, %ymm3
98 ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm3 {%k1}
99 ; CHECK-NEXT: vsubph (%rsi), %ymm0, %ymm2 {%k1}
100 ; CHECK-NEXT: vsubph %ymm2, %ymm3, %ymm0
102 %msk = bitcast i16 %mask to <16 x i1>
103 %val = load <16 x half>, <16 x half>* %ptr
104 %res0 = fsub <16 x half> %x1, %x2
105 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
106 %t3 = fsub <16 x half> %x1, %val
107 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
108 %res = fsub <16 x half> %res1 , %res2
112 define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, <16 x half>* %ptr) {
113 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_256:
115 ; CHECK-NEXT: kmovd %edi, %k1
116 ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0 {%k1} {z}
118 %msk = bitcast i16 %mask to <16 x i1>
119 %res0 = fsub <16 x half> %x1, %x2
120 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
121 ret <16 x half> %res1
124 define <8 x half> @test_int_x86_avx512fp16_sub_ph_128(<8 x half> %x1, <8 x half> %x2) {
125 ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_128:
127 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0
129 %res = fsub <8 x half> %x1, %x2
133 define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, <8 x half>* %ptr) {
134 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_128:
136 ; CHECK-NEXT: kmovd %edi, %k1
137 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
138 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm3 {%k1}
139 ; CHECK-NEXT: vsubph (%rsi), %xmm0, %xmm2 {%k1}
140 ; CHECK-NEXT: vsubph %xmm2, %xmm3, %xmm0
142 %msk = bitcast i8 %mask to <8 x i1>
143 %val = load <8 x half>, <8 x half>* %ptr
144 %res0 = fsub <8 x half> %x1, %x2
145 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
146 %t3 = fsub <8 x half> %x1, %val
147 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
148 %res = fsub <8 x half> %res1 , %res2
152 define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, <8 x half>* %ptr) {
153 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_128:
155 ; CHECK-NEXT: kmovd %edi, %k1
156 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 {%k1} {z}
158 %msk = bitcast i8 %mask to <8 x i1>
159 %res0 = fsub <8 x half> %x1, %x2
160 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
164 define <16 x half> @test_int_x86_avx512fp16_mul_ph_256(<16 x half> %x1, <16 x half> %x2) {
165 ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_256:
167 ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0
169 %res = fmul <16 x half> %x1, %x2
173 define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, <16 x half>* %ptr) {
174 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_256:
176 ; CHECK-NEXT: kmovd %edi, %k1
177 ; CHECK-NEXT: vmovaps %ymm2, %ymm3
178 ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm3 {%k1}
179 ; CHECK-NEXT: vmulph (%rsi), %ymm0, %ymm2 {%k1}
180 ; CHECK-NEXT: vmulph %ymm2, %ymm3, %ymm0
182 %msk = bitcast i16 %mask to <16 x i1>
183 %val = load <16 x half>, <16 x half>* %ptr
184 %res0 = fmul <16 x half> %x1, %x2
185 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
186 %t3 = fmul <16 x half> %x1, %val
187 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
188 %res = fmul <16 x half> %res1 , %res2
192 define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, <16 x half>* %ptr) {
193 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_256:
195 ; CHECK-NEXT: kmovd %edi, %k1
196 ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0 {%k1} {z}
198 %msk = bitcast i16 %mask to <16 x i1>
199 %res0 = fmul <16 x half> %x1, %x2
200 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
201 ret <16 x half> %res1
204 define <8 x half> @test_int_x86_avx512fp16_mul_ph_128(<8 x half> %x1, <8 x half> %x2) {
205 ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_128:
207 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0
209 %res = fmul <8 x half> %x1, %x2
213 define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, <8 x half>* %ptr) {
214 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_128:
216 ; CHECK-NEXT: kmovd %edi, %k1
217 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
218 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm3 {%k1}
219 ; CHECK-NEXT: vmulph (%rsi), %xmm0, %xmm2 {%k1}
220 ; CHECK-NEXT: vmulph %xmm2, %xmm3, %xmm0
222 %msk = bitcast i8 %mask to <8 x i1>
223 %val = load <8 x half>, <8 x half>* %ptr
224 %res0 = fmul <8 x half> %x1, %x2
225 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
226 %t3 = fmul <8 x half> %x1, %val
227 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
228 %res = fmul <8 x half> %res1 , %res2
232 define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, <8 x half>* %ptr) {
233 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_128:
235 ; CHECK-NEXT: kmovd %edi, %k1
236 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 {%k1} {z}
238 %msk = bitcast i8 %mask to <8 x i1>
239 %res0 = fmul <8 x half> %x1, %x2
240 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
244 define <16 x half> @test_int_x86_avx512fp16_div_ph_256(<16 x half> %x1, <16 x half> %x2) {
245 ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256:
247 ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0
249 %res = fdiv <16 x half> %x1, %x2
253 define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, <16 x half>* %ptr) {
254 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_256:
256 ; CHECK-NEXT: kmovd %edi, %k1
257 ; CHECK-NEXT: vmovaps %ymm2, %ymm3
258 ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm3 {%k1}
259 ; CHECK-NEXT: vdivph (%rsi), %ymm0, %ymm2 {%k1}
260 ; CHECK-NEXT: vdivph %ymm2, %ymm3, %ymm0
262 %msk = bitcast i16 %mask to <16 x i1>
263 %val = load <16 x half>, <16 x half>* %ptr
264 %res0 = fdiv <16 x half> %x1, %x2
265 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src
266 %t3 = fdiv <16 x half> %x1, %val
267 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src
268 %res = fdiv <16 x half> %res1 , %res2
272 define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, <16 x half>* %ptr) {
273 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_256:
275 ; CHECK-NEXT: kmovd %edi, %k1
276 ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0 {%k1} {z}
278 %msk = bitcast i16 %mask to <16 x i1>
279 %res0 = fdiv <16 x half> %x1, %x2
280 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
281 ret <16 x half> %res1
284 define <8 x half> @test_int_x86_avx512fp16_div_ph_128(<8 x half> %x1, <8 x half> %x2) {
285 ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128:
287 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0
289 %res = fdiv <8 x half> %x1, %x2
293 define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, <8 x half>* %ptr) {
294 ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_128:
296 ; CHECK-NEXT: kmovd %edi, %k1
297 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
298 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm3 {%k1}
299 ; CHECK-NEXT: vdivph (%rsi), %xmm0, %xmm2 {%k1}
300 ; CHECK-NEXT: vdivph %xmm2, %xmm3, %xmm0
302 %msk = bitcast i8 %mask to <8 x i1>
303 %val = load <8 x half>, <8 x half>* %ptr
304 %res0 = fdiv <8 x half> %x1, %x2
305 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src
306 %t3 = fdiv <8 x half> %x1, %val
307 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src
308 %res = fdiv <8 x half> %res1 , %res2
312 define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, <8 x half>* %ptr) {
313 ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_128:
315 ; CHECK-NEXT: kmovd %edi, %k1
316 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 {%k1} {z}
318 %msk = bitcast i8 %mask to <8 x i1>
319 %res0 = fdiv <8 x half> %x1, %x2
320 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
324 define <16 x half> @test_min_ph_256(<16 x half> %x1, <16 x half> %x2) {
325 ; CHECK-LABEL: test_min_ph_256:
327 ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
329 %res0 = fcmp olt <16 x half> %x1, %x2
330 %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2
331 ret <16 x half> %res1
334 define <16 x half> @test_max_ph_256(<16 x half> %x1, <16 x half> %x2) {
335 ; CHECK-LABEL: test_max_ph_256:
337 ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0
339 %res0 = fcmp ogt <16 x half> %x1, %x2
340 %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2
341 ret <16 x half> %res1
344 define <8 x half> @test_min_ph_128(<8 x half> %x1, <8 x half> %x2) {
345 ; CHECK-LABEL: test_min_ph_128:
347 ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
349 %res0 = fcmp olt <8 x half> %x1, %x2
350 %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2
354 define <8 x half> @test_max_ph_128(<8 x half> %x1, <8 x half> %x2) {
355 ; CHECK-LABEL: test_max_ph_128:
357 ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
359 %res0 = fcmp ogt <8 x half> %x1, %x2
360 %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2
364 declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>)
365 declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>)
367 define <8 x half> @test_max_ph_128_2(<8 x half> %x1, <8 x half> %x2) {
368 ; CHECK-LABEL: test_max_ph_128_2:
370 ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
372 %res0 = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> %x1, <8 x half> %x2)
376 define <16 x half> @test_max_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
377 ; CHECK-LABEL: test_max_ph_256_2:
379 ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0
381 %res0 = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> %x1, <16 x half> %x2)
382 ret <16 x half> %res0
385 declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>)
386 declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>)
388 define <8 x half> @test_min_ph_128_2(<8 x half> %x1, <8 x half> %x2) {
389 ; CHECK-LABEL: test_min_ph_128_2:
391 ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
393 %res0 = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> %x1, <8 x half> %x2)
397 define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
398 ; CHECK-LABEL: test_min_ph_256_2:
400 ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
402 %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2)
403 ret <16 x half> %res0
406 declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
408 define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
409 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256:
411 ; CHECK-NEXT: kmovd %edi, %k1
412 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1}
413 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
415 %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
416 ret <4 x double> %res
419 define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
420 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask:
422 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
424 %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
425 ret <4 x double> %res
428 declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
430 define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
431 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128:
433 ; CHECK-NEXT: kmovd %edi, %k1
434 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1}
435 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
437 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
438 ret <2 x double> %res
441 define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
442 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask:
444 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
446 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
447 ret <2 x double> %res
450 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
452 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
453 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256:
455 ; CHECK-NEXT: kmovd %edi, %k1
456 ; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1}
457 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
458 ; CHECK-NEXT: vzeroupper
460 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
464 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) {
465 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load:
467 ; CHECK-NEXT: kmovd %esi, %k1
468 ; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1}
470 %x0 = load <4 x double>, <4 x double>* %px0, align 32
471 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
475 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
477 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
478 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128:
480 ; CHECK-NEXT: kmovd %edi, %k1
481 ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1}
482 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
484 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
488 define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) {
489 ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load:
491 ; CHECK-NEXT: kmovd %esi, %k1
492 ; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1}
494 %x0 = load <2 x double>, <2 x double>* %px0, align 16
495 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
499 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
501 define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
502 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
504 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0
506 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
510 define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
511 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
513 ; CHECK-NEXT: kmovd %edi, %k1
514 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1}
515 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
517 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
521 define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
522 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
524 ; CHECK-NEXT: kmovd %edi, %k1
525 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z}
527 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
531 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
533 define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
534 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
536 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0
538 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
542 define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
543 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
545 ; CHECK-NEXT: kmovd %edi, %k1
546 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1}
547 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
549 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
553 define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
554 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
556 ; CHECK-NEXT: kmovd %edi, %k1
557 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z}
559 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
563 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
565 define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
566 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
568 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
570 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
574 define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
575 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
577 ; CHECK-NEXT: kmovd %edi, %k1
578 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1}
579 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
581 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
585 define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
586 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
588 ; CHECK-NEXT: kmovd %edi, %k1
589 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z}
591 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
595 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
597 define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
598 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
600 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
602 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
606 define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
607 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
609 ; CHECK-NEXT: kmovd %edi, %k1
610 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1}
611 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
613 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
617 define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
618 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
620 ; CHECK-NEXT: kmovd %edi, %k1
621 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z}
623 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
627 declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
629 define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
630 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
632 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
634 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
638 define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
639 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
641 ; CHECK-NEXT: kmovd %edi, %k1
642 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1}
643 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
645 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
649 define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
650 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
652 ; CHECK-NEXT: kmovd %edi, %k1
653 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z}
655 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
659 declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
661 define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
662 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
664 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
666 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
670 define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
671 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
673 ; CHECK-NEXT: kmovd %edi, %k1
674 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1}
675 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
677 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
681 define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
682 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
684 ; CHECK-NEXT: kmovd %edi, %k1
685 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z}
687 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
691 declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8)
693 define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) {
694 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128:
696 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
698 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1)
702 define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
703 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128:
705 ; CHECK-NEXT: kmovd %edi, %k1
706 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm1 {%k1}
707 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
709 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2)
713 define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) {
714 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128:
716 ; CHECK-NEXT: kmovd %edi, %k1
717 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z}
719 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2)
723 declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8)
725 define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) {
726 ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256:
728 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0
730 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1)
734 define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) {
735 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256:
737 ; CHECK-NEXT: kmovd %edi, %k1
738 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm1 {%k1}
739 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
741 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2)
745 define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) {
746 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256:
748 ; CHECK-NEXT: kmovd %edi, %k1
749 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z}
751 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2)
755 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
757 define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
758 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128:
760 ; CHECK-NEXT: kmovd %edi, %k1
761 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
762 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
763 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
765 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2)
766 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1)
767 %res2 = fadd <8 x half> %res, %res1
771 declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
773 define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) {
774 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256:
776 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
777 ; CHECK-NEXT: vzeroupper
779 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1)
783 define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) {
784 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256:
786 ; CHECK-NEXT: kmovd %edi, %k1
787 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
788 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
789 ; CHECK-NEXT: vzeroupper
791 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2)
795 define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) {
796 ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256:
798 ; CHECK-NEXT: kmovd %edi, %k1
799 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z}
800 ; CHECK-NEXT: vzeroupper
802 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2)