1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
5 declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half>, metadata)
6 declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half>, metadata)
7 declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half>, metadata)
8 declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half>, metadata)
9 declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half>, metadata)
10 declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half>, metadata)
11 declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half>, metadata)
12 declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half>, metadata)
13 declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half>, metadata)
14 declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half>, metadata)
15 declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half>, metadata)
16 declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half>, metadata)
17 declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half>, metadata)
18 declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half>, metadata)
19 declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half>, metadata)
20 declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half>, metadata)
21 declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half>, metadata)
22 declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half>, metadata)
23 declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half>, metadata)
24 declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half>, metadata)
25 declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half>, metadata)
26 declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half>, metadata)
27 declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half>, metadata)
28 declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half>, metadata)
30 define <2 x i64> @strict_vector_fptosi_v2f16_to_v2i64(<2 x half> %a) #0 {
31 ; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i64:
33 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
34 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
35 ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
36 ; CHECK-NEXT: ret{{[l|q]}}
37 %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half> %a,
38 metadata !"fpexcept.strict") #0
42 define <2 x i64> @strict_vector_fptoui_v2f16_to_v2i64(<2 x half> %a) #0 {
43 ; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i64:
45 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
46 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
47 ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
48 ; CHECK-NEXT: ret{{[l|q]}}
49 %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half> %a,
50 metadata !"fpexcept.strict") #0
54 define <2 x i32> @strict_vector_fptosi_v2f16_to_v2i32(<2 x half> %a) #0 {
55 ; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i32:
57 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
58 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
59 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
60 ; CHECK-NEXT: ret{{[l|q]}}
61 %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half> %a,
62 metadata !"fpexcept.strict") #0
66 define <2 x i32> @strict_vector_fptoui_v2f16_to_v2i32(<2 x half> %a) #0 {
67 ; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i32:
69 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
70 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
71 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
72 ; CHECK-NEXT: ret{{[l|q]}}
73 %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half> %a,
74 metadata !"fpexcept.strict") #0
78 define <2 x i16> @strict_vector_fptosi_v2f16_to_v2i16(<2 x half> %a) #0 {
79 ; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i16:
81 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
82 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
83 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
84 ; CHECK-NEXT: ret{{[l|q]}}
85 %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half> %a,
86 metadata !"fpexcept.strict") #0
90 define <2 x i16> @strict_vector_fptoui_v2f16_to_v2i16(<2 x half> %a) #0 {
91 ; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i16:
93 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
94 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
95 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
96 ; CHECK-NEXT: ret{{[l|q]}}
97 %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half> %a,
98 metadata !"fpexcept.strict") #0
102 define <2 x i8> @strict_vector_fptosi_v2f16_to_v2i8(<2 x half> %a) #0 {
103 ; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i8:
105 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
106 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
107 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
108 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0
109 ; CHECK-NEXT: ret{{[l|q]}}
110 %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half> %a,
111 metadata !"fpexcept.strict") #0
115 define <2 x i8> @strict_vector_fptoui_v2f16_to_v2i8(<2 x half> %a) #0 {
116 ; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i8:
118 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
119 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
120 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
121 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0
122 ; CHECK-NEXT: ret{{[l|q]}}
123 %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half> %a,
124 metadata !"fpexcept.strict") #0
128 define <2 x i1> @strict_vector_fptosi_v2f16_to_v2i1(<2 x half> %a) #0 {
129 ; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i1:
131 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
132 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
133 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
134 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
135 ; CHECK-NEXT: vpmovw2m %xmm0, %k0
136 ; CHECK-NEXT: vpmovm2q %k0, %xmm0
137 ; CHECK-NEXT: ret{{[l|q]}}
138 %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half> %a,
139 metadata !"fpexcept.strict") #0
143 define <2 x i1> @strict_vector_fptoui_v2f16_to_v2i1(<2 x half> %a) #0 {
144 ; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i1:
146 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
147 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
148 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
149 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
150 ; CHECK-NEXT: vpmovw2m %xmm0, %k0
151 ; CHECK-NEXT: vpmovm2q %k0, %xmm0
152 ; CHECK-NEXT: ret{{[l|q]}}
153 %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half> %a,
154 metadata !"fpexcept.strict") #0
158 define <4 x i32> @strict_vector_fptosi_v4f16_to_v4i32(<4 x half> %a) #0 {
159 ; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i32:
161 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
162 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
163 ; CHECK-NEXT: ret{{[l|q]}}
164 %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half> %a,
165 metadata !"fpexcept.strict") #0
169 define <4 x i32> @strict_vector_fptoui_v4f16_to_v4i32(<4 x half> %a) #0 {
170 ; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i32:
172 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
173 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
174 ; CHECK-NEXT: ret{{[l|q]}}
175 %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half> %a,
176 metadata !"fpexcept.strict") #0
180 define <4 x i16> @strict_vector_fptosi_v4f16_to_v4i16(<4 x half> %a) #0 {
181 ; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i16:
183 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
184 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
185 ; CHECK-NEXT: ret{{[l|q]}}
186 %ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half> %a,
187 metadata !"fpexcept.strict") #0
191 define <4 x i16> @strict_vector_fptoui_v4f16_to_v4i16(<4 x half> %a) #0 {
192 ; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i16:
194 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
195 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
196 ; CHECK-NEXT: ret{{[l|q]}}
197 %ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half> %a,
198 metadata !"fpexcept.strict") #0
202 define <4 x i8> @strict_vector_fptosi_v4f16_to_v4i8(<4 x half> %a) #0 {
203 ; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i8:
205 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
206 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
207 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0
208 ; CHECK-NEXT: ret{{[l|q]}}
209 %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half> %a,
210 metadata !"fpexcept.strict") #0
214 define <4 x i8> @strict_vector_fptoui_v4f16_to_v4i8(<4 x half> %a) #0 {
215 ; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i8:
217 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
218 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
219 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0
220 ; CHECK-NEXT: ret{{[l|q]}}
221 %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half> %a,
222 metadata !"fpexcept.strict") #0
226 define <4 x i1> @strict_vector_fptosi_v4f16_to_v4i1(<4 x half> %a) #0 {
227 ; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i1:
229 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
230 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
231 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
232 ; CHECK-NEXT: vpmovw2m %xmm0, %k0
233 ; CHECK-NEXT: vpmovm2d %k0, %xmm0
234 ; CHECK-NEXT: ret{{[l|q]}}
235 %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half> %a,
236 metadata !"fpexcept.strict") #0
240 define <4 x i1> @strict_vector_fptoui_v4f16_to_v4i1(<4 x half> %a) #0 {
241 ; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i1:
243 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
244 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
245 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
246 ; CHECK-NEXT: vpmovw2m %xmm0, %k0
247 ; CHECK-NEXT: vpmovm2d %k0, %xmm0
248 ; CHECK-NEXT: ret{{[l|q]}}
249 %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half> %a,
250 metadata !"fpexcept.strict") #0
254 define <8 x i16> @strict_vector_fptosi_v8f16_to_v8i16(<8 x half> %a) #0 {
255 ; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i16:
257 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
258 ; CHECK-NEXT: ret{{[l|q]}}
259 %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half> %a,
260 metadata !"fpexcept.strict") #0
264 define <8 x i16> @strict_vector_fptoui_v8f16_to_v8i16(<8 x half> %a) #0 {
265 ; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i16:
267 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
268 ; CHECK-NEXT: ret{{[l|q]}}
269 %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half> %a,
270 metadata !"fpexcept.strict") #0
274 define <8 x i8> @strict_vector_fptosi_v8f16_to_v8i8(<8 x half> %a) #0 {
275 ; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i8:
277 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
278 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0
279 ; CHECK-NEXT: ret{{[l|q]}}
280 %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half> %a,
281 metadata !"fpexcept.strict") #0
285 define <8 x i8> @strict_vector_fptoui_v8f16_to_v8i8(<8 x half> %a) #0 {
286 ; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i8:
288 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
289 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0
290 ; CHECK-NEXT: ret{{[l|q]}}
291 %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half> %a,
292 metadata !"fpexcept.strict") #0
296 define <8 x i1> @strict_vector_fptosi_v8f16_to_v8i1(<8 x half> %a) #0 {
297 ; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i1:
299 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
300 ; CHECK-NEXT: vpmovd2m %ymm0, %k0
301 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
302 ; CHECK-NEXT: vzeroupper
303 ; CHECK-NEXT: ret{{[l|q]}}
304 %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half> %a,
305 metadata !"fpexcept.strict") #0
309 define <8 x i1> @strict_vector_fptoui_v8f16_to_v8i1(<8 x half> %a) #0 {
310 ; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i1:
312 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
313 ; CHECK-NEXT: vpslld $31, %ymm0, %ymm0
314 ; CHECK-NEXT: vpmovd2m %ymm0, %k0
315 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
316 ; CHECK-NEXT: vzeroupper
317 ; CHECK-NEXT: ret{{[l|q]}}
318 %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half> %a,
319 metadata !"fpexcept.strict") #0
323 attributes #0 = { strictfp }