1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ
10 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL
14 declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata)
15 declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata)
16 declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata)
17 declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata)
18 declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata)
19 declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata)
20 declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f64(<4 x double>, metadata)
21 declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f64(<4 x double>, metadata)
22 declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f64(<4 x double>, metadata)
23 declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f64(<4 x double>, metadata)
24 declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float>, metadata)
25 declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float>, metadata)
26 declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double>, metadata)
27 declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double>, metadata)
28 declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f32(<8 x float>, metadata)
29 declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float>, metadata)
30 declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float>, metadata)
31 declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float>, metadata)
32 declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float>, metadata)
33 declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float>, metadata)
34 declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float>, metadata)
35 declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float>, metadata)
37 define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
38 ; AVX-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
40 ; AVX-32-NEXT: pushl %ebp
41 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
42 ; AVX-32-NEXT: .cfi_offset %ebp, -8
43 ; AVX-32-NEXT: movl %esp, %ebp
44 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
45 ; AVX-32-NEXT: andl $-8, %esp
46 ; AVX-32-NEXT: subl $32, %esp
47 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
48 ; AVX-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp)
49 ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0
50 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
51 ; AVX-32-NEXT: vmovhps %xmm0, (%esp)
52 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
53 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
54 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
55 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
56 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
57 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
58 ; AVX-32-NEXT: fldl (%esp)
59 ; AVX-32-NEXT: fisttpll (%esp)
61 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
62 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
63 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
64 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
65 ; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
66 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
67 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
68 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
69 ; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
70 ; AVX-32-NEXT: movl %ebp, %esp
71 ; AVX-32-NEXT: popl %ebp
72 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
75 ; AVX-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
77 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
78 ; AVX-64-NEXT: vcvttsd2si %xmm1, %rax
79 ; AVX-64-NEXT: vmovq %rax, %xmm2
80 ; AVX-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
81 ; AVX-64-NEXT: vcvttsd2si %xmm1, %rax
82 ; AVX-64-NEXT: vmovq %rax, %xmm1
83 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
84 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
85 ; AVX-64-NEXT: vmovq %rax, %xmm2
86 ; AVX-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
87 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
88 ; AVX-64-NEXT: vmovq %rax, %xmm0
89 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
90 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
93 ; AVX512F-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
94 ; AVX512F-32: # %bb.0:
95 ; AVX512F-32-NEXT: pushl %ebp
96 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
97 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
98 ; AVX512F-32-NEXT: movl %esp, %ebp
99 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
100 ; AVX512F-32-NEXT: andl $-8, %esp
101 ; AVX512F-32-NEXT: subl $32, %esp
102 ; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
103 ; AVX512F-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp)
104 ; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm0
105 ; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
106 ; AVX512F-32-NEXT: vmovhps %xmm0, (%esp)
107 ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
108 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
109 ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
110 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
111 ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
112 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
113 ; AVX512F-32-NEXT: fldl (%esp)
114 ; AVX512F-32-NEXT: fisttpll (%esp)
115 ; AVX512F-32-NEXT: wait
116 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
117 ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
118 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
119 ; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
120 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
121 ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
122 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
123 ; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
124 ; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
125 ; AVX512F-32-NEXT: movl %ebp, %esp
126 ; AVX512F-32-NEXT: popl %ebp
127 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
128 ; AVX512F-32-NEXT: retl
130 ; AVX512F-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
131 ; AVX512F-64: # %bb.0:
132 ; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1
133 ; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax
134 ; AVX512F-64-NEXT: vmovq %rax, %xmm2
135 ; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
136 ; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax
137 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
138 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
139 ; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
140 ; AVX512F-64-NEXT: vmovq %rax, %xmm2
141 ; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
142 ; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
143 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
144 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
145 ; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
146 ; AVX512F-64-NEXT: retq
148 ; AVX512VL-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
149 ; AVX512VL-32: # %bb.0:
150 ; AVX512VL-32-NEXT: pushl %ebp
151 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
152 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
153 ; AVX512VL-32-NEXT: movl %esp, %ebp
154 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
155 ; AVX512VL-32-NEXT: andl $-8, %esp
156 ; AVX512VL-32-NEXT: subl $32, %esp
157 ; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
158 ; AVX512VL-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp)
159 ; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm0
160 ; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
161 ; AVX512VL-32-NEXT: vmovhps %xmm0, (%esp)
162 ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
163 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
164 ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
165 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
166 ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
167 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
168 ; AVX512VL-32-NEXT: fldl (%esp)
169 ; AVX512VL-32-NEXT: fisttpll (%esp)
170 ; AVX512VL-32-NEXT: wait
171 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
172 ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
173 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
174 ; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
175 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
176 ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
177 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
178 ; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
179 ; AVX512VL-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
180 ; AVX512VL-32-NEXT: movl %ebp, %esp
181 ; AVX512VL-32-NEXT: popl %ebp
182 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
183 ; AVX512VL-32-NEXT: retl
185 ; AVX512VL-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
186 ; AVX512VL-64: # %bb.0:
187 ; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1
188 ; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax
189 ; AVX512VL-64-NEXT: vmovq %rax, %xmm2
190 ; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
191 ; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax
192 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
193 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
194 ; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
195 ; AVX512VL-64-NEXT: vmovq %rax, %xmm2
196 ; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
197 ; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
198 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
199 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
200 ; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
201 ; AVX512VL-64-NEXT: retq
203 ; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
205 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
206 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
207 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
208 ; AVX512DQ-NEXT: ret{{[l|q]}}
210 ; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
211 ; AVX512DQVL: # %bb.0:
212 ; AVX512DQVL-NEXT: vcvttpd2qq %ymm0, %ymm0
213 ; AVX512DQVL-NEXT: ret{{[l|q]}}
214 %ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double> %a,
215 metadata !"fpexcept.strict") #0
219 define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
220 ; AVX-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
222 ; AVX-32-NEXT: pushl %ebp
223 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
224 ; AVX-32-NEXT: .cfi_offset %ebp, -8
225 ; AVX-32-NEXT: movl %esp, %ebp
226 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
227 ; AVX-32-NEXT: andl $-8, %esp
228 ; AVX-32-NEXT: subl $32, %esp
229 ; AVX-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
230 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
231 ; AVX-32-NEXT: vcomisd %xmm1, %xmm2
232 ; AVX-32-NEXT: vmovapd %xmm1, %xmm3
233 ; AVX-32-NEXT: jae .LBB1_2
234 ; AVX-32-NEXT: # %bb.1:
235 ; AVX-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
236 ; AVX-32-NEXT: .LBB1_2:
237 ; AVX-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
238 ; AVX-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
239 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
240 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
242 ; AVX-32-NEXT: setae %al
243 ; AVX-32-NEXT: movzbl %al, %eax
244 ; AVX-32-NEXT: shll $31, %eax
245 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
246 ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm2
247 ; AVX-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
248 ; AVX-32-NEXT: vcomisd %xmm1, %xmm3
249 ; AVX-32-NEXT: vmovapd %xmm1, %xmm4
250 ; AVX-32-NEXT: jae .LBB1_4
251 ; AVX-32-NEXT: # %bb.3:
252 ; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
253 ; AVX-32-NEXT: .LBB1_4:
254 ; AVX-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
255 ; AVX-32-NEXT: vmovsd %xmm3, (%esp)
256 ; AVX-32-NEXT: fldl (%esp)
257 ; AVX-32-NEXT: fisttpll (%esp)
259 ; AVX-32-NEXT: setae %cl
260 ; AVX-32-NEXT: movzbl %cl, %ecx
261 ; AVX-32-NEXT: shll $31, %ecx
262 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
263 ; AVX-32-NEXT: vcomisd %xmm1, %xmm2
264 ; AVX-32-NEXT: vmovapd %xmm1, %xmm3
265 ; AVX-32-NEXT: jae .LBB1_6
266 ; AVX-32-NEXT: # %bb.5:
267 ; AVX-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
268 ; AVX-32-NEXT: .LBB1_6:
269 ; AVX-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
270 ; AVX-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
271 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
272 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
274 ; AVX-32-NEXT: setae %dl
275 ; AVX-32-NEXT: movzbl %dl, %edx
276 ; AVX-32-NEXT: shll $31, %edx
277 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
278 ; AVX-32-NEXT: vcomisd %xmm1, %xmm0
279 ; AVX-32-NEXT: jae .LBB1_8
280 ; AVX-32-NEXT: # %bb.7:
281 ; AVX-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
282 ; AVX-32-NEXT: .LBB1_8:
283 ; AVX-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
284 ; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
285 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
286 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
288 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
289 ; AVX-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
290 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
291 ; AVX-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
292 ; AVX-32-NEXT: setae %cl
293 ; AVX-32-NEXT: movzbl %cl, %ecx
294 ; AVX-32-NEXT: shll $31, %ecx
295 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
296 ; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
297 ; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
298 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
299 ; AVX-32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
300 ; AVX-32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
301 ; AVX-32-NEXT: movl %ebp, %esp
302 ; AVX-32-NEXT: popl %ebp
303 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
306 ; AVX-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
308 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
309 ; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
310 ; AVX-64-NEXT: vcomisd %xmm1, %xmm3
311 ; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
312 ; AVX-64-NEXT: vxorpd %xmm4, %xmm4, %xmm4
313 ; AVX-64-NEXT: jb .LBB1_2
314 ; AVX-64-NEXT: # %bb.1:
315 ; AVX-64-NEXT: vmovapd %xmm1, %xmm4
316 ; AVX-64-NEXT: .LBB1_2:
317 ; AVX-64-NEXT: vsubsd %xmm4, %xmm3, %xmm4
318 ; AVX-64-NEXT: vcvttsd2si %xmm4, %rcx
319 ; AVX-64-NEXT: setae %al
320 ; AVX-64-NEXT: movzbl %al, %eax
321 ; AVX-64-NEXT: shlq $63, %rax
322 ; AVX-64-NEXT: xorq %rcx, %rax
323 ; AVX-64-NEXT: vshufpd {{.*#+}} xmm4 = xmm3[1,0]
324 ; AVX-64-NEXT: vcomisd %xmm1, %xmm4
325 ; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5
326 ; AVX-64-NEXT: jb .LBB1_4
327 ; AVX-64-NEXT: # %bb.3:
328 ; AVX-64-NEXT: vmovapd %xmm1, %xmm5
329 ; AVX-64-NEXT: .LBB1_4:
330 ; AVX-64-NEXT: vmovq %rax, %xmm3
331 ; AVX-64-NEXT: vsubsd %xmm5, %xmm4, %xmm4
332 ; AVX-64-NEXT: vcvttsd2si %xmm4, %rax
333 ; AVX-64-NEXT: setae %cl
334 ; AVX-64-NEXT: movzbl %cl, %ecx
335 ; AVX-64-NEXT: shlq $63, %rcx
336 ; AVX-64-NEXT: xorq %rax, %rcx
337 ; AVX-64-NEXT: vmovq %rcx, %xmm4
338 ; AVX-64-NEXT: vcomisd %xmm1, %xmm0
339 ; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5
340 ; AVX-64-NEXT: jb .LBB1_6
341 ; AVX-64-NEXT: # %bb.5:
342 ; AVX-64-NEXT: vmovapd %xmm1, %xmm5
343 ; AVX-64-NEXT: .LBB1_6:
344 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
345 ; AVX-64-NEXT: vsubsd %xmm5, %xmm0, %xmm4
346 ; AVX-64-NEXT: vcvttsd2si %xmm4, %rax
347 ; AVX-64-NEXT: setae %cl
348 ; AVX-64-NEXT: movzbl %cl, %ecx
349 ; AVX-64-NEXT: shlq $63, %rcx
350 ; AVX-64-NEXT: xorq %rax, %rcx
351 ; AVX-64-NEXT: vmovq %rcx, %xmm4
352 ; AVX-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
353 ; AVX-64-NEXT: vcomisd %xmm1, %xmm0
354 ; AVX-64-NEXT: jb .LBB1_8
355 ; AVX-64-NEXT: # %bb.7:
356 ; AVX-64-NEXT: vmovapd %xmm1, %xmm2
357 ; AVX-64-NEXT: .LBB1_8:
358 ; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
359 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
360 ; AVX-64-NEXT: setae %cl
361 ; AVX-64-NEXT: movzbl %cl, %ecx
362 ; AVX-64-NEXT: shlq $63, %rcx
363 ; AVX-64-NEXT: xorq %rax, %rcx
364 ; AVX-64-NEXT: vmovq %rcx, %xmm0
365 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
366 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
369 ; AVX512F-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
370 ; AVX512F-32: # %bb.0:
371 ; AVX512F-32-NEXT: pushl %ebp
372 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
373 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
374 ; AVX512F-32-NEXT: movl %esp, %ebp
375 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
376 ; AVX512F-32-NEXT: pushl %ebx
377 ; AVX512F-32-NEXT: andl $-8, %esp
378 ; AVX512F-32-NEXT: subl $40, %esp
379 ; AVX512F-32-NEXT: .cfi_offset %ebx, -12
380 ; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm2
381 ; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
382 ; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
383 ; AVX512F-32-NEXT: xorl %eax, %eax
384 ; AVX512F-32-NEXT: vcomisd %xmm1, %xmm3
385 ; AVX512F-32-NEXT: setae %al
386 ; AVX512F-32-NEXT: kmovw %eax, %k1
387 ; AVX512F-32-NEXT: vmovsd %xmm1, %xmm1, %xmm4 {%k1} {z}
388 ; AVX512F-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
389 ; AVX512F-32-NEXT: vmovsd %xmm3, (%esp)
390 ; AVX512F-32-NEXT: xorl %edx, %edx
391 ; AVX512F-32-NEXT: vcomisd %xmm1, %xmm2
392 ; AVX512F-32-NEXT: setae %dl
393 ; AVX512F-32-NEXT: kmovw %edx, %k1
394 ; AVX512F-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
395 ; AVX512F-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
396 ; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
397 ; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
398 ; AVX512F-32-NEXT: xorl %ecx, %ecx
399 ; AVX512F-32-NEXT: vcomisd %xmm1, %xmm2
400 ; AVX512F-32-NEXT: setae %cl
401 ; AVX512F-32-NEXT: kmovw %ecx, %k1
402 ; AVX512F-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
403 ; AVX512F-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
404 ; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
405 ; AVX512F-32-NEXT: xorl %ebx, %ebx
406 ; AVX512F-32-NEXT: vcomisd %xmm1, %xmm0
407 ; AVX512F-32-NEXT: setae %bl
408 ; AVX512F-32-NEXT: kmovw %ebx, %k1
409 ; AVX512F-32-NEXT: vmovsd %xmm1, %xmm1, %xmm1 {%k1} {z}
410 ; AVX512F-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
411 ; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
412 ; AVX512F-32-NEXT: fldl (%esp)
413 ; AVX512F-32-NEXT: fisttpll (%esp)
414 ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
415 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
416 ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
417 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
418 ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
419 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
420 ; AVX512F-32-NEXT: wait
421 ; AVX512F-32-NEXT: shll $31, %eax
422 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
423 ; AVX512F-32-NEXT: shll $31, %edx
424 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
425 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
426 ; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
427 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
428 ; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
429 ; AVX512F-32-NEXT: shll $31, %ecx
430 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
431 ; AVX512F-32-NEXT: shll $31, %ebx
432 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
433 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
434 ; AVX512F-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1
435 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
436 ; AVX512F-32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
437 ; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
438 ; AVX512F-32-NEXT: leal -4(%ebp), %esp
439 ; AVX512F-32-NEXT: popl %ebx
440 ; AVX512F-32-NEXT: popl %ebp
441 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
442 ; AVX512F-32-NEXT: retl
444 ; AVX512F-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
445 ; AVX512F-64: # %bb.0:
446 ; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1
447 ; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax
448 ; AVX512F-64-NEXT: vmovq %rax, %xmm2
449 ; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
450 ; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax
451 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
452 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
453 ; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
454 ; AVX512F-64-NEXT: vmovq %rax, %xmm2
455 ; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
456 ; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
457 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
458 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
459 ; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
460 ; AVX512F-64-NEXT: retq
462 ; AVX512VL-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
463 ; AVX512VL-32: # %bb.0:
464 ; AVX512VL-32-NEXT: pushl %ebp
465 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
466 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
467 ; AVX512VL-32-NEXT: movl %esp, %ebp
468 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
469 ; AVX512VL-32-NEXT: pushl %ebx
470 ; AVX512VL-32-NEXT: andl $-8, %esp
471 ; AVX512VL-32-NEXT: subl $40, %esp
472 ; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
473 ; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm2
474 ; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0]
475 ; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
476 ; AVX512VL-32-NEXT: xorl %eax, %eax
477 ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
478 ; AVX512VL-32-NEXT: setae %al
479 ; AVX512VL-32-NEXT: kmovw %eax, %k1
480 ; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm4 {%k1} {z}
481 ; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
482 ; AVX512VL-32-NEXT: vmovsd %xmm3, (%esp)
483 ; AVX512VL-32-NEXT: xorl %edx, %edx
484 ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2
485 ; AVX512VL-32-NEXT: setae %dl
486 ; AVX512VL-32-NEXT: kmovw %edx, %k1
487 ; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
488 ; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
489 ; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
490 ; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
491 ; AVX512VL-32-NEXT: xorl %ecx, %ecx
492 ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2
493 ; AVX512VL-32-NEXT: setae %cl
494 ; AVX512VL-32-NEXT: kmovw %ecx, %k1
495 ; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z}
496 ; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
497 ; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
498 ; AVX512VL-32-NEXT: xorl %ebx, %ebx
499 ; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0
500 ; AVX512VL-32-NEXT: setae %bl
501 ; AVX512VL-32-NEXT: kmovw %ebx, %k1
502 ; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm1 {%k1} {z}
503 ; AVX512VL-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
504 ; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
505 ; AVX512VL-32-NEXT: fldl (%esp)
506 ; AVX512VL-32-NEXT: fisttpll (%esp)
507 ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
508 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
509 ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
510 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
511 ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
512 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
513 ; AVX512VL-32-NEXT: wait
514 ; AVX512VL-32-NEXT: shll $31, %eax
515 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
516 ; AVX512VL-32-NEXT: shll $31, %edx
517 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
518 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
519 ; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
520 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
521 ; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
522 ; AVX512VL-32-NEXT: shll $31, %ecx
523 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
524 ; AVX512VL-32-NEXT: shll $31, %ebx
525 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
526 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
527 ; AVX512VL-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1
528 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
529 ; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
530 ; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
531 ; AVX512VL-32-NEXT: leal -4(%ebp), %esp
532 ; AVX512VL-32-NEXT: popl %ebx
533 ; AVX512VL-32-NEXT: popl %ebp
534 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
535 ; AVX512VL-32-NEXT: retl
537 ; AVX512VL-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
538 ; AVX512VL-64: # %bb.0:
539 ; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1
540 ; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax
541 ; AVX512VL-64-NEXT: vmovq %rax, %xmm2
542 ; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0]
543 ; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax
544 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
545 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
546 ; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
547 ; AVX512VL-64-NEXT: vmovq %rax, %xmm2
548 ; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
549 ; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
550 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
551 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
552 ; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
553 ; AVX512VL-64-NEXT: retq
555 ; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
557 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
558 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
559 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
560 ; AVX512DQ-NEXT: ret{{[l|q]}}
562 ; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
563 ; AVX512DQVL: # %bb.0:
564 ; AVX512DQVL-NEXT: vcvttpd2uqq %ymm0, %ymm0
565 ; AVX512DQVL-NEXT: ret{{[l|q]}}
566 %ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double> %a,
567 metadata !"fpexcept.strict") #0
571 define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
572 ; AVX-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
574 ; AVX-32-NEXT: pushl %ebp
575 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
576 ; AVX-32-NEXT: .cfi_offset %ebp, -8
577 ; AVX-32-NEXT: movl %esp, %ebp
578 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
579 ; AVX-32-NEXT: andl $-8, %esp
580 ; AVX-32-NEXT: subl $32, %esp
581 ; AVX-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
582 ; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
583 ; AVX-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp)
584 ; AVX-32-NEXT: vextractps $3, %xmm0, (%esp)
585 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
586 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
587 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
588 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
589 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
590 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
591 ; AVX-32-NEXT: flds (%esp)
592 ; AVX-32-NEXT: fisttpll (%esp)
594 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
595 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
596 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
597 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
598 ; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
599 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
600 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
601 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
602 ; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
603 ; AVX-32-NEXT: movl %ebp, %esp
604 ; AVX-32-NEXT: popl %ebp
605 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
608 ; AVX-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
610 ; AVX-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
611 ; AVX-64-NEXT: vcvttss2si %xmm1, %rax
612 ; AVX-64-NEXT: vmovq %rax, %xmm1
613 ; AVX-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
614 ; AVX-64-NEXT: vcvttss2si %xmm2, %rax
615 ; AVX-64-NEXT: vmovq %rax, %xmm2
616 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
617 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
618 ; AVX-64-NEXT: vmovq %rax, %xmm2
619 ; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
620 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
621 ; AVX-64-NEXT: vmovq %rax, %xmm0
622 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
623 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
626 ; AVX512F-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
627 ; AVX512F-32: # %bb.0:
628 ; AVX512F-32-NEXT: pushl %ebp
629 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
630 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
631 ; AVX512F-32-NEXT: movl %esp, %ebp
632 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
633 ; AVX512F-32-NEXT: andl $-8, %esp
634 ; AVX512F-32-NEXT: subl $32, %esp
635 ; AVX512F-32-NEXT: vmovd %xmm0, {{[0-9]+}}(%esp)
636 ; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
637 ; AVX512F-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp)
638 ; AVX512F-32-NEXT: vextractps $3, %xmm0, (%esp)
639 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
640 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
641 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
642 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
643 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
644 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
645 ; AVX512F-32-NEXT: flds (%esp)
646 ; AVX512F-32-NEXT: fisttpll (%esp)
647 ; AVX512F-32-NEXT: wait
648 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
649 ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
650 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
651 ; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
652 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
653 ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
654 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
655 ; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
656 ; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
657 ; AVX512F-32-NEXT: movl %ebp, %esp
658 ; AVX512F-32-NEXT: popl %ebp
659 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
660 ; AVX512F-32-NEXT: retl
662 ; AVX512F-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
663 ; AVX512F-64: # %bb.0:
664 ; AVX512F-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
665 ; AVX512F-64-NEXT: vcvttss2si %xmm1, %rax
666 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
667 ; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
668 ; AVX512F-64-NEXT: vcvttss2si %xmm2, %rax
669 ; AVX512F-64-NEXT: vmovq %rax, %xmm2
670 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
671 ; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
672 ; AVX512F-64-NEXT: vmovq %rax, %xmm2
673 ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
674 ; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
675 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
676 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
677 ; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
678 ; AVX512F-64-NEXT: retq
680 ; AVX512VL-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
681 ; AVX512VL-32: # %bb.0:
682 ; AVX512VL-32-NEXT: pushl %ebp
683 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
684 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
685 ; AVX512VL-32-NEXT: movl %esp, %ebp
686 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
687 ; AVX512VL-32-NEXT: andl $-8, %esp
688 ; AVX512VL-32-NEXT: subl $32, %esp
689 ; AVX512VL-32-NEXT: vmovd %xmm0, {{[0-9]+}}(%esp)
690 ; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
691 ; AVX512VL-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp)
692 ; AVX512VL-32-NEXT: vextractps $3, %xmm0, (%esp)
693 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
694 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
695 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
696 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
697 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
698 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
699 ; AVX512VL-32-NEXT: flds (%esp)
700 ; AVX512VL-32-NEXT: fisttpll (%esp)
701 ; AVX512VL-32-NEXT: wait
702 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
703 ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
704 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
705 ; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
706 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
707 ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
708 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
709 ; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
710 ; AVX512VL-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
711 ; AVX512VL-32-NEXT: movl %ebp, %esp
712 ; AVX512VL-32-NEXT: popl %ebp
713 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
714 ; AVX512VL-32-NEXT: retl
716 ; AVX512VL-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
717 ; AVX512VL-64: # %bb.0:
718 ; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
719 ; AVX512VL-64-NEXT: vcvttss2si %xmm1, %rax
720 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
721 ; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
722 ; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax
723 ; AVX512VL-64-NEXT: vmovq %rax, %xmm2
724 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
725 ; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax
726 ; AVX512VL-64-NEXT: vmovq %rax, %xmm2
727 ; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
728 ; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax
729 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
730 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
731 ; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
732 ; AVX512VL-64-NEXT: retq
734 ; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
736 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
737 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
738 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
739 ; AVX512DQ-NEXT: ret{{[l|q]}}
741 ; AVX512DQVL-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
742 ; AVX512DQVL: # %bb.0:
743 ; AVX512DQVL-NEXT: vcvttps2qq %xmm0, %ymm0
744 ; AVX512DQVL-NEXT: ret{{[l|q]}}
745 %ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %a,
746 metadata !"fpexcept.strict") #0
750 define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
751 ; AVX-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
753 ; AVX-32-NEXT: pushl %ebp
754 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
755 ; AVX-32-NEXT: .cfi_offset %ebp, -8
756 ; AVX-32-NEXT: movl %esp, %ebp
757 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
758 ; AVX-32-NEXT: andl $-8, %esp
759 ; AVX-32-NEXT: subl $32, %esp
760 ; AVX-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
761 ; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
762 ; AVX-32-NEXT: vcomiss %xmm1, %xmm2
763 ; AVX-32-NEXT: vmovaps %xmm1, %xmm3
764 ; AVX-32-NEXT: jae .LBB3_2
765 ; AVX-32-NEXT: # %bb.1:
766 ; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
767 ; AVX-32-NEXT: .LBB3_2:
768 ; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
769 ; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
770 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
771 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
773 ; AVX-32-NEXT: setae %al
774 ; AVX-32-NEXT: movzbl %al, %eax
775 ; AVX-32-NEXT: shll $31, %eax
776 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
777 ; AVX-32-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
778 ; AVX-32-NEXT: vcomiss %xmm1, %xmm2
779 ; AVX-32-NEXT: vmovaps %xmm1, %xmm3
780 ; AVX-32-NEXT: jae .LBB3_4
781 ; AVX-32-NEXT: # %bb.3:
782 ; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
783 ; AVX-32-NEXT: .LBB3_4:
784 ; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
785 ; AVX-32-NEXT: vmovss %xmm2, (%esp)
786 ; AVX-32-NEXT: flds (%esp)
787 ; AVX-32-NEXT: fisttpll (%esp)
789 ; AVX-32-NEXT: setae %cl
790 ; AVX-32-NEXT: movzbl %cl, %ecx
791 ; AVX-32-NEXT: shll $31, %ecx
792 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
793 ; AVX-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
794 ; AVX-32-NEXT: vcomiss %xmm1, %xmm2
795 ; AVX-32-NEXT: vmovaps %xmm1, %xmm3
796 ; AVX-32-NEXT: jae .LBB3_6
797 ; AVX-32-NEXT: # %bb.5:
798 ; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
799 ; AVX-32-NEXT: .LBB3_6:
800 ; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
801 ; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
802 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
803 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
805 ; AVX-32-NEXT: setae %dl
806 ; AVX-32-NEXT: movzbl %dl, %edx
807 ; AVX-32-NEXT: shll $31, %edx
808 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
809 ; AVX-32-NEXT: vcomiss %xmm1, %xmm0
810 ; AVX-32-NEXT: jae .LBB3_8
811 ; AVX-32-NEXT: # %bb.7:
812 ; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
813 ; AVX-32-NEXT: .LBB3_8:
814 ; AVX-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
815 ; AVX-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
816 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
817 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
819 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
820 ; AVX-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
821 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
822 ; AVX-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
823 ; AVX-32-NEXT: setae %cl
824 ; AVX-32-NEXT: movzbl %cl, %ecx
825 ; AVX-32-NEXT: shll $31, %ecx
826 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
827 ; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
828 ; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
829 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
830 ; AVX-32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
831 ; AVX-32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
832 ; AVX-32-NEXT: movl %ebp, %esp
833 ; AVX-32-NEXT: popl %ebp
834 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
837 ; AVX-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
839 ; AVX-64-NEXT: vshufps {{.*#+}} xmm3 = xmm0[3,3,3,3]
840 ; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
841 ; AVX-64-NEXT: vcomiss %xmm1, %xmm3
842 ; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
843 ; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
844 ; AVX-64-NEXT: jb .LBB3_2
845 ; AVX-64-NEXT: # %bb.1:
846 ; AVX-64-NEXT: vmovaps %xmm1, %xmm4
847 ; AVX-64-NEXT: .LBB3_2:
848 ; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3
849 ; AVX-64-NEXT: vcvttss2si %xmm3, %rcx
850 ; AVX-64-NEXT: setae %al
851 ; AVX-64-NEXT: movzbl %al, %eax
852 ; AVX-64-NEXT: shlq $63, %rax
853 ; AVX-64-NEXT: xorq %rcx, %rax
854 ; AVX-64-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0]
855 ; AVX-64-NEXT: vcomiss %xmm1, %xmm4
856 ; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5
857 ; AVX-64-NEXT: jb .LBB3_4
858 ; AVX-64-NEXT: # %bb.3:
859 ; AVX-64-NEXT: vmovaps %xmm1, %xmm5
860 ; AVX-64-NEXT: .LBB3_4:
861 ; AVX-64-NEXT: vmovq %rax, %xmm3
862 ; AVX-64-NEXT: vsubss %xmm5, %xmm4, %xmm4
863 ; AVX-64-NEXT: vcvttss2si %xmm4, %rax
864 ; AVX-64-NEXT: setae %cl
865 ; AVX-64-NEXT: movzbl %cl, %ecx
866 ; AVX-64-NEXT: shlq $63, %rcx
867 ; AVX-64-NEXT: xorq %rax, %rcx
868 ; AVX-64-NEXT: vmovq %rcx, %xmm4
869 ; AVX-64-NEXT: vcomiss %xmm1, %xmm0
870 ; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5
871 ; AVX-64-NEXT: jb .LBB3_6
872 ; AVX-64-NEXT: # %bb.5:
873 ; AVX-64-NEXT: vmovaps %xmm1, %xmm5
874 ; AVX-64-NEXT: .LBB3_6:
875 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
876 ; AVX-64-NEXT: vsubss %xmm5, %xmm0, %xmm4
877 ; AVX-64-NEXT: vcvttss2si %xmm4, %rax
878 ; AVX-64-NEXT: setae %cl
879 ; AVX-64-NEXT: movzbl %cl, %ecx
880 ; AVX-64-NEXT: shlq $63, %rcx
881 ; AVX-64-NEXT: xorq %rax, %rcx
882 ; AVX-64-NEXT: vmovq %rcx, %xmm4
883 ; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
884 ; AVX-64-NEXT: vcomiss %xmm1, %xmm0
885 ; AVX-64-NEXT: jb .LBB3_8
886 ; AVX-64-NEXT: # %bb.7:
887 ; AVX-64-NEXT: vmovaps %xmm1, %xmm2
888 ; AVX-64-NEXT: .LBB3_8:
889 ; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
890 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
891 ; AVX-64-NEXT: setae %cl
892 ; AVX-64-NEXT: movzbl %cl, %ecx
893 ; AVX-64-NEXT: shlq $63, %rcx
894 ; AVX-64-NEXT: xorq %rax, %rcx
895 ; AVX-64-NEXT: vmovq %rcx, %xmm0
896 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
897 ; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
900 ; AVX512F-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
901 ; AVX512F-32: # %bb.0:
902 ; AVX512F-32-NEXT: pushl %ebp
903 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
904 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
905 ; AVX512F-32-NEXT: movl %esp, %ebp
906 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
907 ; AVX512F-32-NEXT: pushl %ebx
908 ; AVX512F-32-NEXT: andl $-8, %esp
909 ; AVX512F-32-NEXT: subl $40, %esp
910 ; AVX512F-32-NEXT: .cfi_offset %ebx, -12
911 ; AVX512F-32-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
912 ; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
913 ; AVX512F-32-NEXT: xorl %eax, %eax
914 ; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
915 ; AVX512F-32-NEXT: setae %al
916 ; AVX512F-32-NEXT: kmovw %eax, %k1
917 ; AVX512F-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
918 ; AVX512F-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
919 ; AVX512F-32-NEXT: vmovss %xmm2, (%esp)
920 ; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
921 ; AVX512F-32-NEXT: xorl %edx, %edx
922 ; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
923 ; AVX512F-32-NEXT: setae %dl
924 ; AVX512F-32-NEXT: kmovw %edx, %k1
925 ; AVX512F-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
926 ; AVX512F-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
927 ; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
928 ; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
929 ; AVX512F-32-NEXT: xorl %ecx, %ecx
930 ; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
931 ; AVX512F-32-NEXT: setae %cl
932 ; AVX512F-32-NEXT: kmovw %ecx, %k1
933 ; AVX512F-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
934 ; AVX512F-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
935 ; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
936 ; AVX512F-32-NEXT: xorl %ebx, %ebx
937 ; AVX512F-32-NEXT: vcomiss %xmm1, %xmm0
938 ; AVX512F-32-NEXT: setae %bl
939 ; AVX512F-32-NEXT: kmovw %ebx, %k1
940 ; AVX512F-32-NEXT: vmovss %xmm1, %xmm1, %xmm1 {%k1} {z}
941 ; AVX512F-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
942 ; AVX512F-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
943 ; AVX512F-32-NEXT: flds (%esp)
944 ; AVX512F-32-NEXT: fisttpll (%esp)
945 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
946 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
947 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
948 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
949 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
950 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
951 ; AVX512F-32-NEXT: wait
952 ; AVX512F-32-NEXT: shll $31, %eax
953 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
954 ; AVX512F-32-NEXT: shll $31, %edx
955 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
956 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
957 ; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
958 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
959 ; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
960 ; AVX512F-32-NEXT: shll $31, %ecx
961 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
962 ; AVX512F-32-NEXT: shll $31, %ebx
963 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
964 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
965 ; AVX512F-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1
966 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
967 ; AVX512F-32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
968 ; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
969 ; AVX512F-32-NEXT: leal -4(%ebp), %esp
970 ; AVX512F-32-NEXT: popl %ebx
971 ; AVX512F-32-NEXT: popl %ebp
972 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
973 ; AVX512F-32-NEXT: retl
975 ; AVX512F-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
976 ; AVX512F-64: # %bb.0:
977 ; AVX512F-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
978 ; AVX512F-64-NEXT: vcvttss2usi %xmm1, %rax
979 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
980 ; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
981 ; AVX512F-64-NEXT: vcvttss2usi %xmm2, %rax
982 ; AVX512F-64-NEXT: vmovq %rax, %xmm2
983 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
984 ; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
985 ; AVX512F-64-NEXT: vmovq %rax, %xmm2
986 ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
987 ; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
988 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
989 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
990 ; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
991 ; AVX512F-64-NEXT: retq
993 ; AVX512VL-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
994 ; AVX512VL-32: # %bb.0:
995 ; AVX512VL-32-NEXT: pushl %ebp
996 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
997 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
998 ; AVX512VL-32-NEXT: movl %esp, %ebp
999 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
1000 ; AVX512VL-32-NEXT: pushl %ebx
1001 ; AVX512VL-32-NEXT: andl $-8, %esp
1002 ; AVX512VL-32-NEXT: subl $40, %esp
1003 ; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
1004 ; AVX512VL-32-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1005 ; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
1006 ; AVX512VL-32-NEXT: xorl %eax, %eax
1007 ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
1008 ; AVX512VL-32-NEXT: setae %al
1009 ; AVX512VL-32-NEXT: kmovw %eax, %k1
1010 ; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
1011 ; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
1012 ; AVX512VL-32-NEXT: vmovss %xmm2, (%esp)
1013 ; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1014 ; AVX512VL-32-NEXT: xorl %edx, %edx
1015 ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
1016 ; AVX512VL-32-NEXT: setae %dl
1017 ; AVX512VL-32-NEXT: kmovw %edx, %k1
1018 ; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
1019 ; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
1020 ; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
1021 ; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1022 ; AVX512VL-32-NEXT: xorl %ecx, %ecx
1023 ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
1024 ; AVX512VL-32-NEXT: setae %cl
1025 ; AVX512VL-32-NEXT: kmovw %ecx, %k1
1026 ; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z}
1027 ; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
1028 ; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
1029 ; AVX512VL-32-NEXT: xorl %ebx, %ebx
1030 ; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0
1031 ; AVX512VL-32-NEXT: setae %bl
1032 ; AVX512VL-32-NEXT: kmovw %ebx, %k1
1033 ; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm1 {%k1} {z}
1034 ; AVX512VL-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
1035 ; AVX512VL-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
1036 ; AVX512VL-32-NEXT: flds (%esp)
1037 ; AVX512VL-32-NEXT: fisttpll (%esp)
1038 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
1039 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1040 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
1041 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1042 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
1043 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1044 ; AVX512VL-32-NEXT: wait
1045 ; AVX512VL-32-NEXT: shll $31, %eax
1046 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1047 ; AVX512VL-32-NEXT: shll $31, %edx
1048 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
1049 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1050 ; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
1051 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
1052 ; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1053 ; AVX512VL-32-NEXT: shll $31, %ecx
1054 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
1055 ; AVX512VL-32-NEXT: shll $31, %ebx
1056 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
1057 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1058 ; AVX512VL-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1
1059 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
1060 ; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
1061 ; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1062 ; AVX512VL-32-NEXT: leal -4(%ebp), %esp
1063 ; AVX512VL-32-NEXT: popl %ebx
1064 ; AVX512VL-32-NEXT: popl %ebp
1065 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
1066 ; AVX512VL-32-NEXT: retl
1068 ; AVX512VL-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
1069 ; AVX512VL-64: # %bb.0:
1070 ; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1071 ; AVX512VL-64-NEXT: vcvttss2usi %xmm1, %rax
1072 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
1073 ; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
1074 ; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax
1075 ; AVX512VL-64-NEXT: vmovq %rax, %xmm2
1076 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1077 ; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax
1078 ; AVX512VL-64-NEXT: vmovq %rax, %xmm2
1079 ; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1080 ; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax
1081 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
1082 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1083 ; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1084 ; AVX512VL-64-NEXT: retq
1086 ; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
1087 ; AVX512DQ: # %bb.0:
1088 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
1089 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1090 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1091 ; AVX512DQ-NEXT: ret{{[l|q]}}
1093 ; AVX512DQVL-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
1094 ; AVX512DQVL: # %bb.0:
1095 ; AVX512DQVL-NEXT: vcvttps2uqq %xmm0, %ymm0
1096 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1097 %ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %a,
1098 metadata !"fpexcept.strict") #0
1102 define <4 x i32> @strict_vector_fptosi_v4f64_to_v4i32(<4 x double> %a) #0 {
1103 ; CHECK-LABEL: strict_vector_fptosi_v4f64_to_v4i32:
1105 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
1106 ; CHECK-NEXT: vzeroupper
1107 ; CHECK-NEXT: ret{{[l|q]}}
1108 %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double> %a,
1109 metadata !"fpexcept.strict") #0
1113 define <4 x i32> @strict_vector_fptoui_v4f64_to_v4i32(<4 x double> %a) #0 {
1114 ; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1116 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
1117 ; AVX-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
1118 ; AVX-NEXT: vextractf128 $1, %ymm2, %xmm3
1119 ; AVX-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2]
1120 ; AVX-NEXT: vxorps %xmm4, %xmm4, %xmm4
1121 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
1122 ; AVX-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3
1123 ; AVX-NEXT: vxorps %xmm4, %xmm4, %xmm4
1124 ; AVX-NEXT: vblendvpd %ymm2, %ymm4, %ymm1, %ymm1
1125 ; AVX-NEXT: vsubpd %ymm1, %ymm0, %ymm0
1126 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
1127 ; AVX-NEXT: vxorpd %xmm3, %xmm0, %xmm0
1128 ; AVX-NEXT: vzeroupper
1129 ; AVX-NEXT: ret{{[l|q]}}
1131 ; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1133 ; AVX512F-NEXT: vmovaps %ymm0, %ymm0
1134 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
1135 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1136 ; AVX512F-NEXT: vzeroupper
1137 ; AVX512F-NEXT: ret{{[l|q]}}
1139 ; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1140 ; AVX512VL: # %bb.0:
1141 ; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
1142 ; AVX512VL-NEXT: vzeroupper
1143 ; AVX512VL-NEXT: ret{{[l|q]}}
1145 ; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1146 ; AVX512DQ: # %bb.0:
1147 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
1148 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
1149 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1150 ; AVX512DQ-NEXT: vzeroupper
1151 ; AVX512DQ-NEXT: ret{{[l|q]}}
1153 ; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1154 ; AVX512DQVL: # %bb.0:
1155 ; AVX512DQVL-NEXT: vcvttpd2udq %ymm0, %xmm0
1156 ; AVX512DQVL-NEXT: vzeroupper
1157 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1158 %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double> %a,
1159 metadata !"fpexcept.strict") #0
1163 define <4 x i16> @strict_vector_fptosi_v4f64_to_v4i16(<4 x double> %a) #0 {
1164 ; CHECK-LABEL: strict_vector_fptosi_v4f64_to_v4i16:
1166 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
1167 ; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1168 ; CHECK-NEXT: vzeroupper
1169 ; CHECK-NEXT: ret{{[l|q]}}
1170 %ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f64(<4 x double> %a,
1171 metadata !"fpexcept.strict") #0
1175 define <4 x i16> @strict_vector_fptoui_v4f64_to_v4i16(<4 x double> %a) #0 {
1176 ; CHECK-LABEL: strict_vector_fptoui_v4f64_to_v4i16:
1178 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
1179 ; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1180 ; CHECK-NEXT: vzeroupper
1181 ; CHECK-NEXT: ret{{[l|q]}}
1182 %ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f64(<4 x double> %a,
1183 metadata !"fpexcept.strict") #0
1187 define <4 x i8> @strict_vector_fptosi_v4f64_to_v4i8(<4 x double> %a) #0 {
1188 ; AVX-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1190 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
1191 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1192 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1193 ; AVX-NEXT: vzeroupper
1194 ; AVX-NEXT: ret{{[l|q]}}
1196 ; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1198 ; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0
1199 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1200 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1201 ; AVX512F-NEXT: vzeroupper
1202 ; AVX512F-NEXT: ret{{[l|q]}}
1204 ; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1205 ; AVX512VL: # %bb.0:
1206 ; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0
1207 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
1208 ; AVX512VL-NEXT: vzeroupper
1209 ; AVX512VL-NEXT: ret{{[l|q]}}
1211 ; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1212 ; AVX512DQ: # %bb.0:
1213 ; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0
1214 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1215 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1216 ; AVX512DQ-NEXT: vzeroupper
1217 ; AVX512DQ-NEXT: ret{{[l|q]}}
1219 ; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1220 ; AVX512DQVL: # %bb.0:
1221 ; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0
1222 ; AVX512DQVL-NEXT: vpmovdb %xmm0, %xmm0
1223 ; AVX512DQVL-NEXT: vzeroupper
1224 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1225 %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f64(<4 x double> %a,
1226 metadata !"fpexcept.strict") #0
1230 define <4 x i8> @strict_vector_fptoui_v4f64_to_v4i8(<4 x double> %a) #0 {
1231 ; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1233 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
1234 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1235 ; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
1236 ; AVX-NEXT: vzeroupper
1237 ; AVX-NEXT: ret{{[l|q]}}
1239 ; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1241 ; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0
1242 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1243 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
1244 ; AVX512F-NEXT: vzeroupper
1245 ; AVX512F-NEXT: ret{{[l|q]}}
1247 ; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1248 ; AVX512VL: # %bb.0:
1249 ; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0
1250 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
1251 ; AVX512VL-NEXT: vzeroupper
1252 ; AVX512VL-NEXT: ret{{[l|q]}}
1254 ; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1255 ; AVX512DQ: # %bb.0:
1256 ; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0
1257 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1258 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
1259 ; AVX512DQ-NEXT: vzeroupper
1260 ; AVX512DQ-NEXT: ret{{[l|q]}}
1262 ; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1263 ; AVX512DQVL: # %bb.0:
1264 ; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0
1265 ; AVX512DQVL-NEXT: vpmovdb %xmm0, %xmm0
1266 ; AVX512DQVL-NEXT: vzeroupper
1267 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1268 %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f64(<4 x double> %a,
1269 metadata !"fpexcept.strict") #0
1273 define <4 x i1> @strict_vector_fptosi_v4f64_to_v4i1(<4 x double> %a) #0 {
1274 ; AVX-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1276 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
1277 ; AVX-NEXT: vzeroupper
1278 ; AVX-NEXT: ret{{[l|q]}}
1280 ; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1282 ; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0
1283 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
1284 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1285 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1286 ; AVX512F-NEXT: vzeroupper
1287 ; AVX512F-NEXT: ret{{[l|q]}}
1289 ; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1290 ; AVX512VL: # %bb.0:
1291 ; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0
1292 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
1293 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1294 ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1295 ; AVX512VL-NEXT: vzeroupper
1296 ; AVX512VL-NEXT: ret{{[l|q]}}
1298 ; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1299 ; AVX512DQ: # %bb.0:
1300 ; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0
1301 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1302 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1303 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1304 ; AVX512DQ-NEXT: vzeroupper
1305 ; AVX512DQ-NEXT: ret{{[l|q]}}
1307 ; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1308 ; AVX512DQVL: # %bb.0:
1309 ; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0
1310 ; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0
1311 ; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0
1312 ; AVX512DQVL-NEXT: vzeroupper
1313 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1314 %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double> %a,
1315 metadata !"fpexcept.strict") #0
1319 define <4 x i1> @strict_vector_fptoui_v4f64_to_v4i1(<4 x double> %a) #0 {
1320 ; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1322 ; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
1323 ; AVX-NEXT: vzeroupper
1324 ; AVX-NEXT: ret{{[l|q]}}
1326 ; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1328 ; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0
1329 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
1330 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
1331 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1332 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1333 ; AVX512F-NEXT: vzeroupper
1334 ; AVX512F-NEXT: ret{{[l|q]}}
1336 ; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1337 ; AVX512VL: # %bb.0:
1338 ; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0
1339 ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
1340 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
1341 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1342 ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1343 ; AVX512VL-NEXT: vzeroupper
1344 ; AVX512VL-NEXT: ret{{[l|q]}}
1346 ; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1347 ; AVX512DQ: # %bb.0:
1348 ; AVX512DQ-NEXT: vcvttpd2dq %ymm0, %xmm0
1349 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1350 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1351 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1352 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1353 ; AVX512DQ-NEXT: vzeroupper
1354 ; AVX512DQ-NEXT: ret{{[l|q]}}
1356 ; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1357 ; AVX512DQVL: # %bb.0:
1358 ; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0
1359 ; AVX512DQVL-NEXT: vpslld $31, %xmm0, %xmm0
1360 ; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0
1361 ; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0
1362 ; AVX512DQVL-NEXT: vzeroupper
1363 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1364 %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double> %a,
1365 metadata !"fpexcept.strict") #0
1369 define <8 x i32> @strict_vector_fptosi_v8f32_to_v8i32(<8 x float> %a) #0 {
1370 ; CHECK-LABEL: strict_vector_fptosi_v8f32_to_v8i32:
1372 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
1373 ; CHECK-NEXT: ret{{[l|q]}}
1374 %ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f32(<8 x float> %a,
1375 metadata !"fpexcept.strict") #0
1379 define <8 x i32> @strict_vector_fptoui_v8f32_to_v8i32(<8 x float> %a) #0 {
1380 ; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1382 ; AVX-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1383 ; AVX-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
1384 ; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
1385 ; AVX-NEXT: vbroadcastss {{.*#+}} ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
1386 ; AVX-NEXT: vblendvps %ymm2, %ymm3, %ymm4, %ymm4
1387 ; AVX-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1
1388 ; AVX-NEXT: vsubps %ymm1, %ymm0, %ymm0
1389 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
1390 ; AVX-NEXT: vxorps %ymm4, %ymm0, %ymm0
1391 ; AVX-NEXT: ret{{[l|q]}}
1393 ; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1395 ; AVX512F-NEXT: vmovaps %ymm0, %ymm0
1396 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1397 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1398 ; AVX512F-NEXT: ret{{[l|q]}}
1400 ; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1401 ; AVX512VL: # %bb.0:
1402 ; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
1403 ; AVX512VL-NEXT: ret{{[l|q]}}
1405 ; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1406 ; AVX512DQ: # %bb.0:
1407 ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
1408 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1409 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1410 ; AVX512DQ-NEXT: ret{{[l|q]}}
1412 ; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1413 ; AVX512DQVL: # %bb.0:
1414 ; AVX512DQVL-NEXT: vcvttps2udq %ymm0, %ymm0
1415 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1416 %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float> %a,
1417 metadata !"fpexcept.strict") #0
1421 define <8 x i16> @strict_vector_fptosi_v8f32_to_v8i16(<8 x float> %a) #0 {
1422 ; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1424 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
1425 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1426 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
1427 ; AVX-NEXT: vzeroupper
1428 ; AVX-NEXT: ret{{[l|q]}}
1430 ; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1432 ; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
1433 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
1434 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1435 ; AVX512F-NEXT: vzeroupper
1436 ; AVX512F-NEXT: ret{{[l|q]}}
1438 ; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1439 ; AVX512VL: # %bb.0:
1440 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
1441 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1442 ; AVX512VL-NEXT: vzeroupper
1443 ; AVX512VL-NEXT: ret{{[l|q]}}
1445 ; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1446 ; AVX512DQ: # %bb.0:
1447 ; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
1448 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
1449 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1450 ; AVX512DQ-NEXT: vzeroupper
1451 ; AVX512DQ-NEXT: ret{{[l|q]}}
1453 ; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1454 ; AVX512DQVL: # %bb.0:
1455 ; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
1456 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
1457 ; AVX512DQVL-NEXT: vzeroupper
1458 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1459 %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float> %a,
1460 metadata !"fpexcept.strict") #0
1464 define <8 x i16> @strict_vector_fptoui_v8f32_to_v8i16(<8 x float> %a) #0 {
1465 ; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1467 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
1468 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1469 ; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1470 ; AVX-NEXT: vzeroupper
1471 ; AVX-NEXT: ret{{[l|q]}}
1473 ; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1475 ; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
1476 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
1477 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1478 ; AVX512F-NEXT: vzeroupper
1479 ; AVX512F-NEXT: ret{{[l|q]}}
1481 ; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1482 ; AVX512VL: # %bb.0:
1483 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
1484 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1485 ; AVX512VL-NEXT: vzeroupper
1486 ; AVX512VL-NEXT: ret{{[l|q]}}
1488 ; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1489 ; AVX512DQ: # %bb.0:
1490 ; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
1491 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
1492 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1493 ; AVX512DQ-NEXT: vzeroupper
1494 ; AVX512DQ-NEXT: ret{{[l|q]}}
1496 ; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1497 ; AVX512DQVL: # %bb.0:
1498 ; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
1499 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
1500 ; AVX512DQVL-NEXT: vzeroupper
1501 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1502 %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float> %a,
1503 metadata !"fpexcept.strict") #0
1507 define <8 x i8> @strict_vector_fptosi_v8f32_to_v8i8(<8 x float> %a) #0 {
1508 ; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1510 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
1511 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1512 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
1513 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1514 ; AVX-NEXT: vzeroupper
1515 ; AVX-NEXT: ret{{[l|q]}}
1517 ; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1519 ; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
1520 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1521 ; AVX512F-NEXT: vzeroupper
1522 ; AVX512F-NEXT: ret{{[l|q]}}
1524 ; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1525 ; AVX512VL: # %bb.0:
1526 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
1527 ; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
1528 ; AVX512VL-NEXT: vzeroupper
1529 ; AVX512VL-NEXT: ret{{[l|q]}}
1531 ; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1532 ; AVX512DQ: # %bb.0:
1533 ; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
1534 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1535 ; AVX512DQ-NEXT: vzeroupper
1536 ; AVX512DQ-NEXT: ret{{[l|q]}}
1538 ; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1539 ; AVX512DQVL: # %bb.0:
1540 ; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
1541 ; AVX512DQVL-NEXT: vpmovdb %ymm0, %xmm0
1542 ; AVX512DQVL-NEXT: vzeroupper
1543 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1544 %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float> %a,
1545 metadata !"fpexcept.strict") #0
1549 define <8 x i8> @strict_vector_fptoui_v8f32_to_v8i8(<8 x float> %a) #0 {
1550 ; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1552 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
1553 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1554 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
1555 ; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
1556 ; AVX-NEXT: vzeroupper
1557 ; AVX-NEXT: ret{{[l|q]}}
1559 ; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1561 ; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
1562 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1563 ; AVX512F-NEXT: vzeroupper
1564 ; AVX512F-NEXT: ret{{[l|q]}}
1566 ; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1567 ; AVX512VL: # %bb.0:
1568 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
1569 ; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
1570 ; AVX512VL-NEXT: vzeroupper
1571 ; AVX512VL-NEXT: ret{{[l|q]}}
1573 ; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1574 ; AVX512DQ: # %bb.0:
1575 ; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
1576 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1577 ; AVX512DQ-NEXT: vzeroupper
1578 ; AVX512DQ-NEXT: ret{{[l|q]}}
1580 ; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1581 ; AVX512DQVL: # %bb.0:
1582 ; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
1583 ; AVX512DQVL-NEXT: vpmovdb %ymm0, %xmm0
1584 ; AVX512DQVL-NEXT: vzeroupper
1585 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1586 %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float> %a,
1587 metadata !"fpexcept.strict") #0
1591 define <8 x i1> @strict_vector_fptosi_v8f32_to_v8i1(<8 x float> %a) #0 {
1592 ; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1594 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
1595 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1596 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
1597 ; AVX-NEXT: vzeroupper
1598 ; AVX-NEXT: ret{{[l|q]}}
1600 ; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1602 ; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
1603 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
1604 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1605 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
1606 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1607 ; AVX512F-NEXT: vzeroupper
1608 ; AVX512F-NEXT: ret{{[l|q]}}
1610 ; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1611 ; AVX512VL: # %bb.0:
1612 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
1613 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1
1614 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1615 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1616 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1617 ; AVX512VL-NEXT: vzeroupper
1618 ; AVX512VL-NEXT: ret{{[l|q]}}
1620 ; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1621 ; AVX512DQ: # %bb.0:
1622 ; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
1623 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1624 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1625 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
1626 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1627 ; AVX512DQ-NEXT: vzeroupper
1628 ; AVX512DQ-NEXT: ret{{[l|q]}}
1630 ; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1631 ; AVX512DQVL: # %bb.0:
1632 ; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
1633 ; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0
1634 ; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0
1635 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
1636 ; AVX512DQVL-NEXT: vzeroupper
1637 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1638 %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float> %a,
1639 metadata !"fpexcept.strict") #0
1643 define <8 x i1> @strict_vector_fptoui_v8f32_to_v8i1(<8 x float> %a) #0 {
1644 ; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1646 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
1647 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
1648 ; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1649 ; AVX-NEXT: vzeroupper
1650 ; AVX-NEXT: ret{{[l|q]}}
1652 ; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1654 ; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
1655 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
1656 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
1657 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1658 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
1659 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1660 ; AVX512F-NEXT: vzeroupper
1661 ; AVX512F-NEXT: ret{{[l|q]}}
1663 ; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1664 ; AVX512VL: # %bb.0:
1665 ; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
1666 ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
1667 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1
1668 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
1669 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1670 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1671 ; AVX512VL-NEXT: vzeroupper
1672 ; AVX512VL-NEXT: ret{{[l|q]}}
1674 ; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1675 ; AVX512DQ: # %bb.0:
1676 ; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
1677 ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
1678 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
1679 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1680 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
1681 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1682 ; AVX512DQ-NEXT: vzeroupper
1683 ; AVX512DQ-NEXT: ret{{[l|q]}}
1685 ; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1686 ; AVX512DQVL: # %bb.0:
1687 ; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
1688 ; AVX512DQVL-NEXT: vpslld $31, %ymm0, %ymm0
1689 ; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0
1690 ; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0
1691 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
1692 ; AVX512DQVL-NEXT: vzeroupper
1693 ; AVX512DQVL-NEXT: ret{{[l|q]}}
1694 %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float> %a,
1695 metadata !"fpexcept.strict") #0
1699 attributes #0 = { strictfp }