1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefix=SSE-32
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefix=SSE-64
4 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX-32
5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX-64
6 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX512F,AVX512F-32
7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX512F,AVX512F-64
8 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VL,AVX512VL-32
9 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VL,AVX512VL-64
10 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=AVX512DQ,AVX512DQ-32
11 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=AVX512DQ,AVX512DQ-64
12 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VLDQ,AVX512VLDQ-32
13 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VLDQ,AVX512VLDQ-64
15 declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
16 declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
17 declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata)
18 declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata)
19 declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata)
20 declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata)
21 declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata)
22 declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata)
23 declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double>, metadata)
24 declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double>, metadata)
25 declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float>, metadata)
26 declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float>, metadata)
27 declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double>, metadata)
28 declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double>, metadata)
29 declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float>, metadata)
30 declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float>, metadata)
31 declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double>, metadata)
32 declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double>, metadata)
33 declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float>, metadata)
34 declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float>, metadata)
35 declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata)
36 declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata)
37 declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f32(<4 x float>, metadata)
38 declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f32(<4 x float>, metadata)
39 declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float>, metadata)
40 declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float>, metadata)
41 declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float>, metadata)
42 declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float>, metadata)
44 define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
45 ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
47 ; SSE-32-NEXT: pushl %ebp
48 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
49 ; SSE-32-NEXT: .cfi_offset %ebp, -8
50 ; SSE-32-NEXT: movl %esp, %ebp
51 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
52 ; SSE-32-NEXT: andl $-8, %esp
53 ; SSE-32-NEXT: subl $24, %esp
54 ; SSE-32-NEXT: movhps %xmm0, {{[0-9]+}}(%esp)
55 ; SSE-32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
56 ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
58 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
59 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
60 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
61 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
62 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
63 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
64 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
65 ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
67 ; SSE-32-NEXT: fnstcw (%esp)
68 ; SSE-32-NEXT: movzwl (%esp), %eax
69 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
70 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
71 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
72 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
73 ; SSE-32-NEXT: fldcw (%esp)
74 ; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
75 ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
76 ; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
77 ; SSE-32-NEXT: movl %ebp, %esp
78 ; SSE-32-NEXT: popl %ebp
79 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
82 ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
84 ; SSE-64-NEXT: cvttsd2si %xmm0, %rax
85 ; SSE-64-NEXT: movq %rax, %xmm1
86 ; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
87 ; SSE-64-NEXT: cvttsd2si %xmm0, %rax
88 ; SSE-64-NEXT: movq %rax, %xmm0
89 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
90 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
93 ; AVX-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
95 ; AVX-32-NEXT: pushl %ebp
96 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
97 ; AVX-32-NEXT: .cfi_offset %ebp, -8
98 ; AVX-32-NEXT: movl %esp, %ebp
99 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
100 ; AVX-32-NEXT: andl $-8, %esp
101 ; AVX-32-NEXT: subl $16, %esp
102 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
103 ; AVX-32-NEXT: vmovhps %xmm0, (%esp)
104 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
105 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
106 ; AVX-32-NEXT: fldl (%esp)
107 ; AVX-32-NEXT: fisttpll (%esp)
109 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
110 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
111 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
112 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
113 ; AVX-32-NEXT: movl %ebp, %esp
114 ; AVX-32-NEXT: popl %ebp
115 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
118 ; AVX-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
120 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
121 ; AVX-64-NEXT: vmovq %rax, %xmm1
122 ; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
123 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
124 ; AVX-64-NEXT: vmovq %rax, %xmm0
125 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
128 ; AVX512F-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
129 ; AVX512F-32: # %bb.0:
130 ; AVX512F-32-NEXT: pushl %ebp
131 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
132 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
133 ; AVX512F-32-NEXT: movl %esp, %ebp
134 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
135 ; AVX512F-32-NEXT: andl $-8, %esp
136 ; AVX512F-32-NEXT: subl $16, %esp
137 ; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
138 ; AVX512F-32-NEXT: vmovhps %xmm0, (%esp)
139 ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
140 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
141 ; AVX512F-32-NEXT: fldl (%esp)
142 ; AVX512F-32-NEXT: fisttpll (%esp)
143 ; AVX512F-32-NEXT: wait
144 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
145 ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
146 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
147 ; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
148 ; AVX512F-32-NEXT: movl %ebp, %esp
149 ; AVX512F-32-NEXT: popl %ebp
150 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
151 ; AVX512F-32-NEXT: retl
153 ; AVX512F-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
154 ; AVX512F-64: # %bb.0:
155 ; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
156 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
157 ; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
158 ; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
159 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
160 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
161 ; AVX512F-64-NEXT: retq
163 ; AVX512VL-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
164 ; AVX512VL-32: # %bb.0:
165 ; AVX512VL-32-NEXT: pushl %ebp
166 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
167 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
168 ; AVX512VL-32-NEXT: movl %esp, %ebp
169 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
170 ; AVX512VL-32-NEXT: andl $-8, %esp
171 ; AVX512VL-32-NEXT: subl $16, %esp
172 ; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
173 ; AVX512VL-32-NEXT: vmovhps %xmm0, (%esp)
174 ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
175 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
176 ; AVX512VL-32-NEXT: fldl (%esp)
177 ; AVX512VL-32-NEXT: fisttpll (%esp)
178 ; AVX512VL-32-NEXT: wait
179 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
180 ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
181 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
182 ; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
183 ; AVX512VL-32-NEXT: movl %ebp, %esp
184 ; AVX512VL-32-NEXT: popl %ebp
185 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
186 ; AVX512VL-32-NEXT: retl
188 ; AVX512VL-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
189 ; AVX512VL-64: # %bb.0:
190 ; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
191 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
192 ; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
193 ; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
194 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
195 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
196 ; AVX512VL-64-NEXT: retq
198 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
200 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
201 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
202 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
203 ; AVX512DQ-NEXT: vzeroupper
204 ; AVX512DQ-NEXT: ret{{[l|q]}}
206 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
207 ; AVX512VLDQ: # %bb.0:
208 ; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
209 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
210 %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %a,
211 metadata !"fpexcept.strict") #0
215 define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
216 ; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
218 ; SSE-32-NEXT: pushl %ebp
219 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
220 ; SSE-32-NEXT: .cfi_offset %ebp, -8
221 ; SSE-32-NEXT: movl %esp, %ebp
222 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
223 ; SSE-32-NEXT: andl $-8, %esp
224 ; SSE-32-NEXT: subl $24, %esp
225 ; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
226 ; SSE-32-NEXT: comisd %xmm1, %xmm0
227 ; SSE-32-NEXT: movapd %xmm1, %xmm2
228 ; SSE-32-NEXT: jae .LBB1_2
229 ; SSE-32-NEXT: # %bb.1:
230 ; SSE-32-NEXT: xorpd %xmm2, %xmm2
231 ; SSE-32-NEXT: .LBB1_2:
232 ; SSE-32-NEXT: movapd %xmm0, %xmm3
233 ; SSE-32-NEXT: subsd %xmm2, %xmm3
234 ; SSE-32-NEXT: movsd %xmm3, {{[0-9]+}}(%esp)
235 ; SSE-32-NEXT: setae %al
236 ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
238 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
239 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
240 ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
241 ; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
242 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
243 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
244 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
245 ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
246 ; SSE-32-NEXT: comisd %xmm1, %xmm0
247 ; SSE-32-NEXT: jae .LBB1_4
248 ; SSE-32-NEXT: # %bb.3:
249 ; SSE-32-NEXT: xorpd %xmm1, %xmm1
250 ; SSE-32-NEXT: .LBB1_4:
251 ; SSE-32-NEXT: subsd %xmm1, %xmm0
252 ; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
253 ; SSE-32-NEXT: setae %cl
254 ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
256 ; SSE-32-NEXT: fnstcw (%esp)
257 ; SSE-32-NEXT: movzwl (%esp), %edx
258 ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
259 ; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
260 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
261 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
262 ; SSE-32-NEXT: fldcw (%esp)
263 ; SSE-32-NEXT: movzbl %al, %eax
264 ; SSE-32-NEXT: shll $31, %eax
265 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
266 ; SSE-32-NEXT: movd %eax, %xmm1
267 ; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
268 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
269 ; SSE-32-NEXT: movzbl %cl, %eax
270 ; SSE-32-NEXT: shll $31, %eax
271 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
272 ; SSE-32-NEXT: movd %eax, %xmm1
273 ; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
274 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
275 ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
276 ; SSE-32-NEXT: movl %ebp, %esp
277 ; SSE-32-NEXT: popl %ebp
278 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
281 ; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
283 ; SSE-64-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
284 ; SSE-64-NEXT: comisd %xmm3, %xmm0
285 ; SSE-64-NEXT: xorpd %xmm2, %xmm2
286 ; SSE-64-NEXT: xorpd %xmm1, %xmm1
287 ; SSE-64-NEXT: jb .LBB1_2
288 ; SSE-64-NEXT: # %bb.1:
289 ; SSE-64-NEXT: movapd %xmm3, %xmm1
290 ; SSE-64-NEXT: .LBB1_2:
291 ; SSE-64-NEXT: movapd %xmm0, %xmm4
292 ; SSE-64-NEXT: subsd %xmm1, %xmm4
293 ; SSE-64-NEXT: cvttsd2si %xmm4, %rax
294 ; SSE-64-NEXT: setae %cl
295 ; SSE-64-NEXT: movzbl %cl, %ecx
296 ; SSE-64-NEXT: shlq $63, %rcx
297 ; SSE-64-NEXT: xorq %rax, %rcx
298 ; SSE-64-NEXT: movq %rcx, %xmm1
299 ; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
300 ; SSE-64-NEXT: comisd %xmm3, %xmm0
301 ; SSE-64-NEXT: jb .LBB1_4
302 ; SSE-64-NEXT: # %bb.3:
303 ; SSE-64-NEXT: movapd %xmm3, %xmm2
304 ; SSE-64-NEXT: .LBB1_4:
305 ; SSE-64-NEXT: subsd %xmm2, %xmm0
306 ; SSE-64-NEXT: cvttsd2si %xmm0, %rax
307 ; SSE-64-NEXT: setae %cl
308 ; SSE-64-NEXT: movzbl %cl, %ecx
309 ; SSE-64-NEXT: shlq $63, %rcx
310 ; SSE-64-NEXT: xorq %rax, %rcx
311 ; SSE-64-NEXT: movq %rcx, %xmm0
312 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
313 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
316 ; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
318 ; AVX-32-NEXT: pushl %ebp
319 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
320 ; AVX-32-NEXT: .cfi_offset %ebp, -8
321 ; AVX-32-NEXT: movl %esp, %ebp
322 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
323 ; AVX-32-NEXT: andl $-8, %esp
324 ; AVX-32-NEXT: subl $16, %esp
325 ; AVX-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
326 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
327 ; AVX-32-NEXT: vcomisd %xmm1, %xmm2
328 ; AVX-32-NEXT: vmovapd %xmm1, %xmm3
329 ; AVX-32-NEXT: jae .LBB1_2
330 ; AVX-32-NEXT: # %bb.1:
331 ; AVX-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
332 ; AVX-32-NEXT: .LBB1_2:
333 ; AVX-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
334 ; AVX-32-NEXT: vmovsd %xmm2, (%esp)
335 ; AVX-32-NEXT: fldl (%esp)
336 ; AVX-32-NEXT: fisttpll (%esp)
338 ; AVX-32-NEXT: setae %al
339 ; AVX-32-NEXT: movzbl %al, %eax
340 ; AVX-32-NEXT: shll $31, %eax
341 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
342 ; AVX-32-NEXT: vcomisd %xmm1, %xmm0
343 ; AVX-32-NEXT: jae .LBB1_4
344 ; AVX-32-NEXT: # %bb.3:
345 ; AVX-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
346 ; AVX-32-NEXT: .LBB1_4:
347 ; AVX-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
348 ; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
349 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
350 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
352 ; AVX-32-NEXT: setae %cl
353 ; AVX-32-NEXT: movzbl %cl, %ecx
354 ; AVX-32-NEXT: shll $31, %ecx
355 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
356 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
357 ; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
358 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
359 ; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
360 ; AVX-32-NEXT: movl %ebp, %esp
361 ; AVX-32-NEXT: popl %ebp
362 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
365 ; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
367 ; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
368 ; AVX-64-NEXT: vcomisd %xmm1, %xmm0
369 ; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
370 ; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
371 ; AVX-64-NEXT: jb .LBB1_2
372 ; AVX-64-NEXT: # %bb.1:
373 ; AVX-64-NEXT: vmovapd %xmm1, %xmm3
374 ; AVX-64-NEXT: .LBB1_2:
375 ; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
376 ; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
377 ; AVX-64-NEXT: setae %cl
378 ; AVX-64-NEXT: movzbl %cl, %ecx
379 ; AVX-64-NEXT: shlq $63, %rcx
380 ; AVX-64-NEXT: xorq %rax, %rcx
381 ; AVX-64-NEXT: vmovq %rcx, %xmm3
382 ; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
383 ; AVX-64-NEXT: vcomisd %xmm1, %xmm0
384 ; AVX-64-NEXT: jb .LBB1_4
385 ; AVX-64-NEXT: # %bb.3:
386 ; AVX-64-NEXT: vmovapd %xmm1, %xmm2
387 ; AVX-64-NEXT: .LBB1_4:
388 ; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
389 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
390 ; AVX-64-NEXT: setae %cl
391 ; AVX-64-NEXT: movzbl %cl, %ecx
392 ; AVX-64-NEXT: shlq $63, %rcx
393 ; AVX-64-NEXT: xorq %rax, %rcx
394 ; AVX-64-NEXT: vmovq %rcx, %xmm0
395 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
398 ; AVX512F-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
399 ; AVX512F-32: # %bb.0:
400 ; AVX512F-32-NEXT: pushl %ebp
401 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
402 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
403 ; AVX512F-32-NEXT: movl %esp, %ebp
404 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
405 ; AVX512F-32-NEXT: andl $-8, %esp
406 ; AVX512F-32-NEXT: subl $16, %esp
407 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
408 ; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
409 ; AVX512F-32-NEXT: xorl %eax, %eax
410 ; AVX512F-32-NEXT: vcomisd %xmm2, %xmm1
411 ; AVX512F-32-NEXT: setae %al
412 ; AVX512F-32-NEXT: kmovw %eax, %k1
413 ; AVX512F-32-NEXT: vmovsd %xmm2, %xmm2, %xmm3 {%k1} {z}
414 ; AVX512F-32-NEXT: vsubsd %xmm3, %xmm1, %xmm1
415 ; AVX512F-32-NEXT: vmovsd %xmm1, (%esp)
416 ; AVX512F-32-NEXT: xorl %ecx, %ecx
417 ; AVX512F-32-NEXT: vcomisd %xmm2, %xmm0
418 ; AVX512F-32-NEXT: setae %cl
419 ; AVX512F-32-NEXT: kmovw %ecx, %k1
420 ; AVX512F-32-NEXT: vmovsd %xmm2, %xmm2, %xmm1 {%k1} {z}
421 ; AVX512F-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
422 ; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
423 ; AVX512F-32-NEXT: fldl (%esp)
424 ; AVX512F-32-NEXT: fisttpll (%esp)
425 ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
426 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
427 ; AVX512F-32-NEXT: wait
428 ; AVX512F-32-NEXT: shll $31, %eax
429 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
430 ; AVX512F-32-NEXT: shll $31, %ecx
431 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
432 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
433 ; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
434 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
435 ; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
436 ; AVX512F-32-NEXT: movl %ebp, %esp
437 ; AVX512F-32-NEXT: popl %ebp
438 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
439 ; AVX512F-32-NEXT: retl
441 ; AVX512F-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
442 ; AVX512F-64: # %bb.0:
443 ; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
444 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
445 ; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
446 ; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
447 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
448 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
449 ; AVX512F-64-NEXT: retq
451 ; AVX512VL-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
452 ; AVX512VL-32: # %bb.0:
453 ; AVX512VL-32-NEXT: pushl %ebp
454 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
455 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
456 ; AVX512VL-32-NEXT: movl %esp, %ebp
457 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
458 ; AVX512VL-32-NEXT: andl $-8, %esp
459 ; AVX512VL-32-NEXT: subl $16, %esp
460 ; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
461 ; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
462 ; AVX512VL-32-NEXT: xorl %eax, %eax
463 ; AVX512VL-32-NEXT: vcomisd %xmm2, %xmm1
464 ; AVX512VL-32-NEXT: setae %al
465 ; AVX512VL-32-NEXT: kmovw %eax, %k1
466 ; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm2, %xmm3 {%k1} {z}
467 ; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm1, %xmm1
468 ; AVX512VL-32-NEXT: vmovsd %xmm1, (%esp)
469 ; AVX512VL-32-NEXT: xorl %ecx, %ecx
470 ; AVX512VL-32-NEXT: vcomisd %xmm2, %xmm0
471 ; AVX512VL-32-NEXT: setae %cl
472 ; AVX512VL-32-NEXT: kmovw %ecx, %k1
473 ; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm2, %xmm1 {%k1} {z}
474 ; AVX512VL-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
475 ; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
476 ; AVX512VL-32-NEXT: fldl (%esp)
477 ; AVX512VL-32-NEXT: fisttpll (%esp)
478 ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
479 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
480 ; AVX512VL-32-NEXT: wait
481 ; AVX512VL-32-NEXT: shll $31, %eax
482 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
483 ; AVX512VL-32-NEXT: shll $31, %ecx
484 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
485 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
486 ; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
487 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
488 ; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
489 ; AVX512VL-32-NEXT: movl %ebp, %esp
490 ; AVX512VL-32-NEXT: popl %ebp
491 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
492 ; AVX512VL-32-NEXT: retl
494 ; AVX512VL-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
495 ; AVX512VL-64: # %bb.0:
496 ; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
497 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
498 ; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
499 ; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
500 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
501 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
502 ; AVX512VL-64-NEXT: retq
504 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
506 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
507 ; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
508 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
509 ; AVX512DQ-NEXT: vzeroupper
510 ; AVX512DQ-NEXT: ret{{[l|q]}}
512 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
513 ; AVX512VLDQ: # %bb.0:
514 ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
515 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
516 %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %a,
517 metadata !"fpexcept.strict") #0
521 define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
522 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
524 ; SSE-32-NEXT: pushl %ebp
525 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
526 ; SSE-32-NEXT: .cfi_offset %ebp, -8
527 ; SSE-32-NEXT: movl %esp, %ebp
528 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
529 ; SSE-32-NEXT: andl $-8, %esp
530 ; SSE-32-NEXT: subl $24, %esp
531 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
532 ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
533 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
534 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
536 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
537 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
538 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
539 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
540 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
541 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
542 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
543 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
545 ; SSE-32-NEXT: fnstcw (%esp)
546 ; SSE-32-NEXT: movzwl (%esp), %eax
547 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
548 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
549 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
550 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
551 ; SSE-32-NEXT: fldcw (%esp)
552 ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
553 ; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
554 ; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
555 ; SSE-32-NEXT: movl %ebp, %esp
556 ; SSE-32-NEXT: popl %ebp
557 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
560 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
562 ; SSE-64-NEXT: cvttss2si %xmm0, %rax
563 ; SSE-64-NEXT: movq %rax, %xmm1
564 ; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
565 ; SSE-64-NEXT: cvttss2si %xmm0, %rax
566 ; SSE-64-NEXT: movq %rax, %xmm0
567 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
568 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
571 ; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
573 ; AVX-32-NEXT: pushl %ebp
574 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
575 ; AVX-32-NEXT: .cfi_offset %ebp, -8
576 ; AVX-32-NEXT: movl %esp, %ebp
577 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
578 ; AVX-32-NEXT: andl $-8, %esp
579 ; AVX-32-NEXT: subl $16, %esp
580 ; AVX-32-NEXT: vmovss %xmm0, (%esp)
581 ; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
582 ; AVX-32-NEXT: flds (%esp)
583 ; AVX-32-NEXT: fisttpll (%esp)
584 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
585 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
587 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
588 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
589 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
590 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
591 ; AVX-32-NEXT: movl %ebp, %esp
592 ; AVX-32-NEXT: popl %ebp
593 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
596 ; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
598 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
599 ; AVX-64-NEXT: vmovq %rax, %xmm1
600 ; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
601 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
602 ; AVX-64-NEXT: vmovq %rax, %xmm0
603 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
606 ; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
607 ; AVX512F-32: # %bb.0:
608 ; AVX512F-32-NEXT: pushl %ebp
609 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
610 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
611 ; AVX512F-32-NEXT: movl %esp, %ebp
612 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
613 ; AVX512F-32-NEXT: andl $-8, %esp
614 ; AVX512F-32-NEXT: subl $16, %esp
615 ; AVX512F-32-NEXT: vmovd %xmm0, (%esp)
616 ; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
617 ; AVX512F-32-NEXT: flds (%esp)
618 ; AVX512F-32-NEXT: fisttpll (%esp)
619 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
620 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
621 ; AVX512F-32-NEXT: wait
622 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
623 ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
624 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
625 ; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
626 ; AVX512F-32-NEXT: movl %ebp, %esp
627 ; AVX512F-32-NEXT: popl %ebp
628 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
629 ; AVX512F-32-NEXT: retl
631 ; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
632 ; AVX512F-64: # %bb.0:
633 ; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
634 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
635 ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
636 ; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
637 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
638 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
639 ; AVX512F-64-NEXT: retq
641 ; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
642 ; AVX512VL-32: # %bb.0:
643 ; AVX512VL-32-NEXT: pushl %ebp
644 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
645 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
646 ; AVX512VL-32-NEXT: movl %esp, %ebp
647 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
648 ; AVX512VL-32-NEXT: andl $-8, %esp
649 ; AVX512VL-32-NEXT: subl $16, %esp
650 ; AVX512VL-32-NEXT: vmovd %xmm0, (%esp)
651 ; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
652 ; AVX512VL-32-NEXT: flds (%esp)
653 ; AVX512VL-32-NEXT: fisttpll (%esp)
654 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
655 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
656 ; AVX512VL-32-NEXT: wait
657 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
658 ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
659 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
660 ; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
661 ; AVX512VL-32-NEXT: movl %ebp, %esp
662 ; AVX512VL-32-NEXT: popl %ebp
663 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
664 ; AVX512VL-32-NEXT: retl
666 ; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
667 ; AVX512VL-64: # %bb.0:
668 ; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax
669 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
670 ; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
671 ; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax
672 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
673 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
674 ; AVX512VL-64-NEXT: retq
676 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
678 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
679 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
680 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
681 ; AVX512DQ-NEXT: vzeroupper
682 ; AVX512DQ-NEXT: ret{{[l|q]}}
684 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
685 ; AVX512VLDQ: # %bb.0:
686 ; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
687 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
688 %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %a,
689 metadata !"fpexcept.strict") #0
693 define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(<4 x float>* %x) strictfp {
694 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
696 ; SSE-32-NEXT: pushl %ebp
697 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
698 ; SSE-32-NEXT: .cfi_offset %ebp, -8
699 ; SSE-32-NEXT: movl %esp, %ebp
700 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
701 ; SSE-32-NEXT: andl $-8, %esp
702 ; SSE-32-NEXT: subl $24, %esp
703 ; SSE-32-NEXT: movl 8(%ebp), %eax
704 ; SSE-32-NEXT: movaps (%eax), %xmm0
705 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
706 ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
707 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
708 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
710 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
711 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
712 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
713 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
714 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
715 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
716 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
717 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
719 ; SSE-32-NEXT: fnstcw (%esp)
720 ; SSE-32-NEXT: movzwl (%esp), %eax
721 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
722 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
723 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
724 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
725 ; SSE-32-NEXT: fldcw (%esp)
726 ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
727 ; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
728 ; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
729 ; SSE-32-NEXT: movl %ebp, %esp
730 ; SSE-32-NEXT: popl %ebp
731 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
734 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
736 ; SSE-64-NEXT: movaps (%rdi), %xmm1
737 ; SSE-64-NEXT: cvttss2si %xmm1, %rax
738 ; SSE-64-NEXT: movq %rax, %xmm0
739 ; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
740 ; SSE-64-NEXT: cvttss2si %xmm1, %rax
741 ; SSE-64-NEXT: movq %rax, %xmm1
742 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
745 ; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
747 ; AVX-32-NEXT: pushl %ebp
748 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
749 ; AVX-32-NEXT: .cfi_offset %ebp, -8
750 ; AVX-32-NEXT: movl %esp, %ebp
751 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
752 ; AVX-32-NEXT: andl $-8, %esp
753 ; AVX-32-NEXT: subl $16, %esp
754 ; AVX-32-NEXT: movl 8(%ebp), %eax
755 ; AVX-32-NEXT: vmovaps (%eax), %xmm0
756 ; AVX-32-NEXT: vmovss %xmm0, (%esp)
757 ; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
758 ; AVX-32-NEXT: flds (%esp)
759 ; AVX-32-NEXT: fisttpll (%esp)
760 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
761 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
763 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
764 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
765 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
766 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
767 ; AVX-32-NEXT: movl %ebp, %esp
768 ; AVX-32-NEXT: popl %ebp
769 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
772 ; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
774 ; AVX-64-NEXT: vcvttss2si 4(%rdi), %rax
775 ; AVX-64-NEXT: vmovq %rax, %xmm0
776 ; AVX-64-NEXT: vcvttss2si (%rdi), %rax
777 ; AVX-64-NEXT: vmovq %rax, %xmm1
778 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
781 ; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
782 ; AVX512F-32: # %bb.0:
783 ; AVX512F-32-NEXT: pushl %ebp
784 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
785 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
786 ; AVX512F-32-NEXT: movl %esp, %ebp
787 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
788 ; AVX512F-32-NEXT: andl $-8, %esp
789 ; AVX512F-32-NEXT: subl $16, %esp
790 ; AVX512F-32-NEXT: movl 8(%ebp), %eax
791 ; AVX512F-32-NEXT: vmovdqa (%eax), %xmm0
792 ; AVX512F-32-NEXT: vmovd %xmm0, (%esp)
793 ; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
794 ; AVX512F-32-NEXT: flds (%esp)
795 ; AVX512F-32-NEXT: fisttpll (%esp)
796 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
797 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
798 ; AVX512F-32-NEXT: wait
799 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
800 ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
801 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
802 ; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
803 ; AVX512F-32-NEXT: movl %ebp, %esp
804 ; AVX512F-32-NEXT: popl %ebp
805 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
806 ; AVX512F-32-NEXT: retl
808 ; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
809 ; AVX512F-64: # %bb.0:
810 ; AVX512F-64-NEXT: vcvttss2si 4(%rdi), %rax
811 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
812 ; AVX512F-64-NEXT: vcvttss2si (%rdi), %rax
813 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
814 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
815 ; AVX512F-64-NEXT: retq
817 ; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
818 ; AVX512VL-32: # %bb.0:
819 ; AVX512VL-32-NEXT: pushl %ebp
820 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
821 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
822 ; AVX512VL-32-NEXT: movl %esp, %ebp
823 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
824 ; AVX512VL-32-NEXT: andl $-8, %esp
825 ; AVX512VL-32-NEXT: subl $16, %esp
826 ; AVX512VL-32-NEXT: movl 8(%ebp), %eax
827 ; AVX512VL-32-NEXT: vmovdqa (%eax), %xmm0
828 ; AVX512VL-32-NEXT: vmovd %xmm0, (%esp)
829 ; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
830 ; AVX512VL-32-NEXT: flds (%esp)
831 ; AVX512VL-32-NEXT: fisttpll (%esp)
832 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
833 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
834 ; AVX512VL-32-NEXT: wait
835 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
836 ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
837 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
838 ; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
839 ; AVX512VL-32-NEXT: movl %ebp, %esp
840 ; AVX512VL-32-NEXT: popl %ebp
841 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
842 ; AVX512VL-32-NEXT: retl
844 ; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
845 ; AVX512VL-64: # %bb.0:
846 ; AVX512VL-64-NEXT: vcvttss2si 4(%rdi), %rax
847 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
848 ; AVX512VL-64-NEXT: vcvttss2si (%rdi), %rax
849 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
850 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
851 ; AVX512VL-64-NEXT: retq
853 ; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
854 ; AVX512DQ-32: # %bb.0:
855 ; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
856 ; AVX512DQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
857 ; AVX512DQ-32-NEXT: vcvttps2qq %ymm0, %zmm0
858 ; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
859 ; AVX512DQ-32-NEXT: vzeroupper
860 ; AVX512DQ-32-NEXT: retl
862 ; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
863 ; AVX512DQ-64: # %bb.0:
864 ; AVX512DQ-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
865 ; AVX512DQ-64-NEXT: vcvttps2qq %ymm0, %zmm0
866 ; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
867 ; AVX512DQ-64-NEXT: vzeroupper
868 ; AVX512DQ-64-NEXT: retq
870 ; AVX512VLDQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
871 ; AVX512VLDQ-32: # %bb.0:
872 ; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
873 ; AVX512VLDQ-32-NEXT: vcvttps2qq (%eax), %xmm0
874 ; AVX512VLDQ-32-NEXT: retl
876 ; AVX512VLDQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
877 ; AVX512VLDQ-64: # %bb.0:
878 ; AVX512VLDQ-64-NEXT: vcvttps2qq (%rdi), %xmm0
879 ; AVX512VLDQ-64-NEXT: retq
880 %a = load <4 x float>, <4 x float>* %x
881 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
882 %c = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
886 define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
887 ; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
889 ; SSE-32-NEXT: pushl %ebp
890 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
891 ; SSE-32-NEXT: .cfi_offset %ebp, -8
892 ; SSE-32-NEXT: movl %esp, %ebp
893 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
894 ; SSE-32-NEXT: andl $-8, %esp
895 ; SSE-32-NEXT: subl $24, %esp
896 ; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
897 ; SSE-32-NEXT: comiss %xmm1, %xmm0
898 ; SSE-32-NEXT: movaps %xmm1, %xmm2
899 ; SSE-32-NEXT: jae .LBB4_2
900 ; SSE-32-NEXT: # %bb.1:
901 ; SSE-32-NEXT: xorps %xmm2, %xmm2
902 ; SSE-32-NEXT: .LBB4_2:
903 ; SSE-32-NEXT: movaps %xmm0, %xmm3
904 ; SSE-32-NEXT: subss %xmm2, %xmm3
905 ; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
906 ; SSE-32-NEXT: setae %al
907 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
909 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
910 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
911 ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
912 ; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
913 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
914 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
915 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
916 ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
917 ; SSE-32-NEXT: comiss %xmm1, %xmm0
918 ; SSE-32-NEXT: jae .LBB4_4
919 ; SSE-32-NEXT: # %bb.3:
920 ; SSE-32-NEXT: xorps %xmm1, %xmm1
921 ; SSE-32-NEXT: .LBB4_4:
922 ; SSE-32-NEXT: subss %xmm1, %xmm0
923 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
924 ; SSE-32-NEXT: setae %cl
925 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
927 ; SSE-32-NEXT: fnstcw (%esp)
928 ; SSE-32-NEXT: movzwl (%esp), %edx
929 ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
930 ; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
931 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
932 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
933 ; SSE-32-NEXT: fldcw (%esp)
934 ; SSE-32-NEXT: movzbl %al, %eax
935 ; SSE-32-NEXT: shll $31, %eax
936 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
937 ; SSE-32-NEXT: movd %eax, %xmm1
938 ; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
939 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
940 ; SSE-32-NEXT: movzbl %cl, %eax
941 ; SSE-32-NEXT: shll $31, %eax
942 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
943 ; SSE-32-NEXT: movd %eax, %xmm1
944 ; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
945 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
946 ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
947 ; SSE-32-NEXT: movl %ebp, %esp
948 ; SSE-32-NEXT: popl %ebp
949 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
952 ; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
954 ; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
955 ; SSE-64-NEXT: comiss %xmm3, %xmm0
956 ; SSE-64-NEXT: xorps %xmm2, %xmm2
957 ; SSE-64-NEXT: xorps %xmm1, %xmm1
958 ; SSE-64-NEXT: jb .LBB4_2
959 ; SSE-64-NEXT: # %bb.1:
960 ; SSE-64-NEXT: movaps %xmm3, %xmm1
961 ; SSE-64-NEXT: .LBB4_2:
962 ; SSE-64-NEXT: movaps %xmm0, %xmm4
963 ; SSE-64-NEXT: subss %xmm1, %xmm4
964 ; SSE-64-NEXT: cvttss2si %xmm4, %rax
965 ; SSE-64-NEXT: setae %cl
966 ; SSE-64-NEXT: movzbl %cl, %ecx
967 ; SSE-64-NEXT: shlq $63, %rcx
968 ; SSE-64-NEXT: xorq %rax, %rcx
969 ; SSE-64-NEXT: movq %rcx, %xmm1
970 ; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
971 ; SSE-64-NEXT: comiss %xmm3, %xmm0
972 ; SSE-64-NEXT: jb .LBB4_4
973 ; SSE-64-NEXT: # %bb.3:
974 ; SSE-64-NEXT: movaps %xmm3, %xmm2
975 ; SSE-64-NEXT: .LBB4_4:
976 ; SSE-64-NEXT: subss %xmm2, %xmm0
977 ; SSE-64-NEXT: cvttss2si %xmm0, %rax
978 ; SSE-64-NEXT: setae %cl
979 ; SSE-64-NEXT: movzbl %cl, %ecx
980 ; SSE-64-NEXT: shlq $63, %rcx
981 ; SSE-64-NEXT: xorq %rax, %rcx
982 ; SSE-64-NEXT: movq %rcx, %xmm0
983 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
984 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
987 ; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
989 ; AVX-32-NEXT: pushl %ebp
990 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
991 ; AVX-32-NEXT: .cfi_offset %ebp, -8
992 ; AVX-32-NEXT: movl %esp, %ebp
993 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
994 ; AVX-32-NEXT: andl $-8, %esp
995 ; AVX-32-NEXT: subl $16, %esp
996 ; AVX-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
997 ; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
998 ; AVX-32-NEXT: vcomiss %xmm1, %xmm2
999 ; AVX-32-NEXT: vmovaps %xmm1, %xmm3
1000 ; AVX-32-NEXT: jae .LBB4_2
1001 ; AVX-32-NEXT: # %bb.1:
1002 ; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
1003 ; AVX-32-NEXT: .LBB4_2:
1004 ; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
1005 ; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
1006 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
1007 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1009 ; AVX-32-NEXT: setae %al
1010 ; AVX-32-NEXT: movzbl %al, %eax
1011 ; AVX-32-NEXT: shll $31, %eax
1012 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1013 ; AVX-32-NEXT: vcomiss %xmm1, %xmm0
1014 ; AVX-32-NEXT: jae .LBB4_4
1015 ; AVX-32-NEXT: # %bb.3:
1016 ; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
1017 ; AVX-32-NEXT: .LBB4_4:
1018 ; AVX-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
1019 ; AVX-32-NEXT: vmovss %xmm0, (%esp)
1020 ; AVX-32-NEXT: flds (%esp)
1021 ; AVX-32-NEXT: fisttpll (%esp)
1023 ; AVX-32-NEXT: setae %cl
1024 ; AVX-32-NEXT: movzbl %cl, %ecx
1025 ; AVX-32-NEXT: shll $31, %ecx
1026 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
1027 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1028 ; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
1029 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1030 ; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1031 ; AVX-32-NEXT: movl %ebp, %esp
1032 ; AVX-32-NEXT: popl %ebp
1033 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
1036 ; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1038 ; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1039 ; AVX-64-NEXT: vcomiss %xmm1, %xmm0
1040 ; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
1041 ; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
1042 ; AVX-64-NEXT: jb .LBB4_2
1043 ; AVX-64-NEXT: # %bb.1:
1044 ; AVX-64-NEXT: vmovaps %xmm1, %xmm3
1045 ; AVX-64-NEXT: .LBB4_2:
1046 ; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
1047 ; AVX-64-NEXT: vcvttss2si %xmm3, %rax
1048 ; AVX-64-NEXT: setae %cl
1049 ; AVX-64-NEXT: movzbl %cl, %ecx
1050 ; AVX-64-NEXT: shlq $63, %rcx
1051 ; AVX-64-NEXT: xorq %rax, %rcx
1052 ; AVX-64-NEXT: vmovq %rcx, %xmm3
1053 ; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1054 ; AVX-64-NEXT: vcomiss %xmm1, %xmm0
1055 ; AVX-64-NEXT: jb .LBB4_4
1056 ; AVX-64-NEXT: # %bb.3:
1057 ; AVX-64-NEXT: vmovaps %xmm1, %xmm2
1058 ; AVX-64-NEXT: .LBB4_4:
1059 ; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
1060 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
1061 ; AVX-64-NEXT: setae %cl
1062 ; AVX-64-NEXT: movzbl %cl, %ecx
1063 ; AVX-64-NEXT: shlq $63, %rcx
1064 ; AVX-64-NEXT: xorq %rax, %rcx
1065 ; AVX-64-NEXT: vmovq %rcx, %xmm0
1066 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1069 ; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1070 ; AVX512F-32: # %bb.0:
1071 ; AVX512F-32-NEXT: pushl %ebp
1072 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
1073 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
1074 ; AVX512F-32-NEXT: movl %esp, %ebp
1075 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
1076 ; AVX512F-32-NEXT: andl $-8, %esp
1077 ; AVX512F-32-NEXT: subl $16, %esp
1078 ; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1079 ; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1080 ; AVX512F-32-NEXT: xorl %eax, %eax
1081 ; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1
1082 ; AVX512F-32-NEXT: setae %al
1083 ; AVX512F-32-NEXT: kmovw %eax, %k1
1084 ; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1085 ; AVX512F-32-NEXT: vsubss %xmm3, %xmm1, %xmm1
1086 ; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
1087 ; AVX512F-32-NEXT: xorl %ecx, %ecx
1088 ; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0
1089 ; AVX512F-32-NEXT: setae %cl
1090 ; AVX512F-32-NEXT: kmovw %ecx, %k1
1091 ; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1092 ; AVX512F-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
1093 ; AVX512F-32-NEXT: vmovss %xmm0, (%esp)
1094 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
1095 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1096 ; AVX512F-32-NEXT: flds (%esp)
1097 ; AVX512F-32-NEXT: fisttpll (%esp)
1098 ; AVX512F-32-NEXT: wait
1099 ; AVX512F-32-NEXT: shll $31, %eax
1100 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1101 ; AVX512F-32-NEXT: shll $31, %ecx
1102 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
1103 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1104 ; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
1105 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1106 ; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1107 ; AVX512F-32-NEXT: movl %ebp, %esp
1108 ; AVX512F-32-NEXT: popl %ebp
1109 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
1110 ; AVX512F-32-NEXT: retl
1112 ; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1113 ; AVX512F-64: # %bb.0:
1114 ; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
1115 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
1116 ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1117 ; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
1118 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
1119 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1120 ; AVX512F-64-NEXT: retq
1122 ; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1123 ; AVX512VL-32: # %bb.0:
1124 ; AVX512VL-32-NEXT: pushl %ebp
1125 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
1126 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
1127 ; AVX512VL-32-NEXT: movl %esp, %ebp
1128 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
1129 ; AVX512VL-32-NEXT: andl $-8, %esp
1130 ; AVX512VL-32-NEXT: subl $16, %esp
1131 ; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1132 ; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1133 ; AVX512VL-32-NEXT: xorl %eax, %eax
1134 ; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1
1135 ; AVX512VL-32-NEXT: setae %al
1136 ; AVX512VL-32-NEXT: kmovw %eax, %k1
1137 ; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1138 ; AVX512VL-32-NEXT: vsubss %xmm3, %xmm1, %xmm1
1139 ; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
1140 ; AVX512VL-32-NEXT: xorl %ecx, %ecx
1141 ; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0
1142 ; AVX512VL-32-NEXT: setae %cl
1143 ; AVX512VL-32-NEXT: kmovw %ecx, %k1
1144 ; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1145 ; AVX512VL-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
1146 ; AVX512VL-32-NEXT: vmovss %xmm0, (%esp)
1147 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
1148 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1149 ; AVX512VL-32-NEXT: flds (%esp)
1150 ; AVX512VL-32-NEXT: fisttpll (%esp)
1151 ; AVX512VL-32-NEXT: wait
1152 ; AVX512VL-32-NEXT: shll $31, %eax
1153 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1154 ; AVX512VL-32-NEXT: shll $31, %ecx
1155 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
1156 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1157 ; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
1158 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1159 ; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1160 ; AVX512VL-32-NEXT: movl %ebp, %esp
1161 ; AVX512VL-32-NEXT: popl %ebp
1162 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
1163 ; AVX512VL-32-NEXT: retl
1165 ; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1166 ; AVX512VL-64: # %bb.0:
1167 ; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax
1168 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
1169 ; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1170 ; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax
1171 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
1172 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1173 ; AVX512VL-64-NEXT: retq
1175 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1176 ; AVX512DQ: # %bb.0:
1177 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1178 ; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
1179 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1180 ; AVX512DQ-NEXT: vzeroupper
1181 ; AVX512DQ-NEXT: ret{{[l|q]}}
1183 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
1184 ; AVX512VLDQ: # %bb.0:
1185 ; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
1186 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
1187 %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %a,
1188 metadata !"fpexcept.strict") #0
1192 define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(<4 x float>* %x) strictfp {
1193 ; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1195 ; SSE-32-NEXT: pushl %ebp
1196 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
1197 ; SSE-32-NEXT: .cfi_offset %ebp, -8
1198 ; SSE-32-NEXT: movl %esp, %ebp
1199 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
1200 ; SSE-32-NEXT: andl $-8, %esp
1201 ; SSE-32-NEXT: subl $24, %esp
1202 ; SSE-32-NEXT: movl 8(%ebp), %eax
1203 ; SSE-32-NEXT: movaps (%eax), %xmm0
1204 ; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1205 ; SSE-32-NEXT: comiss %xmm1, %xmm0
1206 ; SSE-32-NEXT: movaps %xmm1, %xmm2
1207 ; SSE-32-NEXT: jae .LBB5_2
1208 ; SSE-32-NEXT: # %bb.1:
1209 ; SSE-32-NEXT: xorps %xmm2, %xmm2
1210 ; SSE-32-NEXT: .LBB5_2:
1211 ; SSE-32-NEXT: movaps %xmm0, %xmm3
1212 ; SSE-32-NEXT: subss %xmm2, %xmm3
1213 ; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
1214 ; SSE-32-NEXT: setae %al
1215 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
1217 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
1218 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
1219 ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
1220 ; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
1221 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
1222 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
1223 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
1224 ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1225 ; SSE-32-NEXT: comiss %xmm1, %xmm0
1226 ; SSE-32-NEXT: jae .LBB5_4
1227 ; SSE-32-NEXT: # %bb.3:
1228 ; SSE-32-NEXT: xorps %xmm1, %xmm1
1229 ; SSE-32-NEXT: .LBB5_4:
1230 ; SSE-32-NEXT: subss %xmm1, %xmm0
1231 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
1232 ; SSE-32-NEXT: setae %cl
1233 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
1235 ; SSE-32-NEXT: fnstcw (%esp)
1236 ; SSE-32-NEXT: movzwl (%esp), %edx
1237 ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
1238 ; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
1239 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
1240 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
1241 ; SSE-32-NEXT: fldcw (%esp)
1242 ; SSE-32-NEXT: movzbl %al, %eax
1243 ; SSE-32-NEXT: shll $31, %eax
1244 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1245 ; SSE-32-NEXT: movd %eax, %xmm1
1246 ; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1247 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1248 ; SSE-32-NEXT: movzbl %cl, %eax
1249 ; SSE-32-NEXT: shll $31, %eax
1250 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1251 ; SSE-32-NEXT: movd %eax, %xmm1
1252 ; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1253 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1254 ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1255 ; SSE-32-NEXT: movl %ebp, %esp
1256 ; SSE-32-NEXT: popl %ebp
1257 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
1260 ; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1262 ; SSE-64-NEXT: movaps (%rdi), %xmm1
1263 ; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1264 ; SSE-64-NEXT: comiss %xmm3, %xmm1
1265 ; SSE-64-NEXT: xorps %xmm2, %xmm2
1266 ; SSE-64-NEXT: xorps %xmm0, %xmm0
1267 ; SSE-64-NEXT: jb .LBB5_2
1268 ; SSE-64-NEXT: # %bb.1:
1269 ; SSE-64-NEXT: movaps %xmm3, %xmm0
1270 ; SSE-64-NEXT: .LBB5_2:
1271 ; SSE-64-NEXT: movaps %xmm1, %xmm4
1272 ; SSE-64-NEXT: subss %xmm0, %xmm4
1273 ; SSE-64-NEXT: cvttss2si %xmm4, %rax
1274 ; SSE-64-NEXT: setae %cl
1275 ; SSE-64-NEXT: movzbl %cl, %ecx
1276 ; SSE-64-NEXT: shlq $63, %rcx
1277 ; SSE-64-NEXT: xorq %rax, %rcx
1278 ; SSE-64-NEXT: movq %rcx, %xmm0
1279 ; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1280 ; SSE-64-NEXT: comiss %xmm3, %xmm1
1281 ; SSE-64-NEXT: jb .LBB5_4
1282 ; SSE-64-NEXT: # %bb.3:
1283 ; SSE-64-NEXT: movaps %xmm3, %xmm2
1284 ; SSE-64-NEXT: .LBB5_4:
1285 ; SSE-64-NEXT: subss %xmm2, %xmm1
1286 ; SSE-64-NEXT: cvttss2si %xmm1, %rax
1287 ; SSE-64-NEXT: setae %cl
1288 ; SSE-64-NEXT: movzbl %cl, %ecx
1289 ; SSE-64-NEXT: shlq $63, %rcx
1290 ; SSE-64-NEXT: xorq %rax, %rcx
1291 ; SSE-64-NEXT: movq %rcx, %xmm1
1292 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1295 ; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1297 ; AVX-32-NEXT: pushl %ebp
1298 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
1299 ; AVX-32-NEXT: .cfi_offset %ebp, -8
1300 ; AVX-32-NEXT: movl %esp, %ebp
1301 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
1302 ; AVX-32-NEXT: andl $-8, %esp
1303 ; AVX-32-NEXT: subl $16, %esp
1304 ; AVX-32-NEXT: movl 8(%ebp), %eax
1305 ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1306 ; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1307 ; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1308 ; AVX-32-NEXT: vcomiss %xmm1, %xmm2
1309 ; AVX-32-NEXT: vmovaps %xmm1, %xmm3
1310 ; AVX-32-NEXT: jae .LBB5_2
1311 ; AVX-32-NEXT: # %bb.1:
1312 ; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
1313 ; AVX-32-NEXT: .LBB5_2:
1314 ; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
1315 ; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
1316 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
1317 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1319 ; AVX-32-NEXT: setae %al
1320 ; AVX-32-NEXT: movzbl %al, %eax
1321 ; AVX-32-NEXT: shll $31, %eax
1322 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1323 ; AVX-32-NEXT: vcomiss %xmm1, %xmm0
1324 ; AVX-32-NEXT: jae .LBB5_4
1325 ; AVX-32-NEXT: # %bb.3:
1326 ; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
1327 ; AVX-32-NEXT: .LBB5_4:
1328 ; AVX-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
1329 ; AVX-32-NEXT: vmovss %xmm0, (%esp)
1330 ; AVX-32-NEXT: flds (%esp)
1331 ; AVX-32-NEXT: fisttpll (%esp)
1333 ; AVX-32-NEXT: setae %cl
1334 ; AVX-32-NEXT: movzbl %cl, %ecx
1335 ; AVX-32-NEXT: shll $31, %ecx
1336 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
1337 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1338 ; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
1339 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1340 ; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1341 ; AVX-32-NEXT: movl %ebp, %esp
1342 ; AVX-32-NEXT: popl %ebp
1343 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
1346 ; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1348 ; AVX-64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1349 ; AVX-64-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1350 ; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1351 ; AVX-64-NEXT: vcomiss %xmm1, %xmm3
1352 ; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
1353 ; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
1354 ; AVX-64-NEXT: jb .LBB5_2
1355 ; AVX-64-NEXT: # %bb.1:
1356 ; AVX-64-NEXT: vmovaps %xmm1, %xmm4
1357 ; AVX-64-NEXT: .LBB5_2:
1358 ; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3
1359 ; AVX-64-NEXT: vcvttss2si %xmm3, %rax
1360 ; AVX-64-NEXT: setae %cl
1361 ; AVX-64-NEXT: movzbl %cl, %ecx
1362 ; AVX-64-NEXT: shlq $63, %rcx
1363 ; AVX-64-NEXT: xorq %rax, %rcx
1364 ; AVX-64-NEXT: vmovq %rcx, %xmm3
1365 ; AVX-64-NEXT: vcomiss %xmm1, %xmm0
1366 ; AVX-64-NEXT: jb .LBB5_4
1367 ; AVX-64-NEXT: # %bb.3:
1368 ; AVX-64-NEXT: vmovaps %xmm1, %xmm2
1369 ; AVX-64-NEXT: .LBB5_4:
1370 ; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
1371 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
1372 ; AVX-64-NEXT: setae %cl
1373 ; AVX-64-NEXT: movzbl %cl, %ecx
1374 ; AVX-64-NEXT: shlq $63, %rcx
1375 ; AVX-64-NEXT: xorq %rax, %rcx
1376 ; AVX-64-NEXT: vmovq %rcx, %xmm0
1377 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
1380 ; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1381 ; AVX512F-32: # %bb.0:
1382 ; AVX512F-32-NEXT: pushl %ebp
1383 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
1384 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
1385 ; AVX512F-32-NEXT: movl %esp, %ebp
1386 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
1387 ; AVX512F-32-NEXT: andl $-8, %esp
1388 ; AVX512F-32-NEXT: subl $16, %esp
1389 ; AVX512F-32-NEXT: movl 8(%ebp), %eax
1390 ; AVX512F-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1391 ; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1392 ; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1393 ; AVX512F-32-NEXT: xorl %eax, %eax
1394 ; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1
1395 ; AVX512F-32-NEXT: setae %al
1396 ; AVX512F-32-NEXT: kmovw %eax, %k1
1397 ; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1398 ; AVX512F-32-NEXT: vsubss %xmm3, %xmm1, %xmm1
1399 ; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
1400 ; AVX512F-32-NEXT: xorl %ecx, %ecx
1401 ; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0
1402 ; AVX512F-32-NEXT: setae %cl
1403 ; AVX512F-32-NEXT: kmovw %ecx, %k1
1404 ; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1405 ; AVX512F-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
1406 ; AVX512F-32-NEXT: vmovss %xmm0, (%esp)
1407 ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
1408 ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1409 ; AVX512F-32-NEXT: flds (%esp)
1410 ; AVX512F-32-NEXT: fisttpll (%esp)
1411 ; AVX512F-32-NEXT: wait
1412 ; AVX512F-32-NEXT: shll $31, %eax
1413 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1414 ; AVX512F-32-NEXT: shll $31, %ecx
1415 ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
1416 ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1417 ; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
1418 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1419 ; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1420 ; AVX512F-32-NEXT: movl %ebp, %esp
1421 ; AVX512F-32-NEXT: popl %ebp
1422 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
1423 ; AVX512F-32-NEXT: retl
1425 ; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1426 ; AVX512F-64: # %bb.0:
1427 ; AVX512F-64-NEXT: vcvttss2usi 4(%rdi), %rax
1428 ; AVX512F-64-NEXT: vmovq %rax, %xmm0
1429 ; AVX512F-64-NEXT: vcvttss2usi (%rdi), %rax
1430 ; AVX512F-64-NEXT: vmovq %rax, %xmm1
1431 ; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1432 ; AVX512F-64-NEXT: retq
1434 ; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1435 ; AVX512VL-32: # %bb.0:
1436 ; AVX512VL-32-NEXT: pushl %ebp
1437 ; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
1438 ; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
1439 ; AVX512VL-32-NEXT: movl %esp, %ebp
1440 ; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
1441 ; AVX512VL-32-NEXT: andl $-8, %esp
1442 ; AVX512VL-32-NEXT: subl $16, %esp
1443 ; AVX512VL-32-NEXT: movl 8(%ebp), %eax
1444 ; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1445 ; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1446 ; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1447 ; AVX512VL-32-NEXT: xorl %eax, %eax
1448 ; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1
1449 ; AVX512VL-32-NEXT: setae %al
1450 ; AVX512VL-32-NEXT: kmovw %eax, %k1
1451 ; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1452 ; AVX512VL-32-NEXT: vsubss %xmm3, %xmm1, %xmm1
1453 ; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
1454 ; AVX512VL-32-NEXT: xorl %ecx, %ecx
1455 ; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0
1456 ; AVX512VL-32-NEXT: setae %cl
1457 ; AVX512VL-32-NEXT: kmovw %ecx, %k1
1458 ; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1459 ; AVX512VL-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
1460 ; AVX512VL-32-NEXT: vmovss %xmm0, (%esp)
1461 ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
1462 ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1463 ; AVX512VL-32-NEXT: flds (%esp)
1464 ; AVX512VL-32-NEXT: fisttpll (%esp)
1465 ; AVX512VL-32-NEXT: wait
1466 ; AVX512VL-32-NEXT: shll $31, %eax
1467 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
1468 ; AVX512VL-32-NEXT: shll $31, %ecx
1469 ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
1470 ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1471 ; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
1472 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
1473 ; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1474 ; AVX512VL-32-NEXT: movl %ebp, %esp
1475 ; AVX512VL-32-NEXT: popl %ebp
1476 ; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
1477 ; AVX512VL-32-NEXT: retl
1479 ; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1480 ; AVX512VL-64: # %bb.0:
1481 ; AVX512VL-64-NEXT: vcvttss2usi 4(%rdi), %rax
1482 ; AVX512VL-64-NEXT: vmovq %rax, %xmm0
1483 ; AVX512VL-64-NEXT: vcvttss2usi (%rdi), %rax
1484 ; AVX512VL-64-NEXT: vmovq %rax, %xmm1
1485 ; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1486 ; AVX512VL-64-NEXT: retq
1488 ; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1489 ; AVX512DQ-32: # %bb.0:
1490 ; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1491 ; AVX512DQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1492 ; AVX512DQ-32-NEXT: vcvttps2uqq %ymm0, %zmm0
1493 ; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1494 ; AVX512DQ-32-NEXT: vzeroupper
1495 ; AVX512DQ-32-NEXT: retl
1497 ; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1498 ; AVX512DQ-64: # %bb.0:
1499 ; AVX512DQ-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1500 ; AVX512DQ-64-NEXT: vcvttps2uqq %ymm0, %zmm0
1501 ; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1502 ; AVX512DQ-64-NEXT: vzeroupper
1503 ; AVX512DQ-64-NEXT: retq
1505 ; AVX512VLDQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1506 ; AVX512VLDQ-32: # %bb.0:
1507 ; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1508 ; AVX512VLDQ-32-NEXT: vcvttps2uqq (%eax), %xmm0
1509 ; AVX512VLDQ-32-NEXT: retl
1511 ; AVX512VLDQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
1512 ; AVX512VLDQ-64: # %bb.0:
1513 ; AVX512VLDQ-64-NEXT: vcvttps2uqq (%rdi), %xmm0
1514 ; AVX512VLDQ-64-NEXT: retq
1515 %a = load <4 x float>, <4 x float>* %x
1516 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1517 %c = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
1521 define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
1522 ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1524 ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
1527 ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1529 ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
1532 ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1534 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
1535 ; AVX-NEXT: ret{{[l|q]}}
1537 ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1539 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
1540 ; AVX512F-NEXT: ret{{[l|q]}}
1542 ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1543 ; AVX512VL: # %bb.0:
1544 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
1545 ; AVX512VL-NEXT: ret{{[l|q]}}
1547 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1548 ; AVX512DQ: # %bb.0:
1549 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
1550 ; AVX512DQ-NEXT: ret{{[l|q]}}
1552 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
1553 ; AVX512VLDQ: # %bb.0:
1554 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
1555 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
1556 %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %a,
1557 metadata !"fpexcept.strict") #0
1561 define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
1562 ; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1564 ; SSE-32-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
1565 ; SSE-32-NEXT: comisd %xmm3, %xmm0
1566 ; SSE-32-NEXT: xorpd %xmm2, %xmm2
1567 ; SSE-32-NEXT: xorpd %xmm1, %xmm1
1568 ; SSE-32-NEXT: jb .LBB7_2
1569 ; SSE-32-NEXT: # %bb.1:
1570 ; SSE-32-NEXT: movapd %xmm3, %xmm1
1571 ; SSE-32-NEXT: .LBB7_2:
1572 ; SSE-32-NEXT: setae %al
1573 ; SSE-32-NEXT: movzbl %al, %eax
1574 ; SSE-32-NEXT: shll $31, %eax
1575 ; SSE-32-NEXT: movapd %xmm0, %xmm4
1576 ; SSE-32-NEXT: subsd %xmm1, %xmm4
1577 ; SSE-32-NEXT: cvttsd2si %xmm4, %ecx
1578 ; SSE-32-NEXT: xorl %eax, %ecx
1579 ; SSE-32-NEXT: movd %ecx, %xmm1
1580 ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
1581 ; SSE-32-NEXT: comisd %xmm3, %xmm0
1582 ; SSE-32-NEXT: jb .LBB7_4
1583 ; SSE-32-NEXT: # %bb.3:
1584 ; SSE-32-NEXT: movapd %xmm3, %xmm2
1585 ; SSE-32-NEXT: .LBB7_4:
1586 ; SSE-32-NEXT: setae %al
1587 ; SSE-32-NEXT: movzbl %al, %eax
1588 ; SSE-32-NEXT: shll $31, %eax
1589 ; SSE-32-NEXT: subsd %xmm2, %xmm0
1590 ; SSE-32-NEXT: cvttsd2si %xmm0, %ecx
1591 ; SSE-32-NEXT: xorl %eax, %ecx
1592 ; SSE-32-NEXT: movd %ecx, %xmm0
1593 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1594 ; SSE-32-NEXT: movdqa %xmm1, %xmm0
1597 ; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1599 ; SSE-64-NEXT: cvttsd2si %xmm0, %rax
1600 ; SSE-64-NEXT: movd %eax, %xmm1
1601 ; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
1602 ; SSE-64-NEXT: cvttsd2si %xmm0, %rax
1603 ; SSE-64-NEXT: movd %eax, %xmm0
1604 ; SSE-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1605 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
1608 ; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1610 ; AVX-32-NEXT: pushl %ebp
1611 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
1612 ; AVX-32-NEXT: .cfi_offset %ebp, -8
1613 ; AVX-32-NEXT: movl %esp, %ebp
1614 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
1615 ; AVX-32-NEXT: andl $-8, %esp
1616 ; AVX-32-NEXT: subl $16, %esp
1617 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
1618 ; AVX-32-NEXT: vmovhps %xmm0, (%esp)
1619 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
1620 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1621 ; AVX-32-NEXT: fldl (%esp)
1622 ; AVX-32-NEXT: fisttpll (%esp)
1624 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1625 ; AVX-32-NEXT: vpinsrd $1, (%esp), %xmm0, %xmm0
1626 ; AVX-32-NEXT: movl %ebp, %esp
1627 ; AVX-32-NEXT: popl %ebp
1628 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
1631 ; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1633 ; AVX-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1634 ; AVX-64-NEXT: vcvttsd2si %xmm1, %rax
1635 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rcx
1636 ; AVX-64-NEXT: vmovd %ecx, %xmm0
1637 ; AVX-64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
1640 ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1642 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
1643 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
1644 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1645 ; AVX512F-NEXT: vzeroupper
1646 ; AVX512F-NEXT: ret{{[l|q]}}
1648 ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1649 ; AVX512VL: # %bb.0:
1650 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
1651 ; AVX512VL-NEXT: ret{{[l|q]}}
1653 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1654 ; AVX512DQ: # %bb.0:
1655 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
1656 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
1657 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1658 ; AVX512DQ-NEXT: vzeroupper
1659 ; AVX512DQ-NEXT: ret{{[l|q]}}
1661 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
1662 ; AVX512VLDQ: # %bb.0:
1663 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
1664 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
1665 %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %a,
1666 metadata !"fpexcept.strict") #0
1670 define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 {
1671 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1673 ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1674 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
1677 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1679 ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1680 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
1683 ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1685 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1686 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
1687 ; AVX-NEXT: ret{{[l|q]}}
1689 ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1691 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1692 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
1693 ; AVX512F-NEXT: ret{{[l|q]}}
1695 ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1696 ; AVX512VL: # %bb.0:
1697 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1698 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
1699 ; AVX512VL-NEXT: ret{{[l|q]}}
1701 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1702 ; AVX512DQ: # %bb.0:
1703 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1704 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
1705 ; AVX512DQ-NEXT: ret{{[l|q]}}
1707 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
1708 ; AVX512VLDQ: # %bb.0:
1709 ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1710 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
1711 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
1712 %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float> %a,
1713 metadata !"fpexcept.strict") #0
1717 define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
1718 ; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1720 ; SSE-32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1721 ; SSE-32-NEXT: comiss %xmm3, %xmm0
1722 ; SSE-32-NEXT: xorps %xmm2, %xmm2
1723 ; SSE-32-NEXT: xorps %xmm1, %xmm1
1724 ; SSE-32-NEXT: jb .LBB9_2
1725 ; SSE-32-NEXT: # %bb.1:
1726 ; SSE-32-NEXT: movaps %xmm3, %xmm1
1727 ; SSE-32-NEXT: .LBB9_2:
1728 ; SSE-32-NEXT: setae %al
1729 ; SSE-32-NEXT: movzbl %al, %eax
1730 ; SSE-32-NEXT: shll $31, %eax
1731 ; SSE-32-NEXT: movaps %xmm0, %xmm4
1732 ; SSE-32-NEXT: subss %xmm1, %xmm4
1733 ; SSE-32-NEXT: cvttss2si %xmm4, %ecx
1734 ; SSE-32-NEXT: xorl %eax, %ecx
1735 ; SSE-32-NEXT: movd %ecx, %xmm1
1736 ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1737 ; SSE-32-NEXT: comiss %xmm3, %xmm0
1738 ; SSE-32-NEXT: jb .LBB9_4
1739 ; SSE-32-NEXT: # %bb.3:
1740 ; SSE-32-NEXT: movaps %xmm3, %xmm2
1741 ; SSE-32-NEXT: .LBB9_4:
1742 ; SSE-32-NEXT: setae %al
1743 ; SSE-32-NEXT: movzbl %al, %eax
1744 ; SSE-32-NEXT: shll $31, %eax
1745 ; SSE-32-NEXT: subss %xmm2, %xmm0
1746 ; SSE-32-NEXT: cvttss2si %xmm0, %ecx
1747 ; SSE-32-NEXT: xorl %eax, %ecx
1748 ; SSE-32-NEXT: movd %ecx, %xmm0
1749 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1750 ; SSE-32-NEXT: movdqa %xmm1, %xmm0
1753 ; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1755 ; SSE-64-NEXT: cvttss2si %xmm0, %rax
1756 ; SSE-64-NEXT: movd %eax, %xmm1
1757 ; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1758 ; SSE-64-NEXT: cvttss2si %xmm0, %rax
1759 ; SSE-64-NEXT: movd %eax, %xmm0
1760 ; SSE-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1761 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
1764 ; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1766 ; AVX-32-NEXT: pushl %ebp
1767 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
1768 ; AVX-32-NEXT: .cfi_offset %ebp, -8
1769 ; AVX-32-NEXT: movl %esp, %ebp
1770 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
1771 ; AVX-32-NEXT: andl $-8, %esp
1772 ; AVX-32-NEXT: subl $16, %esp
1773 ; AVX-32-NEXT: vmovss %xmm0, (%esp)
1774 ; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
1775 ; AVX-32-NEXT: flds (%esp)
1776 ; AVX-32-NEXT: fisttpll (%esp)
1777 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
1778 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
1780 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1781 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
1782 ; AVX-32-NEXT: movl %ebp, %esp
1783 ; AVX-32-NEXT: popl %ebp
1784 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
1787 ; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1789 ; AVX-64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1790 ; AVX-64-NEXT: vcvttss2si %xmm1, %rax
1791 ; AVX-64-NEXT: vcvttss2si %xmm0, %rcx
1792 ; AVX-64-NEXT: vmovd %ecx, %xmm0
1793 ; AVX-64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
1796 ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1798 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1799 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1800 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1801 ; AVX512F-NEXT: vzeroupper
1802 ; AVX512F-NEXT: ret{{[l|q]}}
1804 ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1805 ; AVX512VL: # %bb.0:
1806 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1807 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1808 ; AVX512VL-NEXT: ret{{[l|q]}}
1810 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1811 ; AVX512DQ: # %bb.0:
1812 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1813 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
1814 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1815 ; AVX512DQ-NEXT: vzeroupper
1816 ; AVX512DQ-NEXT: ret{{[l|q]}}
1818 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
1819 ; AVX512VLDQ: # %bb.0:
1820 ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1821 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1822 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
1823 %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float> %a,
1824 metadata !"fpexcept.strict") #0
1828 define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 {
1829 ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1831 ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
1832 ; SSE-32-NEXT: packssdw %xmm0, %xmm0
1835 ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1837 ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
1838 ; SSE-64-NEXT: packssdw %xmm0, %xmm0
1841 ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1843 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
1844 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1845 ; AVX-NEXT: ret{{[l|q]}}
1847 ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1849 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
1850 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1851 ; AVX512F-NEXT: ret{{[l|q]}}
1853 ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1854 ; AVX512VL: # %bb.0:
1855 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
1856 ; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1857 ; AVX512VL-NEXT: ret{{[l|q]}}
1859 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1860 ; AVX512DQ: # %bb.0:
1861 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
1862 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1863 ; AVX512DQ-NEXT: ret{{[l|q]}}
1865 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
1866 ; AVX512VLDQ: # %bb.0:
1867 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
1868 ; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1869 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
1870 %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
1871 metadata !"fpexcept.strict") #0
1875 define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
1876 ; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1878 ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
1879 ; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1882 ; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1884 ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
1885 ; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1888 ; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1890 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
1891 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1892 ; AVX-NEXT: ret{{[l|q]}}
1894 ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1896 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
1897 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1898 ; AVX512F-NEXT: ret{{[l|q]}}
1900 ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1901 ; AVX512VL: # %bb.0:
1902 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
1903 ; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1904 ; AVX512VL-NEXT: ret{{[l|q]}}
1906 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1907 ; AVX512DQ: # %bb.0:
1908 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
1909 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1910 ; AVX512DQ-NEXT: ret{{[l|q]}}
1912 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
1913 ; AVX512VLDQ: # %bb.0:
1914 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
1915 ; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1916 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
1917 %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
1918 metadata !"fpexcept.strict") #0
1922 define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
1923 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1925 ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1926 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
1927 ; SSE-32-NEXT: packssdw %xmm0, %xmm0
1930 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1932 ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1933 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
1934 ; SSE-64-NEXT: packssdw %xmm0, %xmm0
1937 ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1939 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1940 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
1941 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1942 ; AVX-NEXT: ret{{[l|q]}}
1944 ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1946 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1947 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
1948 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1949 ; AVX512F-NEXT: ret{{[l|q]}}
1951 ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1952 ; AVX512VL: # %bb.0:
1953 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1954 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
1955 ; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1956 ; AVX512VL-NEXT: ret{{[l|q]}}
1958 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1959 ; AVX512DQ: # %bb.0:
1960 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1961 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
1962 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1963 ; AVX512DQ-NEXT: ret{{[l|q]}}
1965 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
1966 ; AVX512VLDQ: # %bb.0:
1967 ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1968 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
1969 ; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1970 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
1971 %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
1972 metadata !"fpexcept.strict") #0
1976 define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
1977 ; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
1979 ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1980 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
1981 ; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1984 ; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
1986 ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1987 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
1988 ; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1991 ; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
1993 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1994 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
1995 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1996 ; AVX-NEXT: ret{{[l|q]}}
1998 ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
2000 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2001 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2002 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2003 ; AVX512F-NEXT: ret{{[l|q]}}
2005 ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
2006 ; AVX512VL: # %bb.0:
2007 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2008 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2009 ; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2010 ; AVX512VL-NEXT: ret{{[l|q]}}
2012 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
2013 ; AVX512DQ: # %bb.0:
2014 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2015 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2016 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2017 ; AVX512DQ-NEXT: ret{{[l|q]}}
2019 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
2020 ; AVX512VLDQ: # %bb.0:
2021 ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2022 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2023 ; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2024 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2025 %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
2026 metadata !"fpexcept.strict") #0
2030 define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
2031 ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2033 ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
2034 ; SSE-32-NEXT: packssdw %xmm0, %xmm0
2035 ; SSE-32-NEXT: packsswb %xmm0, %xmm0
2038 ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2040 ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
2041 ; SSE-64-NEXT: packssdw %xmm0, %xmm0
2042 ; SSE-64-NEXT: packsswb %xmm0, %xmm0
2045 ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2047 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2048 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2049 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2050 ; AVX-NEXT: ret{{[l|q]}}
2052 ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2054 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2055 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2056 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2057 ; AVX512F-NEXT: ret{{[l|q]}}
2059 ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2060 ; AVX512VL: # %bb.0:
2061 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2062 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2063 ; AVX512VL-NEXT: ret{{[l|q]}}
2065 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2066 ; AVX512DQ: # %bb.0:
2067 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2068 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2069 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2070 ; AVX512DQ-NEXT: ret{{[l|q]}}
2072 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
2073 ; AVX512VLDQ: # %bb.0:
2074 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2075 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2076 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2077 %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double> %a,
2078 metadata !"fpexcept.strict") #0
2082 define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
2083 ; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2085 ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
2086 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
2087 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
2090 ; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2092 ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
2093 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
2094 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
2097 ; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2099 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
2100 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2101 ; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2102 ; AVX-NEXT: ret{{[l|q]}}
2104 ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2106 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2107 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2108 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2109 ; AVX512F-NEXT: ret{{[l|q]}}
2111 ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2112 ; AVX512VL: # %bb.0:
2113 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2114 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2115 ; AVX512VL-NEXT: ret{{[l|q]}}
2117 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2118 ; AVX512DQ: # %bb.0:
2119 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2120 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2121 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2122 ; AVX512DQ-NEXT: ret{{[l|q]}}
2124 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
2125 ; AVX512VLDQ: # %bb.0:
2126 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2127 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2128 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2129 %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double> %a,
2130 metadata !"fpexcept.strict") #0
2134 define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
2135 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2137 ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2138 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
2139 ; SSE-32-NEXT: packssdw %xmm0, %xmm0
2140 ; SSE-32-NEXT: packsswb %xmm0, %xmm0
2143 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2145 ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2146 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
2147 ; SSE-64-NEXT: packssdw %xmm0, %xmm0
2148 ; SSE-64-NEXT: packsswb %xmm0, %xmm0
2151 ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2153 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2154 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2155 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2156 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2157 ; AVX-NEXT: ret{{[l|q]}}
2159 ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2161 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2162 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2163 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2164 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2165 ; AVX512F-NEXT: ret{{[l|q]}}
2167 ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2168 ; AVX512VL: # %bb.0:
2169 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2170 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2171 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2172 ; AVX512VL-NEXT: ret{{[l|q]}}
2174 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2175 ; AVX512DQ: # %bb.0:
2176 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2177 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2178 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
2179 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
2180 ; AVX512DQ-NEXT: ret{{[l|q]}}
2182 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
2183 ; AVX512VLDQ: # %bb.0:
2184 ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2185 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2186 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2187 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2188 %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float> %a,
2189 metadata !"fpexcept.strict") #0
2193 define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
2194 ; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2196 ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2197 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
2198 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
2199 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
2202 ; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2204 ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2205 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
2206 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
2207 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
2210 ; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2212 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2213 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
2214 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2215 ; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2216 ; AVX-NEXT: ret{{[l|q]}}
2218 ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2220 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2221 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
2222 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2223 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2224 ; AVX512F-NEXT: ret{{[l|q]}}
2226 ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2227 ; AVX512VL: # %bb.0:
2228 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2229 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
2230 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
2231 ; AVX512VL-NEXT: ret{{[l|q]}}
2233 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2234 ; AVX512DQ: # %bb.0:
2235 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2236 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
2237 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
2238 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2239 ; AVX512DQ-NEXT: ret{{[l|q]}}
2241 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
2242 ; AVX512VLDQ: # %bb.0:
2243 ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2244 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
2245 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
2246 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2247 %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float> %a,
2248 metadata !"fpexcept.strict") #0
2252 define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 {
2253 ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2255 ; SSE-32-NEXT: pushl %ebp
2256 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
2257 ; SSE-32-NEXT: .cfi_offset %ebp, -8
2258 ; SSE-32-NEXT: movl %esp, %ebp
2259 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
2260 ; SSE-32-NEXT: andl $-8, %esp
2261 ; SSE-32-NEXT: subl $24, %esp
2262 ; SSE-32-NEXT: movhps %xmm0, {{[0-9]+}}(%esp)
2263 ; SSE-32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
2264 ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
2266 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
2267 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2268 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
2269 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
2270 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2271 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
2272 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2273 ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
2275 ; SSE-32-NEXT: fnstcw (%esp)
2276 ; SSE-32-NEXT: movzwl (%esp), %eax
2277 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
2278 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
2279 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2280 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
2281 ; SSE-32-NEXT: fldcw (%esp)
2282 ; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2283 ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2284 ; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2285 ; SSE-32-NEXT: movl %ebp, %esp
2286 ; SSE-32-NEXT: popl %ebp
2287 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
2290 ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2292 ; SSE-64-NEXT: cvttsd2si %xmm0, %rax
2293 ; SSE-64-NEXT: movq %rax, %xmm1
2294 ; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
2295 ; SSE-64-NEXT: cvttsd2si %xmm0, %rax
2296 ; SSE-64-NEXT: movq %rax, %xmm0
2297 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2298 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
2301 ; AVX-32-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2303 ; AVX-32-NEXT: pushl %ebp
2304 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
2305 ; AVX-32-NEXT: .cfi_offset %ebp, -8
2306 ; AVX-32-NEXT: movl %esp, %ebp
2307 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
2308 ; AVX-32-NEXT: andl $-8, %esp
2309 ; AVX-32-NEXT: subl $16, %esp
2310 ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
2311 ; AVX-32-NEXT: vmovhps %xmm0, (%esp)
2312 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
2313 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
2314 ; AVX-32-NEXT: fldl (%esp)
2315 ; AVX-32-NEXT: fisttpll (%esp)
2317 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2318 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
2319 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
2320 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
2321 ; AVX-32-NEXT: movl %ebp, %esp
2322 ; AVX-32-NEXT: popl %ebp
2323 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
2326 ; AVX-64-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2328 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
2329 ; AVX-64-NEXT: vmovq %rax, %xmm1
2330 ; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
2331 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
2332 ; AVX-64-NEXT: vmovq %rax, %xmm0
2333 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2336 ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2338 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
2339 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
2340 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
2341 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2342 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2343 ; AVX512F-NEXT: vzeroupper
2344 ; AVX512F-NEXT: ret{{[l|q]}}
2346 ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2347 ; AVX512VL: # %bb.0:
2348 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
2349 ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
2350 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
2351 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
2352 ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
2353 ; AVX512VL-NEXT: ret{{[l|q]}}
2355 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2356 ; AVX512DQ: # %bb.0:
2357 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2358 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
2359 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2360 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2361 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2362 ; AVX512DQ-NEXT: vzeroupper
2363 ; AVX512DQ-NEXT: ret{{[l|q]}}
2365 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
2366 ; AVX512VLDQ: # %bb.0:
2367 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
2368 ; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
2369 ; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
2370 ; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
2371 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2372 %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double> %a,
2373 metadata !"fpexcept.strict") #0
2377 define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
2378 ; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2380 ; SSE-32-NEXT: pushl %ebp
2381 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
2382 ; SSE-32-NEXT: .cfi_offset %ebp, -8
2383 ; SSE-32-NEXT: movl %esp, %ebp
2384 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
2385 ; SSE-32-NEXT: andl $-8, %esp
2386 ; SSE-32-NEXT: subl $24, %esp
2387 ; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2388 ; SSE-32-NEXT: comisd %xmm1, %xmm0
2389 ; SSE-32-NEXT: movapd %xmm1, %xmm2
2390 ; SSE-32-NEXT: jae .LBB19_2
2391 ; SSE-32-NEXT: # %bb.1:
2392 ; SSE-32-NEXT: xorpd %xmm2, %xmm2
2393 ; SSE-32-NEXT: .LBB19_2:
2394 ; SSE-32-NEXT: movapd %xmm0, %xmm3
2395 ; SSE-32-NEXT: subsd %xmm2, %xmm3
2396 ; SSE-32-NEXT: movsd %xmm3, {{[0-9]+}}(%esp)
2397 ; SSE-32-NEXT: setae %al
2398 ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
2400 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
2401 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
2402 ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
2403 ; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
2404 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2405 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
2406 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2407 ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
2408 ; SSE-32-NEXT: comisd %xmm1, %xmm0
2409 ; SSE-32-NEXT: jae .LBB19_4
2410 ; SSE-32-NEXT: # %bb.3:
2411 ; SSE-32-NEXT: xorpd %xmm1, %xmm1
2412 ; SSE-32-NEXT: .LBB19_4:
2413 ; SSE-32-NEXT: subsd %xmm1, %xmm0
2414 ; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
2415 ; SSE-32-NEXT: setae %cl
2416 ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
2418 ; SSE-32-NEXT: fnstcw (%esp)
2419 ; SSE-32-NEXT: movzwl (%esp), %edx
2420 ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
2421 ; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
2422 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2423 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
2424 ; SSE-32-NEXT: fldcw (%esp)
2425 ; SSE-32-NEXT: movzbl %al, %eax
2426 ; SSE-32-NEXT: shll $31, %eax
2427 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
2428 ; SSE-32-NEXT: movd %eax, %xmm1
2429 ; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2430 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2431 ; SSE-32-NEXT: movzbl %cl, %eax
2432 ; SSE-32-NEXT: shll $31, %eax
2433 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
2434 ; SSE-32-NEXT: movd %eax, %xmm1
2435 ; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2436 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2437 ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2438 ; SSE-32-NEXT: movl %ebp, %esp
2439 ; SSE-32-NEXT: popl %ebp
2440 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
2443 ; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2445 ; SSE-64-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
2446 ; SSE-64-NEXT: comisd %xmm3, %xmm0
2447 ; SSE-64-NEXT: xorpd %xmm2, %xmm2
2448 ; SSE-64-NEXT: xorpd %xmm1, %xmm1
2449 ; SSE-64-NEXT: jb .LBB19_2
2450 ; SSE-64-NEXT: # %bb.1:
2451 ; SSE-64-NEXT: movapd %xmm3, %xmm1
2452 ; SSE-64-NEXT: .LBB19_2:
2453 ; SSE-64-NEXT: movapd %xmm0, %xmm4
2454 ; SSE-64-NEXT: subsd %xmm1, %xmm4
2455 ; SSE-64-NEXT: cvttsd2si %xmm4, %rax
2456 ; SSE-64-NEXT: setae %cl
2457 ; SSE-64-NEXT: movzbl %cl, %ecx
2458 ; SSE-64-NEXT: shlq $63, %rcx
2459 ; SSE-64-NEXT: xorq %rax, %rcx
2460 ; SSE-64-NEXT: movq %rcx, %xmm1
2461 ; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
2462 ; SSE-64-NEXT: comisd %xmm3, %xmm0
2463 ; SSE-64-NEXT: jb .LBB19_4
2464 ; SSE-64-NEXT: # %bb.3:
2465 ; SSE-64-NEXT: movapd %xmm3, %xmm2
2466 ; SSE-64-NEXT: .LBB19_4:
2467 ; SSE-64-NEXT: subsd %xmm2, %xmm0
2468 ; SSE-64-NEXT: cvttsd2si %xmm0, %rax
2469 ; SSE-64-NEXT: setae %cl
2470 ; SSE-64-NEXT: movzbl %cl, %ecx
2471 ; SSE-64-NEXT: shlq $63, %rcx
2472 ; SSE-64-NEXT: xorq %rax, %rcx
2473 ; SSE-64-NEXT: movq %rcx, %xmm0
2474 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2475 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
2478 ; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2480 ; AVX-32-NEXT: pushl %ebp
2481 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
2482 ; AVX-32-NEXT: .cfi_offset %ebp, -8
2483 ; AVX-32-NEXT: movl %esp, %ebp
2484 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
2485 ; AVX-32-NEXT: andl $-8, %esp
2486 ; AVX-32-NEXT: subl $16, %esp
2487 ; AVX-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
2488 ; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2489 ; AVX-32-NEXT: vcomisd %xmm1, %xmm2
2490 ; AVX-32-NEXT: vmovapd %xmm1, %xmm3
2491 ; AVX-32-NEXT: jae .LBB19_2
2492 ; AVX-32-NEXT: # %bb.1:
2493 ; AVX-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2494 ; AVX-32-NEXT: .LBB19_2:
2495 ; AVX-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2
2496 ; AVX-32-NEXT: vmovsd %xmm2, (%esp)
2497 ; AVX-32-NEXT: fldl (%esp)
2498 ; AVX-32-NEXT: fisttpll (%esp)
2500 ; AVX-32-NEXT: setae %al
2501 ; AVX-32-NEXT: movzbl %al, %eax
2502 ; AVX-32-NEXT: shll $31, %eax
2503 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
2504 ; AVX-32-NEXT: vcomisd %xmm1, %xmm0
2505 ; AVX-32-NEXT: jae .LBB19_4
2506 ; AVX-32-NEXT: # %bb.3:
2507 ; AVX-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2508 ; AVX-32-NEXT: .LBB19_4:
2509 ; AVX-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
2510 ; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
2511 ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
2512 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
2514 ; AVX-32-NEXT: setae %cl
2515 ; AVX-32-NEXT: movzbl %cl, %ecx
2516 ; AVX-32-NEXT: shll $31, %ecx
2517 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
2518 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2519 ; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
2520 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
2521 ; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
2522 ; AVX-32-NEXT: movl %ebp, %esp
2523 ; AVX-32-NEXT: popl %ebp
2524 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
2527 ; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2529 ; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2530 ; AVX-64-NEXT: vcomisd %xmm1, %xmm0
2531 ; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2532 ; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2533 ; AVX-64-NEXT: jb .LBB19_2
2534 ; AVX-64-NEXT: # %bb.1:
2535 ; AVX-64-NEXT: vmovapd %xmm1, %xmm3
2536 ; AVX-64-NEXT: .LBB19_2:
2537 ; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
2538 ; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
2539 ; AVX-64-NEXT: setae %cl
2540 ; AVX-64-NEXT: movzbl %cl, %ecx
2541 ; AVX-64-NEXT: shlq $63, %rcx
2542 ; AVX-64-NEXT: xorq %rax, %rcx
2543 ; AVX-64-NEXT: vmovq %rcx, %xmm3
2544 ; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
2545 ; AVX-64-NEXT: vcomisd %xmm1, %xmm0
2546 ; AVX-64-NEXT: jb .LBB19_4
2547 ; AVX-64-NEXT: # %bb.3:
2548 ; AVX-64-NEXT: vmovapd %xmm1, %xmm2
2549 ; AVX-64-NEXT: .LBB19_4:
2550 ; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
2551 ; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
2552 ; AVX-64-NEXT: setae %cl
2553 ; AVX-64-NEXT: movzbl %cl, %ecx
2554 ; AVX-64-NEXT: shlq $63, %rcx
2555 ; AVX-64-NEXT: xorq %rax, %rcx
2556 ; AVX-64-NEXT: vmovq %rcx, %xmm0
2557 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
2560 ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2562 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
2563 ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
2564 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
2565 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
2566 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2567 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2568 ; AVX512F-NEXT: vzeroupper
2569 ; AVX512F-NEXT: ret{{[l|q]}}
2571 ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2572 ; AVX512VL: # %bb.0:
2573 ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
2574 ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
2575 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
2576 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
2577 ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
2578 ; AVX512VL-NEXT: ret{{[l|q]}}
2580 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2581 ; AVX512DQ: # %bb.0:
2582 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
2583 ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
2584 ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
2585 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
2586 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2587 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2588 ; AVX512DQ-NEXT: vzeroupper
2589 ; AVX512DQ-NEXT: ret{{[l|q]}}
2591 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
2592 ; AVX512VLDQ: # %bb.0:
2593 ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
2594 ; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
2595 ; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
2596 ; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
2597 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2598 %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double> %a,
2599 metadata !"fpexcept.strict") #0
2603 define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 {
2604 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2606 ; SSE-32-NEXT: pushl %ebp
2607 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
2608 ; SSE-32-NEXT: .cfi_offset %ebp, -8
2609 ; SSE-32-NEXT: movl %esp, %ebp
2610 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
2611 ; SSE-32-NEXT: andl $-8, %esp
2612 ; SSE-32-NEXT: subl $24, %esp
2613 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
2614 ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
2615 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
2616 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
2618 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
2619 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
2620 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
2621 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
2622 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2623 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
2624 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2625 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
2627 ; SSE-32-NEXT: fnstcw (%esp)
2628 ; SSE-32-NEXT: movzwl (%esp), %eax
2629 ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
2630 ; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
2631 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2632 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
2633 ; SSE-32-NEXT: fldcw (%esp)
2634 ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2635 ; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2636 ; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2637 ; SSE-32-NEXT: movl %ebp, %esp
2638 ; SSE-32-NEXT: popl %ebp
2639 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
2642 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2644 ; SSE-64-NEXT: cvttss2si %xmm0, %rax
2645 ; SSE-64-NEXT: movq %rax, %xmm1
2646 ; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
2647 ; SSE-64-NEXT: cvttss2si %xmm0, %rax
2648 ; SSE-64-NEXT: movq %rax, %xmm0
2649 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2650 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
2653 ; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2655 ; AVX-32-NEXT: pushl %ebp
2656 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
2657 ; AVX-32-NEXT: .cfi_offset %ebp, -8
2658 ; AVX-32-NEXT: movl %esp, %ebp
2659 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
2660 ; AVX-32-NEXT: andl $-8, %esp
2661 ; AVX-32-NEXT: subl $16, %esp
2662 ; AVX-32-NEXT: vmovss %xmm0, (%esp)
2663 ; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
2664 ; AVX-32-NEXT: flds (%esp)
2665 ; AVX-32-NEXT: fisttpll (%esp)
2666 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
2667 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
2669 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2670 ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
2671 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
2672 ; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
2673 ; AVX-32-NEXT: movl %ebp, %esp
2674 ; AVX-32-NEXT: popl %ebp
2675 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
2678 ; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2680 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
2681 ; AVX-64-NEXT: vmovq %rax, %xmm1
2682 ; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2683 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
2684 ; AVX-64-NEXT: vmovq %rax, %xmm0
2685 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2688 ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2690 ; AVX512F-NEXT: vcvttss2si %xmm0, %eax
2691 ; AVX512F-NEXT: andl $1, %eax
2692 ; AVX512F-NEXT: kmovw %eax, %k0
2693 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2694 ; AVX512F-NEXT: vcvttss2si %xmm0, %eax
2695 ; AVX512F-NEXT: kmovw %eax, %k1
2696 ; AVX512F-NEXT: kshiftlw $1, %k1, %k1
2697 ; AVX512F-NEXT: korw %k1, %k0, %k1
2698 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2699 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2700 ; AVX512F-NEXT: vzeroupper
2701 ; AVX512F-NEXT: ret{{[l|q]}}
2703 ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2704 ; AVX512VL: # %bb.0:
2705 ; AVX512VL-NEXT: vcvttss2si %xmm0, %eax
2706 ; AVX512VL-NEXT: andl $1, %eax
2707 ; AVX512VL-NEXT: kmovw %eax, %k0
2708 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2709 ; AVX512VL-NEXT: vcvttss2si %xmm0, %eax
2710 ; AVX512VL-NEXT: kmovw %eax, %k1
2711 ; AVX512VL-NEXT: kshiftlw $1, %k1, %k1
2712 ; AVX512VL-NEXT: korw %k1, %k0, %k1
2713 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
2714 ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
2715 ; AVX512VL-NEXT: ret{{[l|q]}}
2717 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2718 ; AVX512DQ: # %bb.0:
2719 ; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
2720 ; AVX512DQ-NEXT: vcvttss2si %xmm1, %eax
2721 ; AVX512DQ-NEXT: kmovw %eax, %k0
2722 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
2723 ; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax
2724 ; AVX512DQ-NEXT: kmovw %eax, %k1
2725 ; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
2726 ; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
2727 ; AVX512DQ-NEXT: korw %k0, %k1, %k0
2728 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2729 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2730 ; AVX512DQ-NEXT: vzeroupper
2731 ; AVX512DQ-NEXT: ret{{[l|q]}}
2733 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
2734 ; AVX512VLDQ: # %bb.0:
2735 ; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
2736 ; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
2737 ; AVX512VLDQ-NEXT: kmovw %eax, %k0
2738 ; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0
2739 ; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax
2740 ; AVX512VLDQ-NEXT: kmovw %eax, %k1
2741 ; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1
2742 ; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1
2743 ; AVX512VLDQ-NEXT: korw %k0, %k1, %k0
2744 ; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
2745 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2746 %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float> %a,
2747 metadata !"fpexcept.strict") #0
2751 define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
2752 ; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2754 ; SSE-32-NEXT: pushl %ebp
2755 ; SSE-32-NEXT: .cfi_def_cfa_offset 8
2756 ; SSE-32-NEXT: .cfi_offset %ebp, -8
2757 ; SSE-32-NEXT: movl %esp, %ebp
2758 ; SSE-32-NEXT: .cfi_def_cfa_register %ebp
2759 ; SSE-32-NEXT: andl $-8, %esp
2760 ; SSE-32-NEXT: subl $24, %esp
2761 ; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2762 ; SSE-32-NEXT: comiss %xmm1, %xmm0
2763 ; SSE-32-NEXT: movaps %xmm1, %xmm2
2764 ; SSE-32-NEXT: jae .LBB21_2
2765 ; SSE-32-NEXT: # %bb.1:
2766 ; SSE-32-NEXT: xorps %xmm2, %xmm2
2767 ; SSE-32-NEXT: .LBB21_2:
2768 ; SSE-32-NEXT: movaps %xmm0, %xmm3
2769 ; SSE-32-NEXT: subss %xmm2, %xmm3
2770 ; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
2771 ; SSE-32-NEXT: setae %al
2772 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
2774 ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
2775 ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
2776 ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
2777 ; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
2778 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2779 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
2780 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2781 ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
2782 ; SSE-32-NEXT: comiss %xmm1, %xmm0
2783 ; SSE-32-NEXT: jae .LBB21_4
2784 ; SSE-32-NEXT: # %bb.3:
2785 ; SSE-32-NEXT: xorps %xmm1, %xmm1
2786 ; SSE-32-NEXT: .LBB21_4:
2787 ; SSE-32-NEXT: subss %xmm1, %xmm0
2788 ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
2789 ; SSE-32-NEXT: setae %cl
2790 ; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
2792 ; SSE-32-NEXT: fnstcw (%esp)
2793 ; SSE-32-NEXT: movzwl (%esp), %edx
2794 ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
2795 ; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
2796 ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
2797 ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
2798 ; SSE-32-NEXT: fldcw (%esp)
2799 ; SSE-32-NEXT: movzbl %al, %eax
2800 ; SSE-32-NEXT: shll $31, %eax
2801 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
2802 ; SSE-32-NEXT: movd %eax, %xmm1
2803 ; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2804 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2805 ; SSE-32-NEXT: movzbl %cl, %eax
2806 ; SSE-32-NEXT: shll $31, %eax
2807 ; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
2808 ; SSE-32-NEXT: movd %eax, %xmm1
2809 ; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2810 ; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2811 ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2812 ; SSE-32-NEXT: movl %ebp, %esp
2813 ; SSE-32-NEXT: popl %ebp
2814 ; SSE-32-NEXT: .cfi_def_cfa %esp, 4
2817 ; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2819 ; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
2820 ; SSE-64-NEXT: comiss %xmm3, %xmm0
2821 ; SSE-64-NEXT: xorps %xmm2, %xmm2
2822 ; SSE-64-NEXT: xorps %xmm1, %xmm1
2823 ; SSE-64-NEXT: jb .LBB21_2
2824 ; SSE-64-NEXT: # %bb.1:
2825 ; SSE-64-NEXT: movaps %xmm3, %xmm1
2826 ; SSE-64-NEXT: .LBB21_2:
2827 ; SSE-64-NEXT: movaps %xmm0, %xmm4
2828 ; SSE-64-NEXT: subss %xmm1, %xmm4
2829 ; SSE-64-NEXT: cvttss2si %xmm4, %rax
2830 ; SSE-64-NEXT: setae %cl
2831 ; SSE-64-NEXT: movzbl %cl, %ecx
2832 ; SSE-64-NEXT: shlq $63, %rcx
2833 ; SSE-64-NEXT: xorq %rax, %rcx
2834 ; SSE-64-NEXT: movq %rcx, %xmm1
2835 ; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
2836 ; SSE-64-NEXT: comiss %xmm3, %xmm0
2837 ; SSE-64-NEXT: jb .LBB21_4
2838 ; SSE-64-NEXT: # %bb.3:
2839 ; SSE-64-NEXT: movaps %xmm3, %xmm2
2840 ; SSE-64-NEXT: .LBB21_4:
2841 ; SSE-64-NEXT: subss %xmm2, %xmm0
2842 ; SSE-64-NEXT: cvttss2si %xmm0, %rax
2843 ; SSE-64-NEXT: setae %cl
2844 ; SSE-64-NEXT: movzbl %cl, %ecx
2845 ; SSE-64-NEXT: shlq $63, %rcx
2846 ; SSE-64-NEXT: xorq %rax, %rcx
2847 ; SSE-64-NEXT: movq %rcx, %xmm0
2848 ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2849 ; SSE-64-NEXT: movdqa %xmm1, %xmm0
2852 ; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2854 ; AVX-32-NEXT: pushl %ebp
2855 ; AVX-32-NEXT: .cfi_def_cfa_offset 8
2856 ; AVX-32-NEXT: .cfi_offset %ebp, -8
2857 ; AVX-32-NEXT: movl %esp, %ebp
2858 ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
2859 ; AVX-32-NEXT: andl $-8, %esp
2860 ; AVX-32-NEXT: subl $16, %esp
2861 ; AVX-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
2862 ; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2863 ; AVX-32-NEXT: vcomiss %xmm1, %xmm2
2864 ; AVX-32-NEXT: vmovaps %xmm1, %xmm3
2865 ; AVX-32-NEXT: jae .LBB21_2
2866 ; AVX-32-NEXT: # %bb.1:
2867 ; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
2868 ; AVX-32-NEXT: .LBB21_2:
2869 ; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2
2870 ; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
2871 ; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
2872 ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
2874 ; AVX-32-NEXT: setae %al
2875 ; AVX-32-NEXT: movzbl %al, %eax
2876 ; AVX-32-NEXT: shll $31, %eax
2877 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
2878 ; AVX-32-NEXT: vcomiss %xmm1, %xmm0
2879 ; AVX-32-NEXT: jae .LBB21_4
2880 ; AVX-32-NEXT: # %bb.3:
2881 ; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
2882 ; AVX-32-NEXT: .LBB21_4:
2883 ; AVX-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
2884 ; AVX-32-NEXT: vmovss %xmm0, (%esp)
2885 ; AVX-32-NEXT: flds (%esp)
2886 ; AVX-32-NEXT: fisttpll (%esp)
2888 ; AVX-32-NEXT: setae %cl
2889 ; AVX-32-NEXT: movzbl %cl, %ecx
2890 ; AVX-32-NEXT: shll $31, %ecx
2891 ; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
2892 ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2893 ; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
2894 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
2895 ; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
2896 ; AVX-32-NEXT: movl %ebp, %esp
2897 ; AVX-32-NEXT: popl %ebp
2898 ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
2901 ; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2903 ; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2904 ; AVX-64-NEXT: vcomiss %xmm1, %xmm0
2905 ; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
2906 ; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
2907 ; AVX-64-NEXT: jb .LBB21_2
2908 ; AVX-64-NEXT: # %bb.1:
2909 ; AVX-64-NEXT: vmovaps %xmm1, %xmm3
2910 ; AVX-64-NEXT: .LBB21_2:
2911 ; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
2912 ; AVX-64-NEXT: vcvttss2si %xmm3, %rax
2913 ; AVX-64-NEXT: setae %cl
2914 ; AVX-64-NEXT: movzbl %cl, %ecx
2915 ; AVX-64-NEXT: shlq $63, %rcx
2916 ; AVX-64-NEXT: xorq %rax, %rcx
2917 ; AVX-64-NEXT: vmovq %rcx, %xmm3
2918 ; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2919 ; AVX-64-NEXT: vcomiss %xmm1, %xmm0
2920 ; AVX-64-NEXT: jb .LBB21_4
2921 ; AVX-64-NEXT: # %bb.3:
2922 ; AVX-64-NEXT: vmovaps %xmm1, %xmm2
2923 ; AVX-64-NEXT: .LBB21_4:
2924 ; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
2925 ; AVX-64-NEXT: vcvttss2si %xmm0, %rax
2926 ; AVX-64-NEXT: setae %cl
2927 ; AVX-64-NEXT: movzbl %cl, %ecx
2928 ; AVX-64-NEXT: shlq $63, %rcx
2929 ; AVX-64-NEXT: xorq %rax, %rcx
2930 ; AVX-64-NEXT: vmovq %rcx, %xmm0
2931 ; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
2934 ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2936 ; AVX512F-NEXT: vcvttss2si %xmm0, %eax
2937 ; AVX512F-NEXT: andl $1, %eax
2938 ; AVX512F-NEXT: kmovw %eax, %k0
2939 ; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2940 ; AVX512F-NEXT: vcvttss2si %xmm0, %eax
2941 ; AVX512F-NEXT: kmovw %eax, %k1
2942 ; AVX512F-NEXT: kshiftlw $1, %k1, %k1
2943 ; AVX512F-NEXT: korw %k1, %k0, %k1
2944 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
2945 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2946 ; AVX512F-NEXT: vzeroupper
2947 ; AVX512F-NEXT: ret{{[l|q]}}
2949 ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2950 ; AVX512VL: # %bb.0:
2951 ; AVX512VL-NEXT: vcvttss2si %xmm0, %eax
2952 ; AVX512VL-NEXT: andl $1, %eax
2953 ; AVX512VL-NEXT: kmovw %eax, %k0
2954 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2955 ; AVX512VL-NEXT: vcvttss2si %xmm0, %eax
2956 ; AVX512VL-NEXT: kmovw %eax, %k1
2957 ; AVX512VL-NEXT: kshiftlw $1, %k1, %k1
2958 ; AVX512VL-NEXT: korw %k1, %k0, %k1
2959 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
2960 ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
2961 ; AVX512VL-NEXT: ret{{[l|q]}}
2963 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2964 ; AVX512DQ: # %bb.0:
2965 ; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
2966 ; AVX512DQ-NEXT: vcvttss2si %xmm1, %eax
2967 ; AVX512DQ-NEXT: kmovw %eax, %k0
2968 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
2969 ; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax
2970 ; AVX512DQ-NEXT: kmovw %eax, %k1
2971 ; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
2972 ; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
2973 ; AVX512DQ-NEXT: korw %k0, %k1, %k0
2974 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
2975 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
2976 ; AVX512DQ-NEXT: vzeroupper
2977 ; AVX512DQ-NEXT: ret{{[l|q]}}
2979 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
2980 ; AVX512VLDQ: # %bb.0:
2981 ; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
2982 ; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
2983 ; AVX512VLDQ-NEXT: kmovw %eax, %k0
2984 ; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0
2985 ; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax
2986 ; AVX512VLDQ-NEXT: kmovw %eax, %k1
2987 ; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1
2988 ; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1
2989 ; AVX512VLDQ-NEXT: korw %k0, %k1, %k0
2990 ; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
2991 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
2992 %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float> %a,
2993 metadata !"fpexcept.strict") #0
2997 define <4 x i32> @strict_vector_fptosi_v4f32_to_v4i32(<4 x float> %a) #0 {
2998 ; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3000 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
3003 ; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3005 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
3008 ; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3010 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
3011 ; AVX-NEXT: ret{{[l|q]}}
3013 ; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3015 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
3016 ; AVX512F-NEXT: ret{{[l|q]}}
3018 ; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3019 ; AVX512VL: # %bb.0:
3020 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
3021 ; AVX512VL-NEXT: ret{{[l|q]}}
3023 ; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3024 ; AVX512DQ: # %bb.0:
3025 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
3026 ; AVX512DQ-NEXT: ret{{[l|q]}}
3028 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
3029 ; AVX512VLDQ: # %bb.0:
3030 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
3031 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
3032 %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %a,
3033 metadata !"fpexcept.strict") #0
3037 define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
3038 ; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3040 ; SSE-32-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
3041 ; SSE-32-NEXT: movaps %xmm0, %xmm3
3042 ; SSE-32-NEXT: cmpltps %xmm2, %xmm3
3043 ; SSE-32-NEXT: movaps %xmm3, %xmm1
3044 ; SSE-32-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
3045 ; SSE-32-NEXT: andnps %xmm2, %xmm3
3046 ; SSE-32-NEXT: subps %xmm3, %xmm0
3047 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
3048 ; SSE-32-NEXT: xorps %xmm0, %xmm1
3049 ; SSE-32-NEXT: movaps %xmm1, %xmm0
3052 ; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3054 ; SSE-64-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
3055 ; SSE-64-NEXT: movaps %xmm0, %xmm3
3056 ; SSE-64-NEXT: cmpltps %xmm2, %xmm3
3057 ; SSE-64-NEXT: movaps %xmm3, %xmm1
3058 ; SSE-64-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3059 ; SSE-64-NEXT: andnps %xmm2, %xmm3
3060 ; SSE-64-NEXT: subps %xmm3, %xmm0
3061 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
3062 ; SSE-64-NEXT: xorps %xmm0, %xmm1
3063 ; SSE-64-NEXT: movaps %xmm1, %xmm0
3066 ; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3068 ; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
3069 ; AVX-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
3070 ; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
3071 ; AVX-NEXT: vmovaps {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
3072 ; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm4
3073 ; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1
3074 ; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0
3075 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
3076 ; AVX-NEXT: vxorps %xmm4, %xmm0, %xmm0
3077 ; AVX-NEXT: ret{{[l|q]}}
3079 ; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3081 ; AVX512F-NEXT: vmovaps %xmm0, %xmm0
3082 ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
3083 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3084 ; AVX512F-NEXT: vzeroupper
3085 ; AVX512F-NEXT: ret{{[l|q]}}
3087 ; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3088 ; AVX512VL: # %bb.0:
3089 ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
3090 ; AVX512VL-NEXT: ret{{[l|q]}}
3092 ; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3093 ; AVX512DQ: # %bb.0:
3094 ; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
3095 ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
3096 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3097 ; AVX512DQ-NEXT: vzeroupper
3098 ; AVX512DQ-NEXT: ret{{[l|q]}}
3100 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
3101 ; AVX512VLDQ: # %bb.0:
3102 ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
3103 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
3104 %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %a,
3105 metadata !"fpexcept.strict") #0
3109 define <4 x i8> @strict_vector_fptosi_v4f32_to_v4i8(<4 x float> %a) #0 {
3110 ; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3112 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
3113 ; SSE-32-NEXT: packssdw %xmm0, %xmm0
3114 ; SSE-32-NEXT: packsswb %xmm0, %xmm0
3117 ; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3119 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
3120 ; SSE-64-NEXT: packssdw %xmm0, %xmm0
3121 ; SSE-64-NEXT: packsswb %xmm0, %xmm0
3124 ; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3126 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
3127 ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
3128 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
3129 ; AVX-NEXT: ret{{[l|q]}}
3131 ; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3133 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
3134 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
3135 ; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
3136 ; AVX512F-NEXT: ret{{[l|q]}}
3138 ; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3139 ; AVX512VL: # %bb.0:
3140 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
3141 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
3142 ; AVX512VL-NEXT: ret{{[l|q]}}
3144 ; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3145 ; AVX512DQ: # %bb.0:
3146 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
3147 ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
3148 ; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
3149 ; AVX512DQ-NEXT: ret{{[l|q]}}
3151 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
3152 ; AVX512VLDQ: # %bb.0:
3153 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
3154 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
3155 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
3156 %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float> %a,
3157 metadata !"fpexcept.strict") #0
3161 define <4 x i8> @strict_vector_fptoui_v4f32_to_v4i8(<4 x float> %a) #0 {
3162 ; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3164 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
3165 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
3166 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
3169 ; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3171 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
3172 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
3173 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
3176 ; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3178 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
3179 ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
3180 ; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
3181 ; AVX-NEXT: ret{{[l|q]}}
3183 ; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3185 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
3186 ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
3187 ; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
3188 ; AVX512F-NEXT: ret{{[l|q]}}
3190 ; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3191 ; AVX512VL: # %bb.0:
3192 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
3193 ; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
3194 ; AVX512VL-NEXT: ret{{[l|q]}}
3196 ; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3197 ; AVX512DQ: # %bb.0:
3198 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
3199 ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
3200 ; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
3201 ; AVX512DQ-NEXT: ret{{[l|q]}}
3203 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
3204 ; AVX512VLDQ: # %bb.0:
3205 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
3206 ; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0
3207 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
3208 %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float> %a,
3209 metadata !"fpexcept.strict") #0
3213 define <4 x i1> @strict_vector_fptosi_v4f32_to_v4i1(<4 x float> %a) #0 {
3214 ; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3216 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
3219 ; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3221 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
3224 ; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3226 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
3227 ; AVX-NEXT: ret{{[l|q]}}
3229 ; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3231 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
3232 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
3233 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
3234 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3235 ; AVX512F-NEXT: vzeroupper
3236 ; AVX512F-NEXT: ret{{[l|q]}}
3238 ; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3239 ; AVX512VL: # %bb.0:
3240 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
3241 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
3242 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
3243 ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
3244 ; AVX512VL-NEXT: ret{{[l|q]}}
3246 ; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3247 ; AVX512DQ: # %bb.0:
3248 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
3249 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
3250 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
3251 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3252 ; AVX512DQ-NEXT: vzeroupper
3253 ; AVX512DQ-NEXT: ret{{[l|q]}}
3255 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
3256 ; AVX512VLDQ: # %bb.0:
3257 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
3258 ; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
3259 ; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0
3260 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
3261 %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float> %a,
3262 metadata !"fpexcept.strict") #0
3266 define <4 x i1> @strict_vector_fptoui_v4f32_to_v4i1(<4 x float> %a) #0 {
3267 ; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3269 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
3272 ; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3274 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
3277 ; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3279 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
3280 ; AVX-NEXT: ret{{[l|q]}}
3282 ; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3284 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
3285 ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
3286 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
3287 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
3288 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3289 ; AVX512F-NEXT: vzeroupper
3290 ; AVX512F-NEXT: ret{{[l|q]}}
3292 ; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3293 ; AVX512VL: # %bb.0:
3294 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
3295 ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
3296 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
3297 ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
3298 ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
3299 ; AVX512VL-NEXT: ret{{[l|q]}}
3301 ; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3302 ; AVX512DQ: # %bb.0:
3303 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
3304 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
3305 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
3306 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
3307 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3308 ; AVX512DQ-NEXT: vzeroupper
3309 ; AVX512DQ-NEXT: ret{{[l|q]}}
3311 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
3312 ; AVX512VLDQ: # %bb.0:
3313 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
3314 ; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
3315 ; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
3316 ; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0
3317 ; AVX512VLDQ-NEXT: ret{{[l|q]}}
3318 %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float> %a,
3319 metadata !"fpexcept.strict") #0
3323 attributes #0 = { strictfp }