1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
5 ; 32-bit float to unsigned integer
8 declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float>)
9 declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float>)
10 declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float>)
11 declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float>)
12 declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float>)
13 declare <4 x i128> @llvm.fptoui.sat.v4i128.v4f32(<4 x float>)
15 define <4 x i1> @test_unsigned_v4i1_v4f32(<4 x float> %f) nounwind {
16 ; CHECK-LABEL: test_unsigned_v4i1_v4f32:
18 ; CHECK-NEXT: movaps %xmm0, %xmm1
19 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
20 ; CHECK-NEXT: xorps %xmm2, %xmm2
21 ; CHECK-NEXT: maxss %xmm2, %xmm1
22 ; CHECK-NEXT: movss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
23 ; CHECK-NEXT: minss %xmm3, %xmm1
24 ; CHECK-NEXT: cvttss2si %xmm1, %eax
25 ; CHECK-NEXT: movd %eax, %xmm1
26 ; CHECK-NEXT: movaps %xmm0, %xmm4
27 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
28 ; CHECK-NEXT: maxss %xmm2, %xmm4
29 ; CHECK-NEXT: minss %xmm3, %xmm4
30 ; CHECK-NEXT: cvttss2si %xmm4, %eax
31 ; CHECK-NEXT: movd %eax, %xmm4
32 ; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
33 ; CHECK-NEXT: movaps %xmm0, %xmm1
34 ; CHECK-NEXT: maxss %xmm2, %xmm1
35 ; CHECK-NEXT: minss %xmm3, %xmm1
36 ; CHECK-NEXT: cvttss2si %xmm1, %eax
37 ; CHECK-NEXT: movd %eax, %xmm1
38 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
39 ; CHECK-NEXT: maxss %xmm2, %xmm0
40 ; CHECK-NEXT: minss %xmm3, %xmm0
41 ; CHECK-NEXT: cvttss2si %xmm0, %eax
42 ; CHECK-NEXT: movd %eax, %xmm0
43 ; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
44 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
45 ; CHECK-NEXT: movdqa %xmm1, %xmm0
47 %x = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> %f)
51 define <4 x i8> @test_unsigned_v4i8_v4f32(<4 x float> %f) nounwind {
52 ; CHECK-LABEL: test_unsigned_v4i8_v4f32:
54 ; CHECK-NEXT: xorps %xmm1, %xmm1
55 ; CHECK-NEXT: xorps %xmm3, %xmm3
56 ; CHECK-NEXT: maxss %xmm0, %xmm3
57 ; CHECK-NEXT: movss {{.*#+}} xmm2 = [2.55E+2,0.0E+0,0.0E+0,0.0E+0]
58 ; CHECK-NEXT: movaps %xmm2, %xmm4
59 ; CHECK-NEXT: minss %xmm3, %xmm4
60 ; CHECK-NEXT: cvttss2si %xmm4, %eax
61 ; CHECK-NEXT: movzbl %al, %eax
62 ; CHECK-NEXT: movaps %xmm0, %xmm3
63 ; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
64 ; CHECK-NEXT: xorps %xmm4, %xmm4
65 ; CHECK-NEXT: maxss %xmm3, %xmm4
66 ; CHECK-NEXT: movaps %xmm2, %xmm3
67 ; CHECK-NEXT: minss %xmm4, %xmm3
68 ; CHECK-NEXT: cvttss2si %xmm3, %ecx
69 ; CHECK-NEXT: movzbl %cl, %ecx
70 ; CHECK-NEXT: shll $8, %ecx
71 ; CHECK-NEXT: orl %eax, %ecx
72 ; CHECK-NEXT: movaps %xmm0, %xmm3
73 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
74 ; CHECK-NEXT: xorps %xmm4, %xmm4
75 ; CHECK-NEXT: maxss %xmm3, %xmm4
76 ; CHECK-NEXT: movaps %xmm2, %xmm3
77 ; CHECK-NEXT: minss %xmm4, %xmm3
78 ; CHECK-NEXT: cvttss2si %xmm3, %eax
79 ; CHECK-NEXT: movzbl %al, %eax
80 ; CHECK-NEXT: shll $16, %eax
81 ; CHECK-NEXT: orl %ecx, %eax
82 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
83 ; CHECK-NEXT: maxss %xmm0, %xmm1
84 ; CHECK-NEXT: minss %xmm1, %xmm2
85 ; CHECK-NEXT: cvttss2si %xmm2, %ecx
86 ; CHECK-NEXT: shll $24, %ecx
87 ; CHECK-NEXT: orl %eax, %ecx
88 ; CHECK-NEXT: movd %ecx, %xmm0
90 %x = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> %f)
94 define <4 x i16> @test_unsigned_v4i16_v4f32(<4 x float> %f) nounwind {
95 ; CHECK-LABEL: test_unsigned_v4i16_v4f32:
97 ; CHECK-NEXT: movaps %xmm0, %xmm1
98 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
99 ; CHECK-NEXT: xorps %xmm2, %xmm2
100 ; CHECK-NEXT: xorps %xmm3, %xmm3
101 ; CHECK-NEXT: maxss %xmm1, %xmm3
102 ; CHECK-NEXT: movss {{.*#+}} xmm4 = [6.5535E+4,0.0E+0,0.0E+0,0.0E+0]
103 ; CHECK-NEXT: movaps %xmm4, %xmm1
104 ; CHECK-NEXT: minss %xmm3, %xmm1
105 ; CHECK-NEXT: cvttss2si %xmm1, %eax
106 ; CHECK-NEXT: xorps %xmm1, %xmm1
107 ; CHECK-NEXT: maxss %xmm0, %xmm1
108 ; CHECK-NEXT: movaps %xmm4, %xmm3
109 ; CHECK-NEXT: minss %xmm1, %xmm3
110 ; CHECK-NEXT: cvttss2si %xmm3, %ecx
111 ; CHECK-NEXT: movd %ecx, %xmm1
112 ; CHECK-NEXT: pinsrw $1, %eax, %xmm1
113 ; CHECK-NEXT: movaps %xmm0, %xmm3
114 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
115 ; CHECK-NEXT: xorps %xmm5, %xmm5
116 ; CHECK-NEXT: maxss %xmm3, %xmm5
117 ; CHECK-NEXT: movaps %xmm4, %xmm3
118 ; CHECK-NEXT: minss %xmm5, %xmm3
119 ; CHECK-NEXT: cvttss2si %xmm3, %eax
120 ; CHECK-NEXT: pinsrw $2, %eax, %xmm1
121 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
122 ; CHECK-NEXT: maxss %xmm0, %xmm2
123 ; CHECK-NEXT: minss %xmm2, %xmm4
124 ; CHECK-NEXT: cvttss2si %xmm4, %eax
125 ; CHECK-NEXT: pinsrw $3, %eax, %xmm1
126 ; CHECK-NEXT: movdqa %xmm1, %xmm0
128 %x = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> %f)
132 define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind {
133 ; CHECK-LABEL: test_unsigned_v4i32_v4f32:
135 ; CHECK-NEXT: movaps %xmm0, %xmm1
136 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
137 ; CHECK-NEXT: cvttss2si %xmm1, %rdx
138 ; CHECK-NEXT: xorl %eax, %eax
139 ; CHECK-NEXT: xorps %xmm2, %xmm2
140 ; CHECK-NEXT: ucomiss %xmm2, %xmm1
141 ; CHECK-NEXT: cmovbl %eax, %edx
142 ; CHECK-NEXT: movss {{.*#+}} xmm3 = [4.29496704E+9,0.0E+0,0.0E+0,0.0E+0]
143 ; CHECK-NEXT: ucomiss %xmm3, %xmm1
144 ; CHECK-NEXT: movl $-1, %ecx
145 ; CHECK-NEXT: cmoval %ecx, %edx
146 ; CHECK-NEXT: movd %edx, %xmm1
147 ; CHECK-NEXT: movaps %xmm0, %xmm4
148 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
149 ; CHECK-NEXT: cvttss2si %xmm4, %rdx
150 ; CHECK-NEXT: ucomiss %xmm2, %xmm4
151 ; CHECK-NEXT: cmovbl %eax, %edx
152 ; CHECK-NEXT: ucomiss %xmm3, %xmm4
153 ; CHECK-NEXT: cmoval %ecx, %edx
154 ; CHECK-NEXT: movd %edx, %xmm4
155 ; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
156 ; CHECK-NEXT: cvttss2si %xmm0, %rdx
157 ; CHECK-NEXT: ucomiss %xmm2, %xmm0
158 ; CHECK-NEXT: cmovbl %eax, %edx
159 ; CHECK-NEXT: ucomiss %xmm3, %xmm0
160 ; CHECK-NEXT: cmoval %ecx, %edx
161 ; CHECK-NEXT: movd %edx, %xmm1
162 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
163 ; CHECK-NEXT: cvttss2si %xmm0, %rdx
164 ; CHECK-NEXT: ucomiss %xmm2, %xmm0
165 ; CHECK-NEXT: cmovbl %eax, %edx
166 ; CHECK-NEXT: ucomiss %xmm3, %xmm0
167 ; CHECK-NEXT: cmoval %ecx, %edx
168 ; CHECK-NEXT: movd %edx, %xmm0
169 ; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
170 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
171 ; CHECK-NEXT: movdqa %xmm1, %xmm0
173 %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f)
177 define <4 x i64> @test_unsigned_v4i64_v4f32(<4 x float> %f) nounwind {
178 ; CHECK-LABEL: test_unsigned_v4i64_v4f32:
180 ; CHECK-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
181 ; CHECK-NEXT: movaps %xmm0, %xmm2
182 ; CHECK-NEXT: subss %xmm1, %xmm2
183 ; CHECK-NEXT: cvttss2si %xmm2, %rax
184 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
185 ; CHECK-NEXT: movq %rcx, %rdx
186 ; CHECK-NEXT: sarq $63, %rdx
187 ; CHECK-NEXT: andq %rax, %rdx
188 ; CHECK-NEXT: orq %rcx, %rdx
189 ; CHECK-NEXT: xorl %eax, %eax
190 ; CHECK-NEXT: xorps %xmm3, %xmm3
191 ; CHECK-NEXT: ucomiss %xmm3, %xmm0
192 ; CHECK-NEXT: cmovbq %rax, %rdx
193 ; CHECK-NEXT: movss {{.*#+}} xmm4 = [1.8446743E+19,0.0E+0,0.0E+0,0.0E+0]
194 ; CHECK-NEXT: ucomiss %xmm4, %xmm0
195 ; CHECK-NEXT: movq $-1, %rcx
196 ; CHECK-NEXT: cmovaq %rcx, %rdx
197 ; CHECK-NEXT: movq %rdx, %xmm2
198 ; CHECK-NEXT: movaps %xmm0, %xmm5
199 ; CHECK-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[1,1]
200 ; CHECK-NEXT: movaps %xmm5, %xmm6
201 ; CHECK-NEXT: subss %xmm1, %xmm6
202 ; CHECK-NEXT: cvttss2si %xmm6, %rdx
203 ; CHECK-NEXT: cvttss2si %xmm5, %rsi
204 ; CHECK-NEXT: movq %rsi, %rdi
205 ; CHECK-NEXT: sarq $63, %rdi
206 ; CHECK-NEXT: andq %rdx, %rdi
207 ; CHECK-NEXT: orq %rsi, %rdi
208 ; CHECK-NEXT: ucomiss %xmm3, %xmm5
209 ; CHECK-NEXT: cmovbq %rax, %rdi
210 ; CHECK-NEXT: ucomiss %xmm4, %xmm5
211 ; CHECK-NEXT: cmovaq %rcx, %rdi
212 ; CHECK-NEXT: movq %rdi, %xmm5
213 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
214 ; CHECK-NEXT: movaps %xmm0, %xmm5
215 ; CHECK-NEXT: shufps {{.*#+}} xmm5 = xmm5[3,3],xmm0[3,3]
216 ; CHECK-NEXT: movaps %xmm5, %xmm6
217 ; CHECK-NEXT: subss %xmm1, %xmm6
218 ; CHECK-NEXT: cvttss2si %xmm6, %rdx
219 ; CHECK-NEXT: cvttss2si %xmm5, %rsi
220 ; CHECK-NEXT: movq %rsi, %rdi
221 ; CHECK-NEXT: sarq $63, %rdi
222 ; CHECK-NEXT: andq %rdx, %rdi
223 ; CHECK-NEXT: orq %rsi, %rdi
224 ; CHECK-NEXT: ucomiss %xmm3, %xmm5
225 ; CHECK-NEXT: cmovbq %rax, %rdi
226 ; CHECK-NEXT: ucomiss %xmm4, %xmm5
227 ; CHECK-NEXT: cmovaq %rcx, %rdi
228 ; CHECK-NEXT: movq %rdi, %xmm5
229 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
230 ; CHECK-NEXT: movaps %xmm0, %xmm6
231 ; CHECK-NEXT: subss %xmm1, %xmm6
232 ; CHECK-NEXT: cvttss2si %xmm6, %rdx
233 ; CHECK-NEXT: cvttss2si %xmm0, %rsi
234 ; CHECK-NEXT: movq %rsi, %rdi
235 ; CHECK-NEXT: sarq $63, %rdi
236 ; CHECK-NEXT: andq %rdx, %rdi
237 ; CHECK-NEXT: orq %rsi, %rdi
238 ; CHECK-NEXT: ucomiss %xmm3, %xmm0
239 ; CHECK-NEXT: cmovbq %rax, %rdi
240 ; CHECK-NEXT: ucomiss %xmm4, %xmm0
241 ; CHECK-NEXT: cmovaq %rcx, %rdi
242 ; CHECK-NEXT: movq %rdi, %xmm1
243 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
244 ; CHECK-NEXT: movdqa %xmm2, %xmm0
246 %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> %f)
250 define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
251 ; CHECK-LABEL: test_unsigned_v4i128_v4f32:
253 ; CHECK-NEXT: pushq %rbp
254 ; CHECK-NEXT: pushq %r15
255 ; CHECK-NEXT: pushq %r14
256 ; CHECK-NEXT: pushq %r13
257 ; CHECK-NEXT: pushq %r12
258 ; CHECK-NEXT: pushq %rbx
259 ; CHECK-NEXT: subq $56, %rsp
260 ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
261 ; CHECK-NEXT: movq %rdi, %rbx
262 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
263 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
264 ; CHECK-NEXT: callq __fixunssfti@PLT
265 ; CHECK-NEXT: movq %rdx, %r15
266 ; CHECK-NEXT: xorl %r14d, %r14d
267 ; CHECK-NEXT: xorps %xmm0, %xmm0
268 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
269 ; CHECK-NEXT: ucomiss %xmm0, %xmm1
270 ; CHECK-NEXT: cmovbq %r14, %r15
271 ; CHECK-NEXT: cmovbq %r14, %rax
272 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
273 ; CHECK-NEXT: movq $-1, %rbp
274 ; CHECK-NEXT: cmovaq %rbp, %rax
275 ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
276 ; CHECK-NEXT: cmovaq %rbp, %r15
277 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
278 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
279 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
280 ; CHECK-NEXT: callq __fixunssfti@PLT
281 ; CHECK-NEXT: movq %rax, %r12
282 ; CHECK-NEXT: movq %rdx, %r13
283 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
284 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
285 ; CHECK-NEXT: cmovbq %r14, %r13
286 ; CHECK-NEXT: cmovbq %r14, %r12
287 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
288 ; CHECK-NEXT: cmovaq %rbp, %r12
289 ; CHECK-NEXT: cmovaq %rbp, %r13
290 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
291 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
292 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
293 ; CHECK-NEXT: callq __fixunssfti@PLT
294 ; CHECK-NEXT: movq %rax, %rbp
295 ; CHECK-NEXT: movq %rdx, %r14
296 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
297 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
298 ; CHECK-NEXT: movl $0, %eax
299 ; CHECK-NEXT: cmovbq %rax, %r14
300 ; CHECK-NEXT: cmovbq %rax, %rbp
301 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
302 ; CHECK-NEXT: movq $-1, %rax
303 ; CHECK-NEXT: cmovaq %rax, %rbp
304 ; CHECK-NEXT: cmovaq %rax, %r14
305 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
306 ; CHECK-NEXT: callq __fixunssfti@PLT
307 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
308 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
309 ; CHECK-NEXT: movl $0, %ecx
310 ; CHECK-NEXT: cmovbq %rcx, %rdx
311 ; CHECK-NEXT: cmovbq %rcx, %rax
312 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
313 ; CHECK-NEXT: movq $-1, %rcx
314 ; CHECK-NEXT: cmovaq %rcx, %rax
315 ; CHECK-NEXT: cmovaq %rcx, %rdx
316 ; CHECK-NEXT: movq %rdx, 8(%rbx)
317 ; CHECK-NEXT: movq %rax, (%rbx)
318 ; CHECK-NEXT: movq %r14, 56(%rbx)
319 ; CHECK-NEXT: movq %rbp, 48(%rbx)
320 ; CHECK-NEXT: movq %r13, 40(%rbx)
321 ; CHECK-NEXT: movq %r12, 32(%rbx)
322 ; CHECK-NEXT: movq %r15, 24(%rbx)
323 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
324 ; CHECK-NEXT: movq %rax, 16(%rbx)
325 ; CHECK-NEXT: movq %rbx, %rax
326 ; CHECK-NEXT: addq $56, %rsp
327 ; CHECK-NEXT: popq %rbx
328 ; CHECK-NEXT: popq %r12
329 ; CHECK-NEXT: popq %r13
330 ; CHECK-NEXT: popq %r14
331 ; CHECK-NEXT: popq %r15
332 ; CHECK-NEXT: popq %rbp
334 %x = call <4 x i128> @llvm.fptoui.sat.v4i128.v4f32(<4 x float> %f)
339 ; 64-bit float to unsigned integer
342 declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double>)
343 declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double>)
344 declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double>)
345 declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double>)
346 declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double>)
347 declare <2 x i128> @llvm.fptoui.sat.v2i128.v2f64(<2 x double>)
349 define <2 x i1> @test_unsigned_v2i1_v2f64(<2 x double> %f) nounwind {
350 ; CHECK-LABEL: test_unsigned_v2i1_v2f64:
352 ; CHECK-NEXT: xorpd %xmm2, %xmm2
353 ; CHECK-NEXT: movapd %xmm0, %xmm1
354 ; CHECK-NEXT: maxsd %xmm2, %xmm1
355 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0]
356 ; CHECK-NEXT: minsd %xmm3, %xmm1
357 ; CHECK-NEXT: cvttsd2si %xmm1, %rax
358 ; CHECK-NEXT: movq %rax, %xmm1
359 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
360 ; CHECK-NEXT: maxsd %xmm2, %xmm0
361 ; CHECK-NEXT: minsd %xmm3, %xmm0
362 ; CHECK-NEXT: cvttsd2si %xmm0, %rax
363 ; CHECK-NEXT: movq %rax, %xmm0
364 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
365 ; CHECK-NEXT: movdqa %xmm1, %xmm0
367 %x = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> %f)
371 define <2 x i8> @test_unsigned_v2i8_v2f64(<2 x double> %f) nounwind {
372 ; CHECK-LABEL: test_unsigned_v2i8_v2f64:
374 ; CHECK-NEXT: xorpd %xmm1, %xmm1
375 ; CHECK-NEXT: xorpd %xmm2, %xmm2
376 ; CHECK-NEXT: maxsd %xmm0, %xmm2
377 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = [2.55E+2,0.0E+0]
378 ; CHECK-NEXT: movapd %xmm3, %xmm4
379 ; CHECK-NEXT: minsd %xmm2, %xmm4
380 ; CHECK-NEXT: cvttsd2si %xmm4, %eax
381 ; CHECK-NEXT: movzbl %al, %eax
382 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
383 ; CHECK-NEXT: maxsd %xmm0, %xmm1
384 ; CHECK-NEXT: minsd %xmm1, %xmm3
385 ; CHECK-NEXT: cvttsd2si %xmm3, %ecx
386 ; CHECK-NEXT: shll $8, %ecx
387 ; CHECK-NEXT: orl %eax, %ecx
388 ; CHECK-NEXT: movd %ecx, %xmm0
390 %x = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> %f)
394 define <2 x i16> @test_unsigned_v2i16_v2f64(<2 x double> %f) nounwind {
395 ; CHECK-LABEL: test_unsigned_v2i16_v2f64:
397 ; CHECK-NEXT: xorpd %xmm1, %xmm1
398 ; CHECK-NEXT: maxsd %xmm0, %xmm1
399 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
400 ; CHECK-NEXT: xorpd %xmm2, %xmm2
401 ; CHECK-NEXT: maxsd %xmm0, %xmm2
402 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = [6.5535E+4,0.0E+0]
403 ; CHECK-NEXT: movapd %xmm0, %xmm3
404 ; CHECK-NEXT: minsd %xmm2, %xmm3
405 ; CHECK-NEXT: cvttsd2si %xmm3, %eax
406 ; CHECK-NEXT: minsd %xmm1, %xmm0
407 ; CHECK-NEXT: cvttsd2si %xmm0, %ecx
408 ; CHECK-NEXT: movd %ecx, %xmm0
409 ; CHECK-NEXT: pinsrw $1, %eax, %xmm0
411 %x = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> %f)
415 define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %f) nounwind {
416 ; CHECK-LABEL: test_unsigned_v2i32_v2f64:
418 ; CHECK-NEXT: xorpd %xmm2, %xmm2
419 ; CHECK-NEXT: xorpd %xmm1, %xmm1
420 ; CHECK-NEXT: maxsd %xmm0, %xmm1
421 ; CHECK-NEXT: movsd {{.*#+}} xmm3 = [4.294967295E+9,0.0E+0]
422 ; CHECK-NEXT: movapd %xmm3, %xmm4
423 ; CHECK-NEXT: minsd %xmm1, %xmm4
424 ; CHECK-NEXT: cvttsd2si %xmm4, %rax
425 ; CHECK-NEXT: movd %eax, %xmm1
426 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
427 ; CHECK-NEXT: maxsd %xmm0, %xmm2
428 ; CHECK-NEXT: minsd %xmm2, %xmm3
429 ; CHECK-NEXT: cvttsd2si %xmm3, %rax
430 ; CHECK-NEXT: movd %eax, %xmm0
431 ; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
432 ; CHECK-NEXT: movdqa %xmm1, %xmm0
434 %x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %f)
438 define <2 x i64> @test_unsigned_v2i64_v2f64(<2 x double> %f) nounwind {
439 ; CHECK-LABEL: test_unsigned_v2i64_v2f64:
441 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
442 ; CHECK-NEXT: movapd %xmm0, %xmm1
443 ; CHECK-NEXT: subsd %xmm2, %xmm1
444 ; CHECK-NEXT: cvttsd2si %xmm1, %rax
445 ; CHECK-NEXT: cvttsd2si %xmm0, %rcx
446 ; CHECK-NEXT: movq %rcx, %rdx
447 ; CHECK-NEXT: sarq $63, %rdx
448 ; CHECK-NEXT: andq %rax, %rdx
449 ; CHECK-NEXT: orq %rcx, %rdx
450 ; CHECK-NEXT: xorl %eax, %eax
451 ; CHECK-NEXT: xorpd %xmm3, %xmm3
452 ; CHECK-NEXT: ucomisd %xmm3, %xmm0
453 ; CHECK-NEXT: cmovbq %rax, %rdx
454 ; CHECK-NEXT: movsd {{.*#+}} xmm4 = [1.844674407370955E+19,0.0E+0]
455 ; CHECK-NEXT: ucomisd %xmm4, %xmm0
456 ; CHECK-NEXT: movq $-1, %rcx
457 ; CHECK-NEXT: cmovaq %rcx, %rdx
458 ; CHECK-NEXT: movq %rdx, %xmm1
459 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
460 ; CHECK-NEXT: movapd %xmm0, %xmm5
461 ; CHECK-NEXT: subsd %xmm2, %xmm5
462 ; CHECK-NEXT: cvttsd2si %xmm5, %rdx
463 ; CHECK-NEXT: cvttsd2si %xmm0, %rsi
464 ; CHECK-NEXT: movq %rsi, %rdi
465 ; CHECK-NEXT: sarq $63, %rdi
466 ; CHECK-NEXT: andq %rdx, %rdi
467 ; CHECK-NEXT: orq %rsi, %rdi
468 ; CHECK-NEXT: ucomisd %xmm3, %xmm0
469 ; CHECK-NEXT: cmovbq %rax, %rdi
470 ; CHECK-NEXT: ucomisd %xmm4, %xmm0
471 ; CHECK-NEXT: cmovaq %rcx, %rdi
472 ; CHECK-NEXT: movq %rdi, %xmm0
473 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
474 ; CHECK-NEXT: movdqa %xmm1, %xmm0
476 %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %f)
480 define <2 x i128> @test_unsigned_v2i128_v2f64(<2 x double> %f) nounwind {
481 ; CHECK-LABEL: test_unsigned_v2i128_v2f64:
483 ; CHECK-NEXT: pushq %r15
484 ; CHECK-NEXT: pushq %r14
485 ; CHECK-NEXT: pushq %r13
486 ; CHECK-NEXT: pushq %r12
487 ; CHECK-NEXT: pushq %rbx
488 ; CHECK-NEXT: subq $32, %rsp
489 ; CHECK-NEXT: movapd %xmm0, (%rsp) # 16-byte Spill
490 ; CHECK-NEXT: movq %rdi, %rbx
491 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
492 ; CHECK-NEXT: movapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
493 ; CHECK-NEXT: callq __fixunsdfti@PLT
494 ; CHECK-NEXT: movq %rax, %r14
495 ; CHECK-NEXT: movq %rdx, %r15
496 ; CHECK-NEXT: xorl %r12d, %r12d
497 ; CHECK-NEXT: xorpd %xmm0, %xmm0
498 ; CHECK-NEXT: movapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
499 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
500 ; CHECK-NEXT: cmovbq %r12, %r15
501 ; CHECK-NEXT: cmovbq %r12, %r14
502 ; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
503 ; CHECK-NEXT: movq $-1, %r13
504 ; CHECK-NEXT: cmovaq %r13, %r14
505 ; CHECK-NEXT: cmovaq %r13, %r15
506 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
507 ; CHECK-NEXT: callq __fixunsdfti@PLT
508 ; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
509 ; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
510 ; CHECK-NEXT: cmovbq %r12, %rdx
511 ; CHECK-NEXT: cmovbq %r12, %rax
512 ; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
513 ; CHECK-NEXT: cmovaq %r13, %rax
514 ; CHECK-NEXT: cmovaq %r13, %rdx
515 ; CHECK-NEXT: movq %rdx, 8(%rbx)
516 ; CHECK-NEXT: movq %rax, (%rbx)
517 ; CHECK-NEXT: movq %r15, 24(%rbx)
518 ; CHECK-NEXT: movq %r14, 16(%rbx)
519 ; CHECK-NEXT: movq %rbx, %rax
520 ; CHECK-NEXT: addq $32, %rsp
521 ; CHECK-NEXT: popq %rbx
522 ; CHECK-NEXT: popq %r12
523 ; CHECK-NEXT: popq %r13
524 ; CHECK-NEXT: popq %r14
525 ; CHECK-NEXT: popq %r15
527 %x = call <2 x i128> @llvm.fptoui.sat.v2i128.v2f64(<2 x double> %f)
532 ; 16-bit float to unsigned integer
535 declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half>)
536 declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half>)
537 declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half>)
538 declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half>)
539 declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half>)
540 declare <8 x i128> @llvm.fptoui.sat.v8i128.v8f16(<8 x half>)
542 define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
543 ; CHECK-LABEL: test_unsigned_v8i1_v8f16:
545 ; CHECK-NEXT: pushq %rbp
546 ; CHECK-NEXT: pushq %rbx
547 ; CHECK-NEXT: subq $72, %rsp
548 ; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
549 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
550 ; CHECK-NEXT: callq __extendhfsf2@PLT
551 ; CHECK-NEXT: cvttss2si %xmm0, %eax
552 ; CHECK-NEXT: xorl %ebx, %ebx
553 ; CHECK-NEXT: xorps %xmm1, %xmm1
554 ; CHECK-NEXT: ucomiss %xmm1, %xmm0
555 ; CHECK-NEXT: cmovbl %ebx, %eax
556 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
557 ; CHECK-NEXT: movl $1, %ebp
558 ; CHECK-NEXT: cmoval %ebp, %eax
559 ; CHECK-NEXT: movd %eax, %xmm0
560 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
561 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
562 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
563 ; CHECK-NEXT: callq __extendhfsf2@PLT
564 ; CHECK-NEXT: cvttss2si %xmm0, %eax
565 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
566 ; CHECK-NEXT: cmovbl %ebx, %eax
567 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
568 ; CHECK-NEXT: cmoval %ebp, %eax
569 ; CHECK-NEXT: movd %eax, %xmm0
570 ; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
571 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
572 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
573 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
574 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
575 ; CHECK-NEXT: callq __extendhfsf2@PLT
576 ; CHECK-NEXT: cvttss2si %xmm0, %eax
577 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
578 ; CHECK-NEXT: cmovbl %ebx, %eax
579 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
580 ; CHECK-NEXT: cmoval %ebp, %eax
581 ; CHECK-NEXT: movd %eax, %xmm0
582 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
583 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
584 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
585 ; CHECK-NEXT: callq __extendhfsf2@PLT
586 ; CHECK-NEXT: cvttss2si %xmm0, %eax
587 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
588 ; CHECK-NEXT: cmovbl %ebx, %eax
589 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
590 ; CHECK-NEXT: cmoval %ebp, %eax
591 ; CHECK-NEXT: movd %eax, %xmm0
592 ; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
593 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
594 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
595 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
596 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
597 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
598 ; CHECK-NEXT: psrlq $48, %xmm0
599 ; CHECK-NEXT: callq __extendhfsf2@PLT
600 ; CHECK-NEXT: cvttss2si %xmm0, %eax
601 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
602 ; CHECK-NEXT: cmovbl %ebx, %eax
603 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
604 ; CHECK-NEXT: cmoval %ebp, %eax
605 ; CHECK-NEXT: movd %eax, %xmm0
606 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
607 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
608 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
609 ; CHECK-NEXT: callq __extendhfsf2@PLT
610 ; CHECK-NEXT: cvttss2si %xmm0, %eax
611 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
612 ; CHECK-NEXT: cmovbl %ebx, %eax
613 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
614 ; CHECK-NEXT: cmoval %ebp, %eax
615 ; CHECK-NEXT: movd %eax, %xmm0
616 ; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
617 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
618 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
619 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
620 ; CHECK-NEXT: callq __extendhfsf2@PLT
621 ; CHECK-NEXT: cvttss2si %xmm0, %eax
622 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
623 ; CHECK-NEXT: cmovbl %ebx, %eax
624 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
625 ; CHECK-NEXT: cmoval %ebp, %eax
626 ; CHECK-NEXT: movd %eax, %xmm0
627 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
628 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
629 ; CHECK-NEXT: psrld $16, %xmm0
630 ; CHECK-NEXT: callq __extendhfsf2@PLT
631 ; CHECK-NEXT: cvttss2si %xmm0, %eax
632 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
633 ; CHECK-NEXT: cmovbl %ebx, %eax
634 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
635 ; CHECK-NEXT: cmoval %ebp, %eax
636 ; CHECK-NEXT: movd %eax, %xmm1
637 ; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
638 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
639 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
640 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
641 ; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
642 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
643 ; CHECK-NEXT: addq $72, %rsp
644 ; CHECK-NEXT: popq %rbx
645 ; CHECK-NEXT: popq %rbp
647 %x = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> %f)
651 define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
652 ; CHECK-LABEL: test_unsigned_v8i8_v8f16:
654 ; CHECK-NEXT: pushq %rbp
655 ; CHECK-NEXT: pushq %r15
656 ; CHECK-NEXT: pushq %r14
657 ; CHECK-NEXT: pushq %r12
658 ; CHECK-NEXT: pushq %rbx
659 ; CHECK-NEXT: subq $32, %rsp
660 ; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
661 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
662 ; CHECK-NEXT: callq __extendhfsf2@PLT
663 ; CHECK-NEXT: cvttss2si %xmm0, %r15d
664 ; CHECK-NEXT: xorl %ebx, %ebx
665 ; CHECK-NEXT: xorps %xmm1, %xmm1
666 ; CHECK-NEXT: ucomiss %xmm1, %xmm0
667 ; CHECK-NEXT: cmovbl %ebx, %r15d
668 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
669 ; CHECK-NEXT: movl $255, %ebp
670 ; CHECK-NEXT: cmoval %ebp, %r15d
671 ; CHECK-NEXT: shll $8, %r15d
672 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
673 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
674 ; CHECK-NEXT: callq __extendhfsf2@PLT
675 ; CHECK-NEXT: cvttss2si %xmm0, %eax
676 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
677 ; CHECK-NEXT: cmovbl %ebx, %eax
678 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
679 ; CHECK-NEXT: cmoval %ebp, %eax
680 ; CHECK-NEXT: movzbl %al, %r14d
681 ; CHECK-NEXT: orl %r15d, %r14d
682 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
683 ; CHECK-NEXT: callq __extendhfsf2@PLT
684 ; CHECK-NEXT: cvttss2si %xmm0, %eax
685 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
686 ; CHECK-NEXT: cmovbl %ebx, %eax
687 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
688 ; CHECK-NEXT: cmoval %ebp, %eax
689 ; CHECK-NEXT: movzbl %al, %r15d
690 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
691 ; CHECK-NEXT: psrld $16, %xmm0
692 ; CHECK-NEXT: callq __extendhfsf2@PLT
693 ; CHECK-NEXT: cvttss2si %xmm0, %eax
694 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
695 ; CHECK-NEXT: cmovbl %ebx, %eax
696 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
697 ; CHECK-NEXT: cmoval %ebp, %eax
698 ; CHECK-NEXT: movzbl %al, %r12d
699 ; CHECK-NEXT: shll $8, %r12d
700 ; CHECK-NEXT: orl %r15d, %r12d
701 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
702 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
703 ; CHECK-NEXT: callq __extendhfsf2@PLT
704 ; CHECK-NEXT: cvttss2si %xmm0, %eax
705 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
706 ; CHECK-NEXT: cmovbl %ebx, %eax
707 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
708 ; CHECK-NEXT: cmoval %ebp, %eax
709 ; CHECK-NEXT: movzbl %al, %r15d
710 ; CHECK-NEXT: shll $16, %r15d
711 ; CHECK-NEXT: orl %r12d, %r15d
712 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
713 ; CHECK-NEXT: psrlq $48, %xmm0
714 ; CHECK-NEXT: callq __extendhfsf2@PLT
715 ; CHECK-NEXT: cvttss2si %xmm0, %eax
716 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
717 ; CHECK-NEXT: cmovbl %ebx, %eax
718 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
719 ; CHECK-NEXT: cmoval %ebp, %eax
720 ; CHECK-NEXT: shll $24, %eax
721 ; CHECK-NEXT: orl %r15d, %eax
722 ; CHECK-NEXT: movd %eax, %xmm0
723 ; CHECK-NEXT: pinsrw $2, %r14d, %xmm0
724 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
725 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
726 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
727 ; CHECK-NEXT: callq __extendhfsf2@PLT
728 ; CHECK-NEXT: cvttss2si %xmm0, %r14d
729 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
730 ; CHECK-NEXT: cmovbl %ebx, %r14d
731 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
732 ; CHECK-NEXT: cmoval %ebp, %r14d
733 ; CHECK-NEXT: shll $8, %r14d
734 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
735 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
736 ; CHECK-NEXT: callq __extendhfsf2@PLT
737 ; CHECK-NEXT: cvttss2si %xmm0, %eax
738 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
739 ; CHECK-NEXT: cmovbl %ebx, %eax
740 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
741 ; CHECK-NEXT: cmoval %ebp, %eax
742 ; CHECK-NEXT: movzbl %al, %eax
743 ; CHECK-NEXT: orl %r14d, %eax
744 ; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
745 ; CHECK-NEXT: pinsrw $3, %eax, %xmm0
746 ; CHECK-NEXT: addq $32, %rsp
747 ; CHECK-NEXT: popq %rbx
748 ; CHECK-NEXT: popq %r12
749 ; CHECK-NEXT: popq %r14
750 ; CHECK-NEXT: popq %r15
751 ; CHECK-NEXT: popq %rbp
753 %x = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> %f)
757 define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
758 ; CHECK-LABEL: test_unsigned_v8i16_v8f16:
760 ; CHECK-NEXT: pushq %rbp
761 ; CHECK-NEXT: pushq %rbx
762 ; CHECK-NEXT: subq $72, %rsp
763 ; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
764 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
765 ; CHECK-NEXT: callq __extendhfsf2@PLT
766 ; CHECK-NEXT: cvttss2si %xmm0, %eax
767 ; CHECK-NEXT: xorl %ebx, %ebx
768 ; CHECK-NEXT: xorps %xmm1, %xmm1
769 ; CHECK-NEXT: ucomiss %xmm1, %xmm0
770 ; CHECK-NEXT: cmovbl %ebx, %eax
771 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
772 ; CHECK-NEXT: movl $65535, %ebp # imm = 0xFFFF
773 ; CHECK-NEXT: cmoval %ebp, %eax
774 ; CHECK-NEXT: movd %eax, %xmm0
775 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
776 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
777 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
778 ; CHECK-NEXT: callq __extendhfsf2@PLT
779 ; CHECK-NEXT: cvttss2si %xmm0, %eax
780 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
781 ; CHECK-NEXT: cmovbl %ebx, %eax
782 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
783 ; CHECK-NEXT: cmoval %ebp, %eax
784 ; CHECK-NEXT: movd %eax, %xmm0
785 ; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
786 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
787 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
788 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
789 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
790 ; CHECK-NEXT: callq __extendhfsf2@PLT
791 ; CHECK-NEXT: cvttss2si %xmm0, %eax
792 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
793 ; CHECK-NEXT: cmovbl %ebx, %eax
794 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
795 ; CHECK-NEXT: cmoval %ebp, %eax
796 ; CHECK-NEXT: movd %eax, %xmm0
797 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
798 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
799 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
800 ; CHECK-NEXT: callq __extendhfsf2@PLT
801 ; CHECK-NEXT: cvttss2si %xmm0, %eax
802 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
803 ; CHECK-NEXT: cmovbl %ebx, %eax
804 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
805 ; CHECK-NEXT: cmoval %ebp, %eax
806 ; CHECK-NEXT: movd %eax, %xmm0
807 ; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
808 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
809 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
810 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
811 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
812 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
813 ; CHECK-NEXT: psrlq $48, %xmm0
814 ; CHECK-NEXT: callq __extendhfsf2@PLT
815 ; CHECK-NEXT: cvttss2si %xmm0, %eax
816 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
817 ; CHECK-NEXT: cmovbl %ebx, %eax
818 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
819 ; CHECK-NEXT: cmoval %ebp, %eax
820 ; CHECK-NEXT: movd %eax, %xmm0
821 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
822 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
823 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
824 ; CHECK-NEXT: callq __extendhfsf2@PLT
825 ; CHECK-NEXT: cvttss2si %xmm0, %eax
826 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
827 ; CHECK-NEXT: cmovbl %ebx, %eax
828 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
829 ; CHECK-NEXT: cmoval %ebp, %eax
830 ; CHECK-NEXT: movd %eax, %xmm0
831 ; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
832 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
833 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
834 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
835 ; CHECK-NEXT: callq __extendhfsf2@PLT
836 ; CHECK-NEXT: cvttss2si %xmm0, %eax
837 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
838 ; CHECK-NEXT: cmovbl %ebx, %eax
839 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
840 ; CHECK-NEXT: cmoval %ebp, %eax
841 ; CHECK-NEXT: movd %eax, %xmm0
842 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
843 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
844 ; CHECK-NEXT: psrld $16, %xmm0
845 ; CHECK-NEXT: callq __extendhfsf2@PLT
846 ; CHECK-NEXT: cvttss2si %xmm0, %eax
847 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
848 ; CHECK-NEXT: cmovbl %ebx, %eax
849 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
850 ; CHECK-NEXT: cmoval %ebp, %eax
851 ; CHECK-NEXT: movd %eax, %xmm1
852 ; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
853 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
854 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
855 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
856 ; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
857 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
858 ; CHECK-NEXT: addq $72, %rsp
859 ; CHECK-NEXT: popq %rbx
860 ; CHECK-NEXT: popq %rbp
862 %x = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %f)
866 define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
867 ; CHECK-LABEL: test_unsigned_v8i32_v8f16:
869 ; CHECK-NEXT: pushq %rbp
870 ; CHECK-NEXT: pushq %rbx
871 ; CHECK-NEXT: subq $72, %rsp
872 ; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
873 ; CHECK-NEXT: psrlq $48, %xmm0
874 ; CHECK-NEXT: callq __extendhfsf2@PLT
875 ; CHECK-NEXT: cvttss2si %xmm0, %rax
876 ; CHECK-NEXT: xorl %ebx, %ebx
877 ; CHECK-NEXT: xorps %xmm1, %xmm1
878 ; CHECK-NEXT: ucomiss %xmm1, %xmm0
879 ; CHECK-NEXT: cmovbl %ebx, %eax
880 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
881 ; CHECK-NEXT: movl $-1, %ebp
882 ; CHECK-NEXT: cmoval %ebp, %eax
883 ; CHECK-NEXT: movd %eax, %xmm0
884 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
885 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
886 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
887 ; CHECK-NEXT: callq __extendhfsf2@PLT
888 ; CHECK-NEXT: cvttss2si %xmm0, %rax
889 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
890 ; CHECK-NEXT: cmovbl %ebx, %eax
891 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
892 ; CHECK-NEXT: cmoval %ebp, %eax
893 ; CHECK-NEXT: movd %eax, %xmm0
894 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
895 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
896 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
897 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
898 ; CHECK-NEXT: callq __extendhfsf2@PLT
899 ; CHECK-NEXT: cvttss2si %xmm0, %rax
900 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
901 ; CHECK-NEXT: cmovbl %ebx, %eax
902 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
903 ; CHECK-NEXT: cmoval %ebp, %eax
904 ; CHECK-NEXT: movd %eax, %xmm0
905 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
906 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
907 ; CHECK-NEXT: psrld $16, %xmm0
908 ; CHECK-NEXT: callq __extendhfsf2@PLT
909 ; CHECK-NEXT: cvttss2si %xmm0, %rax
910 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
911 ; CHECK-NEXT: cmovbl %ebx, %eax
912 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
913 ; CHECK-NEXT: cmoval %ebp, %eax
914 ; CHECK-NEXT: movd %eax, %xmm0
915 ; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
916 ; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
917 ; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
918 ; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
919 ; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
920 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
921 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
922 ; CHECK-NEXT: callq __extendhfsf2@PLT
923 ; CHECK-NEXT: cvttss2si %xmm0, %rax
924 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
925 ; CHECK-NEXT: cmovbl %ebx, %eax
926 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
927 ; CHECK-NEXT: cmoval %ebp, %eax
928 ; CHECK-NEXT: movd %eax, %xmm0
929 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
930 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
931 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
932 ; CHECK-NEXT: callq __extendhfsf2@PLT
933 ; CHECK-NEXT: cvttss2si %xmm0, %rax
934 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
935 ; CHECK-NEXT: cmovbl %ebx, %eax
936 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
937 ; CHECK-NEXT: cmoval %ebp, %eax
938 ; CHECK-NEXT: movd %eax, %xmm0
939 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
940 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
941 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
942 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
943 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
944 ; CHECK-NEXT: callq __extendhfsf2@PLT
945 ; CHECK-NEXT: cvttss2si %xmm0, %rax
946 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
947 ; CHECK-NEXT: cmovbl %ebx, %eax
948 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
949 ; CHECK-NEXT: cmoval %ebp, %eax
950 ; CHECK-NEXT: movd %eax, %xmm0
951 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
952 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
953 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
954 ; CHECK-NEXT: callq __extendhfsf2@PLT
955 ; CHECK-NEXT: cvttss2si %xmm0, %rax
956 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
957 ; CHECK-NEXT: cmovbl %ebx, %eax
958 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
959 ; CHECK-NEXT: cmoval %ebp, %eax
960 ; CHECK-NEXT: movd %eax, %xmm1
961 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
962 ; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
963 ; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
964 ; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
965 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
966 ; CHECK-NEXT: addq $72, %rsp
967 ; CHECK-NEXT: popq %rbx
968 ; CHECK-NEXT: popq %rbp
970 %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> %f)
974 define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
975 ; CHECK-LABEL: test_unsigned_v8i64_v8f16:
977 ; CHECK-NEXT: pushq %r14
978 ; CHECK-NEXT: pushq %rbx
979 ; CHECK-NEXT: subq $88, %rsp
980 ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
981 ; CHECK-NEXT: callq __extendhfsf2@PLT
982 ; CHECK-NEXT: movaps %xmm0, %xmm1
983 ; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
984 ; CHECK-NEXT: cvttss2si %xmm1, %rax
985 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
986 ; CHECK-NEXT: movq %rcx, %rdx
987 ; CHECK-NEXT: sarq $63, %rdx
988 ; CHECK-NEXT: andq %rax, %rdx
989 ; CHECK-NEXT: orq %rcx, %rdx
990 ; CHECK-NEXT: xorl %ebx, %ebx
991 ; CHECK-NEXT: xorps %xmm1, %xmm1
992 ; CHECK-NEXT: ucomiss %xmm1, %xmm0
993 ; CHECK-NEXT: cmovbq %rbx, %rdx
994 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
995 ; CHECK-NEXT: movq $-1, %r14
996 ; CHECK-NEXT: cmovaq %r14, %rdx
997 ; CHECK-NEXT: movq %rdx, %xmm0
998 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
999 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
1000 ; CHECK-NEXT: psrld $16, %xmm0
1001 ; CHECK-NEXT: callq __extendhfsf2@PLT
1002 ; CHECK-NEXT: movdqa %xmm0, %xmm1
1003 ; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1004 ; CHECK-NEXT: cvttss2si %xmm1, %rax
1005 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
1006 ; CHECK-NEXT: movq %rcx, %rdx
1007 ; CHECK-NEXT: sarq $63, %rdx
1008 ; CHECK-NEXT: andq %rax, %rdx
1009 ; CHECK-NEXT: orq %rcx, %rdx
1010 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1011 ; CHECK-NEXT: cmovbq %rbx, %rdx
1012 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1013 ; CHECK-NEXT: cmovaq %r14, %rdx
1014 ; CHECK-NEXT: movq %rdx, %xmm0
1015 ; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1016 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1017 ; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1018 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
1019 ; CHECK-NEXT: psrlq $48, %xmm0
1020 ; CHECK-NEXT: callq __extendhfsf2@PLT
1021 ; CHECK-NEXT: movdqa %xmm0, %xmm1
1022 ; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1023 ; CHECK-NEXT: cvttss2si %xmm1, %rax
1024 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
1025 ; CHECK-NEXT: movq %rcx, %rdx
1026 ; CHECK-NEXT: sarq $63, %rdx
1027 ; CHECK-NEXT: andq %rax, %rdx
1028 ; CHECK-NEXT: orq %rcx, %rdx
1029 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1030 ; CHECK-NEXT: cmovbq %rbx, %rdx
1031 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1032 ; CHECK-NEXT: cmovaq %r14, %rdx
1033 ; CHECK-NEXT: movq %rdx, %xmm0
1034 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1035 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1036 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1037 ; CHECK-NEXT: callq __extendhfsf2@PLT
1038 ; CHECK-NEXT: movaps %xmm0, %xmm1
1039 ; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1040 ; CHECK-NEXT: cvttss2si %xmm1, %rax
1041 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
1042 ; CHECK-NEXT: movq %rcx, %rdx
1043 ; CHECK-NEXT: sarq $63, %rdx
1044 ; CHECK-NEXT: andq %rax, %rdx
1045 ; CHECK-NEXT: orq %rcx, %rdx
1046 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1047 ; CHECK-NEXT: cmovbq %rbx, %rdx
1048 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1049 ; CHECK-NEXT: cmovaq %r14, %rdx
1050 ; CHECK-NEXT: movq %rdx, %xmm0
1051 ; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1052 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
1053 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1054 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
1055 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1056 ; CHECK-NEXT: callq __extendhfsf2@PLT
1057 ; CHECK-NEXT: movdqa %xmm0, %xmm1
1058 ; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1059 ; CHECK-NEXT: cvttss2si %xmm1, %rax
1060 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
1061 ; CHECK-NEXT: movq %rcx, %rdx
1062 ; CHECK-NEXT: sarq $63, %rdx
1063 ; CHECK-NEXT: andq %rax, %rdx
1064 ; CHECK-NEXT: orq %rcx, %rdx
1065 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1066 ; CHECK-NEXT: cmovbq %rbx, %rdx
1067 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1068 ; CHECK-NEXT: cmovaq %r14, %rdx
1069 ; CHECK-NEXT: movq %rdx, %xmm0
1070 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1071 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1072 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1073 ; CHECK-NEXT: callq __extendhfsf2@PLT
1074 ; CHECK-NEXT: movaps %xmm0, %xmm1
1075 ; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1076 ; CHECK-NEXT: cvttss2si %xmm1, %rax
1077 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
1078 ; CHECK-NEXT: movq %rcx, %rdx
1079 ; CHECK-NEXT: sarq $63, %rdx
1080 ; CHECK-NEXT: andq %rax, %rdx
1081 ; CHECK-NEXT: orq %rcx, %rdx
1082 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1083 ; CHECK-NEXT: cmovbq %rbx, %rdx
1084 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1085 ; CHECK-NEXT: cmovaq %r14, %rdx
1086 ; CHECK-NEXT: movq %rdx, %xmm0
1087 ; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1088 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
1089 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1090 ; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
1091 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1092 ; CHECK-NEXT: callq __extendhfsf2@PLT
1093 ; CHECK-NEXT: movdqa %xmm0, %xmm1
1094 ; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1095 ; CHECK-NEXT: cvttss2si %xmm1, %rax
1096 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
1097 ; CHECK-NEXT: movq %rcx, %rdx
1098 ; CHECK-NEXT: sarq $63, %rdx
1099 ; CHECK-NEXT: andq %rax, %rdx
1100 ; CHECK-NEXT: orq %rcx, %rdx
1101 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1102 ; CHECK-NEXT: cmovbq %rbx, %rdx
1103 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1104 ; CHECK-NEXT: cmovaq %r14, %rdx
1105 ; CHECK-NEXT: movq %rdx, %xmm0
1106 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1107 ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
1108 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1109 ; CHECK-NEXT: callq __extendhfsf2@PLT
1110 ; CHECK-NEXT: movaps %xmm0, %xmm1
1111 ; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1112 ; CHECK-NEXT: cvttss2si %xmm1, %rax
1113 ; CHECK-NEXT: cvttss2si %xmm0, %rcx
1114 ; CHECK-NEXT: movq %rcx, %rdx
1115 ; CHECK-NEXT: sarq $63, %rdx
1116 ; CHECK-NEXT: andq %rax, %rdx
1117 ; CHECK-NEXT: orq %rcx, %rdx
1118 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1119 ; CHECK-NEXT: cmovbq %rbx, %rdx
1120 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1121 ; CHECK-NEXT: cmovaq %r14, %rdx
1122 ; CHECK-NEXT: movq %rdx, %xmm3
1123 ; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload
1124 ; CHECK-NEXT: # xmm3 = xmm3[0],mem[0]
1125 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1126 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1127 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
1128 ; CHECK-NEXT: addq $88, %rsp
1129 ; CHECK-NEXT: popq %rbx
1130 ; CHECK-NEXT: popq %r14
1132 %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> %f)
1136 define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
1137 ; CHECK-LABEL: test_unsigned_v8i128_v8f16:
1139 ; CHECK-NEXT: pushq %rbp
1140 ; CHECK-NEXT: pushq %r15
1141 ; CHECK-NEXT: pushq %r14
1142 ; CHECK-NEXT: pushq %r13
1143 ; CHECK-NEXT: pushq %r12
1144 ; CHECK-NEXT: pushq %rbx
1145 ; CHECK-NEXT: subq $104, %rsp
1146 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1147 ; CHECK-NEXT: movq %rdi, %rbx
1148 ; CHECK-NEXT: psrld $16, %xmm0
1149 ; CHECK-NEXT: callq __extendhfsf2@PLT
1150 ; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1151 ; CHECK-NEXT: callq __fixunssfti@PLT
1152 ; CHECK-NEXT: xorl %r12d, %r12d
1153 ; CHECK-NEXT: pxor %xmm0, %xmm0
1154 ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1155 ; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
1156 ; CHECK-NEXT: ucomiss %xmm0, %xmm1
1157 ; CHECK-NEXT: cmovbq %r12, %rdx
1158 ; CHECK-NEXT: cmovbq %r12, %rax
1159 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1160 ; CHECK-NEXT: movq $-1, %r13
1161 ; CHECK-NEXT: cmovaq %r13, %rax
1162 ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1163 ; CHECK-NEXT: cmovaq %r13, %rdx
1164 ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1165 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1166 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1167 ; CHECK-NEXT: callq __extendhfsf2@PLT
1168 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1169 ; CHECK-NEXT: callq __fixunssfti@PLT
1170 ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1171 ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
1172 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1173 ; CHECK-NEXT: cmovbq %r12, %rdx
1174 ; CHECK-NEXT: cmovbq %r12, %rax
1175 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1176 ; CHECK-NEXT: cmovaq %r13, %rax
1177 ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1178 ; CHECK-NEXT: cmovaq %r13, %rdx
1179 ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1180 ; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1181 ; CHECK-NEXT: psrlq $48, %xmm0
1182 ; CHECK-NEXT: callq __extendhfsf2@PLT
1183 ; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1184 ; CHECK-NEXT: callq __fixunssfti@PLT
1185 ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1186 ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
1187 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1188 ; CHECK-NEXT: cmovbq %r12, %rdx
1189 ; CHECK-NEXT: cmovbq %r12, %rax
1190 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1191 ; CHECK-NEXT: cmovaq %r13, %rax
1192 ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1193 ; CHECK-NEXT: cmovaq %r13, %rdx
1194 ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1195 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1196 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
1197 ; CHECK-NEXT: callq __extendhfsf2@PLT
1198 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1199 ; CHECK-NEXT: callq __fixunssfti@PLT
1200 ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1201 ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
1202 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1203 ; CHECK-NEXT: cmovbq %r12, %rdx
1204 ; CHECK-NEXT: cmovbq %r12, %rax
1205 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1206 ; CHECK-NEXT: cmovaq %r13, %rax
1207 ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1208 ; CHECK-NEXT: cmovaq %r13, %rdx
1209 ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1210 ; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1211 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1212 ; CHECK-NEXT: callq __extendhfsf2@PLT
1213 ; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1214 ; CHECK-NEXT: callq __fixunssfti@PLT
1215 ; CHECK-NEXT: movq %rdx, %rbp
1216 ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1217 ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
1218 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1219 ; CHECK-NEXT: cmovbq %r12, %rbp
1220 ; CHECK-NEXT: cmovbq %r12, %rax
1221 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1222 ; CHECK-NEXT: cmovaq %r13, %rax
1223 ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1224 ; CHECK-NEXT: cmovaq %r13, %rbp
1225 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1226 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1227 ; CHECK-NEXT: callq __extendhfsf2@PLT
1228 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1229 ; CHECK-NEXT: callq __fixunssfti@PLT
1230 ; CHECK-NEXT: movq %rax, %r14
1231 ; CHECK-NEXT: movq %rdx, %r15
1232 ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1233 ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
1234 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1235 ; CHECK-NEXT: cmovbq %r12, %r15
1236 ; CHECK-NEXT: cmovbq %r12, %r14
1237 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1238 ; CHECK-NEXT: cmovaq %r13, %r14
1239 ; CHECK-NEXT: cmovaq %r13, %r15
1240 ; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1241 ; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1242 ; CHECK-NEXT: callq __extendhfsf2@PLT
1243 ; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1244 ; CHECK-NEXT: callq __fixunssfti@PLT
1245 ; CHECK-NEXT: movq %rax, %r12
1246 ; CHECK-NEXT: movq %rdx, %r13
1247 ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1248 ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
1249 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1250 ; CHECK-NEXT: movl $0, %eax
1251 ; CHECK-NEXT: cmovbq %rax, %r13
1252 ; CHECK-NEXT: cmovbq %rax, %r12
1253 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1254 ; CHECK-NEXT: movq $-1, %rax
1255 ; CHECK-NEXT: cmovaq %rax, %r12
1256 ; CHECK-NEXT: cmovaq %rax, %r13
1257 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1258 ; CHECK-NEXT: callq __extendhfsf2@PLT
1259 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1260 ; CHECK-NEXT: callq __fixunssfti@PLT
1261 ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1262 ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
1263 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1264 ; CHECK-NEXT: movl $0, %ecx
1265 ; CHECK-NEXT: cmovbq %rcx, %rdx
1266 ; CHECK-NEXT: cmovbq %rcx, %rax
1267 ; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1268 ; CHECK-NEXT: movq $-1, %rcx
1269 ; CHECK-NEXT: cmovaq %rcx, %rax
1270 ; CHECK-NEXT: cmovaq %rcx, %rdx
1271 ; CHECK-NEXT: movq %rdx, 8(%rbx)
1272 ; CHECK-NEXT: movq %rax, (%rbx)
1273 ; CHECK-NEXT: movq %r13, 120(%rbx)
1274 ; CHECK-NEXT: movq %r12, 112(%rbx)
1275 ; CHECK-NEXT: movq %r15, 104(%rbx)
1276 ; CHECK-NEXT: movq %r14, 96(%rbx)
1277 ; CHECK-NEXT: movq %rbp, 88(%rbx)
1278 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1279 ; CHECK-NEXT: movq %rax, 80(%rbx)
1280 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1281 ; CHECK-NEXT: movq %rax, 72(%rbx)
1282 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1283 ; CHECK-NEXT: movq %rax, 64(%rbx)
1284 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1285 ; CHECK-NEXT: movq %rax, 56(%rbx)
1286 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1287 ; CHECK-NEXT: movq %rax, 48(%rbx)
1288 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1289 ; CHECK-NEXT: movq %rax, 40(%rbx)
1290 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1291 ; CHECK-NEXT: movq %rax, 32(%rbx)
1292 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1293 ; CHECK-NEXT: movq %rax, 24(%rbx)
1294 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1295 ; CHECK-NEXT: movq %rax, 16(%rbx)
1296 ; CHECK-NEXT: movq %rbx, %rax
1297 ; CHECK-NEXT: addq $104, %rsp
1298 ; CHECK-NEXT: popq %rbx
1299 ; CHECK-NEXT: popq %r12
1300 ; CHECK-NEXT: popq %r13
1301 ; CHECK-NEXT: popq %r14
1302 ; CHECK-NEXT: popq %r15
1303 ; CHECK-NEXT: popq %rbp
1305 %x = call <8 x i128> @llvm.fptoui.sat.v8i128.v8f16(<8 x half> %f)