1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
5 declare i4 @llvm.udiv.fix.sat.i4 (i4, i4, i32)
6 declare i15 @llvm.udiv.fix.sat.i15 (i15, i15, i32)
7 declare i16 @llvm.udiv.fix.sat.i16 (i16, i16, i32)
8 declare i18 @llvm.udiv.fix.sat.i18 (i18, i18, i32)
9 declare i64 @llvm.udiv.fix.sat.i64 (i64, i64, i32)
10 declare <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32)
12 define i16 @func(i16 %x, i16 %y) nounwind {
15 ; X64-NEXT: movzwl %si, %ecx
16 ; X64-NEXT: movzwl %di, %eax
17 ; X64-NEXT: shll $8, %eax
18 ; X64-NEXT: xorl %edx, %edx
20 ; X64-NEXT: cmpl $131071, %eax # imm = 0x1FFFF
21 ; X64-NEXT: movl $131071, %ecx # imm = 0x1FFFF
22 ; X64-NEXT: cmovael %ecx, %eax
24 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
29 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
30 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
31 ; X86-NEXT: movzwl %ax, %eax
32 ; X86-NEXT: shll $8, %eax
33 ; X86-NEXT: xorl %edx, %edx
35 ; X86-NEXT: cmpl $131071, %eax # imm = 0x1FFFF
36 ; X86-NEXT: movl $131071, %ecx # imm = 0x1FFFF
37 ; X86-NEXT: cmovael %ecx, %eax
39 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
41 %tmp = call i16 @llvm.udiv.fix.sat.i16(i16 %x, i16 %y, i32 7)
45 define i16 @func2(i8 %x, i8 %y) nounwind {
48 ; X64-NEXT: movsbl %dil, %eax
49 ; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
50 ; X64-NEXT: movsbl %sil, %ecx
51 ; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
52 ; X64-NEXT: shll $14, %eax
53 ; X64-NEXT: xorl %edx, %edx
55 ; X64-NEXT: cmpl $32767, %eax # imm = 0x7FFF
56 ; X64-NEXT: movl $32767, %ecx # imm = 0x7FFF
57 ; X64-NEXT: cmovbl %eax, %ecx
58 ; X64-NEXT: addl %ecx, %ecx
59 ; X64-NEXT: movswl %cx, %eax
61 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
66 ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
67 ; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF
68 ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
69 ; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
70 ; X86-NEXT: shll $14, %eax
71 ; X86-NEXT: xorl %edx, %edx
73 ; X86-NEXT: cmpl $32767, %eax # imm = 0x7FFF
74 ; X86-NEXT: movl $32767, %ecx # imm = 0x7FFF
75 ; X86-NEXT: cmovbl %eax, %ecx
76 ; X86-NEXT: addl %ecx, %ecx
77 ; X86-NEXT: movswl %cx, %eax
79 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
81 %x2 = sext i8 %x to i15
82 %y2 = sext i8 %y to i15
83 %tmp = call i15 @llvm.udiv.fix.sat.i15(i15 %x2, i15 %y2, i32 14)
84 %tmp2 = sext i15 %tmp to i16
88 define i16 @func3(i15 %x, i8 %y) nounwind {
91 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
92 ; X64-NEXT: leal (%rdi,%rdi), %eax
93 ; X64-NEXT: movzbl %sil, %ecx
94 ; X64-NEXT: shll $4, %ecx
95 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
96 ; X64-NEXT: xorl %edx, %edx
98 ; X64-NEXT: # kill: def $ax killed $ax def $eax
99 ; X64-NEXT: movzwl %ax, %ecx
100 ; X64-NEXT: cmpl $32767, %ecx # imm = 0x7FFF
101 ; X64-NEXT: movl $32767, %ecx # imm = 0x7FFF
102 ; X64-NEXT: cmovbl %eax, %ecx
103 ; X64-NEXT: addl %ecx, %ecx
104 ; X64-NEXT: movswl %cx, %eax
105 ; X64-NEXT: shrl %eax
106 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
111 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
112 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
113 ; X86-NEXT: addl %eax, %eax
114 ; X86-NEXT: movzbl %cl, %ecx
115 ; X86-NEXT: shll $4, %ecx
116 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
117 ; X86-NEXT: xorl %edx, %edx
119 ; X86-NEXT: # kill: def $ax killed $ax def $eax
120 ; X86-NEXT: movzwl %ax, %ecx
121 ; X86-NEXT: cmpl $32767, %ecx # imm = 0x7FFF
122 ; X86-NEXT: movl $32767, %ecx # imm = 0x7FFF
123 ; X86-NEXT: cmovbl %eax, %ecx
124 ; X86-NEXT: addl %ecx, %ecx
125 ; X86-NEXT: movswl %cx, %eax
126 ; X86-NEXT: shrl %eax
127 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
129 %y2 = sext i8 %y to i15
131 %tmp = call i15 @llvm.udiv.fix.sat.i15(i15 %x, i15 %y3, i32 4)
132 %tmp2 = sext i15 %tmp to i16
136 define i4 @func4(i4 %x, i4 %y) nounwind {
139 ; X64-NEXT: andb $15, %sil
140 ; X64-NEXT: andb $15, %dil
141 ; X64-NEXT: shlb $2, %dil
142 ; X64-NEXT: movzbl %dil, %eax
143 ; X64-NEXT: divb %sil
144 ; X64-NEXT: movzbl %al, %ecx
145 ; X64-NEXT: cmpb $15, %cl
146 ; X64-NEXT: movl $15, %eax
147 ; X64-NEXT: cmovbl %ecx, %eax
148 ; X64-NEXT: # kill: def $al killed $al killed $eax
153 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
154 ; X86-NEXT: andb $15, %cl
155 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
156 ; X86-NEXT: andb $15, %al
157 ; X86-NEXT: shlb $2, %al
158 ; X86-NEXT: movzbl %al, %eax
160 ; X86-NEXT: movzbl %al, %ecx
161 ; X86-NEXT: cmpb $15, %al
162 ; X86-NEXT: movl $15, %eax
163 ; X86-NEXT: cmovbl %ecx, %eax
164 ; X86-NEXT: # kill: def $al killed $al killed $eax
166 %tmp = call i4 @llvm.udiv.fix.sat.i4(i4 %x, i4 %y, i32 2)
170 define i64 @func5(i64 %x, i64 %y) nounwind {
173 ; X64-NEXT: pushq %rax
174 ; X64-NEXT: movq %rsi, %rdx
175 ; X64-NEXT: leaq (%rdi,%rdi), %rax
176 ; X64-NEXT: movq %rdi, %rsi
177 ; X64-NEXT: shrq $63, %rsi
178 ; X64-NEXT: shldq $31, %rax, %rsi
179 ; X64-NEXT: shlq $32, %rdi
180 ; X64-NEXT: xorl %ecx, %ecx
181 ; X64-NEXT: callq __udivti3@PLT
182 ; X64-NEXT: cmpq $2, %rdx
183 ; X64-NEXT: movq $-1, %rcx
184 ; X64-NEXT: cmovaeq %rcx, %rax
185 ; X64-NEXT: movl $1, %ecx
186 ; X64-NEXT: cmovbq %rdx, %rcx
187 ; X64-NEXT: shrdq $1, %rcx, %rax
188 ; X64-NEXT: popq %rcx
193 ; X86-NEXT: pushl %ebp
194 ; X86-NEXT: movl %esp, %ebp
195 ; X86-NEXT: pushl %esi
196 ; X86-NEXT: andl $-16, %esp
197 ; X86-NEXT: subl $32, %esp
198 ; X86-NEXT: movl 8(%ebp), %eax
199 ; X86-NEXT: movl 12(%ebp), %ecx
200 ; X86-NEXT: movl %ecx, %edx
201 ; X86-NEXT: shrl %edx
202 ; X86-NEXT: shldl $31, %eax, %ecx
203 ; X86-NEXT: shll $31, %eax
204 ; X86-NEXT: movl %esp, %esi
207 ; X86-NEXT: pushl 20(%ebp)
208 ; X86-NEXT: pushl 16(%ebp)
210 ; X86-NEXT: pushl %edx
211 ; X86-NEXT: pushl %ecx
212 ; X86-NEXT: pushl %eax
213 ; X86-NEXT: pushl %esi
214 ; X86-NEXT: calll __udivti3
215 ; X86-NEXT: addl $32, %esp
216 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
217 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
218 ; X86-NEXT: movl $-1, %eax
219 ; X86-NEXT: movl $-1, %edx
220 ; X86-NEXT: jne .LBB4_2
222 ; X86-NEXT: movl (%esp), %eax
223 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
225 ; X86-NEXT: leal -4(%ebp), %esp
226 ; X86-NEXT: popl %esi
227 ; X86-NEXT: popl %ebp
229 %tmp = call i64 @llvm.udiv.fix.sat.i64(i64 %x, i64 %y, i32 31)
233 define i18 @func6(i16 %x, i16 %y) nounwind {
236 ; X64-NEXT: movswl %di, %eax
237 ; X64-NEXT: andl $262143, %eax # imm = 0x3FFFF
238 ; X64-NEXT: movswl %si, %ecx
239 ; X64-NEXT: andl $262143, %ecx # imm = 0x3FFFF
240 ; X64-NEXT: shll $7, %eax
241 ; X64-NEXT: xorl %edx, %edx
242 ; X64-NEXT: divl %ecx
243 ; X64-NEXT: cmpl $262143, %eax # imm = 0x3FFFF
244 ; X64-NEXT: movl $262143, %ecx # imm = 0x3FFFF
245 ; X64-NEXT: cmovael %ecx, %eax
250 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
251 ; X86-NEXT: andl $262143, %ecx # imm = 0x3FFFF
252 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
253 ; X86-NEXT: andl $262143, %eax # imm = 0x3FFFF
254 ; X86-NEXT: shll $7, %eax
255 ; X86-NEXT: xorl %edx, %edx
256 ; X86-NEXT: divl %ecx
257 ; X86-NEXT: cmpl $262143, %eax # imm = 0x3FFFF
258 ; X86-NEXT: movl $262143, %ecx # imm = 0x3FFFF
259 ; X86-NEXT: cmovael %ecx, %eax
261 %x2 = sext i16 %x to i18
262 %y2 = sext i16 %y to i18
263 %tmp = call i18 @llvm.udiv.fix.sat.i18(i18 %x2, i18 %y2, i32 7)
267 define i16 @func7(i16 %x, i16 %y) nounwind {
270 ; X64-NEXT: movzwl %si, %ecx
271 ; X64-NEXT: movzwl %di, %eax
272 ; X64-NEXT: addl %eax, %eax
273 ; X64-NEXT: shlq $16, %rax
274 ; X64-NEXT: xorl %edx, %edx
275 ; X64-NEXT: divq %rcx
276 ; X64-NEXT: cmpq $131071, %rax # imm = 0x1FFFF
277 ; X64-NEXT: movl $131071, %ecx # imm = 0x1FFFF
278 ; X64-NEXT: cmovaeq %rcx, %rax
279 ; X64-NEXT: shrl %eax
280 ; X64-NEXT: # kill: def $ax killed $ax killed $rax
285 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
286 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
287 ; X86-NEXT: movl %ecx, %edx
288 ; X86-NEXT: shll $17, %edx
289 ; X86-NEXT: shrl $15, %ecx
290 ; X86-NEXT: andl $1, %ecx
292 ; X86-NEXT: pushl %eax
293 ; X86-NEXT: pushl %ecx
294 ; X86-NEXT: pushl %edx
295 ; X86-NEXT: calll __udivdi3
296 ; X86-NEXT: addl $16, %esp
297 ; X86-NEXT: cmpl $131071, %eax # imm = 0x1FFFF
298 ; X86-NEXT: movl $131071, %ecx # imm = 0x1FFFF
299 ; X86-NEXT: cmovael %ecx, %eax
300 ; X86-NEXT: testl %edx, %edx
301 ; X86-NEXT: cmovnel %ecx, %eax
302 ; X86-NEXT: shrl %eax
303 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
305 %tmp = call i16 @llvm.udiv.fix.sat.i16(i16 %x, i16 %y, i32 16)
309 define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
312 ; X64-NEXT: pxor %xmm2, %xmm2
313 ; X64-NEXT: pxor %xmm3, %xmm3
314 ; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
315 ; X64-NEXT: movq %xmm3, %rax
316 ; X64-NEXT: movdqa %xmm1, %xmm4
317 ; X64-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm2[2],xmm4[3],xmm2[3]
318 ; X64-NEXT: movq %xmm4, %rcx
319 ; X64-NEXT: xorl %edx, %edx
320 ; X64-NEXT: divq %rcx
321 ; X64-NEXT: movq %rax, %xmm8
322 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
323 ; X64-NEXT: movq %xmm3, %rax
324 ; X64-NEXT: movdqa %xmm1, %xmm3
325 ; X64-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
326 ; X64-NEXT: movq %xmm3, %rcx
327 ; X64-NEXT: xorl %edx, %edx
328 ; X64-NEXT: divq %rcx
329 ; X64-NEXT: movq %rax, %xmm3
330 ; X64-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm3[0]
331 ; X64-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
332 ; X64-NEXT: movdqa %xmm8, %xmm3
333 ; X64-NEXT: pxor %xmm4, %xmm3
334 ; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,1,3,3]
335 ; X64-NEXT: movdqa {{.*#+}} xmm7 = [2147483649,2147483649,2147483649,2147483649]
336 ; X64-NEXT: pcmpeqd %xmm7, %xmm6
337 ; X64-NEXT: movdqa {{.*#+}} xmm5 = [9223372043297226751,9223372043297226751]
338 ; X64-NEXT: movdqa %xmm5, %xmm9
339 ; X64-NEXT: pcmpgtd %xmm3, %xmm9
340 ; X64-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
341 ; X64-NEXT: pand %xmm6, %xmm10
342 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
343 ; X64-NEXT: por %xmm10, %xmm3
344 ; X64-NEXT: pcmpeqd %xmm6, %xmm6
345 ; X64-NEXT: pand %xmm3, %xmm8
346 ; X64-NEXT: pxor %xmm6, %xmm3
347 ; X64-NEXT: por %xmm8, %xmm3
348 ; X64-NEXT: psrlq $1, %xmm3
349 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
350 ; X64-NEXT: movq %xmm2, %rax
351 ; X64-NEXT: movd %xmm1, %ecx
352 ; X64-NEXT: xorl %edx, %edx
353 ; X64-NEXT: divq %rcx
354 ; X64-NEXT: movq %rax, %xmm8
355 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
356 ; X64-NEXT: movq %xmm0, %rax
357 ; X64-NEXT: psrlq $32, %xmm1
358 ; X64-NEXT: movq %xmm1, %rcx
359 ; X64-NEXT: xorl %edx, %edx
360 ; X64-NEXT: divq %rcx
361 ; X64-NEXT: movq %rax, %xmm0
362 ; X64-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm0[0]
363 ; X64-NEXT: pxor %xmm8, %xmm4
364 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
365 ; X64-NEXT: pcmpeqd %xmm7, %xmm0
366 ; X64-NEXT: pcmpgtd %xmm4, %xmm5
367 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
368 ; X64-NEXT: pand %xmm0, %xmm1
369 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
370 ; X64-NEXT: por %xmm1, %xmm0
371 ; X64-NEXT: pxor %xmm0, %xmm6
372 ; X64-NEXT: pand %xmm8, %xmm0
373 ; X64-NEXT: por %xmm6, %xmm0
374 ; X64-NEXT: psrlq $1, %xmm0
375 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
380 ; X86-NEXT: pushl %ebp
381 ; X86-NEXT: pushl %ebx
382 ; X86-NEXT: pushl %edi
383 ; X86-NEXT: pushl %esi
384 ; X86-NEXT: subl $8, %esp
385 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
386 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
387 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
388 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
389 ; X86-NEXT: leal (%eax,%eax), %ecx
390 ; X86-NEXT: shrl $31, %eax
391 ; X86-NEXT: shldl $31, %ecx, %eax
393 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
394 ; X86-NEXT: pushl %eax
396 ; X86-NEXT: calll __udivdi3
397 ; X86-NEXT: addl $16, %esp
398 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
399 ; X86-NEXT: movl %edx, %edi
400 ; X86-NEXT: leal (%ebx,%ebx), %eax
401 ; X86-NEXT: shrl $31, %ebx
402 ; X86-NEXT: shldl $31, %eax, %ebx
404 ; X86-NEXT: pushl %ebp
405 ; X86-NEXT: pushl %ebx
407 ; X86-NEXT: calll __udivdi3
408 ; X86-NEXT: addl $16, %esp
409 ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
410 ; X86-NEXT: movl %edx, %ebx
411 ; X86-NEXT: leal (%esi,%esi), %eax
412 ; X86-NEXT: shrl $31, %esi
413 ; X86-NEXT: shldl $31, %eax, %esi
415 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
416 ; X86-NEXT: pushl %esi
418 ; X86-NEXT: calll __udivdi3
419 ; X86-NEXT: addl $16, %esp
420 ; X86-NEXT: movl %edx, %esi
421 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
422 ; X86-NEXT: leal (%edx,%edx), %ecx
423 ; X86-NEXT: shrl $31, %edx
424 ; X86-NEXT: shldl $31, %ecx, %edx
425 ; X86-NEXT: movl %edx, %ecx
426 ; X86-NEXT: cmpl $2, %esi
427 ; X86-NEXT: movl $-1, %edx
428 ; X86-NEXT: cmovael %edx, %eax
429 ; X86-NEXT: movl $1, %ebp
430 ; X86-NEXT: cmovael %ebp, %esi
431 ; X86-NEXT: shldl $31, %eax, %esi
432 ; X86-NEXT: cmpl $2, %ebx
433 ; X86-NEXT: movl (%esp), %eax # 4-byte Reload
434 ; X86-NEXT: cmovael %edx, %eax
435 ; X86-NEXT: cmovael %ebp, %ebx
436 ; X86-NEXT: shldl $31, %eax, %ebx
437 ; X86-NEXT: cmpl $2, %edi
438 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
439 ; X86-NEXT: cmovael %edx, %eax
440 ; X86-NEXT: cmovael %ebp, %edi
441 ; X86-NEXT: shldl $31, %eax, %edi
443 ; X86-NEXT: pushl {{[0-9]+}}(%esp)
444 ; X86-NEXT: pushl %ecx
446 ; X86-NEXT: calll __udivdi3
447 ; X86-NEXT: addl $16, %esp
448 ; X86-NEXT: cmpl $2, %edx
449 ; X86-NEXT: movl $-1, %ecx
450 ; X86-NEXT: cmovael %ecx, %eax
451 ; X86-NEXT: cmovbl %edx, %ebp
452 ; X86-NEXT: shldl $31, %eax, %ebp
453 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
454 ; X86-NEXT: movl %ebp, 12(%eax)
455 ; X86-NEXT: movl %edi, 8(%eax)
456 ; X86-NEXT: movl %ebx, 4(%eax)
457 ; X86-NEXT: movl %esi, (%eax)
458 ; X86-NEXT: addl $8, %esp
459 ; X86-NEXT: popl %esi
460 ; X86-NEXT: popl %edi
461 ; X86-NEXT: popl %ebx
462 ; X86-NEXT: popl %ebp
464 %tmp = call <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 31)