1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
5 declare i4 @llvm.umul.fix.i4 (i4, i4, i32)
6 declare i32 @llvm.umul.fix.i32 (i32, i32, i32)
7 declare i64 @llvm.umul.fix.i64 (i64, i64, i32)
8 declare <4 x i32> @llvm.umul.fix.v4i32(<4 x i32>, <4 x i32>, i32)
10 define i32 @func(i32 %x, i32 %y) nounwind {
13 ; X64-NEXT: movl %esi, %eax
14 ; X64-NEXT: movl %edi, %ecx
15 ; X64-NEXT: imulq %rax, %rcx
16 ; X64-NEXT: movq %rcx, %rax
17 ; X64-NEXT: shrq $32, %rax
18 ; X64-NEXT: shldl $30, %ecx, %eax
19 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
24 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
25 ; X86-NEXT: mull {{[0-9]+}}(%esp)
26 ; X86-NEXT: shrdl $2, %edx, %eax
28 %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 2)
32 define i64 @func2(i64 %x, i64 %y) nounwind {
35 ; X64-NEXT: movq %rdi, %rax
37 ; X64-NEXT: shrdq $2, %rdx, %rax
42 ; X86-NEXT: pushl %ebp
43 ; X86-NEXT: pushl %ebx
44 ; X86-NEXT: pushl %edi
45 ; X86-NEXT: pushl %esi
46 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
47 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
48 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
49 ; X86-NEXT: movl %esi, %eax
50 ; X86-NEXT: mull {{[0-9]+}}(%esp)
51 ; X86-NEXT: movl %edx, %edi
52 ; X86-NEXT: movl %eax, %ebx
53 ; X86-NEXT: movl %esi, %eax
55 ; X86-NEXT: movl %eax, %esi
56 ; X86-NEXT: addl %edx, %ebx
57 ; X86-NEXT: adcl $0, %edi
58 ; X86-NEXT: movl %ecx, %eax
60 ; X86-NEXT: addl %ebx, %eax
61 ; X86-NEXT: adcl %edi, %edx
62 ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
63 ; X86-NEXT: addl %ecx, %edx
64 ; X86-NEXT: shldl $30, %eax, %edx
65 ; X86-NEXT: shldl $30, %esi, %eax
71 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 2)
75 define i4 @func3(i4 %x, i4 %y) nounwind {
78 ; X64-NEXT: movl %edi, %eax
79 ; X64-NEXT: andb $15, %al
80 ; X64-NEXT: andb $15, %sil
81 ; X64-NEXT: # kill: def $al killed $al killed $eax
83 ; X64-NEXT: shrb $2, %al
88 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
89 ; X86-NEXT: andb $15, %al
90 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
91 ; X86-NEXT: andb $15, %cl
93 ; X86-NEXT: shrb $2, %al
95 %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 2)
99 define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
102 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
103 ; X64-NEXT: pmuludq %xmm1, %xmm0
104 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
105 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
106 ; X64-NEXT: pmuludq %xmm2, %xmm1
107 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
108 ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
109 ; X64-NEXT: psrld $2, %xmm3
110 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
111 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
112 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
113 ; X64-NEXT: pslld $30, %xmm0
114 ; X64-NEXT: por %xmm3, %xmm0
119 ; X86-NEXT: pushl %ebp
120 ; X86-NEXT: pushl %ebx
121 ; X86-NEXT: pushl %edi
122 ; X86-NEXT: pushl %esi
123 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
124 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
125 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
127 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
128 ; X86-NEXT: mull {{[0-9]+}}(%esp)
129 ; X86-NEXT: movl %edx, %esi
130 ; X86-NEXT: shldl $30, %eax, %esi
131 ; X86-NEXT: movl %ebx, %eax
132 ; X86-NEXT: mull {{[0-9]+}}(%esp)
133 ; X86-NEXT: movl %edx, %ebx
134 ; X86-NEXT: shldl $30, %eax, %ebx
135 ; X86-NEXT: movl %ebp, %eax
136 ; X86-NEXT: mull {{[0-9]+}}(%esp)
137 ; X86-NEXT: movl %edx, %ebp
138 ; X86-NEXT: shldl $30, %eax, %ebp
139 ; X86-NEXT: movl %edi, %eax
140 ; X86-NEXT: mull {{[0-9]+}}(%esp)
141 ; X86-NEXT: shldl $30, %eax, %edx
142 ; X86-NEXT: movl %edx, 12(%ecx)
143 ; X86-NEXT: movl %ebp, 8(%ecx)
144 ; X86-NEXT: movl %ebx, 4(%ecx)
145 ; X86-NEXT: movl %esi, (%ecx)
146 ; X86-NEXT: movl %ecx, %eax
147 ; X86-NEXT: popl %esi
148 ; X86-NEXT: popl %edi
149 ; X86-NEXT: popl %ebx
150 ; X86-NEXT: popl %ebp
152 %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2)
156 ; These result in regular integer multiplication
157 define i32 @func4(i32 %x, i32 %y) nounwind {
160 ; X64-NEXT: movl %edi, %eax
161 ; X64-NEXT: imull %esi, %eax
166 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
167 ; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
169 %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 0)
173 define i64 @func5(i64 %x, i64 %y) nounwind {
176 ; X64-NEXT: movq %rdi, %rax
177 ; X64-NEXT: imulq %rsi, %rax
182 ; X86-NEXT: pushl %esi
183 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
184 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
185 ; X86-NEXT: movl %ecx, %eax
186 ; X86-NEXT: mull %esi
187 ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
188 ; X86-NEXT: addl %ecx, %edx
189 ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
190 ; X86-NEXT: addl %esi, %edx
191 ; X86-NEXT: popl %esi
193 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 0)
197 define i4 @func6(i4 %x, i4 %y) nounwind {
200 ; X64-NEXT: movl %edi, %eax
201 ; X64-NEXT: andb $15, %al
202 ; X64-NEXT: andb $15, %sil
203 ; X64-NEXT: # kill: def $al killed $al killed $eax
204 ; X64-NEXT: mulb %sil
209 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
210 ; X86-NEXT: andb $15, %al
211 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
212 ; X86-NEXT: andb $15, %cl
215 %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 0)
219 define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
222 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
223 ; X64-NEXT: pmuludq %xmm1, %xmm0
224 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
225 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
226 ; X64-NEXT: pmuludq %xmm2, %xmm1
227 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
228 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
233 ; X86-NEXT: pushl %edi
234 ; X86-NEXT: pushl %esi
235 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
236 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
237 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
238 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
239 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
240 ; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
241 ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
242 ; X86-NEXT: imull {{[0-9]+}}(%esp), %edx
243 ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
244 ; X86-NEXT: movl %ecx, 12(%eax)
245 ; X86-NEXT: movl %edx, 8(%eax)
246 ; X86-NEXT: movl %esi, 4(%eax)
247 ; X86-NEXT: movl %edi, (%eax)
248 ; X86-NEXT: popl %esi
249 ; X86-NEXT: popl %edi
251 %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0)
255 define i64 @func7(i64 %x, i64 %y) nounwind {
258 ; X64-NEXT: movq %rdi, %rax
259 ; X64-NEXT: mulq %rsi
260 ; X64-NEXT: shrdq $32, %rdx, %rax
265 ; X86-NEXT: pushl %ebp
266 ; X86-NEXT: pushl %ebx
267 ; X86-NEXT: pushl %edi
268 ; X86-NEXT: pushl %esi
269 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
270 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
271 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
272 ; X86-NEXT: movl %ebx, %eax
273 ; X86-NEXT: mull {{[0-9]+}}(%esp)
274 ; X86-NEXT: movl %edx, %esi
275 ; X86-NEXT: movl %eax, %edi
276 ; X86-NEXT: movl %ebx, %eax
277 ; X86-NEXT: mull %ebp
278 ; X86-NEXT: addl %edx, %edi
279 ; X86-NEXT: adcl $0, %esi
280 ; X86-NEXT: movl %ecx, %eax
281 ; X86-NEXT: mull %ebp
282 ; X86-NEXT: addl %edi, %eax
283 ; X86-NEXT: adcl %esi, %edx
284 ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
285 ; X86-NEXT: addl %ecx, %edx
286 ; X86-NEXT: popl %esi
287 ; X86-NEXT: popl %edi
288 ; X86-NEXT: popl %ebx
289 ; X86-NEXT: popl %ebp
291 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 32)
295 define i64 @func8(i64 %x, i64 %y) nounwind {
298 ; X64-NEXT: movq %rdi, %rax
299 ; X64-NEXT: mulq %rsi
300 ; X64-NEXT: shrdq $63, %rdx, %rax
305 ; X86-NEXT: pushl %ebp
306 ; X86-NEXT: pushl %ebx
307 ; X86-NEXT: pushl %edi
308 ; X86-NEXT: pushl %esi
309 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
310 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
311 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
312 ; X86-NEXT: movl %esi, %eax
313 ; X86-NEXT: mull %ebp
314 ; X86-NEXT: movl %edx, %ecx
315 ; X86-NEXT: movl %eax, %edi
316 ; X86-NEXT: movl %esi, %eax
317 ; X86-NEXT: mull %ebx
318 ; X86-NEXT: addl %edx, %edi
319 ; X86-NEXT: adcl $0, %ecx
320 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
321 ; X86-NEXT: mull %ebp
322 ; X86-NEXT: movl %edx, %esi
323 ; X86-NEXT: movl %eax, %ebp
324 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
325 ; X86-NEXT: mull %ebx
326 ; X86-NEXT: addl %edi, %eax
327 ; X86-NEXT: adcl %edx, %ecx
328 ; X86-NEXT: adcl $0, %esi
329 ; X86-NEXT: addl %ebp, %ecx
330 ; X86-NEXT: adcl $0, %esi
331 ; X86-NEXT: shldl $1, %ecx, %esi
332 ; X86-NEXT: shrdl $31, %ecx, %eax
333 ; X86-NEXT: movl %esi, %edx
334 ; X86-NEXT: popl %esi
335 ; X86-NEXT: popl %edi
336 ; X86-NEXT: popl %ebx
337 ; X86-NEXT: popl %ebp
339 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 63)
343 define i64 @func9(i64 %x, i64 %y) nounwind {
346 ; X64-NEXT: movq %rdi, %rax
347 ; X64-NEXT: mulq %rsi
348 ; X64-NEXT: movq %rdx, %rax
353 ; X86-NEXT: pushl %ebp
354 ; X86-NEXT: pushl %ebx
355 ; X86-NEXT: pushl %edi
356 ; X86-NEXT: pushl %esi
357 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
358 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
359 ; X86-NEXT: movl %edi, %eax
360 ; X86-NEXT: mull %esi
361 ; X86-NEXT: movl %edx, %ecx
362 ; X86-NEXT: movl %eax, %ebp
363 ; X86-NEXT: movl %edi, %eax
364 ; X86-NEXT: mull {{[0-9]+}}(%esp)
365 ; X86-NEXT: movl %edx, %ebx
366 ; X86-NEXT: addl %ebp, %ebx
367 ; X86-NEXT: adcl $0, %ecx
368 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
369 ; X86-NEXT: mull %esi
370 ; X86-NEXT: movl %edx, %edi
371 ; X86-NEXT: movl %eax, %ebp
372 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
373 ; X86-NEXT: mull {{[0-9]+}}(%esp)
374 ; X86-NEXT: addl %ebx, %eax
375 ; X86-NEXT: adcl %edx, %ecx
376 ; X86-NEXT: adcl $0, %edi
377 ; X86-NEXT: addl %ebp, %ecx
378 ; X86-NEXT: adcl $0, %edi
379 ; X86-NEXT: movl %ecx, %eax
380 ; X86-NEXT: movl %edi, %edx
381 ; X86-NEXT: popl %esi
382 ; X86-NEXT: popl %edi
383 ; X86-NEXT: popl %ebx
384 ; X86-NEXT: popl %ebp
386 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 64)