1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
3 ; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,LINUX,FAST
4 ; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
5 ; RUN: llc -disable-peephole -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefixes=CHECK,WIN64
6 ; RUN: llc -disable-peephole -mtriple=i386-pc-win32 < %s | FileCheck %s --check-prefix=WIN32
8 define {i64, i1} @t1() nounwind {
11 ; CHECK-NEXT: movl $72, %eax
12 ; CHECK-NEXT: xorl %edx, %edx
17 ; WIN32-NEXT: movl $72, %eax
18 ; WIN32-NEXT: xorl %edx, %edx
19 ; WIN32-NEXT: xorl %ecx, %ecx
21 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 8)
25 define {i64, i1} @t2() nounwind {
28 ; CHECK-NEXT: xorl %eax, %eax
29 ; CHECK-NEXT: xorl %edx, %edx
34 ; WIN32-NEXT: xorl %eax, %eax
35 ; WIN32-NEXT: xorl %edx, %edx
36 ; WIN32-NEXT: xorl %ecx, %ecx
38 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 0)
42 define {i64, i1} @t3() nounwind {
45 ; CHECK-NEXT: movq $-9, %rax
46 ; CHECK-NEXT: movb $1, %dl
51 ; WIN32-NEXT: movl $-9, %eax
52 ; WIN32-NEXT: movl $-1, %edx
53 ; WIN32-NEXT: movb $1, %cl
55 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 -1)
60 define zeroext i1 @smuloi8(i8 %v1, i8 %v2, i8* %res) {
61 ; SDAG-LABEL: smuloi8:
63 ; SDAG-NEXT: movl %edi, %eax
64 ; SDAG-NEXT: # kill: def $al killed $al killed $eax
65 ; SDAG-NEXT: imulb %sil
67 ; SDAG-NEXT: movb %al, (%rdx)
68 ; SDAG-NEXT: movl %ecx, %eax
71 ; FAST-LABEL: smuloi8:
73 ; FAST-NEXT: movl %edi, %eax
74 ; FAST-NEXT: # kill: def $al killed $al killed $eax
75 ; FAST-NEXT: imulb %sil
77 ; FAST-NEXT: movb %al, (%rdx)
78 ; FAST-NEXT: andb $1, %cl
79 ; FAST-NEXT: movzbl %cl, %eax
82 ; WIN64-LABEL: smuloi8:
84 ; WIN64-NEXT: movl %ecx, %eax
85 ; WIN64-NEXT: imulb %dl
86 ; WIN64-NEXT: seto %cl
87 ; WIN64-NEXT: movb %al, (%r8)
88 ; WIN64-NEXT: movl %ecx, %eax
91 ; WIN32-LABEL: smuloi8:
93 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
94 ; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
95 ; WIN32-NEXT: imulb {{[0-9]+}}(%esp)
96 ; WIN32-NEXT: seto %cl
97 ; WIN32-NEXT: movb %al, (%edx)
98 ; WIN32-NEXT: movl %ecx, %eax
100 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
101 %val = extractvalue {i8, i1} %t, 0
102 %obit = extractvalue {i8, i1} %t, 1
103 store i8 %val, i8* %res
107 define zeroext i1 @smuloi16(i16 %v1, i16 %v2, i16* %res) {
108 ; SDAG-LABEL: smuloi16:
110 ; SDAG-NEXT: imulw %si, %di
111 ; SDAG-NEXT: seto %al
112 ; SDAG-NEXT: movw %di, (%rdx)
115 ; FAST-LABEL: smuloi16:
117 ; FAST-NEXT: imulw %si, %di
118 ; FAST-NEXT: seto %al
119 ; FAST-NEXT: movw %di, (%rdx)
120 ; FAST-NEXT: andb $1, %al
121 ; FAST-NEXT: movzbl %al, %eax
124 ; WIN64-LABEL: smuloi16:
126 ; WIN64-NEXT: imulw %dx, %cx
127 ; WIN64-NEXT: seto %al
128 ; WIN64-NEXT: movw %cx, (%r8)
131 ; WIN32-LABEL: smuloi16:
133 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
134 ; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx
135 ; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %dx
136 ; WIN32-NEXT: seto %al
137 ; WIN32-NEXT: movw %dx, (%ecx)
139 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
140 %val = extractvalue {i16, i1} %t, 0
141 %obit = extractvalue {i16, i1} %t, 1
142 store i16 %val, i16* %res
146 define zeroext i1 @smuloi32(i32 %v1, i32 %v2, i32* %res) {
147 ; SDAG-LABEL: smuloi32:
149 ; SDAG-NEXT: imull %esi, %edi
150 ; SDAG-NEXT: seto %al
151 ; SDAG-NEXT: movl %edi, (%rdx)
154 ; FAST-LABEL: smuloi32:
156 ; FAST-NEXT: imull %esi, %edi
157 ; FAST-NEXT: seto %al
158 ; FAST-NEXT: movl %edi, (%rdx)
159 ; FAST-NEXT: andb $1, %al
160 ; FAST-NEXT: movzbl %al, %eax
163 ; WIN64-LABEL: smuloi32:
165 ; WIN64-NEXT: imull %edx, %ecx
166 ; WIN64-NEXT: seto %al
167 ; WIN64-NEXT: movl %ecx, (%r8)
170 ; WIN32-LABEL: smuloi32:
172 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
173 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
174 ; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx
175 ; WIN32-NEXT: seto %al
176 ; WIN32-NEXT: movl %edx, (%ecx)
178 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
179 %val = extractvalue {i32, i1} %t, 0
180 %obit = extractvalue {i32, i1} %t, 1
181 store i32 %val, i32* %res
185 define zeroext i1 @smuloi64(i64 %v1, i64 %v2, i64* %res) {
186 ; SDAG-LABEL: smuloi64:
188 ; SDAG-NEXT: imulq %rsi, %rdi
189 ; SDAG-NEXT: seto %al
190 ; SDAG-NEXT: movq %rdi, (%rdx)
193 ; FAST-LABEL: smuloi64:
195 ; FAST-NEXT: imulq %rsi, %rdi
196 ; FAST-NEXT: seto %al
197 ; FAST-NEXT: movq %rdi, (%rdx)
198 ; FAST-NEXT: andb $1, %al
199 ; FAST-NEXT: movzbl %al, %eax
202 ; WIN64-LABEL: smuloi64:
204 ; WIN64-NEXT: imulq %rdx, %rcx
205 ; WIN64-NEXT: seto %al
206 ; WIN64-NEXT: movq %rcx, (%r8)
209 ; WIN32-LABEL: smuloi64:
211 ; WIN32-NEXT: pushl %ebx
212 ; WIN32-NEXT: pushl %edi
213 ; WIN32-NEXT: pushl %esi
214 ; WIN32-NEXT: pushl %eax
215 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
216 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
217 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
218 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
219 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
220 ; WIN32-NEXT: movl $0, (%esp)
221 ; WIN32-NEXT: movl %esp, %ebx
222 ; WIN32-NEXT: pushl %ebx
223 ; WIN32-NEXT: pushl %edi
224 ; WIN32-NEXT: pushl %edx
225 ; WIN32-NEXT: pushl %ecx
226 ; WIN32-NEXT: pushl %eax
227 ; WIN32-NEXT: calll ___mulodi4
228 ; WIN32-NEXT: addl $20, %esp
229 ; WIN32-NEXT: cmpl $0, (%esp)
230 ; WIN32-NEXT: setne %cl
231 ; WIN32-NEXT: movl %edx, 4(%esi)
232 ; WIN32-NEXT: movl %eax, (%esi)
233 ; WIN32-NEXT: movl %ecx, %eax
234 ; WIN32-NEXT: addl $4, %esp
235 ; WIN32-NEXT: popl %esi
236 ; WIN32-NEXT: popl %edi
237 ; WIN32-NEXT: popl %ebx
239 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
240 %val = extractvalue {i64, i1} %t, 0
241 %obit = extractvalue {i64, i1} %t, 1
242 store i64 %val, i64* %res
247 define zeroext i1 @umuloi8(i8 %v1, i8 %v2, i8* %res) {
248 ; SDAG-LABEL: umuloi8:
250 ; SDAG-NEXT: movl %edi, %eax
251 ; SDAG-NEXT: # kill: def $al killed $al killed $eax
252 ; SDAG-NEXT: mulb %sil
253 ; SDAG-NEXT: seto %cl
254 ; SDAG-NEXT: movb %al, (%rdx)
255 ; SDAG-NEXT: movl %ecx, %eax
258 ; FAST-LABEL: umuloi8:
260 ; FAST-NEXT: movl %edi, %eax
261 ; FAST-NEXT: # kill: def $al killed $al killed $eax
262 ; FAST-NEXT: mulb %sil
263 ; FAST-NEXT: seto %cl
264 ; FAST-NEXT: movb %al, (%rdx)
265 ; FAST-NEXT: andb $1, %cl
266 ; FAST-NEXT: movzbl %cl, %eax
269 ; WIN64-LABEL: umuloi8:
271 ; WIN64-NEXT: movl %ecx, %eax
272 ; WIN64-NEXT: mulb %dl
273 ; WIN64-NEXT: seto %cl
274 ; WIN64-NEXT: movb %al, (%r8)
275 ; WIN64-NEXT: movl %ecx, %eax
278 ; WIN32-LABEL: umuloi8:
280 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
281 ; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
282 ; WIN32-NEXT: mulb {{[0-9]+}}(%esp)
283 ; WIN32-NEXT: seto %cl
284 ; WIN32-NEXT: movb %al, (%edx)
285 ; WIN32-NEXT: movl %ecx, %eax
287 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
288 %val = extractvalue {i8, i1} %t, 0
289 %obit = extractvalue {i8, i1} %t, 1
290 store i8 %val, i8* %res
294 define zeroext i1 @umuloi16(i16 %v1, i16 %v2, i16* %res) {
295 ; SDAG-LABEL: umuloi16:
297 ; SDAG-NEXT: movq %rdx, %rcx
298 ; SDAG-NEXT: movl %edi, %eax
299 ; SDAG-NEXT: # kill: def $ax killed $ax killed $eax
300 ; SDAG-NEXT: mulw %si
301 ; SDAG-NEXT: seto %dl
302 ; SDAG-NEXT: movw %ax, (%rcx)
303 ; SDAG-NEXT: movl %edx, %eax
306 ; FAST-LABEL: umuloi16:
308 ; FAST-NEXT: movq %rdx, %rcx
309 ; FAST-NEXT: movl %edi, %eax
310 ; FAST-NEXT: # kill: def $ax killed $ax killed $eax
311 ; FAST-NEXT: mulw %si
312 ; FAST-NEXT: seto %dl
313 ; FAST-NEXT: movw %ax, (%rcx)
314 ; FAST-NEXT: andb $1, %dl
315 ; FAST-NEXT: movzbl %dl, %eax
318 ; WIN64-LABEL: umuloi16:
320 ; WIN64-NEXT: movl %ecx, %eax
321 ; WIN64-NEXT: mulw %dx
322 ; WIN64-NEXT: seto %cl
323 ; WIN64-NEXT: movw %ax, (%r8)
324 ; WIN64-NEXT: movl %ecx, %eax
327 ; WIN32-LABEL: umuloi16:
329 ; WIN32-NEXT: pushl %esi
330 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
331 ; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
332 ; WIN32-NEXT: mulw {{[0-9]+}}(%esp)
333 ; WIN32-NEXT: seto %cl
334 ; WIN32-NEXT: movw %ax, (%esi)
335 ; WIN32-NEXT: movl %ecx, %eax
336 ; WIN32-NEXT: popl %esi
338 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
339 %val = extractvalue {i16, i1} %t, 0
340 %obit = extractvalue {i16, i1} %t, 1
341 store i16 %val, i16* %res
345 define zeroext i1 @umuloi32(i32 %v1, i32 %v2, i32* %res) {
346 ; SDAG-LABEL: umuloi32:
348 ; SDAG-NEXT: movq %rdx, %rcx
349 ; SDAG-NEXT: movl %edi, %eax
350 ; SDAG-NEXT: mull %esi
351 ; SDAG-NEXT: seto %dl
352 ; SDAG-NEXT: movl %eax, (%rcx)
353 ; SDAG-NEXT: movl %edx, %eax
356 ; FAST-LABEL: umuloi32:
358 ; FAST-NEXT: movq %rdx, %rcx
359 ; FAST-NEXT: movl %edi, %eax
360 ; FAST-NEXT: mull %esi
361 ; FAST-NEXT: seto %dl
362 ; FAST-NEXT: movl %eax, (%rcx)
363 ; FAST-NEXT: andb $1, %dl
364 ; FAST-NEXT: movzbl %dl, %eax
367 ; WIN64-LABEL: umuloi32:
369 ; WIN64-NEXT: movl %ecx, %eax
370 ; WIN64-NEXT: mull %edx
371 ; WIN64-NEXT: seto %cl
372 ; WIN64-NEXT: movl %eax, (%r8)
373 ; WIN64-NEXT: movl %ecx, %eax
376 ; WIN32-LABEL: umuloi32:
378 ; WIN32-NEXT: pushl %esi
379 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
380 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
381 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
382 ; WIN32-NEXT: seto %cl
383 ; WIN32-NEXT: movl %eax, (%esi)
384 ; WIN32-NEXT: movl %ecx, %eax
385 ; WIN32-NEXT: popl %esi
387 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
388 %val = extractvalue {i32, i1} %t, 0
389 %obit = extractvalue {i32, i1} %t, 1
390 store i32 %val, i32* %res
394 define zeroext i1 @umuloi64(i64 %v1, i64 %v2, i64* %res) {
395 ; SDAG-LABEL: umuloi64:
397 ; SDAG-NEXT: movq %rdx, %rcx
398 ; SDAG-NEXT: movq %rdi, %rax
399 ; SDAG-NEXT: mulq %rsi
400 ; SDAG-NEXT: seto %dl
401 ; SDAG-NEXT: movq %rax, (%rcx)
402 ; SDAG-NEXT: movl %edx, %eax
405 ; FAST-LABEL: umuloi64:
407 ; FAST-NEXT: movq %rdx, %rcx
408 ; FAST-NEXT: movq %rdi, %rax
409 ; FAST-NEXT: mulq %rsi
410 ; FAST-NEXT: seto %dl
411 ; FAST-NEXT: movq %rax, (%rcx)
412 ; FAST-NEXT: andb $1, %dl
413 ; FAST-NEXT: movzbl %dl, %eax
416 ; WIN64-LABEL: umuloi64:
418 ; WIN64-NEXT: movq %rcx, %rax
419 ; WIN64-NEXT: mulq %rdx
420 ; WIN64-NEXT: seto %cl
421 ; WIN64-NEXT: movq %rax, (%r8)
422 ; WIN64-NEXT: movl %ecx, %eax
425 ; WIN32-LABEL: umuloi64:
427 ; WIN32-NEXT: pushl %ebp
428 ; WIN32-NEXT: pushl %ebx
429 ; WIN32-NEXT: pushl %edi
430 ; WIN32-NEXT: pushl %esi
431 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
432 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
433 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
434 ; WIN32-NEXT: testl %esi, %esi
435 ; WIN32-NEXT: setne %dl
436 ; WIN32-NEXT: testl %eax, %eax
437 ; WIN32-NEXT: setne %bl
438 ; WIN32-NEXT: andb %dl, %bl
439 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
440 ; WIN32-NEXT: movl %eax, %edi
441 ; WIN32-NEXT: seto %cl
442 ; WIN32-NEXT: movl %esi, %eax
443 ; WIN32-NEXT: mull %ebp
444 ; WIN32-NEXT: movl %eax, %esi
445 ; WIN32-NEXT: seto %ch
446 ; WIN32-NEXT: orb %cl, %ch
447 ; WIN32-NEXT: addl %edi, %esi
448 ; WIN32-NEXT: movl %ebp, %eax
449 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
450 ; WIN32-NEXT: addl %esi, %edx
451 ; WIN32-NEXT: setb %cl
452 ; WIN32-NEXT: orb %ch, %cl
453 ; WIN32-NEXT: orb %bl, %cl
454 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
455 ; WIN32-NEXT: movl %eax, (%esi)
456 ; WIN32-NEXT: movl %edx, 4(%esi)
457 ; WIN32-NEXT: movl %ecx, %eax
458 ; WIN32-NEXT: popl %esi
459 ; WIN32-NEXT: popl %edi
460 ; WIN32-NEXT: popl %ebx
461 ; WIN32-NEXT: popl %ebp
463 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
464 %val = extractvalue {i64, i1} %t, 0
465 %obit = extractvalue {i64, i1} %t, 1
466 store i64 %val, i64* %res
471 ; Check the use of the overflow bit in combination with a select instruction.
473 define i32 @smuloselecti32(i32 %v1, i32 %v2) {
474 ; LINUX-LABEL: smuloselecti32:
476 ; LINUX-NEXT: movl %esi, %eax
477 ; LINUX-NEXT: movl %edi, %ecx
478 ; LINUX-NEXT: imull %esi, %ecx
479 ; LINUX-NEXT: cmovol %edi, %eax
482 ; WIN64-LABEL: smuloselecti32:
484 ; WIN64-NEXT: movl %edx, %eax
485 ; WIN64-NEXT: movl %ecx, %edx
486 ; WIN64-NEXT: imull %eax, %edx
487 ; WIN64-NEXT: cmovol %ecx, %eax
490 ; WIN32-LABEL: smuloselecti32:
492 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
493 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
494 ; WIN32-NEXT: movl %eax, %edx
495 ; WIN32-NEXT: imull %ecx, %edx
496 ; WIN32-NEXT: jo LBB11_2
497 ; WIN32-NEXT: # %bb.1:
498 ; WIN32-NEXT: movl %ecx, %eax
499 ; WIN32-NEXT: LBB11_2:
501 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
502 %obit = extractvalue {i32, i1} %t, 1
503 %ret = select i1 %obit, i32 %v1, i32 %v2
507 define i64 @smuloselecti64(i64 %v1, i64 %v2) {
508 ; LINUX-LABEL: smuloselecti64:
510 ; LINUX-NEXT: movq %rsi, %rax
511 ; LINUX-NEXT: movq %rdi, %rcx
512 ; LINUX-NEXT: imulq %rsi, %rcx
513 ; LINUX-NEXT: cmovoq %rdi, %rax
516 ; WIN64-LABEL: smuloselecti64:
518 ; WIN64-NEXT: movq %rdx, %rax
519 ; WIN64-NEXT: movq %rcx, %rdx
520 ; WIN64-NEXT: imulq %rax, %rdx
521 ; WIN64-NEXT: cmovoq %rcx, %rax
524 ; WIN32-LABEL: smuloselecti64:
526 ; WIN32-NEXT: pushl %ebp
527 ; WIN32-NEXT: pushl %ebx
528 ; WIN32-NEXT: pushl %edi
529 ; WIN32-NEXT: pushl %esi
530 ; WIN32-NEXT: pushl %eax
531 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
532 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
533 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
534 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
535 ; WIN32-NEXT: movl $0, (%esp)
536 ; WIN32-NEXT: movl %esp, %eax
537 ; WIN32-NEXT: pushl %eax
538 ; WIN32-NEXT: pushl %ebp
539 ; WIN32-NEXT: pushl %ebx
540 ; WIN32-NEXT: pushl %edi
541 ; WIN32-NEXT: pushl %esi
542 ; WIN32-NEXT: calll ___mulodi4
543 ; WIN32-NEXT: addl $20, %esp
544 ; WIN32-NEXT: cmpl $0, (%esp)
545 ; WIN32-NEXT: jne LBB12_2
546 ; WIN32-NEXT: # %bb.1:
547 ; WIN32-NEXT: movl %ebx, %esi
548 ; WIN32-NEXT: movl %ebp, %edi
549 ; WIN32-NEXT: LBB12_2:
550 ; WIN32-NEXT: movl %esi, %eax
551 ; WIN32-NEXT: movl %edi, %edx
552 ; WIN32-NEXT: addl $4, %esp
553 ; WIN32-NEXT: popl %esi
554 ; WIN32-NEXT: popl %edi
555 ; WIN32-NEXT: popl %ebx
556 ; WIN32-NEXT: popl %ebp
558 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
559 %obit = extractvalue {i64, i1} %t, 1
560 %ret = select i1 %obit, i64 %v1, i64 %v2
564 define i32 @umuloselecti32(i32 %v1, i32 %v2) {
565 ; LINUX-LABEL: umuloselecti32:
567 ; LINUX-NEXT: movl %edi, %eax
568 ; LINUX-NEXT: mull %esi
569 ; LINUX-NEXT: cmovol %edi, %esi
570 ; LINUX-NEXT: movl %esi, %eax
573 ; WIN64-LABEL: umuloselecti32:
575 ; WIN64-NEXT: movl %edx, %r8d
576 ; WIN64-NEXT: movl %ecx, %eax
577 ; WIN64-NEXT: mull %edx
578 ; WIN64-NEXT: cmovol %ecx, %r8d
579 ; WIN64-NEXT: movl %r8d, %eax
582 ; WIN32-LABEL: umuloselecti32:
584 ; WIN32-NEXT: pushl %esi
585 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
586 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
587 ; WIN32-NEXT: movl %ecx, %eax
588 ; WIN32-NEXT: mull %esi
589 ; WIN32-NEXT: jo LBB13_2
590 ; WIN32-NEXT: # %bb.1:
591 ; WIN32-NEXT: movl %esi, %ecx
592 ; WIN32-NEXT: LBB13_2:
593 ; WIN32-NEXT: movl %ecx, %eax
594 ; WIN32-NEXT: popl %esi
596 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
597 %obit = extractvalue {i32, i1} %t, 1
598 %ret = select i1 %obit, i32 %v1, i32 %v2
602 define i64 @umuloselecti64(i64 %v1, i64 %v2) {
603 ; LINUX-LABEL: umuloselecti64:
605 ; LINUX-NEXT: movq %rdi, %rax
606 ; LINUX-NEXT: mulq %rsi
607 ; LINUX-NEXT: cmovoq %rdi, %rsi
608 ; LINUX-NEXT: movq %rsi, %rax
611 ; WIN64-LABEL: umuloselecti64:
613 ; WIN64-NEXT: movq %rdx, %r8
614 ; WIN64-NEXT: movq %rcx, %rax
615 ; WIN64-NEXT: mulq %rdx
616 ; WIN64-NEXT: cmovoq %rcx, %r8
617 ; WIN64-NEXT: movq %r8, %rax
620 ; WIN32-LABEL: umuloselecti64:
622 ; WIN32-NEXT: pushl %ebp
623 ; WIN32-NEXT: pushl %ebx
624 ; WIN32-NEXT: pushl %edi
625 ; WIN32-NEXT: pushl %esi
626 ; WIN32-NEXT: pushl %eax
627 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
628 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
629 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
630 ; WIN32-NEXT: testl %ebp, %ebp
631 ; WIN32-NEXT: setne %al
632 ; WIN32-NEXT: testl %esi, %esi
633 ; WIN32-NEXT: setne %bl
634 ; WIN32-NEXT: andb %al, %bl
635 ; WIN32-NEXT: movl %esi, %eax
636 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
637 ; WIN32-NEXT: movl %eax, %edi
638 ; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
639 ; WIN32-NEXT: movl %ebp, %eax
640 ; WIN32-NEXT: mull %ecx
641 ; WIN32-NEXT: movl %eax, %ebp
642 ; WIN32-NEXT: seto %bh
643 ; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
644 ; WIN32-NEXT: addl %edi, %ebp
645 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
646 ; WIN32-NEXT: movl %ecx, %eax
647 ; WIN32-NEXT: mull %edi
648 ; WIN32-NEXT: addl %ebp, %edx
649 ; WIN32-NEXT: setb %al
650 ; WIN32-NEXT: orb %bh, %al
651 ; WIN32-NEXT: orb %bl, %al
652 ; WIN32-NEXT: testb %al, %al
653 ; WIN32-NEXT: jne LBB14_2
654 ; WIN32-NEXT: # %bb.1:
655 ; WIN32-NEXT: movl %edi, %ecx
656 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
657 ; WIN32-NEXT: LBB14_2:
658 ; WIN32-NEXT: movl %ecx, %eax
659 ; WIN32-NEXT: movl %esi, %edx
660 ; WIN32-NEXT: addl $4, %esp
661 ; WIN32-NEXT: popl %esi
662 ; WIN32-NEXT: popl %edi
663 ; WIN32-NEXT: popl %ebx
664 ; WIN32-NEXT: popl %ebp
666 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
667 %obit = extractvalue {i64, i1} %t, 1
668 %ret = select i1 %obit, i64 %v1, i64 %v2
673 ; Check the use of the overflow bit in combination with a branch instruction.
675 define zeroext i1 @smulobri8(i8 %v1, i8 %v2) {
676 ; SDAG-LABEL: smulobri8:
678 ; SDAG-NEXT: movl %edi, %eax
679 ; SDAG-NEXT: # kill: def $al killed $al killed $eax
680 ; SDAG-NEXT: imulb %sil
681 ; SDAG-NEXT: jo .LBB15_1
682 ; SDAG-NEXT: # %bb.2: # %continue
683 ; SDAG-NEXT: movb $1, %al
685 ; SDAG-NEXT: .LBB15_1: # %overflow
686 ; SDAG-NEXT: xorl %eax, %eax
689 ; FAST-LABEL: smulobri8:
691 ; FAST-NEXT: movl %edi, %eax
692 ; FAST-NEXT: # kill: def $al killed $al killed $eax
693 ; FAST-NEXT: imulb %sil
694 ; FAST-NEXT: seto %al
695 ; FAST-NEXT: testb $1, %al
696 ; FAST-NEXT: jne .LBB15_1
697 ; FAST-NEXT: # %bb.2: # %continue
698 ; FAST-NEXT: movb $1, %al
699 ; FAST-NEXT: andb $1, %al
700 ; FAST-NEXT: movzbl %al, %eax
702 ; FAST-NEXT: .LBB15_1: # %overflow
703 ; FAST-NEXT: xorl %eax, %eax
704 ; FAST-NEXT: andb $1, %al
705 ; FAST-NEXT: movzbl %al, %eax
708 ; WIN64-LABEL: smulobri8:
710 ; WIN64-NEXT: movl %ecx, %eax
711 ; WIN64-NEXT: imulb %dl
712 ; WIN64-NEXT: jo .LBB15_1
713 ; WIN64-NEXT: # %bb.2: # %continue
714 ; WIN64-NEXT: movb $1, %al
716 ; WIN64-NEXT: .LBB15_1: # %overflow
717 ; WIN64-NEXT: xorl %eax, %eax
720 ; WIN32-LABEL: smulobri8:
722 ; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
723 ; WIN32-NEXT: imulb {{[0-9]+}}(%esp)
724 ; WIN32-NEXT: jo LBB15_1
725 ; WIN32-NEXT: # %bb.2: # %continue
726 ; WIN32-NEXT: movb $1, %al
728 ; WIN32-NEXT: LBB15_1: # %overflow
729 ; WIN32-NEXT: xorl %eax, %eax
731 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
732 %val = extractvalue {i8, i1} %t, 0
733 %obit = extractvalue {i8, i1} %t, 1
734 br i1 %obit, label %overflow, label %continue, !prof !0
743 define zeroext i1 @smulobri16(i16 %v1, i16 %v2) {
744 ; SDAG-LABEL: smulobri16:
746 ; SDAG-NEXT: imulw %si, %di
747 ; SDAG-NEXT: jo .LBB16_1
748 ; SDAG-NEXT: # %bb.2: # %continue
749 ; SDAG-NEXT: movb $1, %al
751 ; SDAG-NEXT: .LBB16_1: # %overflow
752 ; SDAG-NEXT: xorl %eax, %eax
755 ; FAST-LABEL: smulobri16:
757 ; FAST-NEXT: imulw %si, %di
758 ; FAST-NEXT: seto %al
759 ; FAST-NEXT: testb $1, %al
760 ; FAST-NEXT: jne .LBB16_1
761 ; FAST-NEXT: # %bb.2: # %continue
762 ; FAST-NEXT: movb $1, %al
763 ; FAST-NEXT: andb $1, %al
764 ; FAST-NEXT: movzbl %al, %eax
766 ; FAST-NEXT: .LBB16_1: # %overflow
767 ; FAST-NEXT: xorl %eax, %eax
768 ; FAST-NEXT: andb $1, %al
769 ; FAST-NEXT: movzbl %al, %eax
772 ; WIN64-LABEL: smulobri16:
774 ; WIN64-NEXT: imulw %dx, %cx
775 ; WIN64-NEXT: jo .LBB16_1
776 ; WIN64-NEXT: # %bb.2: # %continue
777 ; WIN64-NEXT: movb $1, %al
779 ; WIN64-NEXT: .LBB16_1: # %overflow
780 ; WIN64-NEXT: xorl %eax, %eax
783 ; WIN32-LABEL: smulobri16:
785 ; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
786 ; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %ax
787 ; WIN32-NEXT: jo LBB16_1
788 ; WIN32-NEXT: # %bb.2: # %continue
789 ; WIN32-NEXT: movb $1, %al
791 ; WIN32-NEXT: LBB16_1: # %overflow
792 ; WIN32-NEXT: xorl %eax, %eax
794 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
795 %val = extractvalue {i16, i1} %t, 0
796 %obit = extractvalue {i16, i1} %t, 1
797 br i1 %obit, label %overflow, label %continue, !prof !0
806 define zeroext i1 @smulobri32(i32 %v1, i32 %v2) {
807 ; SDAG-LABEL: smulobri32:
809 ; SDAG-NEXT: imull %esi, %edi
810 ; SDAG-NEXT: jo .LBB17_1
811 ; SDAG-NEXT: # %bb.2: # %continue
812 ; SDAG-NEXT: movb $1, %al
814 ; SDAG-NEXT: .LBB17_1: # %overflow
815 ; SDAG-NEXT: xorl %eax, %eax
818 ; FAST-LABEL: smulobri32:
820 ; FAST-NEXT: imull %esi, %edi
821 ; FAST-NEXT: jo .LBB17_1
822 ; FAST-NEXT: # %bb.2: # %continue
823 ; FAST-NEXT: movb $1, %al
824 ; FAST-NEXT: andb $1, %al
825 ; FAST-NEXT: movzbl %al, %eax
827 ; FAST-NEXT: .LBB17_1: # %overflow
828 ; FAST-NEXT: xorl %eax, %eax
829 ; FAST-NEXT: andb $1, %al
830 ; FAST-NEXT: movzbl %al, %eax
833 ; WIN64-LABEL: smulobri32:
835 ; WIN64-NEXT: imull %edx, %ecx
836 ; WIN64-NEXT: jo .LBB17_1
837 ; WIN64-NEXT: # %bb.2: # %continue
838 ; WIN64-NEXT: movb $1, %al
840 ; WIN64-NEXT: .LBB17_1: # %overflow
841 ; WIN64-NEXT: xorl %eax, %eax
844 ; WIN32-LABEL: smulobri32:
846 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
847 ; WIN32-NEXT: imull {{[0-9]+}}(%esp), %eax
848 ; WIN32-NEXT: jo LBB17_1
849 ; WIN32-NEXT: # %bb.2: # %continue
850 ; WIN32-NEXT: movb $1, %al
852 ; WIN32-NEXT: LBB17_1: # %overflow
853 ; WIN32-NEXT: xorl %eax, %eax
855 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
856 %val = extractvalue {i32, i1} %t, 0
857 %obit = extractvalue {i32, i1} %t, 1
858 br i1 %obit, label %overflow, label %continue, !prof !0
867 define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
868 ; SDAG-LABEL: smulobri64:
870 ; SDAG-NEXT: imulq %rsi, %rdi
871 ; SDAG-NEXT: jo .LBB18_1
872 ; SDAG-NEXT: # %bb.2: # %continue
873 ; SDAG-NEXT: movb $1, %al
875 ; SDAG-NEXT: .LBB18_1: # %overflow
876 ; SDAG-NEXT: xorl %eax, %eax
879 ; FAST-LABEL: smulobri64:
881 ; FAST-NEXT: imulq %rsi, %rdi
882 ; FAST-NEXT: jo .LBB18_1
883 ; FAST-NEXT: # %bb.2: # %continue
884 ; FAST-NEXT: movb $1, %al
885 ; FAST-NEXT: andb $1, %al
886 ; FAST-NEXT: movzbl %al, %eax
888 ; FAST-NEXT: .LBB18_1: # %overflow
889 ; FAST-NEXT: xorl %eax, %eax
890 ; FAST-NEXT: andb $1, %al
891 ; FAST-NEXT: movzbl %al, %eax
894 ; WIN64-LABEL: smulobri64:
896 ; WIN64-NEXT: imulq %rdx, %rcx
897 ; WIN64-NEXT: jo .LBB18_1
898 ; WIN64-NEXT: # %bb.2: # %continue
899 ; WIN64-NEXT: movb $1, %al
901 ; WIN64-NEXT: .LBB18_1: # %overflow
902 ; WIN64-NEXT: xorl %eax, %eax
905 ; WIN32-LABEL: smulobri64:
907 ; WIN32-NEXT: pushl %edi
908 ; WIN32-NEXT: pushl %esi
909 ; WIN32-NEXT: pushl %eax
910 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
911 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
912 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
913 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
914 ; WIN32-NEXT: movl $0, (%esp)
915 ; WIN32-NEXT: movl %esp, %edi
916 ; WIN32-NEXT: pushl %edi
917 ; WIN32-NEXT: pushl %esi
918 ; WIN32-NEXT: pushl %edx
919 ; WIN32-NEXT: pushl %ecx
920 ; WIN32-NEXT: pushl %eax
921 ; WIN32-NEXT: calll ___mulodi4
922 ; WIN32-NEXT: addl $20, %esp
923 ; WIN32-NEXT: cmpl $0, (%esp)
924 ; WIN32-NEXT: jne LBB18_1
925 ; WIN32-NEXT: # %bb.3: # %continue
926 ; WIN32-NEXT: movb $1, %al
927 ; WIN32-NEXT: LBB18_2: # %overflow
928 ; WIN32-NEXT: addl $4, %esp
929 ; WIN32-NEXT: popl %esi
930 ; WIN32-NEXT: popl %edi
932 ; WIN32-NEXT: LBB18_1: # %overflow
933 ; WIN32-NEXT: xorl %eax, %eax
934 ; WIN32-NEXT: jmp LBB18_2
935 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
936 %val = extractvalue {i64, i1} %t, 0
937 %obit = extractvalue {i64, i1} %t, 1
938 br i1 %obit, label %overflow, label %continue, !prof !0
947 define zeroext i1 @umulobri8(i8 %v1, i8 %v2) {
948 ; SDAG-LABEL: umulobri8:
950 ; SDAG-NEXT: movl %edi, %eax
951 ; SDAG-NEXT: # kill: def $al killed $al killed $eax
952 ; SDAG-NEXT: mulb %sil
953 ; SDAG-NEXT: jo .LBB19_1
954 ; SDAG-NEXT: # %bb.2: # %continue
955 ; SDAG-NEXT: movb $1, %al
957 ; SDAG-NEXT: .LBB19_1: # %overflow
958 ; SDAG-NEXT: xorl %eax, %eax
961 ; FAST-LABEL: umulobri8:
963 ; FAST-NEXT: movl %edi, %eax
964 ; FAST-NEXT: # kill: def $al killed $al killed $eax
965 ; FAST-NEXT: mulb %sil
966 ; FAST-NEXT: seto %al
967 ; FAST-NEXT: testb $1, %al
968 ; FAST-NEXT: jne .LBB19_1
969 ; FAST-NEXT: # %bb.2: # %continue
970 ; FAST-NEXT: movb $1, %al
971 ; FAST-NEXT: andb $1, %al
972 ; FAST-NEXT: movzbl %al, %eax
974 ; FAST-NEXT: .LBB19_1: # %overflow
975 ; FAST-NEXT: xorl %eax, %eax
976 ; FAST-NEXT: andb $1, %al
977 ; FAST-NEXT: movzbl %al, %eax
980 ; WIN64-LABEL: umulobri8:
982 ; WIN64-NEXT: movl %ecx, %eax
983 ; WIN64-NEXT: mulb %dl
984 ; WIN64-NEXT: jo .LBB19_1
985 ; WIN64-NEXT: # %bb.2: # %continue
986 ; WIN64-NEXT: movb $1, %al
988 ; WIN64-NEXT: .LBB19_1: # %overflow
989 ; WIN64-NEXT: xorl %eax, %eax
992 ; WIN32-LABEL: umulobri8:
994 ; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
995 ; WIN32-NEXT: mulb {{[0-9]+}}(%esp)
996 ; WIN32-NEXT: jo LBB19_1
997 ; WIN32-NEXT: # %bb.2: # %continue
998 ; WIN32-NEXT: movb $1, %al
1000 ; WIN32-NEXT: LBB19_1: # %overflow
1001 ; WIN32-NEXT: xorl %eax, %eax
1003 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1004 %val = extractvalue {i8, i1} %t, 0
1005 %obit = extractvalue {i8, i1} %t, 1
1006 br i1 %obit, label %overflow, label %continue, !prof !0
1015 define zeroext i1 @umulobri16(i16 %v1, i16 %v2) {
1016 ; SDAG-LABEL: umulobri16:
1018 ; SDAG-NEXT: movl %edi, %eax
1019 ; SDAG-NEXT: # kill: def $ax killed $ax killed $eax
1020 ; SDAG-NEXT: mulw %si
1021 ; SDAG-NEXT: jo .LBB20_1
1022 ; SDAG-NEXT: # %bb.2: # %continue
1023 ; SDAG-NEXT: movb $1, %al
1025 ; SDAG-NEXT: .LBB20_1: # %overflow
1026 ; SDAG-NEXT: xorl %eax, %eax
1029 ; FAST-LABEL: umulobri16:
1031 ; FAST-NEXT: movl %edi, %eax
1032 ; FAST-NEXT: # kill: def $ax killed $ax killed $eax
1033 ; FAST-NEXT: mulw %si
1034 ; FAST-NEXT: seto %al
1035 ; FAST-NEXT: testb $1, %al
1036 ; FAST-NEXT: jne .LBB20_1
1037 ; FAST-NEXT: # %bb.2: # %continue
1038 ; FAST-NEXT: movb $1, %al
1039 ; FAST-NEXT: andb $1, %al
1040 ; FAST-NEXT: movzbl %al, %eax
1042 ; FAST-NEXT: .LBB20_1: # %overflow
1043 ; FAST-NEXT: xorl %eax, %eax
1044 ; FAST-NEXT: andb $1, %al
1045 ; FAST-NEXT: movzbl %al, %eax
1048 ; WIN64-LABEL: umulobri16:
1050 ; WIN64-NEXT: movl %ecx, %eax
1051 ; WIN64-NEXT: mulw %dx
1052 ; WIN64-NEXT: jo .LBB20_1
1053 ; WIN64-NEXT: # %bb.2: # %continue
1054 ; WIN64-NEXT: movb $1, %al
1056 ; WIN64-NEXT: .LBB20_1: # %overflow
1057 ; WIN64-NEXT: xorl %eax, %eax
1060 ; WIN32-LABEL: umulobri16:
1062 ; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1063 ; WIN32-NEXT: mulw {{[0-9]+}}(%esp)
1064 ; WIN32-NEXT: jo LBB20_1
1065 ; WIN32-NEXT: # %bb.2: # %continue
1066 ; WIN32-NEXT: movb $1, %al
1068 ; WIN32-NEXT: LBB20_1: # %overflow
1069 ; WIN32-NEXT: xorl %eax, %eax
1071 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1072 %val = extractvalue {i16, i1} %t, 0
1073 %obit = extractvalue {i16, i1} %t, 1
1074 br i1 %obit, label %overflow, label %continue, !prof !0
1083 define zeroext i1 @umulobri32(i32 %v1, i32 %v2) {
1084 ; SDAG-LABEL: umulobri32:
1086 ; SDAG-NEXT: movl %edi, %eax
1087 ; SDAG-NEXT: mull %esi
1088 ; SDAG-NEXT: jo .LBB21_1
1089 ; SDAG-NEXT: # %bb.2: # %continue
1090 ; SDAG-NEXT: movb $1, %al
1092 ; SDAG-NEXT: .LBB21_1: # %overflow
1093 ; SDAG-NEXT: xorl %eax, %eax
1096 ; FAST-LABEL: umulobri32:
1098 ; FAST-NEXT: movl %edi, %eax
1099 ; FAST-NEXT: mull %esi
1100 ; FAST-NEXT: jo .LBB21_1
1101 ; FAST-NEXT: # %bb.2: # %continue
1102 ; FAST-NEXT: movb $1, %al
1103 ; FAST-NEXT: andb $1, %al
1104 ; FAST-NEXT: movzbl %al, %eax
1106 ; FAST-NEXT: .LBB21_1: # %overflow
1107 ; FAST-NEXT: xorl %eax, %eax
1108 ; FAST-NEXT: andb $1, %al
1109 ; FAST-NEXT: movzbl %al, %eax
1112 ; WIN64-LABEL: umulobri32:
1114 ; WIN64-NEXT: movl %ecx, %eax
1115 ; WIN64-NEXT: mull %edx
1116 ; WIN64-NEXT: jo .LBB21_1
1117 ; WIN64-NEXT: # %bb.2: # %continue
1118 ; WIN64-NEXT: movb $1, %al
1120 ; WIN64-NEXT: .LBB21_1: # %overflow
1121 ; WIN64-NEXT: xorl %eax, %eax
1124 ; WIN32-LABEL: umulobri32:
1126 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1127 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
1128 ; WIN32-NEXT: jo LBB21_1
1129 ; WIN32-NEXT: # %bb.2: # %continue
1130 ; WIN32-NEXT: movb $1, %al
1132 ; WIN32-NEXT: LBB21_1: # %overflow
1133 ; WIN32-NEXT: xorl %eax, %eax
1135 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1136 %val = extractvalue {i32, i1} %t, 0
1137 %obit = extractvalue {i32, i1} %t, 1
1138 br i1 %obit, label %overflow, label %continue, !prof !0
1147 define zeroext i1 @umulobri64(i64 %v1, i64 %v2) {
1148 ; SDAG-LABEL: umulobri64:
1150 ; SDAG-NEXT: movq %rdi, %rax
1151 ; SDAG-NEXT: mulq %rsi
1152 ; SDAG-NEXT: jo .LBB22_1
1153 ; SDAG-NEXT: # %bb.2: # %continue
1154 ; SDAG-NEXT: movb $1, %al
1156 ; SDAG-NEXT: .LBB22_1: # %overflow
1157 ; SDAG-NEXT: xorl %eax, %eax
1160 ; FAST-LABEL: umulobri64:
1162 ; FAST-NEXT: movq %rdi, %rax
1163 ; FAST-NEXT: mulq %rsi
1164 ; FAST-NEXT: jo .LBB22_1
1165 ; FAST-NEXT: # %bb.2: # %continue
1166 ; FAST-NEXT: movb $1, %al
1167 ; FAST-NEXT: andb $1, %al
1168 ; FAST-NEXT: movzbl %al, %eax
1170 ; FAST-NEXT: .LBB22_1: # %overflow
1171 ; FAST-NEXT: xorl %eax, %eax
1172 ; FAST-NEXT: andb $1, %al
1173 ; FAST-NEXT: movzbl %al, %eax
1176 ; WIN64-LABEL: umulobri64:
1178 ; WIN64-NEXT: movq %rcx, %rax
1179 ; WIN64-NEXT: mulq %rdx
1180 ; WIN64-NEXT: jo .LBB22_1
1181 ; WIN64-NEXT: # %bb.2: # %continue
1182 ; WIN64-NEXT: movb $1, %al
1184 ; WIN64-NEXT: .LBB22_1: # %overflow
1185 ; WIN64-NEXT: xorl %eax, %eax
1188 ; WIN32-LABEL: umulobri64:
1190 ; WIN32-NEXT: pushl %ebp
1191 ; WIN32-NEXT: pushl %ebx
1192 ; WIN32-NEXT: pushl %edi
1193 ; WIN32-NEXT: pushl %esi
1194 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
1195 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1196 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
1197 ; WIN32-NEXT: testl %esi, %esi
1198 ; WIN32-NEXT: setne %dl
1199 ; WIN32-NEXT: testl %eax, %eax
1200 ; WIN32-NEXT: setne %bl
1201 ; WIN32-NEXT: andb %dl, %bl
1202 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
1203 ; WIN32-NEXT: movl %eax, %edi
1204 ; WIN32-NEXT: seto %bh
1205 ; WIN32-NEXT: movl %esi, %eax
1206 ; WIN32-NEXT: mull %ebp
1207 ; WIN32-NEXT: movl %eax, %esi
1208 ; WIN32-NEXT: seto %cl
1209 ; WIN32-NEXT: orb %bh, %cl
1210 ; WIN32-NEXT: addl %edi, %esi
1211 ; WIN32-NEXT: movl %ebp, %eax
1212 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
1213 ; WIN32-NEXT: addl %esi, %edx
1214 ; WIN32-NEXT: setb %al
1215 ; WIN32-NEXT: orb %cl, %al
1216 ; WIN32-NEXT: orb %bl, %al
1217 ; WIN32-NEXT: subb $1, %al
1218 ; WIN32-NEXT: je LBB22_1
1219 ; WIN32-NEXT: # %bb.3: # %continue
1220 ; WIN32-NEXT: movb $1, %al
1221 ; WIN32-NEXT: LBB22_2: # %overflow
1222 ; WIN32-NEXT: popl %esi
1223 ; WIN32-NEXT: popl %edi
1224 ; WIN32-NEXT: popl %ebx
1225 ; WIN32-NEXT: popl %ebp
1227 ; WIN32-NEXT: LBB22_1: # %overflow
1228 ; WIN32-NEXT: xorl %eax, %eax
1229 ; WIN32-NEXT: jmp LBB22_2
1230 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
1231 %val = extractvalue {i64, i1} %t, 0
1232 %obit = extractvalue {i64, i1} %t, 1
1233 br i1 %obit, label %overflow, label %continue, !prof !0
1242 define i1 @bug27873(i64 %c1, i1 %c2) {
1243 ; LINUX-LABEL: bug27873:
1245 ; LINUX-NEXT: movq %rdi, %rax
1246 ; LINUX-NEXT: movl $160, %ecx
1247 ; LINUX-NEXT: mulq %rcx
1248 ; LINUX-NEXT: seto %al
1249 ; LINUX-NEXT: orb %sil, %al
1252 ; WIN64-LABEL: bug27873:
1254 ; WIN64-NEXT: movl %edx, %r8d
1255 ; WIN64-NEXT: movq %rcx, %rax
1256 ; WIN64-NEXT: movl $160, %ecx
1257 ; WIN64-NEXT: mulq %rcx
1258 ; WIN64-NEXT: seto %al
1259 ; WIN64-NEXT: orb %r8b, %al
1262 ; WIN32-LABEL: bug27873:
1264 ; WIN32-NEXT: pushl %ebx
1265 ; WIN32-NEXT: movl $160, %eax
1266 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
1267 ; WIN32-NEXT: movl %eax, %ecx
1268 ; WIN32-NEXT: seto %bl
1269 ; WIN32-NEXT: movl $160, %eax
1270 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
1271 ; WIN32-NEXT: addl %ecx, %edx
1272 ; WIN32-NEXT: setb %al
1273 ; WIN32-NEXT: orb %bl, %al
1274 ; WIN32-NEXT: orb {{[0-9]+}}(%esp), %al
1275 ; WIN32-NEXT: popl %ebx
1277 %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160)
1278 %mul.overflow = extractvalue { i64, i1 } %mul, 1
1279 %x1 = or i1 %c2, %mul.overflow
1283 define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
1284 ; SDAG-LABEL: smuloi8_load:
1286 ; SDAG-NEXT: movl %esi, %eax
1287 ; SDAG-NEXT: # kill: def $al killed $al killed $eax
1288 ; SDAG-NEXT: imulb (%rdi)
1289 ; SDAG-NEXT: seto %cl
1290 ; SDAG-NEXT: movb %al, (%rdx)
1291 ; SDAG-NEXT: movl %ecx, %eax
1294 ; FAST-LABEL: smuloi8_load:
1296 ; FAST-NEXT: movb (%rdi), %al
1297 ; FAST-NEXT: imulb %sil
1298 ; FAST-NEXT: seto %cl
1299 ; FAST-NEXT: movb %al, (%rdx)
1300 ; FAST-NEXT: andb $1, %cl
1301 ; FAST-NEXT: movzbl %cl, %eax
1304 ; WIN64-LABEL: smuloi8_load:
1306 ; WIN64-NEXT: movl %edx, %eax
1307 ; WIN64-NEXT: imulb (%rcx)
1308 ; WIN64-NEXT: seto %cl
1309 ; WIN64-NEXT: movb %al, (%r8)
1310 ; WIN64-NEXT: movl %ecx, %eax
1313 ; WIN32-LABEL: smuloi8_load:
1315 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
1316 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1317 ; WIN32-NEXT: movb (%eax), %al
1318 ; WIN32-NEXT: imulb {{[0-9]+}}(%esp)
1319 ; WIN32-NEXT: seto %cl
1320 ; WIN32-NEXT: movb %al, (%edx)
1321 ; WIN32-NEXT: movl %ecx, %eax
1323 %v1 = load i8, i8* %ptr1
1324 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
1325 %val = extractvalue {i8, i1} %t, 0
1326 %obit = extractvalue {i8, i1} %t, 1
1327 store i8 %val, i8* %res
1331 define zeroext i1 @smuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
1332 ; SDAG-LABEL: smuloi8_load2:
1334 ; SDAG-NEXT: movl %edi, %eax
1335 ; SDAG-NEXT: # kill: def $al killed $al killed $eax
1336 ; SDAG-NEXT: imulb (%rsi)
1337 ; SDAG-NEXT: seto %cl
1338 ; SDAG-NEXT: movb %al, (%rdx)
1339 ; SDAG-NEXT: movl %ecx, %eax
1342 ; FAST-LABEL: smuloi8_load2:
1344 ; FAST-NEXT: movl %edi, %eax
1345 ; FAST-NEXT: # kill: def $al killed $al killed $eax
1346 ; FAST-NEXT: imulb (%rsi)
1347 ; FAST-NEXT: seto %cl
1348 ; FAST-NEXT: movb %al, (%rdx)
1349 ; FAST-NEXT: andb $1, %cl
1350 ; FAST-NEXT: movzbl %cl, %eax
1353 ; WIN64-LABEL: smuloi8_load2:
1355 ; WIN64-NEXT: movl %ecx, %eax
1356 ; WIN64-NEXT: imulb (%rdx)
1357 ; WIN64-NEXT: seto %cl
1358 ; WIN64-NEXT: movb %al, (%r8)
1359 ; WIN64-NEXT: movl %ecx, %eax
1362 ; WIN32-LABEL: smuloi8_load2:
1364 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
1365 ; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
1366 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1367 ; WIN32-NEXT: imulb (%ecx)
1368 ; WIN32-NEXT: seto %cl
1369 ; WIN32-NEXT: movb %al, (%edx)
1370 ; WIN32-NEXT: movl %ecx, %eax
1372 %v2 = load i8, i8* %ptr2
1373 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
1374 %val = extractvalue {i8, i1} %t, 0
1375 %obit = extractvalue {i8, i1} %t, 1
1376 store i8 %val, i8* %res
1380 define zeroext i1 @smuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
1381 ; SDAG-LABEL: smuloi16_load:
1383 ; SDAG-NEXT: imulw (%rdi), %si
1384 ; SDAG-NEXT: seto %al
1385 ; SDAG-NEXT: movw %si, (%rdx)
1388 ; FAST-LABEL: smuloi16_load:
1390 ; FAST-NEXT: imulw (%rdi), %si
1391 ; FAST-NEXT: seto %al
1392 ; FAST-NEXT: movw %si, (%rdx)
1393 ; FAST-NEXT: andb $1, %al
1394 ; FAST-NEXT: movzbl %al, %eax
1397 ; WIN64-LABEL: smuloi16_load:
1399 ; WIN64-NEXT: imulw (%rcx), %dx
1400 ; WIN64-NEXT: seto %al
1401 ; WIN64-NEXT: movw %dx, (%r8)
1404 ; WIN32-LABEL: smuloi16_load:
1406 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1407 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1408 ; WIN32-NEXT: movzwl (%eax), %edx
1409 ; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %dx
1410 ; WIN32-NEXT: seto %al
1411 ; WIN32-NEXT: movw %dx, (%ecx)
1413 %v1 = load i16, i16* %ptr1
1414 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
1415 %val = extractvalue {i16, i1} %t, 0
1416 %obit = extractvalue {i16, i1} %t, 1
1417 store i16 %val, i16* %res
1421 define zeroext i1 @smuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
1422 ; SDAG-LABEL: smuloi16_load2:
1424 ; SDAG-NEXT: imulw (%rsi), %di
1425 ; SDAG-NEXT: seto %al
1426 ; SDAG-NEXT: movw %di, (%rdx)
1429 ; FAST-LABEL: smuloi16_load2:
1431 ; FAST-NEXT: imulw (%rsi), %di
1432 ; FAST-NEXT: seto %al
1433 ; FAST-NEXT: movw %di, (%rdx)
1434 ; FAST-NEXT: andb $1, %al
1435 ; FAST-NEXT: movzbl %al, %eax
1438 ; WIN64-LABEL: smuloi16_load2:
1440 ; WIN64-NEXT: imulw (%rdx), %cx
1441 ; WIN64-NEXT: seto %al
1442 ; WIN64-NEXT: movw %cx, (%r8)
1445 ; WIN32-LABEL: smuloi16_load2:
1447 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1448 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1449 ; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx
1450 ; WIN32-NEXT: imulw (%eax), %dx
1451 ; WIN32-NEXT: seto %al
1452 ; WIN32-NEXT: movw %dx, (%ecx)
1454 %v2 = load i16, i16* %ptr2
1455 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
1456 %val = extractvalue {i16, i1} %t, 0
1457 %obit = extractvalue {i16, i1} %t, 1
1458 store i16 %val, i16* %res
1462 define zeroext i1 @smuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
1463 ; SDAG-LABEL: smuloi32_load:
1465 ; SDAG-NEXT: imull (%rdi), %esi
1466 ; SDAG-NEXT: seto %al
1467 ; SDAG-NEXT: movl %esi, (%rdx)
1470 ; FAST-LABEL: smuloi32_load:
1472 ; FAST-NEXT: imull (%rdi), %esi
1473 ; FAST-NEXT: seto %al
1474 ; FAST-NEXT: movl %esi, (%rdx)
1475 ; FAST-NEXT: andb $1, %al
1476 ; FAST-NEXT: movzbl %al, %eax
1479 ; WIN64-LABEL: smuloi32_load:
1481 ; WIN64-NEXT: imull (%rcx), %edx
1482 ; WIN64-NEXT: seto %al
1483 ; WIN64-NEXT: movl %edx, (%r8)
1486 ; WIN32-LABEL: smuloi32_load:
1488 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1489 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1490 ; WIN32-NEXT: movl (%eax), %edx
1491 ; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx
1492 ; WIN32-NEXT: seto %al
1493 ; WIN32-NEXT: movl %edx, (%ecx)
1495 %v1 = load i32, i32* %ptr1
1496 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
1497 %val = extractvalue {i32, i1} %t, 0
1498 %obit = extractvalue {i32, i1} %t, 1
1499 store i32 %val, i32* %res
1503 define zeroext i1 @smuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
1504 ; SDAG-LABEL: smuloi32_load2:
1506 ; SDAG-NEXT: imull (%rsi), %edi
1507 ; SDAG-NEXT: seto %al
1508 ; SDAG-NEXT: movl %edi, (%rdx)
1511 ; FAST-LABEL: smuloi32_load2:
1513 ; FAST-NEXT: imull (%rsi), %edi
1514 ; FAST-NEXT: seto %al
1515 ; FAST-NEXT: movl %edi, (%rdx)
1516 ; FAST-NEXT: andb $1, %al
1517 ; FAST-NEXT: movzbl %al, %eax
1520 ; WIN64-LABEL: smuloi32_load2:
1522 ; WIN64-NEXT: imull (%rdx), %ecx
1523 ; WIN64-NEXT: seto %al
1524 ; WIN64-NEXT: movl %ecx, (%r8)
1527 ; WIN32-LABEL: smuloi32_load2:
1529 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1530 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1531 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
1532 ; WIN32-NEXT: imull (%eax), %edx
1533 ; WIN32-NEXT: seto %al
1534 ; WIN32-NEXT: movl %edx, (%ecx)
1536 %v2 = load i32, i32* %ptr2
1537 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
1538 %val = extractvalue {i32, i1} %t, 0
1539 %obit = extractvalue {i32, i1} %t, 1
1540 store i32 %val, i32* %res
1544 define zeroext i1 @smuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
1545 ; SDAG-LABEL: smuloi64_load:
1547 ; SDAG-NEXT: imulq (%rdi), %rsi
1548 ; SDAG-NEXT: seto %al
1549 ; SDAG-NEXT: movq %rsi, (%rdx)
1552 ; FAST-LABEL: smuloi64_load:
1554 ; FAST-NEXT: imulq (%rdi), %rsi
1555 ; FAST-NEXT: seto %al
1556 ; FAST-NEXT: movq %rsi, (%rdx)
1557 ; FAST-NEXT: andb $1, %al
1558 ; FAST-NEXT: movzbl %al, %eax
1561 ; WIN64-LABEL: smuloi64_load:
1563 ; WIN64-NEXT: imulq (%rcx), %rdx
1564 ; WIN64-NEXT: seto %al
1565 ; WIN64-NEXT: movq %rdx, (%r8)
1568 ; WIN32-LABEL: smuloi64_load:
1570 ; WIN32-NEXT: pushl %ebx
1571 ; WIN32-NEXT: pushl %edi
1572 ; WIN32-NEXT: pushl %esi
1573 ; WIN32-NEXT: pushl %eax
1574 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
1575 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1576 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1577 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
1578 ; WIN32-NEXT: movl (%edx), %edi
1579 ; WIN32-NEXT: movl 4(%edx), %edx
1580 ; WIN32-NEXT: movl $0, (%esp)
1581 ; WIN32-NEXT: movl %esp, %ebx
1582 ; WIN32-NEXT: pushl %ebx
1583 ; WIN32-NEXT: pushl %ecx
1584 ; WIN32-NEXT: pushl %eax
1585 ; WIN32-NEXT: pushl %edx
1586 ; WIN32-NEXT: pushl %edi
1587 ; WIN32-NEXT: calll ___mulodi4
1588 ; WIN32-NEXT: addl $20, %esp
1589 ; WIN32-NEXT: cmpl $0, (%esp)
1590 ; WIN32-NEXT: setne %cl
1591 ; WIN32-NEXT: movl %eax, (%esi)
1592 ; WIN32-NEXT: movl %edx, 4(%esi)
1593 ; WIN32-NEXT: movl %ecx, %eax
1594 ; WIN32-NEXT: addl $4, %esp
1595 ; WIN32-NEXT: popl %esi
1596 ; WIN32-NEXT: popl %edi
1597 ; WIN32-NEXT: popl %ebx
1599 %v1 = load i64, i64* %ptr1
1600 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
1601 %val = extractvalue {i64, i1} %t, 0
1602 %obit = extractvalue {i64, i1} %t, 1
1603 store i64 %val, i64* %res
1607 define zeroext i1 @smuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
1608 ; SDAG-LABEL: smuloi64_load2:
1610 ; SDAG-NEXT: imulq (%rsi), %rdi
1611 ; SDAG-NEXT: seto %al
1612 ; SDAG-NEXT: movq %rdi, (%rdx)
1615 ; FAST-LABEL: smuloi64_load2:
1617 ; FAST-NEXT: imulq (%rsi), %rdi
1618 ; FAST-NEXT: seto %al
1619 ; FAST-NEXT: movq %rdi, (%rdx)
1620 ; FAST-NEXT: andb $1, %al
1621 ; FAST-NEXT: movzbl %al, %eax
1624 ; WIN64-LABEL: smuloi64_load2:
1626 ; WIN64-NEXT: imulq (%rdx), %rcx
1627 ; WIN64-NEXT: seto %al
1628 ; WIN64-NEXT: movq %rcx, (%r8)
1631 ; WIN32-LABEL: smuloi64_load2:
1633 ; WIN32-NEXT: pushl %ebx
1634 ; WIN32-NEXT: pushl %edi
1635 ; WIN32-NEXT: pushl %esi
1636 ; WIN32-NEXT: pushl %eax
1637 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
1638 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1639 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1640 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
1641 ; WIN32-NEXT: movl (%edx), %edi
1642 ; WIN32-NEXT: movl 4(%edx), %edx
1643 ; WIN32-NEXT: movl $0, (%esp)
1644 ; WIN32-NEXT: movl %esp, %ebx
1645 ; WIN32-NEXT: pushl %ebx
1646 ; WIN32-NEXT: pushl %edx
1647 ; WIN32-NEXT: pushl %edi
1648 ; WIN32-NEXT: pushl %ecx
1649 ; WIN32-NEXT: pushl %eax
1650 ; WIN32-NEXT: calll ___mulodi4
1651 ; WIN32-NEXT: addl $20, %esp
1652 ; WIN32-NEXT: cmpl $0, (%esp)
1653 ; WIN32-NEXT: setne %cl
1654 ; WIN32-NEXT: movl %eax, (%esi)
1655 ; WIN32-NEXT: movl %edx, 4(%esi)
1656 ; WIN32-NEXT: movl %ecx, %eax
1657 ; WIN32-NEXT: addl $4, %esp
1658 ; WIN32-NEXT: popl %esi
1659 ; WIN32-NEXT: popl %edi
1660 ; WIN32-NEXT: popl %ebx
1662 %v2 = load i64, i64* %ptr2
1663 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
1664 %val = extractvalue {i64, i1} %t, 0
1665 %obit = extractvalue {i64, i1} %t, 1
1666 store i64 %val, i64* %res
1670 define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
1671 ; SDAG-LABEL: umuloi8_load:
1673 ; SDAG-NEXT: movl %esi, %eax
1674 ; SDAG-NEXT: # kill: def $al killed $al killed $eax
1675 ; SDAG-NEXT: mulb (%rdi)
1676 ; SDAG-NEXT: seto %cl
1677 ; SDAG-NEXT: movb %al, (%rdx)
1678 ; SDAG-NEXT: movl %ecx, %eax
1681 ; FAST-LABEL: umuloi8_load:
1683 ; FAST-NEXT: movb (%rdi), %al
1684 ; FAST-NEXT: mulb %sil
1685 ; FAST-NEXT: seto %cl
1686 ; FAST-NEXT: movb %al, (%rdx)
1687 ; FAST-NEXT: andb $1, %cl
1688 ; FAST-NEXT: movzbl %cl, %eax
1691 ; WIN64-LABEL: umuloi8_load:
1693 ; WIN64-NEXT: movl %edx, %eax
1694 ; WIN64-NEXT: mulb (%rcx)
1695 ; WIN64-NEXT: seto %cl
1696 ; WIN64-NEXT: movb %al, (%r8)
1697 ; WIN64-NEXT: movl %ecx, %eax
1700 ; WIN32-LABEL: umuloi8_load:
1702 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
1703 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1704 ; WIN32-NEXT: movb (%eax), %al
1705 ; WIN32-NEXT: mulb {{[0-9]+}}(%esp)
1706 ; WIN32-NEXT: seto %cl
1707 ; WIN32-NEXT: movb %al, (%edx)
1708 ; WIN32-NEXT: movl %ecx, %eax
1710 %v1 = load i8, i8* %ptr1
1711 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1712 %val = extractvalue {i8, i1} %t, 0
1713 %obit = extractvalue {i8, i1} %t, 1
1714 store i8 %val, i8* %res
1718 define zeroext i1 @umuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
1719 ; SDAG-LABEL: umuloi8_load2:
1721 ; SDAG-NEXT: movl %edi, %eax
1722 ; SDAG-NEXT: # kill: def $al killed $al killed $eax
1723 ; SDAG-NEXT: mulb (%rsi)
1724 ; SDAG-NEXT: seto %cl
1725 ; SDAG-NEXT: movb %al, (%rdx)
1726 ; SDAG-NEXT: movl %ecx, %eax
1729 ; FAST-LABEL: umuloi8_load2:
1731 ; FAST-NEXT: movl %edi, %eax
1732 ; FAST-NEXT: # kill: def $al killed $al killed $eax
1733 ; FAST-NEXT: mulb (%rsi)
1734 ; FAST-NEXT: seto %cl
1735 ; FAST-NEXT: movb %al, (%rdx)
1736 ; FAST-NEXT: andb $1, %cl
1737 ; FAST-NEXT: movzbl %cl, %eax
1740 ; WIN64-LABEL: umuloi8_load2:
1742 ; WIN64-NEXT: movl %ecx, %eax
1743 ; WIN64-NEXT: mulb (%rdx)
1744 ; WIN64-NEXT: seto %cl
1745 ; WIN64-NEXT: movb %al, (%r8)
1746 ; WIN64-NEXT: movl %ecx, %eax
1749 ; WIN32-LABEL: umuloi8_load2:
1751 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
1752 ; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
1753 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1754 ; WIN32-NEXT: mulb (%ecx)
1755 ; WIN32-NEXT: seto %cl
1756 ; WIN32-NEXT: movb %al, (%edx)
1757 ; WIN32-NEXT: movl %ecx, %eax
1759 %v2 = load i8, i8* %ptr2
1760 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1761 %val = extractvalue {i8, i1} %t, 0
1762 %obit = extractvalue {i8, i1} %t, 1
1763 store i8 %val, i8* %res
1767 define zeroext i1 @umuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
1768 ; SDAG-LABEL: umuloi16_load:
1770 ; SDAG-NEXT: movq %rdx, %rcx
1771 ; SDAG-NEXT: movl %esi, %eax
1772 ; SDAG-NEXT: # kill: def $ax killed $ax killed $eax
1773 ; SDAG-NEXT: mulw (%rdi)
1774 ; SDAG-NEXT: seto %dl
1775 ; SDAG-NEXT: movw %ax, (%rcx)
1776 ; SDAG-NEXT: movl %edx, %eax
1779 ; FAST-LABEL: umuloi16_load:
1781 ; FAST-NEXT: movq %rdx, %rcx
1782 ; FAST-NEXT: movzwl (%rdi), %eax
1783 ; FAST-NEXT: mulw %si
1784 ; FAST-NEXT: seto %dl
1785 ; FAST-NEXT: movw %ax, (%rcx)
1786 ; FAST-NEXT: andb $1, %dl
1787 ; FAST-NEXT: movzbl %dl, %eax
1790 ; WIN64-LABEL: umuloi16_load:
1792 ; WIN64-NEXT: movl %edx, %eax
1793 ; WIN64-NEXT: mulw (%rcx)
1794 ; WIN64-NEXT: seto %cl
1795 ; WIN64-NEXT: movw %ax, (%r8)
1796 ; WIN64-NEXT: movl %ecx, %eax
1799 ; WIN32-LABEL: umuloi16_load:
1801 ; WIN32-NEXT: pushl %esi
1802 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
1803 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1804 ; WIN32-NEXT: movzwl (%eax), %eax
1805 ; WIN32-NEXT: mulw {{[0-9]+}}(%esp)
1806 ; WIN32-NEXT: seto %cl
1807 ; WIN32-NEXT: movw %ax, (%esi)
1808 ; WIN32-NEXT: movl %ecx, %eax
1809 ; WIN32-NEXT: popl %esi
1811 %v1 = load i16, i16* %ptr1
1812 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1813 %val = extractvalue {i16, i1} %t, 0
1814 %obit = extractvalue {i16, i1} %t, 1
1815 store i16 %val, i16* %res
1819 define zeroext i1 @umuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
1820 ; SDAG-LABEL: umuloi16_load2:
1822 ; SDAG-NEXT: movq %rdx, %rcx
1823 ; SDAG-NEXT: movl %edi, %eax
1824 ; SDAG-NEXT: # kill: def $ax killed $ax killed $eax
1825 ; SDAG-NEXT: mulw (%rsi)
1826 ; SDAG-NEXT: seto %dl
1827 ; SDAG-NEXT: movw %ax, (%rcx)
1828 ; SDAG-NEXT: movl %edx, %eax
1831 ; FAST-LABEL: umuloi16_load2:
1833 ; FAST-NEXT: movq %rdx, %rcx
1834 ; FAST-NEXT: movl %edi, %eax
1835 ; FAST-NEXT: # kill: def $ax killed $ax killed $eax
1836 ; FAST-NEXT: mulw (%rsi)
1837 ; FAST-NEXT: seto %dl
1838 ; FAST-NEXT: movw %ax, (%rcx)
1839 ; FAST-NEXT: andb $1, %dl
1840 ; FAST-NEXT: movzbl %dl, %eax
1843 ; WIN64-LABEL: umuloi16_load2:
1845 ; WIN64-NEXT: movl %ecx, %eax
1846 ; WIN64-NEXT: mulw (%rdx)
1847 ; WIN64-NEXT: seto %cl
1848 ; WIN64-NEXT: movw %ax, (%r8)
1849 ; WIN64-NEXT: movl %ecx, %eax
1852 ; WIN32-LABEL: umuloi16_load2:
1854 ; WIN32-NEXT: pushl %esi
1855 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
1856 ; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1857 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1858 ; WIN32-NEXT: mulw (%ecx)
1859 ; WIN32-NEXT: seto %cl
1860 ; WIN32-NEXT: movw %ax, (%esi)
1861 ; WIN32-NEXT: movl %ecx, %eax
1862 ; WIN32-NEXT: popl %esi
1864 %v2 = load i16, i16* %ptr2
1865 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1866 %val = extractvalue {i16, i1} %t, 0
1867 %obit = extractvalue {i16, i1} %t, 1
1868 store i16 %val, i16* %res
1872 define zeroext i1 @umuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
1873 ; SDAG-LABEL: umuloi32_load:
1875 ; SDAG-NEXT: movq %rdx, %rcx
1876 ; SDAG-NEXT: movl %esi, %eax
1877 ; SDAG-NEXT: mull (%rdi)
1878 ; SDAG-NEXT: seto %dl
1879 ; SDAG-NEXT: movl %eax, (%rcx)
1880 ; SDAG-NEXT: movl %edx, %eax
1883 ; FAST-LABEL: umuloi32_load:
1885 ; FAST-NEXT: movq %rdx, %rcx
1886 ; FAST-NEXT: movl (%rdi), %eax
1887 ; FAST-NEXT: mull %esi
1888 ; FAST-NEXT: seto %dl
1889 ; FAST-NEXT: movl %eax, (%rcx)
1890 ; FAST-NEXT: andb $1, %dl
1891 ; FAST-NEXT: movzbl %dl, %eax
1894 ; WIN64-LABEL: umuloi32_load:
1896 ; WIN64-NEXT: movl %edx, %eax
1897 ; WIN64-NEXT: mull (%rcx)
1898 ; WIN64-NEXT: seto %cl
1899 ; WIN64-NEXT: movl %eax, (%r8)
1900 ; WIN64-NEXT: movl %ecx, %eax
1903 ; WIN32-LABEL: umuloi32_load:
1905 ; WIN32-NEXT: pushl %esi
1906 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
1907 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1908 ; WIN32-NEXT: movl (%eax), %eax
1909 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
1910 ; WIN32-NEXT: seto %cl
1911 ; WIN32-NEXT: movl %eax, (%esi)
1912 ; WIN32-NEXT: movl %ecx, %eax
1913 ; WIN32-NEXT: popl %esi
1915 %v1 = load i32, i32* %ptr1
1916 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1917 %val = extractvalue {i32, i1} %t, 0
1918 %obit = extractvalue {i32, i1} %t, 1
1919 store i32 %val, i32* %res
1923 define zeroext i1 @umuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
1924 ; SDAG-LABEL: umuloi32_load2:
1926 ; SDAG-NEXT: movq %rdx, %rcx
1927 ; SDAG-NEXT: movl %edi, %eax
1928 ; SDAG-NEXT: mull (%rsi)
1929 ; SDAG-NEXT: seto %dl
1930 ; SDAG-NEXT: movl %eax, (%rcx)
1931 ; SDAG-NEXT: movl %edx, %eax
1934 ; FAST-LABEL: umuloi32_load2:
1936 ; FAST-NEXT: movq %rdx, %rcx
1937 ; FAST-NEXT: movl %edi, %eax
1938 ; FAST-NEXT: mull (%rsi)
1939 ; FAST-NEXT: seto %dl
1940 ; FAST-NEXT: movl %eax, (%rcx)
1941 ; FAST-NEXT: andb $1, %dl
1942 ; FAST-NEXT: movzbl %dl, %eax
1945 ; WIN64-LABEL: umuloi32_load2:
1947 ; WIN64-NEXT: movl %ecx, %eax
1948 ; WIN64-NEXT: mull (%rdx)
1949 ; WIN64-NEXT: seto %cl
1950 ; WIN64-NEXT: movl %eax, (%r8)
1951 ; WIN64-NEXT: movl %ecx, %eax
1954 ; WIN32-LABEL: umuloi32_load2:
1956 ; WIN32-NEXT: pushl %esi
1957 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
1958 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
1959 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1960 ; WIN32-NEXT: mull (%ecx)
1961 ; WIN32-NEXT: seto %cl
1962 ; WIN32-NEXT: movl %eax, (%esi)
1963 ; WIN32-NEXT: movl %ecx, %eax
1964 ; WIN32-NEXT: popl %esi
1966 %v2 = load i32, i32* %ptr2
1967 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1968 %val = extractvalue {i32, i1} %t, 0
1969 %obit = extractvalue {i32, i1} %t, 1
1970 store i32 %val, i32* %res
1974 define zeroext i1 @umuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
1975 ; SDAG-LABEL: umuloi64_load:
1977 ; SDAG-NEXT: movq %rdx, %rcx
1978 ; SDAG-NEXT: movq %rsi, %rax
1979 ; SDAG-NEXT: mulq (%rdi)
1980 ; SDAG-NEXT: seto %dl
1981 ; SDAG-NEXT: movq %rax, (%rcx)
1982 ; SDAG-NEXT: movl %edx, %eax
1985 ; FAST-LABEL: umuloi64_load:
1987 ; FAST-NEXT: movq %rdx, %rcx
1988 ; FAST-NEXT: movq (%rdi), %rax
1989 ; FAST-NEXT: mulq %rsi
1990 ; FAST-NEXT: seto %dl
1991 ; FAST-NEXT: movq %rax, (%rcx)
1992 ; FAST-NEXT: andb $1, %dl
1993 ; FAST-NEXT: movzbl %dl, %eax
1996 ; WIN64-LABEL: umuloi64_load:
1998 ; WIN64-NEXT: movq %rdx, %rax
1999 ; WIN64-NEXT: mulq (%rcx)
2000 ; WIN64-NEXT: seto %cl
2001 ; WIN64-NEXT: movq %rax, (%r8)
2002 ; WIN64-NEXT: movl %ecx, %eax
2005 ; WIN32-LABEL: umuloi64_load:
2007 ; WIN32-NEXT: pushl %ebp
2008 ; WIN32-NEXT: pushl %ebx
2009 ; WIN32-NEXT: pushl %edi
2010 ; WIN32-NEXT: pushl %esi
2011 ; WIN32-NEXT: pushl %eax
2012 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
2013 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2014 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
2015 ; WIN32-NEXT: movl (%eax), %esi
2016 ; WIN32-NEXT: movl 4(%eax), %eax
2017 ; WIN32-NEXT: testl %ecx, %ecx
2018 ; WIN32-NEXT: setne %dl
2019 ; WIN32-NEXT: testl %eax, %eax
2020 ; WIN32-NEXT: setne %bl
2021 ; WIN32-NEXT: andb %dl, %bl
2022 ; WIN32-NEXT: mull %ebp
2023 ; WIN32-NEXT: movl %eax, %edi
2024 ; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
2025 ; WIN32-NEXT: movl %ecx, %eax
2026 ; WIN32-NEXT: mull %esi
2027 ; WIN32-NEXT: movl %eax, %ecx
2028 ; WIN32-NEXT: seto %bh
2029 ; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
2030 ; WIN32-NEXT: addl %edi, %ecx
2031 ; WIN32-NEXT: movl %esi, %eax
2032 ; WIN32-NEXT: mull %ebp
2033 ; WIN32-NEXT: addl %ecx, %edx
2034 ; WIN32-NEXT: setb %cl
2035 ; WIN32-NEXT: orb %bh, %cl
2036 ; WIN32-NEXT: orb %bl, %cl
2037 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
2038 ; WIN32-NEXT: movl %eax, (%esi)
2039 ; WIN32-NEXT: movl %edx, 4(%esi)
2040 ; WIN32-NEXT: movl %ecx, %eax
2041 ; WIN32-NEXT: addl $4, %esp
2042 ; WIN32-NEXT: popl %esi
2043 ; WIN32-NEXT: popl %edi
2044 ; WIN32-NEXT: popl %ebx
2045 ; WIN32-NEXT: popl %ebp
2047 %v1 = load i64, i64* %ptr1
2048 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
2049 %val = extractvalue {i64, i1} %t, 0
2050 %obit = extractvalue {i64, i1} %t, 1
2051 store i64 %val, i64* %res
2055 define zeroext i1 @umuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
2056 ; SDAG-LABEL: umuloi64_load2:
2058 ; SDAG-NEXT: movq %rdx, %rcx
2059 ; SDAG-NEXT: movq %rdi, %rax
2060 ; SDAG-NEXT: mulq (%rsi)
2061 ; SDAG-NEXT: seto %dl
2062 ; SDAG-NEXT: movq %rax, (%rcx)
2063 ; SDAG-NEXT: movl %edx, %eax
2066 ; FAST-LABEL: umuloi64_load2:
2068 ; FAST-NEXT: movq %rdx, %rcx
2069 ; FAST-NEXT: movq %rdi, %rax
2070 ; FAST-NEXT: mulq (%rsi)
2071 ; FAST-NEXT: seto %dl
2072 ; FAST-NEXT: movq %rax, (%rcx)
2073 ; FAST-NEXT: andb $1, %dl
2074 ; FAST-NEXT: movzbl %dl, %eax
2077 ; WIN64-LABEL: umuloi64_load2:
2079 ; WIN64-NEXT: movq %rcx, %rax
2080 ; WIN64-NEXT: mulq (%rdx)
2081 ; WIN64-NEXT: seto %cl
2082 ; WIN64-NEXT: movq %rax, (%r8)
2083 ; WIN64-NEXT: movl %ecx, %eax
2086 ; WIN32-LABEL: umuloi64_load2:
2088 ; WIN32-NEXT: pushl %ebp
2089 ; WIN32-NEXT: pushl %ebx
2090 ; WIN32-NEXT: pushl %edi
2091 ; WIN32-NEXT: pushl %esi
2092 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
2093 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
2094 ; WIN32-NEXT: movl (%edx), %ebp
2095 ; WIN32-NEXT: movl 4(%edx), %esi
2096 ; WIN32-NEXT: testl %eax, %eax
2097 ; WIN32-NEXT: setne %dl
2098 ; WIN32-NEXT: testl %esi, %esi
2099 ; WIN32-NEXT: setne %bl
2100 ; WIN32-NEXT: andb %dl, %bl
2101 ; WIN32-NEXT: mull %ebp
2102 ; WIN32-NEXT: movl %eax, %edi
2103 ; WIN32-NEXT: seto %cl
2104 ; WIN32-NEXT: movl %esi, %eax
2105 ; WIN32-NEXT: mull {{[0-9]+}}(%esp)
2106 ; WIN32-NEXT: movl %eax, %esi
2107 ; WIN32-NEXT: seto %ch
2108 ; WIN32-NEXT: orb %cl, %ch
2109 ; WIN32-NEXT: addl %edi, %esi
2110 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
2111 ; WIN32-NEXT: mull %ebp
2112 ; WIN32-NEXT: addl %esi, %edx
2113 ; WIN32-NEXT: setb %cl
2114 ; WIN32-NEXT: orb %ch, %cl
2115 ; WIN32-NEXT: orb %bl, %cl
2116 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
2117 ; WIN32-NEXT: movl %eax, (%esi)
2118 ; WIN32-NEXT: movl %edx, 4(%esi)
2119 ; WIN32-NEXT: movl %ecx, %eax
2120 ; WIN32-NEXT: popl %esi
2121 ; WIN32-NEXT: popl %edi
2122 ; WIN32-NEXT: popl %ebx
2123 ; WIN32-NEXT: popl %ebp
2125 %v2 = load i64, i64* %ptr2
2126 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
2127 %val = extractvalue {i64, i1} %t, 0
2128 %obit = extractvalue {i64, i1} %t, 1
2129 store i64 %val, i64* %res
2133 declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone
2134 declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
2135 declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
2136 declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
2137 declare {i8, i1} @llvm.umul.with.overflow.i8 (i8, i8 ) nounwind readnone
2138 declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
2139 declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
2140 declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
2142 !0 = !{!"branch_weights", i32 0, i32 2147483647}