1 ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-64,ENABLE-64
2 ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-64,DISABLE-64
3 ; RUN: llc -mtriple=powerpc-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-32,ENABLE-32
4 ; RUN: llc -mtriple=powerpc-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-32,DISABLE-32
5 ; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-64,ENABLE-64
6 ; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-64,DISABLE-64
9 ; Note: Lots of tests use inline asm instead of regular calls.
10 ; This allows to have a better control on what the allocation will do.
11 ; Otherwise, we may have spill right in the entry block, defeating
12 ; shrink-wrapping. Moreover, some of the inline asm statement (nop)
13 ; are here to ensure that the related paths do not end up as critical
17 ; Initial motivating example: Simple diamond with a call just on one side.
18 ; CHECK-LABEL: {{.*}}foo:
20 ; Compare the arguments and return
23 ; ENABLE-NEXT: bgelr 0
26 ; At a minimum, we save/restore the link register. Other registers may be saved
30 ; Compare the arguments and jump to exit.
31 ; After the prologue is set.
33 ; DISABLE-NEXT: bge 0, {{.*}}[[EXIT_LABEL:BB[0-9_]+]]
35 ; Store %a on the stack
36 ; CHECK: stw 3, {{[0-9]+([0-9]+)}}
37 ; Set the alloca address in the second argument.
38 ; CHECK-NEXT: addi 4, 1, {{[0-9]+}}
39 ; Set the first argument to zero.
41 ; CHECK-NEXT: bl {{.*}}doSomething
43 ; With shrink-wrapping, epilogue is just after the call.
44 ; Restore the link register and return.
45 ; Note that there could be other epilog code before the link register is
46 ; restored but we will not check for it here.
50 ; DISABLE: [[EXIT_LABEL]]:
52 ; Without shrink-wrapping, epilogue is in the exit block.
53 ; Epilogue code. (What we pop does not matter.)
54 ; DISABLE: mtlr {{[0-9]+}}
58 define i32 @foo(i32 %a, i32 %b) {
59 %tmp = alloca i32, align 4
60 %tmp2 = icmp slt i32 %a, %b
61 br i1 %tmp2, label %true, label %false
64 store i32 %a, i32* %tmp, align 4
65 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
69 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
73 ; Function Attrs: optsize
74 declare i32 @doSomething(i32, i32*)
77 ; Check that we do not perform the restore inside the loop whereas the save
79 ; CHECK-LABEL: {{.*}}freqSaveAndRestoreOutsideLoop:
81 ; Shrink-wrapping allows to skip the prologue in the else case.
83 ; ENABLE: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
86 ; Make sure we save the link register
87 ; CHECK: mflr {{[0-9]+}}
89 ; DISABLE: cmplwi 3, 0
90 ; DISABLE: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
93 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
94 ; CHECK-DAG: li [[IV:[0-9]+]], 10
97 ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
98 ; CHECK: bl {{.*}}something
100 ; CHECK-DAG: addi [[IV]], [[IV]], -1
101 ; CHECK-DAG: add [[SUM]], 3, [[SUM]]
102 ; CHECK-DAG: cmplwi [[IV]], 0
103 ; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
106 ; CHECK: slwi 3, [[SUM]], 3
109 ; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
111 ; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
112 ; Shift second argument by one and store into returned register.
113 ; DISABLE: slwi 3, 4, 1
114 ; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
117 ; CHECK: mtlr {{[0-9]+}}
120 ; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
121 ; Shift second argument by one and store into returned register.
122 ; ENABLE: slwi 3, 4, 1
124 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
126 %tobool = icmp eq i32 %cond, 0
127 br i1 %tobool, label %if.else, label %for.preheader
130 tail call void asm "nop", ""()
133 for.body: ; preds = %entry, %for.body
134 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
135 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
136 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
137 %add = add nsw i32 %call, %sum.04
138 %inc = add nuw nsw i32 %i.05, 1
139 %exitcond = icmp eq i32 %inc, 10
140 br i1 %exitcond, label %for.end, label %for.body
142 for.end: ; preds = %for.body
143 %shl = shl i32 %add, 3
146 if.else: ; preds = %entry
147 %mul = shl nsw i32 %N, 1
150 if.end: ; preds = %if.else, %for.end
151 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
155 declare i32 @something(...)
157 ; Check that we do not perform the shrink-wrapping inside the loop even
158 ; though that would be legal. The cost model must prevent that.
159 ; CHECK-LABEL: {{.*}}freqSaveAndRestoreOutsideLoop2:
161 ; Make sure we save the link register before the call
162 ; CHECK: mflr {{[0-9]+}}
165 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
166 ; CHECK-DAG: li [[IV:[0-9]+]], 10
169 ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
170 ; CHECK: bl {{.*}}something
172 ; CHECK-DAG: addi [[IV]], [[IV]], -1
173 ; CHECK-DAG: add [[SUM]], 3, [[SUM]]
174 ; CHECK-DAG: cmplwi [[IV]], 0
176 ; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
180 ; CHECK: mtlr {{[0-9]+}}
182 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
184 br label %for.preheader
187 tail call void asm "nop", ""()
190 for.body: ; preds = %for.body, %entry
191 %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
192 %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
193 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
194 %add = add nsw i32 %call, %sum.03
195 %inc = add nuw nsw i32 %i.04, 1
196 %exitcond = icmp eq i32 %inc, 10
197 br i1 %exitcond, label %for.exit, label %for.body
200 tail call void asm "nop", ""()
203 for.end: ; preds = %for.body
208 ; Check with a more complex case that we do not have save within the loop and
210 ; CHECK-LABEL: {{.*}}loopInfoSaveOutsideLoop:
212 ; ENABLE: cmplwi 3, 0
213 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
216 ; Make sure we save the link register
217 ; CHECK: mflr {{[0-9]+}}
219 ; DISABLE-64-DAG: std {{[0-9]+}}
220 ; DISABLE-64-DAG: std {{[0-9]+}}
221 ; DISABLE-64-DAG: std {{[0-9]+}}
222 ; DISABLE-64-DAG: stdu 1,
223 ; DISABLE-64-DAG: cmplwi 3, 0
225 ; DISABLE-32-DAG: stw {{[0-9]+}}
226 ; DISABLE-32-DAG: stw {{[0-9]+}}
227 ; DISABLE-32-DAG: stw {{[0-9]+}}
228 ; DISABLE-32-DAG: stwu 1,
229 ; DISABLE-32-DAG: cmplwi 3, 0
231 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
234 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
235 ; CHECK-DAG: li [[IV:[0-9]+]], 10
238 ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
239 ; CHECK: bl {{.*}}something
241 ; CHECK-DAG: addi [[IV]], [[IV]], -1
242 ; CHECK-DAG: add [[SUM]], 3, [[SUM]]
243 ; CHECK-DAG: cmplwi [[IV]], 0
245 ; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
248 ; CHECK: bl {{.*}}somethingElse
249 ; CHECK: slwi 3, [[SUM]], 3
252 ; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
254 ; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
255 ; Shift second argument by one and store into returned register.
256 ; DISABLE: slwi 3, 4, 1
258 ; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
261 ; CHECK: mtlr {{[0-9]+}}
264 ; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
265 ; Shift second argument by one and store into returned register.
266 ; ENABLE: slwi 3, 4, 1
268 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
270 %tobool = icmp eq i32 %cond, 0
271 br i1 %tobool, label %if.else, label %for.preheader
274 tail call void asm "nop", ""()
277 for.body: ; preds = %entry, %for.body
278 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
279 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
280 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
281 %add = add nsw i32 %call, %sum.04
282 %inc = add nuw nsw i32 %i.05, 1
283 %exitcond = icmp eq i32 %inc, 10
284 br i1 %exitcond, label %for.end, label %for.body
286 for.end: ; preds = %for.body
287 tail call void bitcast (void (...)* @somethingElse to void ()*)()
288 %shl = shl i32 %add, 3
291 if.else: ; preds = %entry
292 %mul = shl nsw i32 %N, 1
295 if.end: ; preds = %if.else, %for.end
296 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
300 declare void @somethingElse(...)
302 ; Check with a more complex case that we do not have restore within the loop and
304 ; CHECK-LABEL: {{.*}}loopInfoRestoreOutsideLoop:
306 ; ENABLE: cmplwi 3, 0
307 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
310 ; Make sure we save the link register
311 ; CHECK: mflr {{[0-9]+}}
313 ; DISABLE-64-DAG: std {{[0-9]+}}
314 ; DISABLE-64-DAG: std {{[0-9]+}}
315 ; DISABLE-64-DAG: std {{[0-9]+}}
316 ; DISABLE-64-DAG: stdu 1,
317 ; DISABLE-64-DAG: cmplwi 3, 0
319 ; DISABLE-32-DAG: stw {{[0-9]+}}
320 ; DISABLE-32-DAG: stw {{[0-9]+}}
321 ; DISABLE-32-DAG: stw {{[0-9]+}}
322 ; DISABLE-32-DAG: stwu 1,
323 ; DISABLE-32-DAG: cmplwi 3, 0
325 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
327 ; CHECK: bl {{.*}}somethingElse
330 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
331 ; CHECK-DAG: li [[IV:[0-9]+]], 10
334 ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
335 ; CHECK: bl {{.*}}something
337 ; CHECK-DAG: addi [[IV]], [[IV]], -1
338 ; CHECK-DAG: add [[SUM]], 3, [[SUM]]
339 ; CHECK-DAG: cmplwi [[IV]], 0
341 ; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
344 ; CHECK: slwi 3, [[SUM]], 3
346 ; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
348 ; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
349 ; Shift second argument by one and store into returned register.
350 ; DISABLE: slwi 3, 4, 1
351 ; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
354 ; CHECK: mtlr {{[0-9]+}}
357 ; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
358 ; Shift second argument by one and store into returned register.
359 ; ENABLE: slwi 3, 4, 1
361 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind {
363 %tobool = icmp eq i32 %cond, 0
364 br i1 %tobool, label %if.else, label %if.then
366 if.then: ; preds = %entry
367 tail call void bitcast (void (...)* @somethingElse to void ()*)()
370 for.body: ; preds = %for.body, %if.then
371 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
372 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
373 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
374 %add = add nsw i32 %call, %sum.04
375 %inc = add nuw nsw i32 %i.05, 1
376 %exitcond = icmp eq i32 %inc, 10
377 br i1 %exitcond, label %for.end, label %for.body
379 for.end: ; preds = %for.body
380 %shl = shl i32 %add, 3
383 if.else: ; preds = %entry
384 %mul = shl nsw i32 %N, 1
387 if.end: ; preds = %if.else, %for.end
388 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
392 ; Check that we handle function with no frame information correctly.
393 ; CHECK-LABEL: {{.*}}emptyFrame:
395 ; CHECK-NEXT: li 3, 0
397 define i32 @emptyFrame() {
403 ; Check that we handle inline asm correctly.
404 ; CHECK-LABEL: {{.*}}inlineAsm:
406 ; ENABLE: cmplwi 3, 0
407 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
410 ; Make sure we save the CSR used in the inline asm: r14
411 ; ENABLE-DAG: li [[IV:[0-9]+]], 10
412 ; ENABLE-64-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
413 ; ENABLE-32-DAG: stw 14, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
415 ; DISABLE: cmplwi 3, 0
416 ; DISABLE-64-NEXT: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
417 ; DISABLE-32-NEXT: stw 14, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
418 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
419 ; DISABLE: li [[IV:[0-9]+]], 10
422 ; CHECK: mtctr [[IV]]
424 ; CHECK: {{.*}}[[LOOP_LABEL:BB[0-9_]+]]: # %for.body
425 ; Inline asm statement.
426 ; CHECK: addi 14, 14, 1
427 ; CHECK: bdnz {{.*}}[[LOOP_LABEL]]
431 ; CHECK-64-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
432 ; CHECK-32-DAG: lwz 14, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
436 ; CHECK: [[ELSE_LABEL]]
437 ; CHECK-NEXT: slwi 3, 4, 1
438 ; DISABLE-64-NEXT: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
439 ; DISABLE-32-NEXT: lwz 14, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
441 define i32 @inlineAsm(i32 %cond, i32 %N) {
443 %tobool = icmp eq i32 %cond, 0
444 br i1 %tobool, label %if.else, label %for.preheader
447 tail call void asm "nop", ""()
450 for.body: ; preds = %entry, %for.body
451 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
452 tail call void asm "addi 14, 14, 1", "~{r14}"()
453 %inc = add nuw nsw i32 %i.03, 1
454 %exitcond = icmp eq i32 %inc, 10
455 br i1 %exitcond, label %for.exit, label %for.body
458 tail call void asm "nop", ""()
461 if.else: ; preds = %entry
462 %mul = shl nsw i32 %N, 1
465 if.end: ; preds = %for.body, %if.else
466 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
471 ; Check that we handle calls to variadic functions correctly.
472 ; CHECK-LABEL: {{.*}}callVariadicFunc:
474 ; ENABLE: cmplwi 3, 0
475 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
478 ; CHECK: mflr {{[0-9]+}}
480 ; DISABLE: cmplwi 3, 0
481 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
483 ; Setup of the varags.
485 ; CHECK-64-NEXT: mr 5, 3
486 ; CHECK-64-NEXT: mr 6, 3
487 ; CHECK-64-NEXT: mr 7, 3
488 ; CHECK-64-NEXT: mr 8, 3
489 ; CHECK-64-NEXT: mr 9, 3
492 ; CHECK-32-NEXT: mr 5, 4
493 ; CHECK-32-NEXT: mr 6, 4
494 ; CHECK-32-NEXT: mr 7, 4
495 ; CHECK-32-NEXT: mr 8, 4
496 ; CHECK-32-NEXT: mr 9, 4
498 ; CHECK-NEXT: bl {{.*}}someVariadicFunc
499 ; CHECK: slwi 3, 3, 3
500 ; DISABLE: b {{.*}}[[EPILOGUE_BB:BB[0-9_]+]]
502 ; ENABLE: mtlr {{[0-9]+}}
505 ; CHECK: {{.*}}[[ELSE_LABEL]]: # %if.else
506 ; CHECK-NEXT: slwi 3, 4, 1
508 ; DISABLE: {{.*}}[[EPILOGUE_BB]]: # %if.end
511 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
513 %tobool = icmp eq i32 %cond, 0
514 br i1 %tobool, label %if.else, label %if.then
516 if.then: ; preds = %entry
517 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
518 %shl = shl i32 %call, 3
521 if.else: ; preds = %entry
522 %mul = shl nsw i32 %N, 1
525 if.end: ; preds = %if.else, %if.then
526 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
530 declare i32 @someVariadicFunc(i32, ...)
534 ; Make sure we do not insert unreachable code after noreturn function.
535 ; Although this is not incorrect to insert such code, it is useless
536 ; and it hurts the binary size.
538 ; CHECK-LABEL: {{.*}}noreturn:
539 ; DISABLE: mflr {{[0-9]+}}
542 ; CHECK-NEXT: bne{{[-]?}} 0, {{.*}}[[ABORT:BB[0-9_]+]]
546 ; DISABLE: mtlr {{[0-9]+}}
550 ; CHECK: {{.*}}[[ABORT]]: # %if.abort
551 ; ENABLE: mflr {{[0-9]+}}
552 ; CHECK: bl {{.*}}abort
553 ; ENABLE-NOT: mtlr {{[0-9]+}}
554 define i32 @noreturn(i8 signext %bad_thing) {
556 %tobool = icmp eq i8 %bad_thing, 0
557 br i1 %tobool, label %if.end, label %if.abort
560 tail call void @abort() #0
567 declare void @abort() #0
569 attributes #0 = { noreturn nounwind }
572 ; Make sure that we handle infinite loops properly When checking that the Save
573 ; and Restore blocks are control flow equivalent, the loop searches for the
574 ; immediate (post) dominator for the (restore) save blocks. When either the Save
575 ; or Restore block is located in an infinite loop the only immediate (post)
576 ; dominator is itself. In this case, we cannot perform shrink wrapping, but we
577 ; should return gracefully and continue compilation.
578 ; The only condition for this test is the compilation finishes correctly.
580 ; CHECK-LABEL: {{.*}}infiniteloop
582 define void @infiniteloop() {
584 br i1 undef, label %if.then, label %if.end
587 %ptr = alloca i32, i32 4
590 for.body: ; preds = %for.body, %entry
591 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
592 %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
593 %add = add nsw i32 %call, %sum.03
594 store i32 %add, i32* %ptr
601 ; Another infinite loop test this time with a body bigger than just one block.
602 ; CHECK-LABEL: {{.*}}infiniteloop2
604 define void @infiniteloop2() {
606 br i1 undef, label %if.then, label %if.end
609 %ptr = alloca i32, i32 4
612 for.body: ; preds = %for.body, %entry
613 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
614 %call = tail call i32 asm "mftb $0, 268", "=r,~{r14}"()
615 %add = add nsw i32 %call, %sum.03
616 store i32 %add, i32* %ptr
617 br i1 undef, label %body1, label %body2
620 tail call void asm sideeffect "nop", "~{r14}"()
624 tail call void asm sideeffect "nop", "~{r14}"()
631 ; Another infinite loop test this time with two nested infinite loop.
632 ; CHECK-LABEL: {{.*}}infiniteloop3
634 define void @infiniteloop3() {
636 br i1 undef, label %loop2a, label %body
638 body: ; preds = %entry
639 br i1 undef, label %loop2a, label %end
641 loop1: ; preds = %loop2a, %loop2b
642 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
643 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
644 %0 = icmp eq i32* %var, null
645 %next.load = load i32*, i32** undef
646 br i1 %0, label %loop2a, label %loop2b
648 loop2a: ; preds = %loop1, %body, %entry
649 %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
650 %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
653 loop2b: ; preds = %loop1
654 %gep1 = bitcast i32* %var.phi to i32*
655 %next.ptr = bitcast i32* %gep1 to i32**
656 store i32* %next.phi, i32** %next.ptr
663 @columns = external global [0 x i32], align 4
664 @lock = common global i32 0, align 4
665 @htindex = common global i32 0, align 4
666 @stride = common global i32 0, align 4
667 @ht = common global i32* null, align 8
668 @he = common global i8* null, align 8
670 ; Test for a bug that was caused when save point was equal to restore point.
671 ; Function Attrs: nounwind
672 ; CHECK-LABEL: {{.*}}transpose
674 ; Store of callee-save register saved by shrink wrapping
675 ; FIXME: Test disabled: Improved scheduling needs no spills/reloads any longer!
676 ; CHECKXX: std [[CSR:[0-9]+]], -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
678 ; Reload of callee-save register
679 ; CHECKXX: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload
681 ; Ensure no subsequent uses of callee-save register before end of function
682 ; CHECKXX-NOT: {{[a-z]+}} [[CSR]]
684 define signext i32 @transpose() {
686 %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 1), align 4
687 %shl.i = shl i32 %0, 7
688 %1 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 2), align 4
689 %or.i = or i32 %shl.i, %1
690 %shl1.i = shl i32 %or.i, 7
691 %2 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 3), align 4
692 %or2.i = or i32 %shl1.i, %2
693 %3 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 7), align 4
694 %shl3.i = shl i32 %3, 7
695 %4 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 6), align 4
696 %or4.i = or i32 %shl3.i, %4
697 %shl5.i = shl i32 %or4.i, 7
698 %5 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 5), align 4
699 %or6.i = or i32 %shl5.i, %5
700 %cmp.i = icmp ugt i32 %or2.i, %or6.i
701 br i1 %cmp.i, label %cond.true.i, label %cond.false.i
704 %shl7.i = shl i32 %or2.i, 7
705 %6 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
706 %or8.i = or i32 %6, %shl7.i
707 %conv.i = zext i32 %or8.i to i64
708 %shl9.i = shl nuw nsw i64 %conv.i, 21
709 %conv10.i = zext i32 %or6.i to i64
710 %or11.i = or i64 %shl9.i, %conv10.i
714 %shl12.i = shl i32 %or6.i, 7
715 %7 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
716 %or13.i = or i32 %7, %shl12.i
717 %conv14.i = zext i32 %or13.i to i64
718 %shl15.i = shl nuw nsw i64 %conv14.i, 21
719 %conv16.i = zext i32 %or2.i to i64
720 %or17.i = or i64 %shl15.i, %conv16.i
724 %cond.i = phi i64 [ %or11.i, %cond.true.i ], [ %or17.i, %cond.false.i ]
725 %shr.29.i = lshr i64 %cond.i, 17
726 %conv18.i = trunc i64 %shr.29.i to i32
727 store i32 %conv18.i, i32* @lock, align 4
728 %rem.i = srem i64 %cond.i, 1050011
729 %conv19.i = trunc i64 %rem.i to i32
730 store i32 %conv19.i, i32* @htindex, align 4
731 %rem20.i = urem i32 %conv18.i, 179
732 %add.i = or i32 %rem20.i, 131072
733 store i32 %add.i, i32* @stride, align 4
734 %8 = load i32*, i32** @ht, align 8
735 %arrayidx = getelementptr inbounds i32, i32* %8, i64 %rem.i
736 %9 = load i32, i32* %arrayidx, align 4
737 %cmp1 = icmp eq i32 %9, %conv18.i
738 br i1 %cmp1, label %if.then, label %if.end
741 %idxprom.lcssa = phi i64 [ %rem.i, %hash.exit ], [ %idxprom.1, %if.end ], [ %idxprom.2, %if.end.1 ], [ %idxprom.3, %if.end.2 ], [ %idxprom.4, %if.end.3 ], [ %idxprom.5, %if.end.4 ], [ %idxprom.6, %if.end.5 ], [ %idxprom.7, %if.end.6 ]
742 %10 = load i8*, i8** @he, align 8
743 %arrayidx3 = getelementptr inbounds i8, i8* %10, i64 %idxprom.lcssa
744 %11 = load i8, i8* %arrayidx3, align 1
745 %conv = sext i8 %11 to i32
749 %add = add nsw i32 %add.i, %conv19.i
750 %cmp4 = icmp sgt i32 %add, 1050010
751 %sub = add nsw i32 %add, -1050011
752 %sub.add = select i1 %cmp4, i32 %sub, i32 %add
753 %idxprom.1 = sext i32 %sub.add to i64
754 %arrayidx.1 = getelementptr inbounds i32, i32* %8, i64 %idxprom.1
755 %12 = load i32, i32* %arrayidx.1, align 4
756 %cmp1.1 = icmp eq i32 %12, %conv18.i
757 br i1 %cmp1.1, label %if.then, label %if.end.1
760 %retval.0 = phi i32 [ %conv, %if.then ], [ -128, %if.end.6 ]
764 %add.1 = add nsw i32 %add.i, %sub.add
765 %cmp4.1 = icmp sgt i32 %add.1, 1050010
766 %sub.1 = add nsw i32 %add.1, -1050011
767 %sub.add.1 = select i1 %cmp4.1, i32 %sub.1, i32 %add.1
768 %idxprom.2 = sext i32 %sub.add.1 to i64
769 %arrayidx.2 = getelementptr inbounds i32, i32* %8, i64 %idxprom.2
770 %13 = load i32, i32* %arrayidx.2, align 4
771 %cmp1.2 = icmp eq i32 %13, %conv18.i
772 br i1 %cmp1.2, label %if.then, label %if.end.2
775 %add.2 = add nsw i32 %add.i, %sub.add.1
776 %cmp4.2 = icmp sgt i32 %add.2, 1050010
777 %sub.2 = add nsw i32 %add.2, -1050011
778 %sub.add.2 = select i1 %cmp4.2, i32 %sub.2, i32 %add.2
779 %idxprom.3 = sext i32 %sub.add.2 to i64
780 %arrayidx.3 = getelementptr inbounds i32, i32* %8, i64 %idxprom.3
781 %14 = load i32, i32* %arrayidx.3, align 4
782 %cmp1.3 = icmp eq i32 %14, %conv18.i
783 br i1 %cmp1.3, label %if.then, label %if.end.3
786 %add.3 = add nsw i32 %add.i, %sub.add.2
787 %cmp4.3 = icmp sgt i32 %add.3, 1050010
788 %sub.3 = add nsw i32 %add.3, -1050011
789 %sub.add.3 = select i1 %cmp4.3, i32 %sub.3, i32 %add.3
790 %idxprom.4 = sext i32 %sub.add.3 to i64
791 %arrayidx.4 = getelementptr inbounds i32, i32* %8, i64 %idxprom.4
792 %15 = load i32, i32* %arrayidx.4, align 4
793 %cmp1.4 = icmp eq i32 %15, %conv18.i
794 br i1 %cmp1.4, label %if.then, label %if.end.4
797 %add.4 = add nsw i32 %add.i, %sub.add.3
798 %cmp4.4 = icmp sgt i32 %add.4, 1050010
799 %sub.4 = add nsw i32 %add.4, -1050011
800 %sub.add.4 = select i1 %cmp4.4, i32 %sub.4, i32 %add.4
801 %idxprom.5 = sext i32 %sub.add.4 to i64
802 %arrayidx.5 = getelementptr inbounds i32, i32* %8, i64 %idxprom.5
803 %16 = load i32, i32* %arrayidx.5, align 4
804 %cmp1.5 = icmp eq i32 %16, %conv18.i
805 br i1 %cmp1.5, label %if.then, label %if.end.5
808 %add.5 = add nsw i32 %add.i, %sub.add.4
809 %cmp4.5 = icmp sgt i32 %add.5, 1050010
810 %sub.5 = add nsw i32 %add.5, -1050011
811 %sub.add.5 = select i1 %cmp4.5, i32 %sub.5, i32 %add.5
812 %idxprom.6 = sext i32 %sub.add.5 to i64
813 %arrayidx.6 = getelementptr inbounds i32, i32* %8, i64 %idxprom.6
814 %17 = load i32, i32* %arrayidx.6, align 4
815 %cmp1.6 = icmp eq i32 %17, %conv18.i
816 br i1 %cmp1.6, label %if.then, label %if.end.6
819 %add.6 = add nsw i32 %add.i, %sub.add.5
820 %cmp4.6 = icmp sgt i32 %add.6, 1050010
821 %sub.6 = add nsw i32 %add.6, -1050011
822 %sub.add.6 = select i1 %cmp4.6, i32 %sub.6, i32 %add.6
823 %idxprom.7 = sext i32 %sub.add.6 to i64
824 %arrayidx.7 = getelementptr inbounds i32, i32* %8, i64 %idxprom.7
825 %18 = load i32, i32* %arrayidx.7, align 4
826 %cmp1.7 = icmp eq i32 %18, %conv18.i
827 br i1 %cmp1.7, label %if.then, label %cleanup