1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck -check-prefixes=CHECKLX %s
3 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck -check-prefixes=CHECKAIX %s
4 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck -check-prefixes=CHECKAIX32 %s
6 ; The instructions ADDIStocHA8/LDtocL are used to calculate the address of
7 ; globals. The ones that are in bb.3.if.end could not be hoisted by Machine
8 ; LICM due to BCTRL_LDinto_toc in bb2.if.then. This call causes the compiler
9 ; to insert a save TOC to stack before the call and load into X2 to restore TOC
10 ; after. By communicating to Machine LICM that X2 is guaranteed to have the
11 ; same value before and after BCTRL_LDinto_toc, these instructions can be
12 ; hoisted out of bb.3.if.end to outside of the loop.
14 ; Pre Machine LICM MIR
18 ; successors: %bb.2.if.then(0x40000000), %bb.3.if.end(0x40000000)
22 ; %5 = ADDIStocHA8 %x2, @ga
23 ; %6 = LDtocL @ga, killed %5 :: (load (s64) from got)
24 ; %7 = LWZ 0, %6 :: (volatile dereferenceable load (s32) from @ga)
25 ; %8 = ADDIStocHA8 %x2, @gb
26 ; %9 = LDtocL @gb, killed %8 :: (load (s64) from got)
27 ; %10 = LWZ 0, killed %9 :: (volatile dereferenceable load (s32) from @gb)
28 ; %0 = LWZ 0, %6 :: (volatile dereferenceable load (s32) from @ga)
29 ; %11 = CMPW killed %7, killed %10
30 ; BCC 44, killed %11, %bb.2.if.then
34 ; %1 = PHI %0, %bb.0.entry, %3, %bb.3.if.end
35 ; ADJCALLSTACKDOWN 32, 0, implicit-def dead %r1, implicit %r1
37 ; STD %20, 24, %x1 :: (store (s64) into stack + 24)
38 ; %21 = EXTSW_32_64 %1
41 ; MTCTR8 %4, implicit-def %ctr8
42 ; BCTRL8_LDinto_toc 24, %x1, csr_ppc64_altivec, implicit-def dead %lr8, implicit-def dead %x2, implicit %ctr8, implicit %rm, implicit %x3, implicit %x12, implicit %x2, implicit-def %r1, implicit-def %x3
43 ; ADJCALLSTACKUP 32, 0, implicit-def dead %r1, implicit %r1
46 ; BLR8 implicit %lr8, implicit %rm, implicit %x3
49 ; successors: %bb.2.if.then(0x04000000), %bb.3.if.end(0x7c000000)
51 ; %2 = PHI %0, %bb.0.entry, %3, %bb.3.if.end
53 ; %13 = ADDIStocHA8 %x2, @ga
54 ; %14 = LDtocL @ga, killed %13 :: (load (s64) from got)
55 ; STW killed %12, 0, %14 :: (volatile store (s32) into @ga)
56 ; %15 = LWZ 0, %14 :: (volatile dereferenceable load (s32) from @ga)
57 ; %16 = ADDIStocHA8 %x2, @gb
58 ; %17 = LDtocL @gb, killed %16 :: (load (s64) from got)
59 ; %18 = LWZ 0, killed %17 :: (volatile dereferenceable load (s32) from @gb)
60 ; %3 = LWZ 0, %14 :: (volatile dereferenceable load (s32) from @ga)
61 ; %19 = CMPW killed %15, killed %18
62 ; BCC 44, killed %19, %bb.2.if.then
65 @ga = external global i32, align 4
66 @gb = external global i32, align 4
67 define signext i32 @test(ptr nocapture %FP) local_unnamed_addr #0 {
68 ; CHECKLX-LABEL: test:
69 ; CHECKLX: # %bb.0: # %entry
70 ; CHECKLX-NEXT: mr 12, 3
71 ; CHECKLX-NEXT: addis 3, 2, .LC0@toc@ha
72 ; CHECKLX-NEXT: addis 4, 2, .LC1@toc@ha
73 ; CHECKLX-NEXT: ld 3, .LC0@toc@l(3)
74 ; CHECKLX-NEXT: ld 5, .LC1@toc@l(4)
75 ; CHECKLX-NEXT: lwz 6, 0(3)
76 ; CHECKLX-NEXT: .p2align 5
77 ; CHECKLX-NEXT: .LBB0_1: # %if.end
79 ; CHECKLX-NEXT: lwz 7, 0(5)
80 ; CHECKLX-NEXT: lwz 4, 0(3)
81 ; CHECKLX-NEXT: cmpw 6, 7
82 ; CHECKLX-NEXT: bgt 0, .LBB0_3
83 ; CHECKLX-NEXT: # %bb.2: # %if.end
85 ; CHECKLX-NEXT: addi 4, 4, 1
86 ; CHECKLX-NEXT: stw 4, 0(3)
87 ; CHECKLX-NEXT: lwz 6, 0(3)
88 ; CHECKLX-NEXT: b .LBB0_1
89 ; CHECKLX-NEXT: .LBB0_3: # %if.then
90 ; CHECKLX-NEXT: mflr 0
91 ; CHECKLX-NEXT: stdu 1, -32(1)
92 ; CHECKLX-NEXT: std 2, 24(1)
93 ; CHECKLX-NEXT: std 0, 48(1)
94 ; CHECKLX-NEXT: .cfi_def_cfa_offset 32
95 ; CHECKLX-NEXT: .cfi_offset lr, 16
96 ; CHECKLX-NEXT: mtctr 12
97 ; CHECKLX-NEXT: extsw 3, 4
99 ; CHECKLX-NEXT: ld 2, 24(1)
100 ; CHECKLX-NEXT: addi 1, 1, 32
101 ; CHECKLX-NEXT: ld 0, 16(1)
102 ; CHECKLX-NEXT: mtlr 0
105 ; CHECKAIX-LABEL: test:
106 ; CHECKAIX: # %bb.0: # %entry
107 ; CHECKAIX-NEXT: ld 5, L..C0(2) # @ga
108 ; CHECKAIX-NEXT: ld 6, L..C1(2) # @gb
109 ; CHECKAIX-NEXT: L..BB0_1: # %if.end
111 ; CHECKAIX-NEXT: lwz 4, 0(5)
112 ; CHECKAIX-NEXT: lwz 7, 0(6)
113 ; CHECKAIX-NEXT: cmpw 4, 7
114 ; CHECKAIX-NEXT: lwz 4, 0(5)
115 ; CHECKAIX-NEXT: bgt 0, L..BB0_3
116 ; CHECKAIX-NEXT: # %bb.2: # %if.end
118 ; CHECKAIX-NEXT: addi 4, 4, 1
119 ; CHECKAIX-NEXT: stw 4, 0(5)
120 ; CHECKAIX-NEXT: b L..BB0_1
121 ; CHECKAIX-NEXT: L..BB0_3: # %if.then
122 ; CHECKAIX-NEXT: mflr 0
123 ; CHECKAIX-NEXT: stdu 1, -112(1)
124 ; CHECKAIX-NEXT: ld 5, 0(3)
125 ; CHECKAIX-NEXT: std 0, 128(1)
126 ; CHECKAIX-NEXT: ld 11, 16(3)
127 ; CHECKAIX-NEXT: std 2, 40(1)
128 ; CHECKAIX-NEXT: ld 2, 8(3)
129 ; CHECKAIX-NEXT: extsw 3, 4
130 ; CHECKAIX-NEXT: mtctr 5
131 ; CHECKAIX-NEXT: bctrl
132 ; CHECKAIX-NEXT: ld 2, 40(1)
133 ; CHECKAIX-NEXT: addi 1, 1, 112
134 ; CHECKAIX-NEXT: ld 0, 16(1)
135 ; CHECKAIX-NEXT: mtlr 0
138 ; CHECKAIX32-LABEL: test:
139 ; CHECKAIX32: # %bb.0: # %entry
140 ; CHECKAIX32-NEXT: lwz 5, L..C0(2) # @ga
141 ; CHECKAIX32-NEXT: lwz 6, L..C1(2) # @gb
142 ; CHECKAIX32-NEXT: L..BB0_1: # %if.end
144 ; CHECKAIX32-NEXT: lwz 4, 0(5)
145 ; CHECKAIX32-NEXT: lwz 7, 0(6)
146 ; CHECKAIX32-NEXT: cmpw 4, 7
147 ; CHECKAIX32-NEXT: lwz 4, 0(5)
148 ; CHECKAIX32-NEXT: bgt 0, L..BB0_3
149 ; CHECKAIX32-NEXT: # %bb.2: # %if.end
151 ; CHECKAIX32-NEXT: addi 4, 4, 1
152 ; CHECKAIX32-NEXT: stw 4, 0(5)
153 ; CHECKAIX32-NEXT: b L..BB0_1
154 ; CHECKAIX32-NEXT: L..BB0_3: # %if.then
155 ; CHECKAIX32-NEXT: mflr 0
156 ; CHECKAIX32-NEXT: stwu 1, -64(1)
157 ; CHECKAIX32-NEXT: lwz 5, 0(3)
158 ; CHECKAIX32-NEXT: stw 0, 72(1)
159 ; CHECKAIX32-NEXT: stw 2, 20(1)
160 ; CHECKAIX32-NEXT: mtctr 5
161 ; CHECKAIX32-NEXT: lwz 11, 8(3)
162 ; CHECKAIX32-NEXT: lwz 2, 4(3)
163 ; CHECKAIX32-NEXT: mr 3, 4
164 ; CHECKAIX32-NEXT: bctrl
165 ; CHECKAIX32-NEXT: lwz 2, 20(1)
166 ; CHECKAIX32-NEXT: addi 1, 1, 64
167 ; CHECKAIX32-NEXT: lwz 0, 8(1)
168 ; CHECKAIX32-NEXT: mtlr 0
169 ; CHECKAIX32-NEXT: blr
171 %0 = load volatile i32, ptr @ga, align 4
172 %1 = load volatile i32, ptr @gb, align 4
173 %cmp1 = icmp sgt i32 %0, %1
174 %2 = load volatile i32, ptr @ga, align 4
175 br i1 %cmp1, label %if.then, label %if.end
177 if.then: ; preds = %if.end, %entry
178 %.lcssa = phi i32 [ %2, %entry ], [ %6, %if.end ]
179 %call = tail call signext i32 %FP(i32 signext %.lcssa) #1
182 if.end: ; preds = %entry, %if.end
183 %3 = phi i32 [ %6, %if.end ], [ %2, %entry ]
184 %inc = add nsw i32 %3, 1
185 store volatile i32 %inc, ptr @ga, align 4
186 %4 = load volatile i32, ptr @ga, align 4
187 %5 = load volatile i32, ptr @gb, align 4
188 %cmp = icmp sgt i32 %4, %5
189 %6 = load volatile i32, ptr @ga, align 4
190 br i1 %cmp, label %if.then, label %if.end