1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -O2 -o - %s | FileCheck %s
3 target datalayout = "e-m:e-i64:64-n32:64"
4 target triple = "powerpc64le-grtev4-linux-gnu"
7 ; The code for tail-duplication during layout will produce the layout:
10 ; body1 (with copy of test2)
14 define void @tail_dup_break_cfg(i32 %tag) {
15 ; CHECK-LABEL: tail_dup_break_cfg:
16 ; CHECK: # %bb.0: # %entry
18 ; CHECK-NEXT: .cfi_def_cfa_offset 48
19 ; CHECK-NEXT: .cfi_offset lr, 16
20 ; CHECK-NEXT: .cfi_offset r30, -16
21 ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
22 ; CHECK-NEXT: stdu 1, -48(1)
23 ; CHECK-NEXT: mr 30, 3
24 ; CHECK-NEXT: std 0, 64(1)
25 ; CHECK-NEXT: andi. 3, 30, 1
26 ; CHECK-NEXT: bc 12, 1, .LBB0_3
27 ; CHECK-NEXT: # %bb.1: # %test2
28 ; CHECK-NEXT: andi. 3, 30, 2
29 ; CHECK-NEXT: bne 0, .LBB0_4
30 ; CHECK-NEXT: .LBB0_2: # %exit
31 ; CHECK-NEXT: addi 1, 1, 48
32 ; CHECK-NEXT: ld 0, 16(1)
33 ; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
36 ; CHECK-NEXT: .LBB0_3: # %body1
45 ; CHECK-NEXT: andi. 3, 30, 2
46 ; CHECK-NEXT: beq 0, .LBB0_2
47 ; CHECK-NEXT: .LBB0_4: # %body2
56 ; CHECK-NEXT: b .LBB0_2
60 %tagbit1 = and i32 %tag, 1
61 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
62 br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely
70 %tagbit2 = and i32 %tag, 2
71 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
72 br i1 %tagbit2eq0, label %exit, label %body2, !prof !1 ; %exit more likely
83 ; The branch weights here hint that we shouldn't tail duplicate in this case.
84 define void @tail_dup_dont_break_cfg(i32 %tag) {
85 ; CHECK-LABEL: tail_dup_dont_break_cfg:
86 ; CHECK: # %bb.0: # %entry
88 ; CHECK-NEXT: .cfi_def_cfa_offset 48
89 ; CHECK-NEXT: .cfi_offset lr, 16
90 ; CHECK-NEXT: .cfi_offset r30, -16
91 ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
92 ; CHECK-NEXT: stdu 1, -48(1)
93 ; CHECK-NEXT: mr 30, 3
94 ; CHECK-NEXT: std 0, 64(1)
95 ; CHECK-NEXT: andi. 3, 30, 1
96 ; CHECK-NEXT: bc 4, 1, .LBB1_2
97 ; CHECK-NEXT: # %bb.1: # %body1
106 ; CHECK-NEXT: .LBB1_2: # %test2
107 ; CHECK-NEXT: andi. 3, 30, 2
108 ; CHECK-NEXT: beq 0, .LBB1_4
109 ; CHECK-NEXT: # %bb.3: # %body2
118 ; CHECK-NEXT: .LBB1_4: # %exit
119 ; CHECK-NEXT: addi 1, 1, 48
120 ; CHECK-NEXT: ld 0, 16(1)
121 ; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
127 %tagbit1 = and i32 %tag, 1
128 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
129 br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely
137 %tagbit2 = and i32 %tag, 2
138 %tagbit2eq0 = icmp ne i32 %tagbit2, 0
139 br i1 %tagbit2eq0, label %body2, label %exit, !prof !3 ; %body2 more likely
155 ; This function arranges for the successors of %succ to have already been laid
156 ; out. When we consider whether to lay out succ after bb and to tail-duplicate
157 ; it, v and ret have already been placed, so we tail-duplicate as it removes a
158 ; branch and strictly increases fallthrough
159 define void @tail_dup_no_succ(i32 %tag) {
160 ; CHECK-LABEL: tail_dup_no_succ:
161 ; CHECK: # %bb.0: # %entry
163 ; CHECK-NEXT: .cfi_def_cfa_offset 48
164 ; CHECK-NEXT: .cfi_offset lr, 16
165 ; CHECK-NEXT: .cfi_offset r30, -16
166 ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
167 ; CHECK-NEXT: stdu 1, -48(1)
168 ; CHECK-NEXT: andi. 4, 3, 1
169 ; CHECK-NEXT: std 0, 64(1)
170 ; CHECK-NEXT: bc 12, 1, .LBB2_3
171 ; CHECK-NEXT: .LBB2_1: # %v
176 ; CHECK-NEXT: .LBB2_2: # %ret
177 ; CHECK-NEXT: addi 1, 1, 48
178 ; CHECK-NEXT: ld 0, 16(1)
179 ; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
182 ; CHECK-NEXT: .LBB2_3: # %bb
183 ; CHECK-NEXT: andi. 4, 3, 2
184 ; CHECK-NEXT: bne 0, .LBB2_5
185 ; CHECK-NEXT: # %bb.4: # %succ
186 ; CHECK-NEXT: andi. 3, 3, 4
187 ; CHECK-NEXT: beq 0, .LBB2_2
188 ; CHECK-NEXT: b .LBB2_1
189 ; CHECK-NEXT: .LBB2_5: # %c
190 ; CHECK-NEXT: mr 30, 3
195 ; CHECK-NEXT: mr 3, 30
196 ; CHECK-NEXT: andi. 3, 3, 4
197 ; CHECK-NEXT: beq 0, .LBB2_2
198 ; CHECK-NEXT: b .LBB2_1
200 %tagbit1 = and i32 %tag, 1
201 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
202 br i1 %tagbit1eq0, label %v, label %bb, !prof !2 ; %v very much more likely
204 %tagbit2 = and i32 %tag, 2
205 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
206 br i1 %tagbit2eq0, label %succ, label %c, !prof !3 ; %succ more likely
212 %tagbit3 = and i32 %tag, 4
213 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
214 br i1 %tagbit3eq0, label %ret, label %v, !prof !1 ; %u more likely
223 !1 = !{!"branch_weights", i32 5, i32 3}
224 !2 = !{!"branch_weights", i32 95, i32 5}
225 !3 = !{!"branch_weights", i32 8, i32 3}