1 ; REQUIRES: aarch64-registered-target
2 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-none-linux-gnu -S | FileCheck %s
4 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5 target triple = "aarch64--linux-gnu"
7 ; Expect to skip merging two empty blocks (sw.bb and sw.bb2) into sw.epilog
8 ; as both of them are unlikely executed.
9 define i32 @f_switch(i32 %c) {
10 ; CHECK-LABEL: @f_switch
12 ; CHECK: i32 10, label %sw.bb
13 ; CHECK: i32 20, label %sw.bb2
15 switch i32 %c, label %sw.default [
22 sw.bb: ; preds = %entry
25 sw.bb2: ; preds = %entry
28 sw.bb3: ; preds = %entry
29 call void @callcase3()
32 sw.bb4: ; preds = %entry
33 call void @callcase4()
36 sw.default: ; preds = %entry
37 call void @calldefault()
40 ; CHECK-LABEL: sw.epilog:
41 ; CHECK: %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ]
42 sw.epilog: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb
43 %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ]
48 ; Expect not to merge sw.bb2 because of the conflict in the incoming value from
49 ; sw.bb which is already merged.
50 define i32 @f_switch2(i32 %c) {
51 ; CHECK-LABEL: @f_switch2
53 ; CHECK: i32 10, label %sw.epilog
54 ; CHECK: i32 20, label %sw.bb2
56 switch i32 %c, label %sw.default [
63 sw.bb: ; preds = %entry
66 sw.bb2: ; preds = %entry
69 sw.bb3: ; preds = %entry
70 call void @callcase3()
73 sw.bb4: ; preds = %entry
74 call void @callcase4()
77 sw.default: ; preds = %entry
78 call void @calldefault()
81 ; CHECK-LABEL: sw.epilog:
82 ; CHECK: %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %entry ]
83 sw.epilog: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb
84 %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ]
89 ; Multiple empty blocks should be considered together if all incoming values
90 ; from them are same. We expect to merge both empty blocks (sw.bb and sw.bb2)
91 ; because the sum of frequencies are higer than the threshold.
92 define i32 @f_switch3(i32 %c) {
93 ; CHECK-LABEL: @f_switch3
95 ; CHECK: i32 10, label %sw.epilog
96 ; CHECK: i32 20, label %sw.epilog
98 switch i32 %c, label %sw.default [
100 i32 20, label %sw.bb2
101 i32 30, label %sw.bb3
102 i32 40, label %sw.bb4
105 sw.bb: ; preds = %entry
108 sw.bb2: ; preds = %entry
111 sw.bb3: ; preds = %entry
112 call void @callcase3()
115 sw.bb4: ; preds = %entry
116 call void @callcase4()
119 sw.default: ; preds = %entry
120 call void @calldefault()
123 ; CHECK-LABEL: sw.epilog:
124 ; CHECK: %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F1, %entry ], [ @F1, %entry ]
125 sw.epilog: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb
126 %fp.0 = phi ptr [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F1, %sw.bb2 ], [ @F1, %sw.bb ]
131 declare void @F1(...) local_unnamed_addr
132 declare void @F2(...) local_unnamed_addr
133 declare void @F3(...) local_unnamed_addr
134 declare void @F4(...) local_unnamed_addr
135 declare void @FD(...) local_unnamed_addr
136 declare void @callcase3(...) local_unnamed_addr
137 declare void @callcase4(...) local_unnamed_addr
138 declare void @calldefault(...) local_unnamed_addr
140 !0 = !{!"branch_weights", i32 5, i32 1, i32 1,i32 5, i32 5}
141 !1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1}
142 !2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1}
145 ; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable()
146 ; in the case where empty blocks are removed before creating BFI/BPI.
147 @b = common global i32 0, align 4
148 @a = common global ptr null, align 8
149 define i32 @should_not_assert(i32 %i) local_unnamed_addr {
151 %0 = load i32, ptr @b, align 4
152 %cond = icmp eq i32 %0, 6
153 br i1 %cond, label %while.cond.preheader, label %sw.epilog
155 while.cond.preheader: ; preds = %entry
156 %1 = load ptr, ptr @a, align 8
157 %magicptr = ptrtoint ptr %1 to i64
158 %arrayidx = getelementptr inbounds i32, ptr %1, i64 1
161 while.cond: ; preds = %while.cond.preheader, %land.rhs
162 switch i64 %magicptr, label %land.rhs [
163 i64 32, label %while.cond2.loopexit
164 i64 0, label %while.cond2.loopexit
167 land.rhs: ; preds = %while.cond
168 %2 = load i32, ptr %arrayidx, align 4
169 %tobool1 = icmp eq i32 %2, 0
170 br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond
172 while.cond2thread-pre-split.loopexit: ; preds = %land.rhs
173 br label %while.cond2thread-pre-split
175 while.cond2thread-pre-split: ; preds = %while.cond2thread-pre-split.loopexit, %while.body4
176 %.pr = phi ptr [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ]
177 br label %while.cond2
179 while.cond2.loopexit: ; preds = %while.cond, %while.cond
180 br label %while.cond2
182 while.cond2: ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split
183 %3 = phi ptr [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ]
184 %tobool3 = icmp eq ptr %3, null
185 br i1 %tobool3, label %sw.epilog, label %while.body4
187 while.body4: ; preds = %while.cond2
188 tail call void @fn2()
189 %.pr.pre = load ptr, ptr @a, align 8
190 br label %while.cond2thread-pre-split
192 sw.epilog: ; preds = %while.cond2, %entry
197 declare void @fn2(...) local_unnamed_addr