1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
4 define void @ifconvertstore(i32* %A, i32 %B, i32 %C, i32 %D) {
5 ; CHECK-LABEL: @ifconvertstore(
7 ; CHECK-NEXT: store i32 [[B:%.*]], i32* [[A:%.*]], align 4
8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
9 ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[C:%.*]], i32 [[B]], !prof [[PROF0:![0-9]+]]
10 ; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], i32* [[A]], align 4
11 ; CHECK-NEXT: ret void
14 ; First store to the location.
16 %cmp = icmp sgt i32 %D, 42
17 br i1 %cmp, label %if.then, label %ret.end, !prof !0
19 ; Make sure we speculate stores like the following one. It is cheap compared to
20 ; a mispredicated branch.
29 ; Store to a different location.
31 define void @noifconvertstore1(i32* %A1, i32* %A2, i32 %B, i32 %C, i32 %D) {
32 ; CHECK-LABEL: @noifconvertstore1(
34 ; CHECK-NEXT: store i32 [[B:%.*]], i32* [[A1:%.*]], align 4
35 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
36 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
38 ; CHECK-NEXT: store i32 [[C:%.*]], i32* [[A2:%.*]], align 4
39 ; CHECK-NEXT: br label [[RET_END]]
41 ; CHECK-NEXT: ret void
44 store i32 %B, i32* %A1
45 %cmp = icmp sgt i32 %D, 42
46 br i1 %cmp, label %if.then, label %ret.end
49 store i32 %C, i32* %A2
56 ; This function could store to our address, so we can't repeat the first store a second time.
57 declare void @unknown_fun()
59 define void @noifconvertstore2(i32* %A, i32 %B, i32 %C, i32 %D) {
60 ; CHECK-LABEL: @noifconvertstore2(
62 ; CHECK-NEXT: store i32 [[B:%.*]], i32* [[A:%.*]], align 4
63 ; CHECK-NEXT: call void @unknown_fun()
64 ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[D:%.*]], 42
65 ; CHECK-NEXT: br i1 [[CMP6]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
67 ; CHECK-NEXT: store i32 [[C:%.*]], i32* [[A]], align 4
68 ; CHECK-NEXT: br label [[RET_END]]
70 ; CHECK-NEXT: ret void
73 ; First store to the location.
75 call void @unknown_fun()
76 %cmp6 = icmp sgt i32 %D, 42
77 br i1 %cmp6, label %if.then, label %ret.end
87 ; Make sure we don't speculate volatile stores.
89 define void @noifconvertstore_volatile(i32* %A, i32 %B, i32 %C, i32 %D) {
90 ; CHECK-LABEL: @noifconvertstore_volatile(
92 ; CHECK-NEXT: store i32 [[B:%.*]], i32* [[A:%.*]], align 4
93 ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[D:%.*]], 42
94 ; CHECK-NEXT: br i1 [[CMP6]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
96 ; CHECK-NEXT: store volatile i32 [[C:%.*]], i32* [[A]], align 4
97 ; CHECK-NEXT: br label [[RET_END]]
99 ; CHECK-NEXT: ret void
102 ; First store to the location.
103 store i32 %B, i32* %A
104 %cmp6 = icmp sgt i32 %D, 42
105 br i1 %cmp6, label %if.then, label %ret.end
108 store volatile i32 %C, i32* %A
116 ;; Speculate a store, preceded by a local, non-escaping load
117 define i32 @load_before_store_noescape(i64 %i, i32 %b) {
118 ; CHECK-LABEL: @load_before_store_noescape(
120 ; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 8
121 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[A]] to i64*
122 ; CHECK-NEXT: store i64 4294967296, i64* [[TMP0]], align 8
123 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 [[I:%.*]]
124 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
125 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], [[B:%.*]]
126 ; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[TMP1]]
127 ; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], i32* [[ARRAYIDX]], align 4
128 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 0
129 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
130 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
131 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
132 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
133 ; CHECK-NEXT: ret i32 [[ADD]]
136 %a = alloca [2 x i32], align 8
137 %0 = bitcast [2 x i32]* %a to i64*
138 store i64 4294967296, i64* %0, align 8
139 %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 %i
140 %1 = load i32, i32* %arrayidx, align 4
141 %cmp = icmp slt i32 %1, %b
142 br i1 %cmp, label %if.then, label %if.end
145 store i32 %b, i32* %arrayidx, align 4
149 %arrayidx1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
150 %2 = load i32, i32* %arrayidx1, align 4
151 %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
152 %3 = load i32, i32* %arrayidx2, align 4
153 %add = add nsw i32 %2, %3
157 ;; Don't speculate a store, preceded by a local, escaping load
158 define i32 @load_before_store_escape(i64 %i, i32 %b) {
159 ; CHECK-LABEL: @load_before_store_escape(
161 ; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 8
162 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[A]] to i64*
163 ; CHECK-NEXT: store i64 4294967296, i64* [[TMP0]], align 8
164 ; CHECK-NEXT: call void @fork_some_threads([2 x i32]* [[A]])
165 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 [[I:%.*]]
166 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
167 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], [[B:%.*]]
168 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
170 ; CHECK-NEXT: store i32 [[B]], i32* [[ARRAYIDX]], align 4
171 ; CHECK-NEXT: br label [[IF_END]]
173 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 0
174 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
175 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
176 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
177 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
178 ; CHECK-NEXT: call void @join_some_threads()
179 ; CHECK-NEXT: ret i32 [[ADD]]
182 %a = alloca [2 x i32], align 8
183 %0 = bitcast [2 x i32]* %a to i64*
184 store i64 4294967296, i64* %0, align 8
185 call void @fork_some_threads([2 x i32]* %a)
186 %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 %i
187 %1 = load i32, i32* %arrayidx, align 4
188 %cmp = icmp slt i32 %1, %b
189 br i1 %cmp, label %if.then, label %if.end
192 store i32 %b, i32* %arrayidx, align 4
196 %arrayidx1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
197 %2 = load i32, i32* %arrayidx1, align 4
198 %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
199 %3 = load i32, i32* %arrayidx2, align 4
200 %add = add nsw i32 %2, %3
201 call void @join_some_threads()
205 declare void @fork_some_threads([2 x i32] *);
206 declare void @join_some_threads();
208 ; Don't speculate if it's not the only instruction in the block (not counting
210 define i32 @not_alone_in_block(i64 %i, i32 %b) {
211 ; CHECK-LABEL: @not_alone_in_block(
213 ; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 8
214 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[A]] to i64*
215 ; CHECK-NEXT: store i64 4294967296, i64* [[TMP0]], align 8
216 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 [[I:%.*]]
217 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 0
218 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
219 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], [[B:%.*]]
220 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
222 ; CHECK-NEXT: store i32 [[B]], i32* [[ARRAYIDX]], align 4
223 ; CHECK-NEXT: store i32 [[B]], i32* [[ARRAYIDX1]], align 4
224 ; CHECK-NEXT: br label [[IF_END]]
226 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
227 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
228 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
229 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
230 ; CHECK-NEXT: ret i32 [[ADD]]
233 %a = alloca [2 x i32], align 8
234 %0 = bitcast [2 x i32]* %a to i64*
235 store i64 4294967296, i64* %0, align 8
236 %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 %i
237 %arrayidx1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
238 %1 = load i32, i32* %arrayidx, align 4
239 %cmp = icmp slt i32 %1, %b
240 br i1 %cmp, label %if.then, label %if.end
243 store i32 %b, i32* %arrayidx, align 4
244 store i32 %b, i32* %arrayidx1, align 4
248 %2 = load i32, i32* %arrayidx1, align 4
249 %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
250 %3 = load i32, i32* %arrayidx2, align 4
251 %add = add nsw i32 %2, %3
255 ; CHECK: !0 = !{!"branch_weights", i32 3, i32 5}
256 !0 = !{!"branch_weights", i32 3, i32 5}