1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -passes=licm -S < %s | FileCheck %s
3 ; RUN: opt -passes='loop-mssa(licm)' -S %s | FileCheck %s
5 ; If we can prove a local is thread local, we can insert stores during
6 ; promotion which wouldn't be legal otherwise.
8 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
9 target triple = "x86_64-linux-generic"
11 @p = external global ptr
13 declare ptr @malloc(i64)
15 ; Exercise the TLS case
16 define ptr @test(i32 %n) {
19 ; CHECK-NEXT: [[MEM:%.*]] = call noalias dereferenceable(16) ptr @malloc(i64 16)
20 ; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]]
21 ; CHECK: for.body.lr.ph:
22 ; CHECK-NEXT: [[ADDR_PROMOTED:%.*]] = load i32, ptr [[MEM]], align 4
23 ; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
25 ; CHECK-NEXT: [[NEW1:%.*]] = phi i32 [ [[ADDR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[NEW:%.*]], [[FOR_BODY:%.*]] ]
26 ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
27 ; CHECK-NEXT: [[GUARD:%.*]] = load atomic ptr, ptr @p monotonic, align 8
28 ; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq ptr [[GUARD]], null
29 ; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]]
31 ; CHECK-NEXT: [[NEW1_LCSSA:%.*]] = phi i32 [ [[NEW1]], [[FOR_HEADER]] ]
32 ; CHECK-NEXT: store i32 [[NEW1_LCSSA]], ptr [[MEM]], align 4
33 ; CHECK-NEXT: ret ptr null
35 ; CHECK-NEXT: [[NEW]] = add i32 [[NEW1]], 1
36 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1
37 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]]
38 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
39 ; CHECK: for.cond.for.end_crit_edge:
40 ; CHECK-NEXT: [[NEW_LCSSA:%.*]] = phi i32 [ [[NEW]], [[FOR_BODY]] ]
41 ; CHECK-NEXT: [[SPLIT:%.*]] = phi ptr [ [[MEM]], [[FOR_BODY]] ]
42 ; CHECK-NEXT: store i32 [[NEW_LCSSA]], ptr [[MEM]], align 4
43 ; CHECK-NEXT: ret ptr null
46 ;; ignore the required null check for simplicity
47 %mem = call dereferenceable(16) noalias ptr @malloc(i64 16)
48 br label %for.body.lr.ph
50 for.body.lr.ph: ; preds = %entry
54 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
55 %old = load i32, ptr %mem, align 4
56 ; deliberate impossible to analyze branch
57 %guard = load atomic ptr, ptr @p monotonic, align 8
58 %exitcmp = icmp eq ptr %guard, null
59 br i1 %exitcmp, label %for.body, label %early-exit
65 %new = add i32 %old, 1
66 store i32 %new, ptr %mem, align 4
67 %inc = add nsw i32 %i.02, 1
68 %cmp = icmp slt i32 %inc, %n
69 br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
71 for.cond.for.end_crit_edge: ; preds = %for.body
72 %split = phi ptr [ %mem, %for.body ]
76 ; Stack allocations can also be thread-local
77 define ptr @test2(i32 %n) {
78 ; CHECK-LABEL: @test2(
80 ; CHECK-NEXT: [[MEM:%.*]] = alloca i8, i32 16, align 1
81 ; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]]
82 ; CHECK: for.body.lr.ph:
83 ; CHECK-NEXT: [[ADDR_PROMOTED:%.*]] = load i32, ptr [[MEM]], align 4
84 ; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
86 ; CHECK-NEXT: [[NEW1:%.*]] = phi i32 [ [[ADDR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[NEW:%.*]], [[FOR_BODY:%.*]] ]
87 ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
88 ; CHECK-NEXT: [[GUARD:%.*]] = load atomic ptr, ptr @p monotonic, align 8
89 ; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq ptr [[GUARD]], null
90 ; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]]
92 ; CHECK-NEXT: [[NEW1_LCSSA:%.*]] = phi i32 [ [[NEW1]], [[FOR_HEADER]] ]
93 ; CHECK-NEXT: store i32 [[NEW1_LCSSA]], ptr [[MEM]], align 4
94 ; CHECK-NEXT: ret ptr null
96 ; CHECK-NEXT: [[NEW]] = add i32 [[NEW1]], 1
97 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1
98 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]]
99 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
100 ; CHECK: for.cond.for.end_crit_edge:
101 ; CHECK-NEXT: [[NEW_LCSSA:%.*]] = phi i32 [ [[NEW]], [[FOR_BODY]] ]
102 ; CHECK-NEXT: [[SPLIT:%.*]] = phi ptr [ [[MEM]], [[FOR_BODY]] ]
103 ; CHECK-NEXT: store i32 [[NEW_LCSSA]], ptr [[MEM]], align 4
104 ; CHECK-NEXT: ret ptr null
107 %mem = alloca i8, i32 16
108 br label %for.body.lr.ph
110 for.body.lr.ph: ; preds = %entry
114 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
115 %old = load i32, ptr %mem, align 4
116 ; deliberate impossible to analyze branch
117 %guard = load atomic ptr, ptr @p monotonic, align 8
118 %exitcmp = icmp eq ptr %guard, null
119 br i1 %exitcmp, label %for.body, label %early-exit
125 %new = add i32 %old, 1
126 store i32 %new, ptr %mem, align 4
127 %inc = add nsw i32 %i.02, 1
128 %cmp = icmp slt i32 %inc, %n
129 br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
131 for.cond.for.end_crit_edge: ; preds = %for.body
132 %split = phi ptr [ %mem, %for.body ]
136 declare noalias ptr @custom_malloc(i64)
138 ; Custom allocation function marked via noalias.
139 define ptr @test_custom_malloc(i32 %n) {
140 ; CHECK-LABEL: @test_custom_malloc(
142 ; CHECK-NEXT: [[MEM:%.*]] = call noalias dereferenceable(16) ptr @custom_malloc(i64 16)
143 ; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]]
144 ; CHECK: for.body.lr.ph:
145 ; CHECK-NEXT: [[ADDR_PROMOTED:%.*]] = load i32, ptr [[MEM]], align 4
146 ; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
148 ; CHECK-NEXT: [[NEW1:%.*]] = phi i32 [ [[ADDR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[NEW:%.*]], [[FOR_BODY:%.*]] ]
149 ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
150 ; CHECK-NEXT: [[GUARD:%.*]] = load volatile ptr, ptr @p, align 8
151 ; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq ptr [[GUARD]], null
152 ; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]]
154 ; CHECK-NEXT: [[NEW1_LCSSA:%.*]] = phi i32 [ [[NEW1]], [[FOR_HEADER]] ]
155 ; CHECK-NEXT: store i32 [[NEW1_LCSSA]], ptr [[MEM]], align 4
156 ; CHECK-NEXT: ret ptr null
158 ; CHECK-NEXT: [[NEW]] = add i32 [[NEW1]], 1
159 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1
160 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]]
161 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
162 ; CHECK: for.cond.for.end_crit_edge:
163 ; CHECK-NEXT: [[NEW_LCSSA:%.*]] = phi i32 [ [[NEW]], [[FOR_BODY]] ]
164 ; CHECK-NEXT: [[SPLIT:%.*]] = phi ptr [ [[MEM]], [[FOR_BODY]] ]
165 ; CHECK-NEXT: store i32 [[NEW_LCSSA]], ptr [[MEM]], align 4
166 ; CHECK-NEXT: ret ptr null
169 %mem = call dereferenceable(16) noalias ptr @custom_malloc(i64 16)
170 br label %for.body.lr.ph
172 for.body.lr.ph: ; preds = %entry
176 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
177 %old = load i32, ptr %mem, align 4
178 ; deliberate impossible to analyze branch
179 %guard = load volatile ptr, ptr @p
180 %exitcmp = icmp eq ptr %guard, null
181 br i1 %exitcmp, label %for.body, label %early-exit
187 %new = add i32 %old, 1
188 store i32 %new, ptr %mem, align 4
189 %inc = add nsw i32 %i.02, 1
190 %cmp = icmp slt i32 %inc, %n
191 br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
193 for.cond.for.end_crit_edge: ; preds = %for.body
194 %split = phi ptr [ %mem, %for.body ]
198 declare ptr @not_malloc(i64)
200 ; Negative test - not an allocation function.
201 define ptr @test_neg_not_malloc(i32 %n) {
202 ; CHECK-LABEL: @test_neg_not_malloc(
204 ; CHECK-NEXT: [[MEM:%.*]] = call dereferenceable(16) ptr @not_malloc(i64 16)
205 ; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]]
206 ; CHECK: for.body.lr.ph:
207 ; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
209 ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
210 ; CHECK-NEXT: [[OLD:%.*]] = load i32, ptr [[MEM]], align 4
211 ; CHECK-NEXT: [[GUARD:%.*]] = load volatile ptr, ptr @p, align 8
212 ; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq ptr [[GUARD]], null
213 ; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]]
215 ; CHECK-NEXT: ret ptr null
217 ; CHECK-NEXT: [[NEW:%.*]] = add i32 [[OLD]], 1
218 ; CHECK-NEXT: store i32 [[NEW]], ptr [[MEM]], align 4
219 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1
220 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]]
221 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
222 ; CHECK: for.cond.for.end_crit_edge:
223 ; CHECK-NEXT: [[SPLIT:%.*]] = phi ptr [ [[MEM]], [[FOR_BODY]] ]
224 ; CHECK-NEXT: ret ptr null
227 ;; ignore the required null check for simplicity
228 %mem = call dereferenceable(16) ptr @not_malloc(i64 16)
229 br label %for.body.lr.ph
231 for.body.lr.ph: ; preds = %entry
235 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
236 %old = load i32, ptr %mem, align 4
237 ; deliberate impossible to analyze branch
238 %guard = load volatile ptr, ptr @p
239 %exitcmp = icmp eq ptr %guard, null
240 br i1 %exitcmp, label %for.body, label %early-exit
246 %new = add i32 %old, 1
247 store i32 %new, ptr %mem, align 4
248 %inc = add nsw i32 %i.02, 1
249 %cmp = icmp slt i32 %inc, %n
250 br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
252 for.cond.for.end_crit_edge: ; preds = %for.body
253 %split = phi ptr [ %mem, %for.body ]
257 ; Negative test - can't speculate load since branch
258 ; may control alignment
259 define ptr @test_neg2(i32 %n) {
260 ; CHECK-LABEL: @test_neg2(
262 ; CHECK-NEXT: [[MEM:%.*]] = call noalias dereferenceable(16) ptr @malloc(i64 16)
263 ; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]]
264 ; CHECK: for.body.lr.ph:
265 ; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
267 ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
268 ; CHECK-NEXT: [[GUARD:%.*]] = load volatile ptr, ptr @p, align 8
269 ; CHECK-NEXT: [[EXITCMP:%.*]] = icmp eq ptr [[GUARD]], null
270 ; CHECK-NEXT: br i1 [[EXITCMP]], label [[FOR_BODY]], label [[EARLY_EXIT:%.*]]
272 ; CHECK-NEXT: ret ptr null
274 ; CHECK-NEXT: [[OLD:%.*]] = load i32, ptr [[MEM]], align 4
275 ; CHECK-NEXT: [[NEW:%.*]] = add i32 [[OLD]], 1
276 ; CHECK-NEXT: store i32 [[NEW]], ptr [[MEM]], align 4
277 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1
278 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N:%.*]]
279 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_HEADER]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
280 ; CHECK: for.cond.for.end_crit_edge:
281 ; CHECK-NEXT: [[SPLIT:%.*]] = phi ptr [ [[MEM]], [[FOR_BODY]] ]
282 ; CHECK-NEXT: ret ptr null
285 ;; ignore the required null check for simplicity
286 %mem = call dereferenceable(16) noalias ptr @malloc(i64 16)
287 br label %for.body.lr.ph
289 for.body.lr.ph: ; preds = %entry
293 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
294 ; deliberate impossible to analyze branch
295 %guard = load volatile ptr, ptr @p
296 %exitcmp = icmp eq ptr %guard, null
297 br i1 %exitcmp, label %for.body, label %early-exit
303 %old = load i32, ptr %mem, align 4
304 %new = add i32 %old, 1
305 store i32 %new, ptr %mem, align 4
306 %inc = add nsw i32 %i.02, 1
307 %cmp = icmp slt i32 %inc, %n
308 br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
310 for.cond.for.end_crit_edge: ; preds = %for.body
311 %split = phi ptr [ %mem, %for.body ]