1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // REQUIRES: amdgpu-registered-target
4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
6 // expected-no-diagnostics
12 #pragma omp target map(tofrom: sum)
17 for (int i
= 0; i
< N
; i
++)
20 for (int i
= 0; i
< N
; i
++)
30 #pragma omp target teams distribute parallel for map(from: result[:M])
31 for (int i
= 0; i
< M
; i
++) {
36 for (int j
= 0; j
< N
; j
++)
39 for (int j
= 0; j
< N
; j
++)
43 for (int i
= 0; i
< M
; i
++)
52 #pragma omp target teams distribute map(from: result[:M])
53 for (int i
= 0; i
< M
; i
++) {
58 #pragma omp parallel for
59 for (int j
= 0; j
< N
; j
++)
62 for (int j
= 0; j
< N
; j
++)
66 for (int i
= 0; i
< M
; i
++)
76 #pragma omp target teams distribute map(from: result[:M])
77 for (int i
= 0; i
< M
; i
++) {
81 #pragma omp parallel for
82 for (int j
= 0; j
< N
; j
++)
85 for (int j
= 0; j
< N
; j
++)
89 for (int i
= 0; i
< M
; i
++)
95 return foo1() + foo2() + foo3() + foo4();
99 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1v_l12
100 // CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] {
101 // CHECK-NEXT: entry:
102 // CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
103 // CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
104 // CHECK-NEXT: [[N:%.*]] = alloca i32, align 4, addrspace(5)
105 // CHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8, addrspace(5)
106 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
107 // CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4, addrspace(5)
108 // CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
109 // CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr
110 // CHECK-NEXT: [[N_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N]] to ptr
111 // CHECK-NEXT: [[__VLA_EXPR0_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__VLA_EXPR0]] to ptr
112 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
113 // CHECK-NEXT: [[I1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I1]] to ptr
114 // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
115 // CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8
116 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8
117 // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1v_l12_kernel_environment to ptr), ptr [[DYN_PTR]])
118 // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
119 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
120 // CHECK: user_code.entry:
121 // CHECK-NEXT: store i32 10, ptr [[N_ASCAST]], align 4
122 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ASCAST]], align 4
123 // CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
124 // CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
125 // CHECK-NEXT: [[TMP5:%.*]] = add nuw i64 [[TMP4]], 3
126 // CHECK-NEXT: [[TMP6:%.*]] = udiv i64 [[TMP5]], 4
127 // CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
128 // CHECK-NEXT: [[A:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 [[TMP7]])
129 // CHECK-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR0_ASCAST]], align 8
130 // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
131 // CHECK-NEXT: br label [[FOR_COND:%.*]]
133 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I_ASCAST]], align 4
134 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[N_ASCAST]], align 4
135 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
136 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
138 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I_ASCAST]], align 4
139 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
140 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
141 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]]
142 // CHECK-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
143 // CHECK-NEXT: br label [[FOR_INC:%.*]]
145 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4
146 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1
147 // CHECK-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4
148 // CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]]
149 // CHECK: worker.exit:
150 // CHECK-NEXT: ret void
152 // CHECK-NEXT: store i32 0, ptr [[I1_ASCAST]], align 4
153 // CHECK-NEXT: br label [[FOR_COND2:%.*]]
155 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I1_ASCAST]], align 4
156 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[N_ASCAST]], align 4
157 // CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP13]], [[TMP14]]
158 // CHECK-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END9:%.*]]
160 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[I1_ASCAST]], align 4
161 // CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP15]] to i64
162 // CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM5]]
163 // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
164 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4
165 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
166 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
167 // CHECK-NEXT: br label [[FOR_INC7:%.*]]
169 // CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I1_ASCAST]], align 4
170 // CHECK-NEXT: [[INC8:%.*]] = add nsw i32 [[TMP18]], 1
171 // CHECK-NEXT: store i32 [[INC8]], ptr [[I1_ASCAST]], align 4
172 // CHECK-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP15:![0-9]+]]
174 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 [[TMP7]])
175 // CHECK-NEXT: call void @__kmpc_target_deinit()
176 // CHECK-NEXT: ret void
179 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30
180 // CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR0]] {
181 // CHECK-NEXT: entry:
182 // CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
183 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
184 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
185 // CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
186 // CHECK-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
187 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
188 // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
189 // CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
190 // CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
191 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
192 // CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
193 // CHECK-NEXT: [[M_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_CASTED]] to ptr
194 // CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
195 // CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
196 // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
197 // CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
198 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
199 // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
200 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
201 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
202 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_kernel_environment to ptr), ptr [[DYN_PTR]])
203 // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
204 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
205 // CHECK: user_code.entry:
206 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
207 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
208 // CHECK-NEXT: store i32 [[TMP4]], ptr [[M_CASTED_ASCAST]], align 4
209 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[M_CASTED_ASCAST]], align 8
210 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
211 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
212 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR4:[0-9]+]]
213 // CHECK-NEXT: call void @__kmpc_target_deinit()
214 // CHECK-NEXT: ret void
215 // CHECK: worker.exit:
216 // CHECK-NEXT: ret void
219 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_omp_outlined
220 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR3:[0-9]+]] {
221 // CHECK-NEXT: entry:
222 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
223 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
224 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
225 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
226 // CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
227 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
228 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
229 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
230 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
231 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
232 // CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
233 // CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
234 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
235 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
236 // CHECK-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
237 // CHECK-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
238 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
239 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
240 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
241 // CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
242 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
243 // CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
244 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
245 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
246 // CHECK-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
247 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
248 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
249 // CHECK-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
250 // CHECK-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
251 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
252 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
253 // CHECK-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
254 // CHECK-NEXT: [[M_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_CASTED]] to ptr
255 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
256 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
257 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
258 // CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
259 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
260 // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
261 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
262 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
263 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
264 // CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
265 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
266 // CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
267 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
268 // CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
269 // CHECK-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
270 // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
271 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
272 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
273 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
274 // CHECK: omp.precond.then:
275 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
276 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
277 // CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
278 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
279 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
280 // CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
281 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
282 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
283 // CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
284 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
285 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
286 // CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
287 // CHECK-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
289 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
290 // CHECK-NEXT: br label [[COND_END:%.*]]
291 // CHECK: cond.false:
292 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
293 // CHECK-NEXT: br label [[COND_END]]
295 // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
296 // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
297 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
298 // CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
299 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
300 // CHECK: omp.inner.for.cond:
301 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
302 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
303 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], 1
304 // CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP13]], [[ADD]]
305 // CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
306 // CHECK: omp.inner.for.body:
307 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
308 // CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
309 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
310 // CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
311 // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
312 // CHECK-NEXT: store i32 [[TMP19]], ptr [[M_CASTED_ASCAST]], align 4
313 // CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[M_CASTED_ASCAST]], align 8
314 // CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
315 // CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP16]] to ptr
316 // CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
317 // CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
318 // CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr
319 // CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
320 // CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
321 // CHECK-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr
322 // CHECK-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
323 // CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
324 // CHECK-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP0]] to ptr
325 // CHECK-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8
326 // CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
327 // CHECK-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 8
328 // CHECK-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
329 // CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
330 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
331 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
332 // CHECK: omp.inner.for.inc:
333 // CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
334 // CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
335 // CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]]
336 // CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
337 // CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
338 // CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
339 // CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
340 // CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
341 // CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
342 // CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
343 // CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
344 // CHECK-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
345 // CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
346 // CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
347 // CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]]
348 // CHECK-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
349 // CHECK: cond.true10:
350 // CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
351 // CHECK-NEXT: br label [[COND_END12:%.*]]
352 // CHECK: cond.false11:
353 // CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
354 // CHECK-NEXT: br label [[COND_END12]]
355 // CHECK: cond.end12:
356 // CHECK-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ]
357 // CHECK-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
358 // CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
359 // CHECK-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV_ASCAST]], align 4
360 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
361 // CHECK: omp.inner.for.end:
362 // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
363 // CHECK: omp.loop.exit:
364 // CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
365 // CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
366 // CHECK-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP44]])
367 // CHECK-NEXT: br label [[OMP_PRECOND_END]]
368 // CHECK: omp.precond.end:
369 // CHECK-NEXT: ret void
372 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_omp_outlined_omp_outlined
373 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR3]] {
374 // CHECK-NEXT: entry:
375 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
376 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
377 // CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
378 // CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
379 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
380 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
381 // CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
382 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
383 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
384 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
385 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
386 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
387 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
388 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
389 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
390 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
391 // CHECK-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
392 // CHECK-NEXT: [[N:%.*]] = alloca i32, align 4, addrspace(5)
393 // CHECK-NEXT: [[SAVED_STACK:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
394 // CHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8, addrspace(5)
395 // CHECK-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
396 // CHECK-NEXT: [[J11:%.*]] = alloca i32, align 4, addrspace(5)
397 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
398 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
399 // CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
400 // CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
401 // CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
402 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
403 // CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
404 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
405 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
406 // CHECK-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
407 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
408 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
409 // CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
410 // CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
411 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
412 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
413 // CHECK-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
414 // CHECK-NEXT: [[N_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N]] to ptr
415 // CHECK-NEXT: [[SAVED_STACK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SAVED_STACK]] to ptr
416 // CHECK-NEXT: [[__VLA_EXPR0_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__VLA_EXPR0]] to ptr
417 // CHECK-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
418 // CHECK-NEXT: [[J11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J11]] to ptr
419 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
420 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
421 // CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
422 // CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
423 // CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
424 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
425 // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
426 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
427 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
428 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
429 // CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
430 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
431 // CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
432 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
433 // CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
434 // CHECK-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
435 // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
436 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
437 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
438 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
439 // CHECK: omp.precond.then:
440 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
441 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
442 // CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB_ASCAST]], align 4
443 // CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
444 // CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32
445 // CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
446 // CHECK-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32
447 // CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
448 // CHECK-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
449 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
450 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
451 // CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
452 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
453 // CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
454 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
455 // CHECK-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV_ASCAST]], align 4
456 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
457 // CHECK: omp.inner.for.cond:
458 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
459 // CHECK-NEXT: [[CONV5:%.*]] = sext i32 [[TMP11]] to i64
460 // CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
461 // CHECK-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP12]]
462 // CHECK-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
463 // CHECK: omp.inner.for.body:
464 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
465 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
466 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
467 // CHECK-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
468 // CHECK-NEXT: store i32 10, ptr [[N_ASCAST]], align 4
469 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[N_ASCAST]], align 4
470 // CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
471 // CHECK-NEXT: [[TMP16:%.*]] = call ptr addrspace(5) @llvm.stacksave.p5()
472 // CHECK-NEXT: store ptr addrspace(5) [[TMP16]], ptr [[SAVED_STACK_ASCAST]], align 4
473 // CHECK-NEXT: [[VLA7:%.*]] = alloca i32, i64 [[TMP15]], align 4, addrspace(5)
474 // CHECK-NEXT: [[VLA7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA7]] to ptr
475 // CHECK-NEXT: store i64 [[TMP15]], ptr [[__VLA_EXPR0_ASCAST]], align 8
476 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
477 // CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
478 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64
479 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]]
480 // CHECK-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX]], align 4
481 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
482 // CHECK-NEXT: br label [[FOR_COND:%.*]]
484 // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[J_ASCAST]], align 4
485 // CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ASCAST]], align 4
486 // CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP19]], [[TMP20]]
487 // CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
489 // CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[J_ASCAST]], align 4
490 // CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[J_ASCAST]], align 4
491 // CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP22]] to i64
492 // CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[VLA7_ASCAST]], i64 [[IDXPROM9]]
493 // CHECK-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX10]], align 4
494 // CHECK-NEXT: br label [[FOR_INC:%.*]]
496 // CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[J_ASCAST]], align 4
497 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP23]], 1
498 // CHECK-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4
499 // CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
501 // CHECK-NEXT: store i32 0, ptr [[J11_ASCAST]], align 4
502 // CHECK-NEXT: br label [[FOR_COND12:%.*]]
503 // CHECK: for.cond12:
504 // CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[J11_ASCAST]], align 4
505 // CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ASCAST]], align 4
506 // CHECK-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP24]], [[TMP25]]
507 // CHECK-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END22:%.*]]
508 // CHECK: for.body14:
509 // CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[J11_ASCAST]], align 4
510 // CHECK-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP26]] to i64
511 // CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[VLA7_ASCAST]], i64 [[IDXPROM15]]
512 // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4
513 // CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
514 // CHECK-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP28]] to i64
515 // CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM17]]
516 // CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
517 // CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP29]], [[TMP27]]
518 // CHECK-NEXT: store i32 [[ADD19]], ptr [[ARRAYIDX18]], align 4
519 // CHECK-NEXT: br label [[FOR_INC20:%.*]]
521 // CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[J11_ASCAST]], align 4
522 // CHECK-NEXT: [[INC21:%.*]] = add nsw i32 [[TMP30]], 1
523 // CHECK-NEXT: store i32 [[INC21]], ptr [[J11_ASCAST]], align 4
524 // CHECK-NEXT: br label [[FOR_COND12]], !llvm.loop [[LOOP17:![0-9]+]]
526 // CHECK-NEXT: [[TMP31:%.*]] = load ptr addrspace(5), ptr [[SAVED_STACK_ASCAST]], align 4
527 // CHECK-NEXT: call void @llvm.stackrestore.p5(ptr addrspace(5) [[TMP31]])
528 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
529 // CHECK: omp.body.continue:
530 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
531 // CHECK: omp.inner.for.inc:
532 // CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
533 // CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
534 // CHECK-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], [[TMP33]]
535 // CHECK-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV_ASCAST]], align 4
536 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
537 // CHECK: omp.inner.for.end:
538 // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
539 // CHECK: omp.loop.exit:
540 // CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
541 // CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
542 // CHECK-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP35]])
543 // CHECK-NEXT: br label [[OMP_PRECOND_END]]
544 // CHECK: omp.precond.end:
545 // CHECK-NEXT: ret void
548 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52
549 // CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR0]] {
550 // CHECK-NEXT: entry:
551 // CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
552 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
553 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
554 // CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
555 // CHECK-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
556 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
557 // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
558 // CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
559 // CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
560 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
561 // CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
562 // CHECK-NEXT: [[M_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_CASTED]] to ptr
563 // CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
564 // CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
565 // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
566 // CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
567 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
568 // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
569 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
570 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
571 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_kernel_environment to ptr), ptr [[DYN_PTR]])
572 // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
573 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
574 // CHECK: user_code.entry:
575 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
576 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
577 // CHECK-NEXT: store i32 [[TMP4]], ptr [[M_CASTED_ASCAST]], align 4
578 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[M_CASTED_ASCAST]], align 8
579 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
580 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
581 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR4]]
582 // CHECK-NEXT: call void @__kmpc_target_deinit()
583 // CHECK-NEXT: ret void
584 // CHECK: worker.exit:
585 // CHECK-NEXT: ret void
588 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined
589 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR3]] {
590 // CHECK-NEXT: entry:
591 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
592 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
593 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
594 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
595 // CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
596 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
597 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
598 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
599 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
600 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
601 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
602 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
603 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
604 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
605 // CHECK-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
606 // CHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8, addrspace(5)
607 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
608 // CHECK-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
609 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
610 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
611 // CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
612 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
613 // CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
614 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
615 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
616 // CHECK-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
617 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
618 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
619 // CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
620 // CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
621 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
622 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
623 // CHECK-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
624 // CHECK-NEXT: [[__VLA_EXPR0_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__VLA_EXPR0]] to ptr
625 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
626 // CHECK-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
627 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
628 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
629 // CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
630 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
631 // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
632 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
633 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
634 // CHECK-NEXT: [[N:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
635 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
636 // CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
637 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
638 // CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
639 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
640 // CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
641 // CHECK-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
642 // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
643 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
644 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
645 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
646 // CHECK: omp.precond.then:
647 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
648 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
649 // CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB_ASCAST]], align 4
650 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
651 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
652 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
653 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
654 // CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
655 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
656 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
657 // CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
658 // CHECK-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
660 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
661 // CHECK-NEXT: br label [[COND_END:%.*]]
662 // CHECK: cond.false:
663 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
664 // CHECK-NEXT: br label [[COND_END]]
666 // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
667 // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
668 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
669 // CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
670 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
671 // CHECK: omp.inner.for.cond:
672 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
673 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
674 // CHECK-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
675 // CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
676 // CHECK: omp.inner.for.body:
677 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
678 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
679 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
680 // CHECK-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4
681 // CHECK-NEXT: store i32 10, ptr [[N]], align 4
682 // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[N]], align 4
683 // CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
684 // CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4
685 // CHECK-NEXT: [[TMP19:%.*]] = add nuw i64 [[TMP18]], 3
686 // CHECK-NEXT: [[TMP20:%.*]] = udiv i64 [[TMP19]], 4
687 // CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
688 // CHECK-NEXT: [[A:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 [[TMP21]])
689 // CHECK-NEXT: store i64 [[TMP17]], ptr [[__VLA_EXPR0_ASCAST]], align 8
690 // CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
691 // CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
692 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
693 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]]
694 // CHECK-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX]], align 4
695 // CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
696 // CHECK-NEXT: store ptr [[N]], ptr [[TMP24]], align 8
697 // CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
698 // CHECK-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP17]] to ptr
699 // CHECK-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
700 // CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
701 // CHECK-NEXT: store ptr [[A]], ptr [[TMP27]], align 8
702 // CHECK-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
703 // CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
704 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
705 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
706 // CHECK-NEXT: br label [[FOR_COND:%.*]]
708 // CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[J_ASCAST]], align 4
709 // CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4
710 // CHECK-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP30]], [[TMP31]]
711 // CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
713 // CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[J_ASCAST]], align 4
714 // CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP32]] to i64
715 // CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM7]]
716 // CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4
717 // CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
718 // CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP34]] to i64
719 // CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]]
720 // CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
721 // CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], [[TMP33]]
722 // CHECK-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX10]], align 4
723 // CHECK-NEXT: br label [[FOR_INC:%.*]]
725 // CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[J_ASCAST]], align 4
726 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP36]], 1
727 // CHECK-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4
728 // CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
730 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 [[TMP21]])
731 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
732 // CHECK: omp.body.continue:
733 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
734 // CHECK: omp.inner.for.inc:
735 // CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
736 // CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1
737 // CHECK-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4
738 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
739 // CHECK: omp.inner.for.end:
740 // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
741 // CHECK: omp.loop.exit:
742 // CHECK-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
743 // CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
744 // CHECK-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP39]])
745 // CHECK-NEXT: br label [[OMP_PRECOND_END]]
746 // CHECK: omp.precond.end:
747 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[N]], i64 4)
748 // CHECK-NEXT: ret void
751 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined
752 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
753 // CHECK-NEXT: entry:
754 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
755 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
756 // CHECK-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
757 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
758 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
759 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
760 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
761 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
762 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
763 // CHECK-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
764 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
765 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
766 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
767 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
768 // CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5)
769 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
770 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
771 // CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
772 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
773 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
774 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
775 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
776 // CHECK-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
777 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
778 // CHECK-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
779 // CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
780 // CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
781 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
782 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
783 // CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr
784 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
785 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
786 // CHECK-NEXT: store ptr [[N]], ptr [[N_ADDR_ASCAST]], align 8
787 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
788 // CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
789 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR_ASCAST]], align 8
790 // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
791 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
792 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
793 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
794 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
795 // CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0
796 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
797 // CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
798 // CHECK-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
799 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
800 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
801 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
802 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
803 // CHECK: omp.precond.then:
804 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
805 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
806 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
807 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
808 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
809 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
810 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
811 // CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
812 // CHECK-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
813 // CHECK: omp.dispatch.cond:
814 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
815 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
816 // CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
817 // CHECK-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
819 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
820 // CHECK-NEXT: br label [[COND_END:%.*]]
821 // CHECK: cond.false:
822 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
823 // CHECK-NEXT: br label [[COND_END]]
825 // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
826 // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
827 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
828 // CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
829 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
830 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
831 // CHECK-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
832 // CHECK-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
833 // CHECK: omp.dispatch.body:
834 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
835 // CHECK: omp.inner.for.cond:
836 // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
837 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
838 // CHECK-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
839 // CHECK-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
840 // CHECK: omp.inner.for.body:
841 // CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
842 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
843 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
844 // CHECK-NEXT: store i32 [[ADD]], ptr [[J3_ASCAST]], align 4
845 // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[J3_ASCAST]], align 4
846 // CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3_ASCAST]], align 4
847 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
848 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]]
849 // CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4
850 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
851 // CHECK: omp.body.continue:
852 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
853 // CHECK: omp.inner.for.inc:
854 // CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
855 // CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1
856 // CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4
857 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
858 // CHECK: omp.inner.for.end:
859 // CHECK-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
860 // CHECK: omp.dispatch.inc:
861 // CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
862 // CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
863 // CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
864 // CHECK-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
865 // CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
866 // CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
867 // CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
868 // CHECK-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
869 // CHECK-NEXT: br label [[OMP_DISPATCH_COND]]
870 // CHECK: omp.dispatch.end:
871 // CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
872 // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
873 // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP27]])
874 // CHECK-NEXT: br label [[OMP_PRECOND_END]]
875 // CHECK: omp.precond.end:
876 // CHECK-NEXT: ret void
879 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined_wrapper
880 // CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
881 // CHECK-NEXT: entry:
882 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
883 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
884 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
885 // CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
886 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
887 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
888 // CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
889 // CHECK-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
890 // CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
891 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
892 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
893 // CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
894 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
895 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
896 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
897 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
898 // CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
899 // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2
900 // CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
901 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]], i64 [[TMP6]], ptr [[TMP8]]) #[[ATTR4]]
902 // CHECK-NEXT: ret void
905 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76
906 // CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[M:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR0]] {
907 // CHECK-NEXT: entry:
908 // CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
909 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
910 // CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
911 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
912 // CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
913 // CHECK-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
914 // CHECK-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
915 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
916 // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
917 // CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
918 // CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
919 // CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
920 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
921 // CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
922 // CHECK-NEXT: [[M_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_CASTED]] to ptr
923 // CHECK-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
924 // CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
925 // CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
926 // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
927 // CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
928 // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
929 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
930 // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
931 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
932 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
933 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_kernel_environment to ptr), ptr [[DYN_PTR]])
934 // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
935 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
936 // CHECK: user_code.entry:
937 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
938 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
939 // CHECK-NEXT: store i32 [[TMP4]], ptr [[M_CASTED_ASCAST]], align 4
940 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[M_CASTED_ASCAST]], align 8
941 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
942 // CHECK-NEXT: store i32 [[TMP6]], ptr [[N_CASTED_ASCAST]], align 4
943 // CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
944 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
945 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
946 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR4]]
947 // CHECK-NEXT: call void @__kmpc_target_deinit()
948 // CHECK-NEXT: ret void
949 // CHECK: worker.exit:
950 // CHECK-NEXT: ret void
953 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined
954 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[M:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR3]] {
955 // CHECK-NEXT: entry:
956 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
957 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
958 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
959 // CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
960 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
961 // CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
962 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
963 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
964 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
965 // CHECK-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
966 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
967 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
968 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
969 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
970 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
971 // CHECK-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
972 // CHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8, addrspace(5)
973 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
974 // CHECK-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
975 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
976 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
977 // CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
978 // CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
979 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
980 // CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
981 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
982 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
983 // CHECK-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
984 // CHECK-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
985 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
986 // CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
987 // CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
988 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
989 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
990 // CHECK-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
991 // CHECK-NEXT: [[__VLA_EXPR0_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__VLA_EXPR0]] to ptr
992 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
993 // CHECK-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
994 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
995 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
996 // CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
997 // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
998 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
999 // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
1000 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
1001 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
1002 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
1003 // CHECK-NEXT: [[N1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
1004 // CHECK-NEXT: store i32 [[TMP2]], ptr [[N1]], align 4
1005 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
1006 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1007 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1008 // CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0
1009 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1010 // CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
1011 // CHECK-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
1012 // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
1013 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1014 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
1015 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1016 // CHECK: omp.precond.then:
1017 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
1018 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
1019 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
1020 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1021 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
1022 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1023 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
1024 // CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
1025 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
1026 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
1027 // CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
1028 // CHECK-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1029 // CHECK: cond.true:
1030 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
1031 // CHECK-NEXT: br label [[COND_END:%.*]]
1032 // CHECK: cond.false:
1033 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
1034 // CHECK-NEXT: br label [[COND_END]]
1036 // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
1037 // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
1038 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
1039 // CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
1040 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1041 // CHECK: omp.inner.for.cond:
1042 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1043 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
1044 // CHECK-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
1045 // CHECK-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1046 // CHECK: omp.inner.for.body:
1047 // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1048 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
1049 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1050 // CHECK-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
1051 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[N1]], align 4
1052 // CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
1053 // CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
1054 // CHECK-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 3
1055 // CHECK-NEXT: [[TMP21:%.*]] = udiv i64 [[TMP20]], 4
1056 // CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
1057 // CHECK-NEXT: [[A:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 [[TMP22]])
1058 // CHECK-NEXT: store i64 [[TMP18]], ptr [[__VLA_EXPR0_ASCAST]], align 8
1059 // CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
1060 // CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
1061 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64
1062 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]]
1063 // CHECK-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX]], align 4
1064 // CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
1065 // CHECK-NEXT: store ptr [[N1]], ptr [[TMP25]], align 8
1066 // CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
1067 // CHECK-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP18]] to ptr
1068 // CHECK-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8
1069 // CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
1070 // CHECK-NEXT: store ptr [[A]], ptr [[TMP28]], align 8
1071 // CHECK-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1072 // CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
1073 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
1074 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
1075 // CHECK-NEXT: br label [[FOR_COND:%.*]]
1077 // CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[J_ASCAST]], align 4
1078 // CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[N1]], align 4
1079 // CHECK-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]]
1080 // CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
1082 // CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[J_ASCAST]], align 4
1083 // CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP33]] to i64
1084 // CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM8]]
1085 // CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
1086 // CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
1087 // CHECK-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP35]] to i64
1088 // CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM10]]
1089 // CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
1090 // CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[TMP34]]
1091 // CHECK-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX11]], align 4
1092 // CHECK-NEXT: br label [[FOR_INC:%.*]]
1094 // CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[J_ASCAST]], align 4
1095 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP37]], 1
1096 // CHECK-NEXT: store i32 [[INC]], ptr [[J_ASCAST]], align 4
1097 // CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
1099 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 [[TMP22]])
1100 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1101 // CHECK: omp.body.continue:
1102 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1103 // CHECK: omp.inner.for.inc:
1104 // CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1105 // CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP38]], 1
1106 // CHECK-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4
1107 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
1108 // CHECK: omp.inner.for.end:
1109 // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
1110 // CHECK: omp.loop.exit:
1111 // CHECK-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1112 // CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4
1113 // CHECK-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP40]])
1114 // CHECK-NEXT: br label [[OMP_PRECOND_END]]
1115 // CHECK: omp.precond.end:
1116 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[N1]], i64 4)
1117 // CHECK-NEXT: ret void
1120 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined
1121 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
1122 // CHECK-NEXT: entry:
1123 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1124 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1125 // CHECK-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1126 // CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
1127 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
1128 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
1129 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
1130 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
1131 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
1132 // CHECK-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
1133 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
1134 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
1135 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
1136 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
1137 // CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5)
1138 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
1139 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
1140 // CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
1141 // CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
1142 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
1143 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
1144 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
1145 // CHECK-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
1146 // CHECK-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
1147 // CHECK-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
1148 // CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
1149 // CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
1150 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
1151 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
1152 // CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr
1153 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1154 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
1155 // CHECK-NEXT: store ptr [[N]], ptr [[N_ADDR_ASCAST]], align 8
1156 // CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
1157 // CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
1158 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR_ASCAST]], align 8
1159 // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
1160 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
1161 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
1162 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1163 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1164 // CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0
1165 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
1166 // CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
1167 // CHECK-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
1168 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
1169 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
1170 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
1171 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
1172 // CHECK: omp.precond.then:
1173 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
1174 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
1175 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
1176 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1177 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
1178 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1179 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
1180 // CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
1181 // CHECK-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
1182 // CHECK: omp.dispatch.cond:
1183 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
1184 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
1185 // CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
1186 // CHECK-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1187 // CHECK: cond.true:
1188 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
1189 // CHECK-NEXT: br label [[COND_END:%.*]]
1190 // CHECK: cond.false:
1191 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
1192 // CHECK-NEXT: br label [[COND_END]]
1194 // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
1195 // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
1196 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
1197 // CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
1198 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1199 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
1200 // CHECK-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
1201 // CHECK-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
1202 // CHECK: omp.dispatch.body:
1203 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
1204 // CHECK: omp.inner.for.cond:
1205 // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1206 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
1207 // CHECK-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
1208 // CHECK-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1209 // CHECK: omp.inner.for.body:
1210 // CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1211 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
1212 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1213 // CHECK-NEXT: store i32 [[ADD]], ptr [[J3_ASCAST]], align 4
1214 // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[J3_ASCAST]], align 4
1215 // CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3_ASCAST]], align 4
1216 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
1217 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]]
1218 // CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4
1219 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
1220 // CHECK: omp.body.continue:
1221 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
1222 // CHECK: omp.inner.for.inc:
1223 // CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
1224 // CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1
1225 // CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4
1226 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
1227 // CHECK: omp.inner.for.end:
1228 // CHECK-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
1229 // CHECK: omp.dispatch.inc:
1230 // CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
1231 // CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1232 // CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
1233 // CHECK-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
1234 // CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
1235 // CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
1236 // CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
1237 // CHECK-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
1238 // CHECK-NEXT: br label [[OMP_DISPATCH_COND]]
1239 // CHECK: omp.dispatch.end:
1240 // CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
1241 // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
1242 // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP27]])
1243 // CHECK-NEXT: br label [[OMP_PRECOND_END]]
1244 // CHECK: omp.precond.end:
1245 // CHECK-NEXT: ret void
1248 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined_wrapper
1249 // CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7]] {
1250 // CHECK-NEXT: entry:
1251 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
1252 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
1253 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1254 // CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
1255 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
1256 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
1257 // CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
1258 // CHECK-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
1259 // CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
1260 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
1261 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
1262 // CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
1263 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
1264 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
1265 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
1266 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
1267 // CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
1268 // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2
1269 // CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
1270 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]], i64 [[TMP6]], ptr [[TMP8]]) #[[ATTR4]]
1271 // CHECK-NEXT: ret void