Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / nvptx_distribute_parallel_generic_mode_codegen.cpp
blob7402698af3e4c00997d274b777d15b1a2d90a214
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK4
5 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK5
7 // RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK5
9 // expected-no-diagnostics
10 #ifndef HEADER
11 #define HEADER
13 int a;
15 int foo(int *a);
17 int main(int argc, char **argv) {
18 int b[10], c[10], d[10];
19 #pragma omp target teams map(tofrom:a)
20 #pragma omp distribute parallel for firstprivate(b) lastprivate(c) if(a)
21 for (int i= 0; i < argc; ++i)
22 a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]);
23 return 0;
26 #endif
27 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19
28 // CHECK4-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] {
29 // CHECK4-NEXT: entry:
30 // CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
31 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
32 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
33 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
34 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
35 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
36 // CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
37 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
38 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
39 // CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
40 // CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
41 // CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
42 // CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
43 // CHECK4-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
44 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
45 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
46 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8
47 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
48 // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8
49 // CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment, ptr [[DYN_PTR]])
50 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
51 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
52 // CHECK4: user_code.entry:
53 // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
54 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
55 // CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
56 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
57 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
58 // CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
59 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR4:[0-9]+]]
60 // CHECK4-NEXT: call void @__kmpc_target_deinit()
61 // CHECK4-NEXT: ret void
62 // CHECK4: worker.exit:
63 // CHECK4-NEXT: ret void
66 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined
67 // CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] {
68 // CHECK4-NEXT: entry:
69 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
70 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
71 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
72 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
73 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
74 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
75 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
76 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
77 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
78 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
79 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
80 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
81 // CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
82 // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
83 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
84 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
85 // CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
86 // CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4
87 // CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8
88 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
89 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
90 // CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
91 // CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
92 // CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
93 // CHECK4-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
94 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
95 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
96 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8
97 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
98 // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8
99 // CHECK4-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40)
100 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
101 // CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
102 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
103 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
104 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
105 // CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
106 // CHECK4-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
107 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4
108 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
109 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
110 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
111 // CHECK4: omp.precond.then:
112 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
113 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
114 // CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
115 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
116 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
117 // CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i64 40, i1 false)
118 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
119 // CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
120 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
121 // CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
122 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
123 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
124 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
125 // CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
126 // CHECK4: cond.true:
127 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
128 // CHECK4-NEXT: br label [[COND_END:%.*]]
129 // CHECK4: cond.false:
130 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
131 // CHECK4-NEXT: br label [[COND_END]]
132 // CHECK4: cond.end:
133 // CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
134 // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
135 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
136 // CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
137 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
138 // CHECK4: omp.inner.for.cond:
139 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
140 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
141 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
142 // CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
143 // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
144 // CHECK4: omp.inner.for.body:
145 // CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
146 // CHECK4-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
147 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
148 // CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
149 // CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
150 // CHECK4-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP18]] to ptr
151 // CHECK4-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
152 // CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
153 // CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to ptr
154 // CHECK4-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
155 // CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
156 // CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP25]], align 8
157 // CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
158 // CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8
159 // CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
160 // CHECK4-NEXT: store ptr [[B4]], ptr [[TMP27]], align 8
161 // CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
162 // CHECK4-NEXT: store ptr [[C1]], ptr [[TMP28]], align 8
163 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 6
164 // CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP29]], align 8
165 // CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP2]], align 4
166 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP30]], 0
167 // CHECK4-NEXT: [[TMP31:%.*]] = zext i1 [[TOBOOL]] to i32
168 // CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
169 // CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
170 // CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 [[TMP31]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 7)
171 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
172 // CHECK4: omp.inner.for.inc:
173 // CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
174 // CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
175 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
176 // CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
177 // CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
178 // CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
179 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
180 // CHECK4-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4
181 // CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
182 // CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
183 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP38]], [[TMP39]]
184 // CHECK4-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4
185 // CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
186 // CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
187 // CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]]
188 // CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
189 // CHECK4: cond.true12:
190 // CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
191 // CHECK4-NEXT: br label [[COND_END14:%.*]]
192 // CHECK4: cond.false13:
193 // CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
194 // CHECK4-NEXT: br label [[COND_END14]]
195 // CHECK4: cond.end14:
196 // CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE12]] ], [ [[TMP43]], [[COND_FALSE13]] ]
197 // CHECK4-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4
198 // CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
199 // CHECK4-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4
200 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
201 // CHECK4: omp.inner.for.end:
202 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
203 // CHECK4: omp.loop.exit:
204 // CHECK4-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
205 // CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4
206 // CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]])
207 // CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
208 // CHECK4-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
209 // CHECK4-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
210 // CHECK4: .omp.lastprivate.then:
211 // CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i64 40, i1 false)
212 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
213 // CHECK4: .omp.lastprivate.done:
214 // CHECK4-NEXT: br label [[OMP_PRECOND_END]]
215 // CHECK4: omp.precond.end:
216 // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 40)
217 // CHECK4-NEXT: ret void
220 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined
221 // CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] {
222 // CHECK4-NEXT: entry:
223 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
224 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
225 // CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
226 // CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
227 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
228 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
229 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
230 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
231 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
232 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
233 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
234 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
235 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
236 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
237 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
238 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
239 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
240 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
241 // CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
242 // CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4
243 // CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4
244 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
245 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
246 // CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
247 // CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
248 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
249 // CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
250 // CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
251 // CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
252 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
253 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
254 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
255 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
256 // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
257 // CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8
258 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
259 // CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
260 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
261 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
262 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
263 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
264 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
265 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4
266 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
267 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
268 // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
269 // CHECK4: omp.precond.then:
270 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
271 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
272 // CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4
273 // CHECK4-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
274 // CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32
275 // CHECK4-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
276 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32
277 // CHECK4-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
278 // CHECK4-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
279 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
280 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
281 // CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP2]], i64 40, i1 false)
282 // CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
283 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
284 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
285 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
286 // CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
287 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
288 // CHECK4: omp.inner.for.cond:
289 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
290 // CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP14]] to i64
291 // CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
292 // CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP15]]
293 // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
294 // CHECK4: omp.inner.for.body:
295 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
296 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
297 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
298 // CHECK4-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
299 // CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I6]]) #[[ATTR8:[0-9]+]]
300 // CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]]
301 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]]
302 // CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I6]], align 4
303 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
304 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B4]], i64 0, i64 [[IDXPROM]]
305 // CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]]
306 // CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]]
307 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[I6]], align 4
308 // CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64
309 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[C5]], i64 0, i64 [[IDXPROM13]]
310 // CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]]
311 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]]
312 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6]], align 4
313 // CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP19]] to i64
314 // CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM17]]
315 // CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX18]]) #[[ATTR8]]
316 // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]]
317 // CHECK4-NEXT: store i32 [[ADD20]], ptr [[TMP1]], align 4
318 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
319 // CHECK4: omp.body.continue:
320 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
321 // CHECK4: omp.inner.for.inc:
322 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
323 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
324 // CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
325 // CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4
326 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
327 // CHECK4: omp.inner.for.end:
328 // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
329 // CHECK4: omp.loop.exit:
330 // CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
331 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
332 // CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]])
333 // CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
334 // CHECK4-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
335 // CHECK4-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
336 // CHECK4: .omp.lastprivate.then:
337 // CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[C5]], i64 40, i1 false)
338 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
339 // CHECK4: .omp.lastprivate.done:
340 // CHECK4-NEXT: br label [[OMP_PRECOND_END]]
341 // CHECK4: omp.precond.end:
342 // CHECK4-NEXT: ret void
345 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19
346 // CHECK5-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] {
347 // CHECK5-NEXT: entry:
348 // CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
349 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
350 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
351 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
352 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
353 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
354 // CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
355 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
356 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
357 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
358 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
359 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
360 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
361 // CHECK5-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
362 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
363 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
364 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4
365 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
366 // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4
367 // CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment, ptr [[DYN_PTR]])
368 // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
369 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
370 // CHECK5: user_code.entry:
371 // CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
372 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
373 // CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
374 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
375 // CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
376 // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
377 // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR4:[0-9]+]]
378 // CHECK5-NEXT: call void @__kmpc_target_deinit()
379 // CHECK5-NEXT: ret void
380 // CHECK5: worker.exit:
381 // CHECK5-NEXT: ret void
384 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined
385 // CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] {
386 // CHECK5-NEXT: entry:
387 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
388 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
389 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
390 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
391 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
392 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
393 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
394 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
395 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
396 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
397 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
398 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
399 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
400 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
401 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
402 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
403 // CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
404 // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4
405 // CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 4
406 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
407 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
408 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
409 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
410 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
411 // CHECK5-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
412 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
413 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
414 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4
415 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
416 // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4
417 // CHECK5-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 40)
418 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
419 // CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
420 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
421 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
422 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
423 // CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
424 // CHECK5-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
425 // CHECK5-NEXT: store i32 0, ptr [[I]], align 4
426 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
427 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
428 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
429 // CHECK5: omp.precond.then:
430 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
431 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
432 // CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
433 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
434 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
435 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i32 40, i1 false)
436 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
437 // CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
438 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
439 // CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
440 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
441 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
442 // CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
443 // CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
444 // CHECK5: cond.true:
445 // CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
446 // CHECK5-NEXT: br label [[COND_END:%.*]]
447 // CHECK5: cond.false:
448 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
449 // CHECK5-NEXT: br label [[COND_END]]
450 // CHECK5: cond.end:
451 // CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
452 // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
453 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
454 // CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
455 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
456 // CHECK5: omp.inner.for.cond:
457 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
458 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
459 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
460 // CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
461 // CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
462 // CHECK5: omp.inner.for.body:
463 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
464 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
465 // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
466 // CHECK5-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP17]] to ptr
467 // CHECK5-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4
468 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
469 // CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP18]] to ptr
470 // CHECK5-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4
471 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
472 // CHECK5-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP23]], align 4
473 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
474 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4
475 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
476 // CHECK5-NEXT: store ptr [[B4]], ptr [[TMP25]], align 4
477 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 5
478 // CHECK5-NEXT: store ptr [[C1]], ptr [[TMP26]], align 4
479 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 6
480 // CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP27]], align 4
481 // CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP2]], align 4
482 // CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0
483 // CHECK5-NEXT: [[TMP29:%.*]] = zext i1 [[TOBOOL]] to i32
484 // CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
485 // CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
486 // CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 [[TMP29]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 7)
487 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
488 // CHECK5: omp.inner.for.inc:
489 // CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
490 // CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
491 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]]
492 // CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
493 // CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
494 // CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
495 // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
496 // CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4
497 // CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
498 // CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
499 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
500 // CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4
501 // CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
502 // CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
503 // CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]]
504 // CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
505 // CHECK5: cond.true12:
506 // CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
507 // CHECK5-NEXT: br label [[COND_END14:%.*]]
508 // CHECK5: cond.false13:
509 // CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
510 // CHECK5-NEXT: br label [[COND_END14]]
511 // CHECK5: cond.end14:
512 // CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE12]] ], [ [[TMP41]], [[COND_FALSE13]] ]
513 // CHECK5-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4
514 // CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
515 // CHECK5-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4
516 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
517 // CHECK5: omp.inner.for.end:
518 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
519 // CHECK5: omp.loop.exit:
520 // CHECK5-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
521 // CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
522 // CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
523 // CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
524 // CHECK5-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
525 // CHECK5-NEXT: br i1 [[TMP46]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
526 // CHECK5: .omp.lastprivate.then:
527 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i32 40, i1 false)
528 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
529 // CHECK5: .omp.lastprivate.done:
530 // CHECK5-NEXT: br label [[OMP_PRECOND_END]]
531 // CHECK5: omp.precond.end:
532 // CHECK5-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 40)
533 // CHECK5-NEXT: ret void
536 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined
537 // CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] {
538 // CHECK5-NEXT: entry:
539 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
540 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
541 // CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
542 // CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
543 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
544 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
545 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
546 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
547 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
548 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
549 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
550 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
551 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
552 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
553 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
554 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
555 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
556 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
557 // CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4
558 // CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4
559 // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4
560 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
561 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
562 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
563 // CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
564 // CHECK5-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
565 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
566 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
567 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
568 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
569 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
570 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
571 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
572 // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
573 // CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 4
574 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
575 // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
576 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
577 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
578 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
579 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
580 // CHECK5-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
581 // CHECK5-NEXT: store i32 0, ptr [[I]], align 4
582 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
583 // CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
584 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
585 // CHECK5: omp.precond.then:
586 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
587 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
588 // CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4
589 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
590 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
591 // CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4
592 // CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4
593 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
594 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
595 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B3]], ptr align 4 [[TMP2]], i32 40, i1 false)
596 // CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
597 // CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
598 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
599 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
600 // CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
601 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
602 // CHECK5: omp.inner.for.cond:
603 // CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
604 // CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
605 // CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]]
606 // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
607 // CHECK5: omp.inner.for.body:
608 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
609 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
610 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
611 // CHECK5-NEXT: store i32 [[ADD]], ptr [[I5]], align 4
612 // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR8:[0-9]+]]
613 // CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]]
614 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
615 // CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I5]], align 4
616 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3]], i32 0, i32 [[TMP17]]
617 // CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]]
618 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
619 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4
620 // CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4]], i32 0, i32 [[TMP18]]
621 // CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX11]]) #[[ATTR8]]
622 // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]]
623 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4
624 // CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]]
625 // CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]]
626 // CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]]
627 // CHECK5-NEXT: store i32 [[ADD16]], ptr [[TMP1]], align 4
628 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
629 // CHECK5: omp.body.continue:
630 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
631 // CHECK5: omp.inner.for.inc:
632 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
633 // CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
634 // CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
635 // CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4
636 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
637 // CHECK5: omp.inner.for.end:
638 // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
639 // CHECK5: omp.loop.exit:
640 // CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
641 // CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
642 // CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]])
643 // CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
644 // CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
645 // CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
646 // CHECK5: .omp.lastprivate.then:
647 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[C4]], i32 40, i1 false)
648 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
649 // CHECK5: .omp.lastprivate.done:
650 // CHECK5-NEXT: br label [[OMP_PRECOND_END]]
651 // CHECK5: omp.precond.end:
652 // CHECK5-NEXT: ret void