Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / nvptx_parallel_codegen.cpp
blob0fea287bce94eb708ffdb9476c276188822180f6
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1
5 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -aux-triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -disable-O0-optnone | FileCheck %s --check-prefix=CHECK2
7 // expected-no-diagnostics
8 #ifndef HEADER
9 #define HEADER
11 template<typename tx>
12 tx ftemplate(int n) {
13 tx a = 0;
14 short aa = 0;
15 tx b[10];
17 #pragma omp target if(0)
19 #pragma omp parallel
21 int a = 41;
23 a += 1;
26 #pragma omp target
28 #pragma omp parallel
30 int a = 42;
32 #pragma omp parallel if(0)
34 int a = 43;
36 #pragma omp parallel if(1)
38 int a = 44;
40 a += 1;
43 #pragma omp target if(n>40)
45 #pragma omp parallel if(n>1000)
47 int a = 45;
48 #pragma omp barrier
50 a += 1;
51 aa += 1;
52 b[2] += 1;
55 #pragma omp target
57 #pragma omp parallel
59 #pragma omp critical
60 ++a;
62 ++a;
64 return a;
67 int bar(int n){
68 int a = 0;
70 a += ftemplate<int>(n);
72 return a;
75 #endif
76 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
77 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
78 // CHECK1-NEXT: entry:
79 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
80 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
81 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
82 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
83 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8
84 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
85 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
86 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment, ptr [[DYN_PTR]])
87 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
88 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
89 // CHECK1: user_code.entry:
90 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
91 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
92 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
93 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0)
94 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
95 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
96 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
97 // CHECK1-NEXT: call void @__kmpc_target_deinit()
98 // CHECK1-NEXT: ret void
99 // CHECK1: worker.exit:
100 // CHECK1-NEXT: ret void
103 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined
104 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
105 // CHECK1-NEXT: entry:
106 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
107 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
108 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
109 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
110 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
111 // CHECK1-NEXT: store i32 42, ptr [[A]], align 4
112 // CHECK1-NEXT: ret void
115 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper
116 // CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
117 // CHECK1-NEXT: entry:
118 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
119 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
120 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
121 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
122 // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
123 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
124 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
125 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
126 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
127 // CHECK1-NEXT: ret void
130 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1
131 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
132 // CHECK1-NEXT: entry:
133 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
134 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
135 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
136 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
137 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
138 // CHECK1-NEXT: store i32 43, ptr [[A]], align 4
139 // CHECK1-NEXT: ret void
142 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper
143 // CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
144 // CHECK1-NEXT: entry:
145 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
146 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
147 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
148 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
149 // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
150 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
151 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
152 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
153 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
154 // CHECK1-NEXT: ret void
157 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2
158 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
159 // CHECK1-NEXT: entry:
160 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
161 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
162 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
163 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
164 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
165 // CHECK1-NEXT: store i32 44, ptr [[A]], align 4
166 // CHECK1-NEXT: ret void
169 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper
170 // CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
171 // CHECK1-NEXT: entry:
172 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
173 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
174 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
175 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
176 // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
177 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
178 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
179 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
180 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
181 // CHECK1-NEXT: ret void
184 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43
185 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
186 // CHECK1-NEXT: entry:
187 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
188 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
189 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
190 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
191 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
192 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
193 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
194 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
195 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
196 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
197 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
198 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
199 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]])
200 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
201 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
202 // CHECK1: user_code.entry:
203 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
204 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
205 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000
206 // CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32
207 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
208 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
209 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
210 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
211 // CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2
212 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
213 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
214 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
215 // CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
216 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
217 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
218 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
219 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
220 // CHECK1-NEXT: call void @__kmpc_target_deinit()
221 // CHECK1-NEXT: ret void
222 // CHECK1: worker.exit:
223 // CHECK1-NEXT: ret void
226 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined
227 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
228 // CHECK1-NEXT: entry:
229 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
230 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
231 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
232 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
233 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
234 // CHECK1-NEXT: store i32 45, ptr [[A]], align 4
235 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
236 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
237 // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
238 // CHECK1-NEXT: ret void
241 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper
242 // CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
243 // CHECK1-NEXT: entry:
244 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
245 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
246 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
247 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
248 // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
249 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
250 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
251 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
252 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
253 // CHECK1-NEXT: ret void
256 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55
257 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0]] {
258 // CHECK1-NEXT: entry:
259 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
260 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
261 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
262 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
263 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
264 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment, ptr [[DYN_PTR]])
265 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
266 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
267 // CHECK1: user_code.entry:
268 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
269 // CHECK1-NEXT: [[A1:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
270 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A1]], align 4
271 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
272 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
273 // CHECK1-NEXT: store ptr [[A1]], ptr [[TMP3]], align 8
274 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
275 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4
276 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
277 // CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4
278 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4)
279 // CHECK1-NEXT: call void @__kmpc_target_deinit()
280 // CHECK1-NEXT: ret void
281 // CHECK1: worker.exit:
282 // CHECK1-NEXT: ret void
285 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined
286 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
287 // CHECK1-NEXT: entry:
288 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
289 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
290 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
291 // CHECK1-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4
292 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
293 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
294 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
295 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
296 // CHECK1-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
297 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
298 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
299 // CHECK1-NEXT: store i32 0, ptr [[CRITICAL_COUNTER]], align 4
300 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]]
301 // CHECK1: omp.critical.loop:
302 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4
303 // CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]]
304 // CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]]
305 // CHECK1: omp.critical.test:
306 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4
307 // CHECK1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]]
308 // CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]]
309 // CHECK1: omp.critical.body:
310 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
311 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
312 // CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var")
313 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
314 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
315 // CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
316 // CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var")
317 // CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]]
318 // CHECK1: omp.critical.sync:
319 // CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
320 // CHECK1-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1
321 // CHECK1-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER]], align 4
322 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]]
323 // CHECK1: omp.critical.exit:
324 // CHECK1-NEXT: ret void
327 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper
328 // CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
329 // CHECK1-NEXT: entry:
330 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
331 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
332 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
333 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
334 // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
335 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
336 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
337 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
338 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
339 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
340 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
341 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR3]]
342 // CHECK1-NEXT: ret void
345 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
346 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
347 // CHECK2-NEXT: entry:
348 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
349 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
350 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
351 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4
352 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4
353 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
354 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
355 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment, ptr [[DYN_PTR]])
356 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
357 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
358 // CHECK2: user_code.entry:
359 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
360 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
361 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0)
362 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0)
363 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
364 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
365 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
366 // CHECK2-NEXT: call void @__kmpc_target_deinit()
367 // CHECK2-NEXT: ret void
368 // CHECK2: worker.exit:
369 // CHECK2-NEXT: ret void
372 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined
373 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
374 // CHECK2-NEXT: entry:
375 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
376 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
377 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
378 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
379 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
380 // CHECK2-NEXT: store i32 42, ptr [[A]], align 4
381 // CHECK2-NEXT: ret void
384 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper
385 // CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
386 // CHECK2-NEXT: entry:
387 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
388 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
389 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
390 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
391 // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
392 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
393 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
394 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
395 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]]
396 // CHECK2-NEXT: ret void
399 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1
400 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
401 // CHECK2-NEXT: entry:
402 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
403 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
404 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
405 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
406 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
407 // CHECK2-NEXT: store i32 43, ptr [[A]], align 4
408 // CHECK2-NEXT: ret void
411 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper
412 // CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
413 // CHECK2-NEXT: entry:
414 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
415 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
416 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
417 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
418 // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
419 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
420 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
421 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
422 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
423 // CHECK2-NEXT: ret void
426 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2
427 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
428 // CHECK2-NEXT: entry:
429 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
430 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
431 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
432 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
433 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
434 // CHECK2-NEXT: store i32 44, ptr [[A]], align 4
435 // CHECK2-NEXT: ret void
438 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper
439 // CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
440 // CHECK2-NEXT: entry:
441 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
442 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
443 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
444 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
445 // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
446 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
447 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
448 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
449 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
450 // CHECK2-NEXT: ret void
453 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43
454 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
455 // CHECK2-NEXT: entry:
456 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
457 // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
458 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
459 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
460 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
461 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
462 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
463 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
464 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
465 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
466 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
467 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
468 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]])
469 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
470 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
471 // CHECK2: user_code.entry:
472 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
473 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
474 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000
475 // CHECK2-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32
476 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
477 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
478 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
479 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
480 // CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2
481 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
482 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
483 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
484 // CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
485 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
486 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
487 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
488 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
489 // CHECK2-NEXT: call void @__kmpc_target_deinit()
490 // CHECK2-NEXT: ret void
491 // CHECK2: worker.exit:
492 // CHECK2-NEXT: ret void
495 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined
496 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
497 // CHECK2-NEXT: entry:
498 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
499 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
500 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
501 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
502 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
503 // CHECK2-NEXT: store i32 45, ptr [[A]], align 4
504 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
505 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
506 // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
507 // CHECK2-NEXT: ret void
510 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper
511 // CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
512 // CHECK2-NEXT: entry:
513 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
514 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
515 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
516 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
517 // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
518 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
519 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
520 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
521 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
522 // CHECK2-NEXT: ret void
525 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55
526 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0]] {
527 // CHECK2-NEXT: entry:
528 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
529 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
530 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
531 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
532 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
533 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment, ptr [[DYN_PTR]])
534 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
535 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
536 // CHECK2: user_code.entry:
537 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
538 // CHECK2-NEXT: [[A1:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 4)
539 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[A1]], align 4
540 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
541 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
542 // CHECK2-NEXT: store ptr [[A1]], ptr [[TMP3]], align 4
543 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
544 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4
545 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
546 // CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4
547 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4)
548 // CHECK2-NEXT: call void @__kmpc_target_deinit()
549 // CHECK2-NEXT: ret void
550 // CHECK2: worker.exit:
551 // CHECK2-NEXT: ret void
554 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined
555 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
556 // CHECK2-NEXT: entry:
557 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
558 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
559 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
560 // CHECK2-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4
561 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
562 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
563 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
564 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
565 // CHECK2-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
566 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
567 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
568 // CHECK2-NEXT: store i32 0, ptr [[CRITICAL_COUNTER]], align 4
569 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]]
570 // CHECK2: omp.critical.loop:
571 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4
572 // CHECK2-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]]
573 // CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]]
574 // CHECK2: omp.critical.test:
575 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4
576 // CHECK2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]]
577 // CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]]
578 // CHECK2: omp.critical.body:
579 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
580 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
581 // CHECK2-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var")
582 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
583 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
584 // CHECK2-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
585 // CHECK2-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var")
586 // CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]]
587 // CHECK2: omp.critical.sync:
588 // CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
589 // CHECK2-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1
590 // CHECK2-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER]], align 4
591 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]]
592 // CHECK2: omp.critical.exit:
593 // CHECK2-NEXT: ret void
596 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper
597 // CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
598 // CHECK2-NEXT: entry:
599 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
600 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
601 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
602 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
603 // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
604 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
605 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
606 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
607 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
608 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
609 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
610 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR2]]
611 // CHECK2-NEXT: ret void