Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / nvptx_teams_codegen.cpp
blobc71f5da40a1479d1a982ecab6b851a061649ba83
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
5 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
7 // expected-no-diagnostics
8 #ifndef HEADER
9 #define HEADER
11 #ifdef CK1
13 template <typename T>
14 int tmain(T argc) {
15 #pragma omp target
16 #pragma omp teams
17 argc = 0;
18 return 0;
22 int main (int argc, char **argv) {
23 #pragma omp target
24 #pragma omp teams
26 argc = 0;
28 return tmain(argv);
32 // only nvptx side: do not outline teams region and do not call fork_teams
35 // target region in template
39 #endif // CK1
41 // Test target codegen - host bc file has to be created first.
42 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
43 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK3
44 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
45 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4
46 // expected-no-diagnostics
47 #ifdef CK2
49 template <typename T>
50 int tmain(T argc) {
51 int a = 10;
52 int b = 5;
53 #pragma omp target
54 #pragma omp teams num_teams(a) thread_limit(b)
56 argc = 0;
58 return 0;
61 int main (int argc, char **argv) {
62 int a = 20;
63 int b = 5;
64 #pragma omp target
65 #pragma omp teams num_teams(a) thread_limit(b)
67 argc = 0;
69 return tmain(argv);
77 #endif // CK2
78 #endif
79 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
80 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
81 // CHECK1-NEXT: entry:
82 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
83 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
84 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
85 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
86 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
87 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
88 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]])
89 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
90 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
91 // CHECK1: user_code.entry:
92 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
93 // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
94 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
95 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
96 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
97 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
98 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
99 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
100 // CHECK1-NEXT: call void @__kmpc_target_deinit()
101 // CHECK1-NEXT: ret void
102 // CHECK1: worker.exit:
103 // CHECK1-NEXT: ret void
106 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined
107 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
108 // CHECK1-NEXT: entry:
109 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
110 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
111 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
112 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
113 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
114 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
115 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
116 // CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4
117 // CHECK1-NEXT: ret void
120 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15
121 // CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[ARGC:%.*]]) #[[ATTR0]] {
122 // CHECK1-NEXT: entry:
123 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
124 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
125 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
126 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
127 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
128 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
129 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment, ptr [[DYN_PTR]])
130 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
131 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
132 // CHECK1: user_code.entry:
133 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
134 // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
135 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8
136 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
137 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
138 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
139 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
140 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
141 // CHECK1-NEXT: call void @__kmpc_target_deinit()
142 // CHECK1-NEXT: ret void
143 // CHECK1: worker.exit:
144 // CHECK1-NEXT: ret void
147 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined
148 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] {
149 // CHECK1-NEXT: entry:
150 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
151 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
152 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
153 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
154 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
155 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
156 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
157 // CHECK1-NEXT: store ptr null, ptr [[TMP0]], align 8
158 // CHECK1-NEXT: ret void
161 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
162 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
163 // CHECK2-NEXT: entry:
164 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
165 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
166 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
167 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
168 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
169 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
170 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]])
171 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
172 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
173 // CHECK2: user_code.entry:
174 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
175 // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
176 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
177 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
178 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
179 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
180 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
181 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
182 // CHECK2-NEXT: call void @__kmpc_target_deinit()
183 // CHECK2-NEXT: ret void
184 // CHECK2: worker.exit:
185 // CHECK2-NEXT: ret void
188 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined
189 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
190 // CHECK2-NEXT: entry:
191 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
192 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
193 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
194 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
195 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
196 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
197 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
198 // CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4
199 // CHECK2-NEXT: ret void
202 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15
203 // CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[ARGC:%.*]]) #[[ATTR0]] {
204 // CHECK2-NEXT: entry:
205 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
206 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
207 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
208 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
209 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
210 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
211 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment, ptr [[DYN_PTR]])
212 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
213 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
214 // CHECK2: user_code.entry:
215 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
216 // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
217 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 4
218 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
219 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
220 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
221 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
222 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
223 // CHECK2-NEXT: call void @__kmpc_target_deinit()
224 // CHECK2-NEXT: ret void
225 // CHECK2: worker.exit:
226 // CHECK2-NEXT: ret void
229 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined
230 // CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] {
231 // CHECK2-NEXT: entry:
232 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
233 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
234 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
235 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
236 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
237 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
238 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
239 // CHECK2-NEXT: store ptr null, ptr [[TMP0]], align 4
240 // CHECK2-NEXT: ret void
243 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
244 // CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
245 // CHECK3-NEXT: entry:
246 // CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
247 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
248 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
249 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
250 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
251 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
252 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
253 // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
254 // CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
255 // CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
256 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment, ptr [[DYN_PTR]])
257 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
258 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
259 // CHECK3: user_code.entry:
260 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
261 // CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
262 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
263 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
264 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
265 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
266 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
267 // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
268 // CHECK3-NEXT: call void @__kmpc_target_deinit()
269 // CHECK3-NEXT: ret void
270 // CHECK3: worker.exit:
271 // CHECK3-NEXT: ret void
274 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined
275 // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
276 // CHECK3-NEXT: entry:
277 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
278 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
279 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
280 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
281 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
282 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
283 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
284 // CHECK3-NEXT: store i32 0, ptr [[TMP0]], align 4
285 // CHECK3-NEXT: ret void
288 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53
289 // CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], ptr noundef [[ARGC:%.*]]) #[[ATTR0]] {
290 // CHECK3-NEXT: entry:
291 // CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
292 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
293 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
294 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
295 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
296 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
297 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
298 // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
299 // CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
300 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
301 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment, ptr [[DYN_PTR]])
302 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
303 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
304 // CHECK3: user_code.entry:
305 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
306 // CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
307 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8
308 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
309 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
310 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
311 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
312 // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
313 // CHECK3-NEXT: call void @__kmpc_target_deinit()
314 // CHECK3-NEXT: ret void
315 // CHECK3: worker.exit:
316 // CHECK3-NEXT: ret void
319 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined
320 // CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] {
321 // CHECK3-NEXT: entry:
322 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
323 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
324 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
325 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
326 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
327 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
328 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
329 // CHECK3-NEXT: store ptr null, ptr [[TMP0]], align 8
330 // CHECK3-NEXT: ret void
333 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
334 // CHECK4-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
335 // CHECK4-NEXT: entry:
336 // CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
337 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
338 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
339 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
340 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
341 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
342 // CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
343 // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
344 // CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
345 // CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
346 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment, ptr [[DYN_PTR]])
347 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
348 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
349 // CHECK4: user_code.entry:
350 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
351 // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
352 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
353 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
354 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
355 // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
356 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
357 // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
358 // CHECK4-NEXT: call void @__kmpc_target_deinit()
359 // CHECK4-NEXT: ret void
360 // CHECK4: worker.exit:
361 // CHECK4-NEXT: ret void
364 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined
365 // CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
366 // CHECK4-NEXT: entry:
367 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
368 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
369 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
370 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
371 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
372 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
373 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
374 // CHECK4-NEXT: store i32 0, ptr [[TMP0]], align 4
375 // CHECK4-NEXT: ret void
378 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53
379 // CHECK4-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], ptr noundef [[ARGC:%.*]]) #[[ATTR0]] {
380 // CHECK4-NEXT: entry:
381 // CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
382 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
383 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
384 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
385 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
386 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
387 // CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
388 // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
389 // CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
390 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
391 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment, ptr [[DYN_PTR]])
392 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
393 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
394 // CHECK4: user_code.entry:
395 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
396 // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
397 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 4
398 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
399 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
400 // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
401 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
402 // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
403 // CHECK4-NEXT: call void @__kmpc_target_deinit()
404 // CHECK4-NEXT: ret void
405 // CHECK4: worker.exit:
406 // CHECK4-NEXT: ret void
409 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined
410 // CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] {
411 // CHECK4-NEXT: entry:
412 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
413 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
414 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
415 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
416 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
417 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
418 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
419 // CHECK4-NEXT: store ptr null, ptr [[TMP0]], align 4
420 // CHECK4-NEXT: ret void