Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / nvptx_target_parallel_proc_bind_codegen.cpp
blobbfcdf9b5b1839b932d2aea1e49ea545dccdb4b01
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // Test target codegen - host bc file has to be created first.
3 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK45-64
5 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
6 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32
7 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32-EX
9 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
10 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64
11 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
12 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32
13 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32-EX
15 // expected-no-diagnostics
16 #ifndef HEADER
17 #define HEADER
19 // Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode.
21 template<typename tx>
22 tx ftemplate(int n) {
23 tx a = 0;
24 short aa = 0;
25 tx b[10];
27 #pragma omp target parallel proc_bind(master)
31 #pragma omp target parallel proc_bind(spread)
33 aa += 1;
36 #pragma omp target parallel proc_bind(close)
38 a += 1;
39 aa += 1;
40 b[2] += 1;
43 return a;
46 int bar(int n){
47 int a = 0;
49 a += ftemplate<int>(n);
51 return a;
54 #endif
55 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
56 // CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
57 // CHECK45-64-NEXT: entry:
58 // CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
59 // CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
60 // CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
61 // CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
62 // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
63 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
64 // CHECK45-64: user_code.entry:
65 // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
66 // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
67 // CHECK45-64-NEXT: call void @__kmpc_target_deinit()
68 // CHECK45-64-NEXT: ret void
69 // CHECK45-64: worker.exit:
70 // CHECK45-64-NEXT: ret void
73 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
74 // CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
75 // CHECK45-64-NEXT: entry:
76 // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
77 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
78 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
79 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
80 // CHECK45-64-NEXT: ret void
83 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
84 // CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
85 // CHECK45-64-NEXT: entry:
86 // CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
87 // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
88 // CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
89 // CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
90 // CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
91 // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
92 // CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
93 // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
94 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
95 // CHECK45-64: user_code.entry:
96 // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
97 // CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
98 // CHECK45-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
99 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8
100 // CHECK45-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
101 // CHECK45-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
102 // CHECK45-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8
103 // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
104 // CHECK45-64-NEXT: call void @__kmpc_target_deinit()
105 // CHECK45-64-NEXT: ret void
106 // CHECK45-64: worker.exit:
107 // CHECK45-64-NEXT: ret void
110 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
111 // CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] {
112 // CHECK45-64-NEXT: entry:
113 // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
114 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
115 // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
116 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
117 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
118 // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
119 // CHECK45-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
120 // CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
121 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
122 // CHECK45-64-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
123 // CHECK45-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
124 // CHECK45-64-NEXT: ret void
127 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
128 // CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
129 // CHECK45-64-NEXT: entry:
130 // CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
131 // CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
132 // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
133 // CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
134 // CHECK45-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
135 // CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
136 // CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
137 // CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
138 // CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
139 // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
140 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
141 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
142 // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
143 // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
144 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
145 // CHECK45-64: user_code.entry:
146 // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
147 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
148 // CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
149 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8
150 // CHECK45-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
151 // CHECK45-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
152 // CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8
153 // CHECK45-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
154 // CHECK45-64-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP4]] to ptr
155 // CHECK45-64-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
156 // CHECK45-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
157 // CHECK45-64-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr
158 // CHECK45-64-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8
159 // CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
160 // CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
161 // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
162 // CHECK45-64-NEXT: call void @__kmpc_target_deinit()
163 // CHECK45-64-NEXT: ret void
164 // CHECK45-64: worker.exit:
165 // CHECK45-64-NEXT: ret void
168 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
169 // CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
170 // CHECK45-64-NEXT: entry:
171 // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
172 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
173 // CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
174 // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
175 // CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
176 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
177 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
178 // CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
179 // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
180 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
181 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
182 // CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
183 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
184 // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
185 // CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
186 // CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
187 // CHECK45-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
188 // CHECK45-64-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
189 // CHECK45-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
190 // CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
191 // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
192 // CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
193 // CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
194 // CHECK45-64-NEXT: ret void
197 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
198 // CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
199 // CHECK45-32-NEXT: entry:
200 // CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
201 // CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
202 // CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
203 // CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
204 // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
205 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
206 // CHECK45-32: user_code.entry:
207 // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
208 // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
209 // CHECK45-32-NEXT: call void @__kmpc_target_deinit()
210 // CHECK45-32-NEXT: ret void
211 // CHECK45-32: worker.exit:
212 // CHECK45-32-NEXT: ret void
215 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
216 // CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
217 // CHECK45-32-NEXT: entry:
218 // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
219 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
220 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
221 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
222 // CHECK45-32-NEXT: ret void
225 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
226 // CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
227 // CHECK45-32-NEXT: entry:
228 // CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
229 // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
230 // CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
231 // CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
232 // CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
233 // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
234 // CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
235 // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
236 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
237 // CHECK45-32: user_code.entry:
238 // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
239 // CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
240 // CHECK45-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
241 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
242 // CHECK45-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
243 // CHECK45-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
244 // CHECK45-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
245 // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
246 // CHECK45-32-NEXT: call void @__kmpc_target_deinit()
247 // CHECK45-32-NEXT: ret void
248 // CHECK45-32: worker.exit:
249 // CHECK45-32-NEXT: ret void
252 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
253 // CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
254 // CHECK45-32-NEXT: entry:
255 // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
256 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
257 // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
258 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
259 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
260 // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
261 // CHECK45-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
262 // CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
263 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
264 // CHECK45-32-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
265 // CHECK45-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
266 // CHECK45-32-NEXT: ret void
269 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
270 // CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
271 // CHECK45-32-NEXT: entry:
272 // CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
273 // CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
274 // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
275 // CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
276 // CHECK45-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
277 // CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
278 // CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
279 // CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
280 // CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
281 // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
282 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
283 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
284 // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
285 // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
286 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
287 // CHECK45-32: user_code.entry:
288 // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
289 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
290 // CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
291 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
292 // CHECK45-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
293 // CHECK45-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
294 // CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
295 // CHECK45-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
296 // CHECK45-32-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
297 // CHECK45-32-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
298 // CHECK45-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
299 // CHECK45-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
300 // CHECK45-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
301 // CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
302 // CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
303 // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
304 // CHECK45-32-NEXT: call void @__kmpc_target_deinit()
305 // CHECK45-32-NEXT: ret void
306 // CHECK45-32: worker.exit:
307 // CHECK45-32-NEXT: ret void
310 // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
311 // CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
312 // CHECK45-32-NEXT: entry:
313 // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
314 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
315 // CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
316 // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
317 // CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
318 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
319 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
320 // CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
321 // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
322 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
323 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
324 // CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
325 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
326 // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
327 // CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
328 // CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
329 // CHECK45-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
330 // CHECK45-32-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
331 // CHECK45-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
332 // CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
333 // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
334 // CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
335 // CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
336 // CHECK45-32-NEXT: ret void
339 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
340 // CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
341 // CHECK45-32-EX-NEXT: entry:
342 // CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
343 // CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
344 // CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
345 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
346 // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
347 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
348 // CHECK45-32-EX: user_code.entry:
349 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
350 // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
351 // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
352 // CHECK45-32-EX-NEXT: ret void
353 // CHECK45-32-EX: worker.exit:
354 // CHECK45-32-EX-NEXT: ret void
357 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
358 // CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
359 // CHECK45-32-EX-NEXT: entry:
360 // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
361 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
362 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
363 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
364 // CHECK45-32-EX-NEXT: ret void
367 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
368 // CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
369 // CHECK45-32-EX-NEXT: entry:
370 // CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
371 // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
372 // CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
373 // CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
374 // CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
375 // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
376 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
377 // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
378 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
379 // CHECK45-32-EX: user_code.entry:
380 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
381 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
382 // CHECK45-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
383 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
384 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
385 // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
386 // CHECK45-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
387 // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
388 // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
389 // CHECK45-32-EX-NEXT: ret void
390 // CHECK45-32-EX: worker.exit:
391 // CHECK45-32-EX-NEXT: ret void
394 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
395 // CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
396 // CHECK45-32-EX-NEXT: entry:
397 // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
398 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
399 // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
400 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
401 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
402 // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
403 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
404 // CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
405 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
406 // CHECK45-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
407 // CHECK45-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
408 // CHECK45-32-EX-NEXT: ret void
411 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
412 // CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
413 // CHECK45-32-EX-NEXT: entry:
414 // CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
415 // CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
416 // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
417 // CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
418 // CHECK45-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
419 // CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
420 // CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
421 // CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
422 // CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
423 // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
424 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
425 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
426 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
427 // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
428 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
429 // CHECK45-32-EX: user_code.entry:
430 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
431 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
432 // CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
433 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
434 // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
435 // CHECK45-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
436 // CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
437 // CHECK45-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
438 // CHECK45-32-EX-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
439 // CHECK45-32-EX-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
440 // CHECK45-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
441 // CHECK45-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
442 // CHECK45-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
443 // CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
444 // CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
445 // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
446 // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
447 // CHECK45-32-EX-NEXT: ret void
448 // CHECK45-32-EX: worker.exit:
449 // CHECK45-32-EX-NEXT: ret void
452 // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
453 // CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
454 // CHECK45-32-EX-NEXT: entry:
455 // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
456 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
457 // CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
458 // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
459 // CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
460 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
461 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
462 // CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
463 // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
464 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
465 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
466 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
467 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
468 // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
469 // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
470 // CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
471 // CHECK45-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
472 // CHECK45-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
473 // CHECK45-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
474 // CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
475 // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
476 // CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
477 // CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
478 // CHECK45-32-EX-NEXT: ret void
481 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
482 // CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
483 // CHECK-64-NEXT: entry:
484 // CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
485 // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
486 // CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
487 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
488 // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
489 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
490 // CHECK-64: user_code.entry:
491 // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
492 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
493 // CHECK-64-NEXT: call void @__kmpc_target_deinit()
494 // CHECK-64-NEXT: ret void
495 // CHECK-64: worker.exit:
496 // CHECK-64-NEXT: ret void
499 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
500 // CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
501 // CHECK-64-NEXT: entry:
502 // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
503 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
504 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
505 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
506 // CHECK-64-NEXT: ret void
509 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
510 // CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
511 // CHECK-64-NEXT: entry:
512 // CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
513 // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
514 // CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
515 // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
516 // CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
517 // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
518 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
519 // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
520 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
521 // CHECK-64: user_code.entry:
522 // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
523 // CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
524 // CHECK-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
525 // CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8
526 // CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
527 // CHECK-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
528 // CHECK-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8
529 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
530 // CHECK-64-NEXT: call void @__kmpc_target_deinit()
531 // CHECK-64-NEXT: ret void
532 // CHECK-64: worker.exit:
533 // CHECK-64-NEXT: ret void
536 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
537 // CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] {
538 // CHECK-64-NEXT: entry:
539 // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
540 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
541 // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
542 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
543 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
544 // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
545 // CHECK-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
546 // CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
547 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
548 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
549 // CHECK-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
550 // CHECK-64-NEXT: ret void
553 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
554 // CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
555 // CHECK-64-NEXT: entry:
556 // CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
557 // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
558 // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
559 // CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
560 // CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
561 // CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
562 // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
563 // CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
564 // CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
565 // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
566 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
567 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
568 // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
569 // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
570 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
571 // CHECK-64: user_code.entry:
572 // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
573 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
574 // CHECK-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
575 // CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8
576 // CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
577 // CHECK-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
578 // CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8
579 // CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
580 // CHECK-64-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP4]] to ptr
581 // CHECK-64-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
582 // CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
583 // CHECK-64-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr
584 // CHECK-64-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8
585 // CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
586 // CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
587 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
588 // CHECK-64-NEXT: call void @__kmpc_target_deinit()
589 // CHECK-64-NEXT: ret void
590 // CHECK-64: worker.exit:
591 // CHECK-64-NEXT: ret void
594 // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
595 // CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
596 // CHECK-64-NEXT: entry:
597 // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
598 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
599 // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
600 // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
601 // CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
602 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
603 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
604 // CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
605 // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
606 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
607 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
608 // CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
609 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
610 // CHECK-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
611 // CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
612 // CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
613 // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
614 // CHECK-64-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
615 // CHECK-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
616 // CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
617 // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
618 // CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
619 // CHECK-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
620 // CHECK-64-NEXT: ret void
623 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
624 // CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
625 // CHECK-32-NEXT: entry:
626 // CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
627 // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
628 // CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
629 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
630 // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
631 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
632 // CHECK-32: user_code.entry:
633 // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
634 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
635 // CHECK-32-NEXT: call void @__kmpc_target_deinit()
636 // CHECK-32-NEXT: ret void
637 // CHECK-32: worker.exit:
638 // CHECK-32-NEXT: ret void
641 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
642 // CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
643 // CHECK-32-NEXT: entry:
644 // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
645 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
646 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
647 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
648 // CHECK-32-NEXT: ret void
651 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
652 // CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
653 // CHECK-32-NEXT: entry:
654 // CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
655 // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
656 // CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
657 // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
658 // CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
659 // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
660 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
661 // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
662 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
663 // CHECK-32: user_code.entry:
664 // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
665 // CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
666 // CHECK-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
667 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
668 // CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
669 // CHECK-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
670 // CHECK-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
671 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
672 // CHECK-32-NEXT: call void @__kmpc_target_deinit()
673 // CHECK-32-NEXT: ret void
674 // CHECK-32: worker.exit:
675 // CHECK-32-NEXT: ret void
678 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
679 // CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
680 // CHECK-32-NEXT: entry:
681 // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
682 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
683 // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
684 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
685 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
686 // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
687 // CHECK-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
688 // CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
689 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
690 // CHECK-32-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
691 // CHECK-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
692 // CHECK-32-NEXT: ret void
695 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
696 // CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
697 // CHECK-32-NEXT: entry:
698 // CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
699 // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
700 // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
701 // CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
702 // CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
703 // CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
704 // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
705 // CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
706 // CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
707 // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
708 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
709 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
710 // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
711 // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
712 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
713 // CHECK-32: user_code.entry:
714 // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
715 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
716 // CHECK-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
717 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
718 // CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
719 // CHECK-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
720 // CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
721 // CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
722 // CHECK-32-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
723 // CHECK-32-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
724 // CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
725 // CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
726 // CHECK-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
727 // CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
728 // CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
729 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
730 // CHECK-32-NEXT: call void @__kmpc_target_deinit()
731 // CHECK-32-NEXT: ret void
732 // CHECK-32: worker.exit:
733 // CHECK-32-NEXT: ret void
736 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
737 // CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
738 // CHECK-32-NEXT: entry:
739 // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
740 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
741 // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
742 // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
743 // CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
744 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
745 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
746 // CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
747 // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
748 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
749 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
750 // CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
751 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
752 // CHECK-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
753 // CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
754 // CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
755 // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
756 // CHECK-32-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
757 // CHECK-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
758 // CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
759 // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
760 // CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
761 // CHECK-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
762 // CHECK-32-NEXT: ret void
765 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
766 // CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
767 // CHECK-32-EX-NEXT: entry:
768 // CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
769 // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
770 // CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
771 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
772 // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
773 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
774 // CHECK-32-EX: user_code.entry:
775 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
776 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
777 // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
778 // CHECK-32-EX-NEXT: ret void
779 // CHECK-32-EX: worker.exit:
780 // CHECK-32-EX-NEXT: ret void
783 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
784 // CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
785 // CHECK-32-EX-NEXT: entry:
786 // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
787 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
788 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
789 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
790 // CHECK-32-EX-NEXT: ret void
793 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
794 // CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
795 // CHECK-32-EX-NEXT: entry:
796 // CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
797 // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
798 // CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
799 // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
800 // CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
801 // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
802 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
803 // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
804 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
805 // CHECK-32-EX: user_code.entry:
806 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
807 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
808 // CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
809 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
810 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
811 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
812 // CHECK-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
813 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
814 // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
815 // CHECK-32-EX-NEXT: ret void
816 // CHECK-32-EX: worker.exit:
817 // CHECK-32-EX-NEXT: ret void
820 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
821 // CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
822 // CHECK-32-EX-NEXT: entry:
823 // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
824 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
825 // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
826 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
827 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
828 // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
829 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
830 // CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
831 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
832 // CHECK-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
833 // CHECK-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
834 // CHECK-32-EX-NEXT: ret void
837 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
838 // CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
839 // CHECK-32-EX-NEXT: entry:
840 // CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
841 // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
842 // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
843 // CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
844 // CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
845 // CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
846 // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
847 // CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
848 // CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
849 // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
850 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
851 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
852 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
853 // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
854 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
855 // CHECK-32-EX: user_code.entry:
856 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
857 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
858 // CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
859 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
860 // CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
861 // CHECK-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
862 // CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
863 // CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
864 // CHECK-32-EX-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
865 // CHECK-32-EX-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
866 // CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
867 // CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
868 // CHECK-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
869 // CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
870 // CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
871 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
872 // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
873 // CHECK-32-EX-NEXT: ret void
874 // CHECK-32-EX: worker.exit:
875 // CHECK-32-EX-NEXT: ret void
878 // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
879 // CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
880 // CHECK-32-EX-NEXT: entry:
881 // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
882 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
883 // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
884 // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
885 // CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
886 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
887 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
888 // CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
889 // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
890 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
891 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
892 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
893 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
894 // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
895 // CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
896 // CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
897 // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
898 // CHECK-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
899 // CHECK-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
900 // CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
901 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
902 // CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
903 // CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
904 // CHECK-32-EX-NEXT: ret void